Fix destructor NoneType is not callable error
This commit is contained in:
parent
01cb3a0381
commit
ca4cb88351
3 changed files with 45 additions and 30 deletions
|
@ -213,7 +213,7 @@ class _LlamaModel:
|
||||||
|
|
||||||
NOTE: For stability it's recommended you use the Llama class instead."""
|
NOTE: For stability it's recommended you use the Llama class instead."""
|
||||||
|
|
||||||
_llama_free_model = llama_cpp._lib.llama_free_model # type: ignore
|
_llama_free_model = None
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
@ -226,6 +226,8 @@ class _LlamaModel:
|
||||||
self.params = params
|
self.params = params
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
|
|
||||||
|
self._llama_free_model = llama_cpp._lib.llama_free_model # type: ignore
|
||||||
|
|
||||||
if not os.path.exists(path_model):
|
if not os.path.exists(path_model):
|
||||||
raise ValueError(f"Model path does not exist: {path_model}")
|
raise ValueError(f"Model path does not exist: {path_model}")
|
||||||
|
|
||||||
|
@ -236,7 +238,7 @@ class _LlamaModel:
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
with suppress_stdout_stderr(disable=self.verbose):
|
with suppress_stdout_stderr(disable=self.verbose):
|
||||||
if self.model is not None:
|
if self.model is not None and self._llama_free_model is not None:
|
||||||
self._llama_free_model(self.model)
|
self._llama_free_model(self.model)
|
||||||
self.model = None
|
self.model = None
|
||||||
|
|
||||||
|
@ -396,7 +398,7 @@ class _LlamaContext:
|
||||||
|
|
||||||
NOTE: For stability it's recommended you use the Llama class instead."""
|
NOTE: For stability it's recommended you use the Llama class instead."""
|
||||||
|
|
||||||
_llama_free = llama_cpp._lib.llama_free # type: ignore
|
_llama_free = None
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
@ -409,6 +411,8 @@ class _LlamaContext:
|
||||||
self.params = params
|
self.params = params
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
|
|
||||||
|
self._llama_free = llama_cpp._lib.llama_free # type: ignore
|
||||||
|
|
||||||
with suppress_stdout_stderr(disable=self.verbose):
|
with suppress_stdout_stderr(disable=self.verbose):
|
||||||
self.ctx = llama_cpp.llama_new_context_with_model(
|
self.ctx = llama_cpp.llama_new_context_with_model(
|
||||||
self.model.model, self.params
|
self.model.model, self.params
|
||||||
|
@ -416,7 +420,7 @@ class _LlamaContext:
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
with suppress_stdout_stderr(disable=self.verbose):
|
with suppress_stdout_stderr(disable=self.verbose):
|
||||||
if self.ctx is not None:
|
if self.ctx is not None and self._llama_free is not None:
|
||||||
self._llama_free(self.ctx)
|
self._llama_free(self.ctx)
|
||||||
self.ctx = None
|
self.ctx = None
|
||||||
|
|
||||||
|
@ -645,7 +649,7 @@ class _LlamaContext:
|
||||||
|
|
||||||
|
|
||||||
class _LlamaBatch:
|
class _LlamaBatch:
|
||||||
_llama_batch_free = llama_cpp._lib.llama_batch_free # type: ignore
|
_llama_batch_free = None
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, *, n_tokens: int, embd: int, n_seq_max: int, verbose: bool = True
|
self, *, n_tokens: int, embd: int, n_seq_max: int, verbose: bool = True
|
||||||
|
@ -655,6 +659,8 @@ class _LlamaBatch:
|
||||||
self.n_seq_max = n_seq_max
|
self.n_seq_max = n_seq_max
|
||||||
self.verbose = verbose
|
self.verbose = verbose
|
||||||
|
|
||||||
|
self._llama_batch_free = llama_cpp._lib.llama_batch_free # type: ignore
|
||||||
|
|
||||||
with suppress_stdout_stderr(disable=self.verbose):
|
with suppress_stdout_stderr(disable=self.verbose):
|
||||||
self.batch = llama_cpp.llama_batch_init(
|
self.batch = llama_cpp.llama_batch_init(
|
||||||
self.n_tokens, self.embd, self.n_seq_max
|
self.n_tokens, self.embd, self.n_seq_max
|
||||||
|
@ -662,7 +668,7 @@ class _LlamaBatch:
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
with suppress_stdout_stderr(disable=self.verbose):
|
with suppress_stdout_stderr(disable=self.verbose):
|
||||||
if self.batch is not None:
|
if self.batch is not None and self._llama_batch_free is not None:
|
||||||
self._llama_batch_free(self.batch)
|
self._llama_batch_free(self.batch)
|
||||||
self.batch = None
|
self.batch = None
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,8 @@ import llama_cpp.llama as llama
|
||||||
import llama_cpp.llama_types as llama_types
|
import llama_cpp.llama_types as llama_types
|
||||||
import llama_cpp.llama_grammar as llama_grammar
|
import llama_cpp.llama_grammar as llama_grammar
|
||||||
|
|
||||||
|
from ._utils import suppress_stdout_stderr
|
||||||
|
|
||||||
|
|
||||||
class LlamaChatCompletionHandler(Protocol):
|
class LlamaChatCompletionHandler(Protocol):
|
||||||
def __call__(
|
def __call__(
|
||||||
|
@ -775,20 +777,26 @@ def functionary_chat_handler(
|
||||||
|
|
||||||
|
|
||||||
class Llava15ChatHandler:
|
class Llava15ChatHandler:
|
||||||
def __init__(self, clip_model_path: str):
|
_clip_free = None
|
||||||
|
|
||||||
|
def __init__(self, clip_model_path: str, verbose: bool = False):
|
||||||
import llama_cpp.llava_cpp as llava_cpp
|
import llama_cpp.llava_cpp as llava_cpp
|
||||||
|
|
||||||
self._llava_cpp = llava_cpp
|
self._llava_cpp = llava_cpp
|
||||||
self.clip_model_path = clip_model_path
|
self.clip_model_path = clip_model_path
|
||||||
|
self.verbose = verbose
|
||||||
|
self._clip_free = self._llava_cpp._libllava.clip_free # type: ignore
|
||||||
|
|
||||||
self.clip_ctx = self._llava_cpp.clip_model_load(
|
with suppress_stdout_stderr(disable=self.verbose):
|
||||||
self.clip_model_path.encode(), 0
|
self.clip_ctx = self._llava_cpp.clip_model_load(
|
||||||
)
|
self.clip_model_path.encode(), 0
|
||||||
|
)
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
if self.clip_ctx is not None:
|
with suppress_stdout_stderr(disable=self.verbose):
|
||||||
self._llava_cpp.clip_free(self.clip_ctx)
|
if self.clip_ctx is not None and self._clip_free is not None:
|
||||||
self.clip_ctx = None
|
self._clip_free(self.clip_ctx)
|
||||||
|
self.clip_ctx = None
|
||||||
|
|
||||||
def load_image(self, image_url: str) -> bytes:
|
def load_image(self, image_url: str) -> bytes:
|
||||||
if image_url.startswith("data:"):
|
if image_url.startswith("data:"):
|
||||||
|
@ -881,27 +889,28 @@ class Llava15ChatHandler:
|
||||||
c_ubyte_ptr = (
|
c_ubyte_ptr = (
|
||||||
ctypes.c_ubyte * len(data_array)
|
ctypes.c_ubyte * len(data_array)
|
||||||
).from_buffer(data_array)
|
).from_buffer(data_array)
|
||||||
embed = self._llava_cpp.llava_image_embed_make_with_bytes(
|
with suppress_stdout_stderr(disable=self.verbose):
|
||||||
ctx_clip=self.clip_ctx,
|
embed = self._llava_cpp.llava_image_embed_make_with_bytes(
|
||||||
n_threads=llama.context_params.n_threads,
|
ctx_clip=self.clip_ctx,
|
||||||
image_bytes=c_ubyte_ptr,
|
n_threads=llama.context_params.n_threads,
|
||||||
image_bytes_length=len(image_bytes),
|
image_bytes=c_ubyte_ptr,
|
||||||
)
|
image_bytes_length=len(image_bytes),
|
||||||
# image_bytes_p = (ctypes.c_uint8 * len(image_bytes)).from_buffer_copy(image_bytes)
|
)
|
||||||
# embed = self._llava_cpp.llava_image_embed_make_with_bytes(ctx_clip=self.clip_ctx, n_threads=1, image_bytes=image_bytes_p, image_bytes_length=len(image_bytes))
|
|
||||||
try:
|
try:
|
||||||
n_past = ctypes.c_int(llama.n_tokens)
|
n_past = ctypes.c_int(llama.n_tokens)
|
||||||
n_past_p = ctypes.pointer(n_past)
|
n_past_p = ctypes.pointer(n_past)
|
||||||
self._llava_cpp.llava_eval_image_embed(
|
with suppress_stdout_stderr(disable=self.verbose):
|
||||||
ctx_llama=llama.ctx,
|
self._llava_cpp.llava_eval_image_embed(
|
||||||
embed=embed,
|
ctx_llama=llama.ctx,
|
||||||
n_batch=llama.n_batch,
|
embed=embed,
|
||||||
n_past=n_past_p,
|
n_batch=llama.n_batch,
|
||||||
)
|
n_past=n_past_p,
|
||||||
|
)
|
||||||
assert llama.n_ctx() >= n_past.value
|
assert llama.n_ctx() >= n_past.value
|
||||||
llama.n_tokens = n_past.value
|
llama.n_tokens = n_past.value
|
||||||
finally:
|
finally:
|
||||||
self._llava_cpp.llava_image_embed_free(embed)
|
with suppress_stdout_stderr(disable=self.verbose):
|
||||||
|
self._llava_cpp.llava_image_embed_free(embed)
|
||||||
if message["role"] == "assistant" and message["content"] is not None:
|
if message["role"] == "assistant" and message["content"] is not None:
|
||||||
llama.eval(
|
llama.eval(
|
||||||
llama.tokenize(
|
llama.tokenize(
|
||||||
|
@ -910,7 +919,7 @@ class Llava15ChatHandler:
|
||||||
)
|
)
|
||||||
llama.eval(llama.tokenize(f"{assistant_role}".encode("utf8"), add_bos=False))
|
llama.eval(llama.tokenize(f"{assistant_role}".encode("utf8"), add_bos=False))
|
||||||
|
|
||||||
prompt = llama._input_ids.tolist()
|
prompt = llama.input_ids[:llama.n_tokens].tolist()
|
||||||
|
|
||||||
return _convert_completion_to_chat(
|
return _convert_completion_to_chat(
|
||||||
llama.create_completion(
|
llama.create_completion(
|
||||||
|
|
|
@ -384,7 +384,7 @@ def create_app(settings: Optional[Settings] = None):
|
||||||
chat_handler = None
|
chat_handler = None
|
||||||
if settings.chat_format == "llava-1-5":
|
if settings.chat_format == "llava-1-5":
|
||||||
assert settings.clip_model_path is not None
|
assert settings.clip_model_path is not None
|
||||||
chat_handler = llama_cpp.llama_chat_format.Llava15ChatHandler(clip_model_path=settings.clip_model_path)
|
chat_handler = llama_cpp.llama_chat_format.Llava15ChatHandler(clip_model_path=settings.clip_model_path, verbose=settings.verbose)
|
||||||
##
|
##
|
||||||
|
|
||||||
llama = llama_cpp.Llama(
|
llama = llama_cpp.Llama(
|
||||||
|
|
Loading…
Reference in a new issue