Fix destructor NoneType is not callable error

2023-11-08 11:05:45 -05:00 · 2023-11-08 11:05:45 -05:00 · ca4cb88351
commit ca4cb88351
parent 01cb3a0381
3 changed files with 45 additions and 30 deletions
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@ -213,7 +213,7 @@ class _LlamaModel:

    NOTE: For stability it's recommended you use the Llama class instead."""

-    _llama_free_model = llama_cpp._lib.llama_free_model  # type: ignore
+    _llama_free_model = None

    def __init__(
        self,
@ -226,6 +226,8 @@ class _LlamaModel:
        self.params = params
        self.verbose = verbose

+        self._llama_free_model = llama_cpp._lib.llama_free_model  # type: ignore
+
        if not os.path.exists(path_model):
            raise ValueError(f"Model path does not exist: {path_model}")

@ -236,7 +238,7 @@ class _LlamaModel:

    def __del__(self):
        with suppress_stdout_stderr(disable=self.verbose):
-            if self.model is not None:
+            if self.model is not None and self._llama_free_model is not None:
                self._llama_free_model(self.model)
                self.model = None

@ -396,7 +398,7 @@ class _LlamaContext:

    NOTE: For stability it's recommended you use the Llama class instead."""

-    _llama_free = llama_cpp._lib.llama_free  # type: ignore
+    _llama_free = None

    def __init__(
        self,
@ -409,6 +411,8 @@ class _LlamaContext:
        self.params = params
        self.verbose = verbose

+        self._llama_free = llama_cpp._lib.llama_free  # type: ignore
+
        with suppress_stdout_stderr(disable=self.verbose):
            self.ctx = llama_cpp.llama_new_context_with_model(
                self.model.model, self.params
@ -416,7 +420,7 @@ class _LlamaContext:

    def __del__(self):
        with suppress_stdout_stderr(disable=self.verbose):
-            if self.ctx is not None:
+            if self.ctx is not None and self._llama_free is not None:
                self._llama_free(self.ctx)
                self.ctx = None

@ -645,7 +649,7 @@ class _LlamaContext:


 class _LlamaBatch:
-    _llama_batch_free = llama_cpp._lib.llama_batch_free  # type: ignore
+    _llama_batch_free = None

    def __init__(
        self, *, n_tokens: int, embd: int, n_seq_max: int, verbose: bool = True
@ -655,6 +659,8 @@ class _LlamaBatch:
        self.n_seq_max = n_seq_max
        self.verbose = verbose

+        self._llama_batch_free = llama_cpp._lib.llama_batch_free  # type: ignore
+
        with suppress_stdout_stderr(disable=self.verbose):
            self.batch = llama_cpp.llama_batch_init(
                self.n_tokens, self.embd, self.n_seq_max
@ -662,7 +668,7 @@ class _LlamaBatch:

    def __del__(self):
        with suppress_stdout_stderr(disable=self.verbose):
-            if self.batch is not None:
+            if self.batch is not None and self._llama_batch_free is not None:
                self._llama_batch_free(self.batch)
                self.batch = None

--- a/llama_cpp/llama_chat_format.py
+++ b/llama_cpp/llama_chat_format.py
@ -9,6 +9,8 @@ import llama_cpp.llama as llama
 import llama_cpp.llama_types as llama_types
 import llama_cpp.llama_grammar as llama_grammar

+from ._utils import suppress_stdout_stderr
+

 class LlamaChatCompletionHandler(Protocol):
    def __call__(
@ -775,19 +777,25 @@ def functionary_chat_handler(


 class Llava15ChatHandler:
-    def __init__(self, clip_model_path: str):
+    _clip_free = None
+
+    def __init__(self, clip_model_path: str, verbose: bool = False):
        import llama_cpp.llava_cpp as llava_cpp

        self._llava_cpp = llava_cpp
        self.clip_model_path = clip_model_path
+        self.verbose = verbose
+        self._clip_free = self._llava_cpp._libllava.clip_free # type: ignore

+        with suppress_stdout_stderr(disable=self.verbose):
            self.clip_ctx = self._llava_cpp.clip_model_load(
                self.clip_model_path.encode(), 0 
            )

    def __del__(self):
-        if self.clip_ctx is not None:
-            self._llava_cpp.clip_free(self.clip_ctx)
+        with suppress_stdout_stderr(disable=self.verbose):
+            if self.clip_ctx is not None and self._clip_free is not None:
+                self._clip_free(self.clip_ctx)
                self.clip_ctx = None

    def load_image(self, image_url: str) -> bytes:
@ -881,17 +889,17 @@ class Llava15ChatHandler:
                            c_ubyte_ptr = (
                                ctypes.c_ubyte * len(data_array)
                            ).from_buffer(data_array)
+                            with suppress_stdout_stderr(disable=self.verbose):
                                embed = self._llava_cpp.llava_image_embed_make_with_bytes(
                                    ctx_clip=self.clip_ctx,
                                    n_threads=llama.context_params.n_threads,
                                    image_bytes=c_ubyte_ptr,
                                    image_bytes_length=len(image_bytes),
                                )
-                            # image_bytes_p = (ctypes.c_uint8 * len(image_bytes)).from_buffer_copy(image_bytes)
-                            # embed = self._llava_cpp.llava_image_embed_make_with_bytes(ctx_clip=self.clip_ctx, n_threads=1, image_bytes=image_bytes_p, image_bytes_length=len(image_bytes))
                            try:
                                n_past = ctypes.c_int(llama.n_tokens)
                                n_past_p = ctypes.pointer(n_past)
+                                with suppress_stdout_stderr(disable=self.verbose):
                                    self._llava_cpp.llava_eval_image_embed(
                                        ctx_llama=llama.ctx,
                                        embed=embed,
@ -901,6 +909,7 @@ class Llava15ChatHandler:
                                assert llama.n_ctx() >= n_past.value
                                llama.n_tokens = n_past.value
                            finally:
+                                with suppress_stdout_stderr(disable=self.verbose):
                                    self._llava_cpp.llava_image_embed_free(embed)
            if message["role"] == "assistant" and message["content"] is not None:
                llama.eval(
@ -910,7 +919,7 @@ class Llava15ChatHandler:
                )
        llama.eval(llama.tokenize(f"{assistant_role}".encode("utf8"), add_bos=False))

-        prompt = llama._input_ids.tolist()
+        prompt = llama.input_ids[:llama.n_tokens].tolist()

        return _convert_completion_to_chat(
            llama.create_completion(
--- a/llama_cpp/server/app.py
+++ b/llama_cpp/server/app.py
@ -384,7 +384,7 @@ def create_app(settings: Optional[Settings] = None):
    chat_handler = None
    if settings.chat_format == "llava-1-5":
        assert settings.clip_model_path is not None
-        chat_handler = llama_cpp.llama_chat_format.Llava15ChatHandler(clip_model_path=settings.clip_model_path)
+        chat_handler = llama_cpp.llama_chat_format.Llava15ChatHandler(clip_model_path=settings.clip_model_path, verbose=settings.verbose)
    ##

    llama = llama_cpp.Llama(