From a05d90446fea83426fe8dc0d6c78afe6c3dd0894 Mon Sep 17 00:00:00 2001 From: Connor Date: Sun, 11 Feb 2024 10:57:57 -0800 Subject: [PATCH] fix: Circular dependancy preventing early Llama object free (#1176) commit 901827013b732d74f1f67033062d13a6204a62bd introduced a cyclic dependency within Llama objects. That change causes old models to linger in memory longer than necessary, thereby creating memory bloat in most applications attempting to switch between models at runtime. This patch simply removes the problematic line, allowing models to deallocate without relying on GC. One might also consider combining `weakref.ref` with a `@property` if the `llama` attribute is absolutely necessary to expose in the tokenizer class. --- llama_cpp/llama_tokenizer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/llama_cpp/llama_tokenizer.py b/llama_cpp/llama_tokenizer.py index 0ad3c3a..c2aad47 100644 --- a/llama_cpp/llama_tokenizer.py +++ b/llama_cpp/llama_tokenizer.py @@ -27,7 +27,6 @@ class BaseLlamaTokenizer(abc.ABC): class LlamaTokenizer(BaseLlamaTokenizer): def __init__(self, llama: llama_cpp.Llama): - self.llama = llama self._model = llama._model # type: ignore def tokenize(