From a05d90446fea83426fe8dc0d6c78afe6c3dd0894 Mon Sep 17 00:00:00 2001
From: Connor <cloningdonor@gmail.com>
Date: Sun, 11 Feb 2024 10:57:57 -0800
Subject: [PATCH] fix: Circular dependancy preventing early Llama object free 
 (#1176)

commit 901827013b732d74f1f67033062d13a6204a62bd introduced a cyclic dependency
within Llama objects. That change causes old models to linger in memory longer
than necessary, thereby creating memory bloat in most applications attempting
to switch between models at runtime. This patch simply removes the problematic
line, allowing models to deallocate without relying on GC. One might also
consider combining `weakref.ref` with a `@property` if the `llama` attribute is
absolutely necessary to expose in the tokenizer class.
---
 llama_cpp/llama_tokenizer.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llama_cpp/llama_tokenizer.py b/llama_cpp/llama_tokenizer.py
index 0ad3c3a..c2aad47 100644
--- a/llama_cpp/llama_tokenizer.py
+++ b/llama_cpp/llama_tokenizer.py
@@ -27,7 +27,6 @@ class BaseLlamaTokenizer(abc.ABC):
 
 class LlamaTokenizer(BaseLlamaTokenizer):
     def __init__(self, llama: llama_cpp.Llama):
-        self.llama = llama
         self._model = llama._model  # type: ignore
 
     def tokenize(