diff --git a/llama_cpp/_internals.py b/llama_cpp/_internals.py index 651cd4c..3a71ef0 100644 --- a/llama_cpp/_internals.py +++ b/llama_cpp/_internals.py @@ -18,8 +18,6 @@ from .llama_grammar import LlamaGrammar import llama_cpp.llama_cpp as llama_cpp -from ._utils import suppress_stdout_stderr - # Python wrappers over llama.h structs @@ -30,7 +28,6 @@ class _LlamaModel: _llama_free_model = None # NOTE: this must be "saved" here to avoid exceptions when calling __del__ - _suppress_stdout_stderr = suppress_stdout_stderr def __init__( self, @@ -48,16 +45,14 @@ class _LlamaModel: if not os.path.exists(path_model): raise ValueError(f"Model path does not exist: {path_model}") - with self._suppress_stdout_stderr(disable=self.verbose): - self.model = llama_cpp.llama_load_model_from_file( - self.path_model.encode("utf-8"), self.params - ) + self.model = llama_cpp.llama_load_model_from_file( + self.path_model.encode("utf-8"), self.params + ) def __del__(self): - with self._suppress_stdout_stderr(disable=self.verbose): - if self.model is not None and self._llama_free_model is not None: - self._llama_free_model(self.model) - self.model = None + if self.model is not None and self._llama_free_model is not None: + self._llama_free_model(self.model) + self.model = None def vocab_type(self) -> int: assert self.model is not None @@ -240,8 +235,6 @@ class _LlamaContext: NOTE: For stability it's recommended you use the Llama class instead.""" _llama_free = None - # NOTE: this must be "saved" here to avoid exceptions when calling __del__ - _suppress_stdout_stderr = suppress_stdout_stderr def __init__( self, @@ -256,16 +249,16 @@ class _LlamaContext: self._llama_free = llama_cpp._lib.llama_free # type: ignore - with self._suppress_stdout_stderr(disable=self.verbose): - self.ctx = llama_cpp.llama_new_context_with_model( - self.model.model, self.params - ) + assert self.model.model is not None + + self.ctx = llama_cpp.llama_new_context_with_model( + self.model.model, self.params + ) def __del__(self): - with self._suppress_stdout_stderr(disable=self.verbose): - if self.ctx is not None and self._llama_free is not None: - self._llama_free(self.ctx) - self.ctx = None + if self.ctx is not None and self._llama_free is not None: + self._llama_free(self.ctx) + self.ctx = None def n_ctx(self) -> int: assert self.ctx is not None @@ -493,8 +486,6 @@ class _LlamaContext: class _LlamaBatch: _llama_batch_free = None - # NOTE: this must be "saved" here to avoid exceptions when calling __del__ - _suppress_stdout_stderr = suppress_stdout_stderr def __init__( self, *, n_tokens: int, embd: int, n_seq_max: int, verbose: bool = True @@ -506,16 +497,14 @@ class _LlamaBatch: self._llama_batch_free = llama_cpp._lib.llama_batch_free # type: ignore - with self._suppress_stdout_stderr(disable=self.verbose): - self.batch = llama_cpp.llama_batch_init( - self.n_tokens, self.embd, self.n_seq_max - ) + self.batch = llama_cpp.llama_batch_init( + self.n_tokens, self.embd, self.n_seq_max + ) def __del__(self): - with self._suppress_stdout_stderr(disable=self.verbose): - if self.batch is not None and self._llama_batch_free is not None: - self._llama_batch_free(self.batch) - self.batch = None + if self.batch is not None and self._llama_batch_free is not None: + self._llama_batch_free(self.batch) + self.batch = None def set_batch(self, batch: Sequence[int], n_past: int, logits_all: bool): assert self.batch is not None diff --git a/llama_cpp/_logger.py b/llama_cpp/_logger.py new file mode 100644 index 0000000..7638170 --- /dev/null +++ b/llama_cpp/_logger.py @@ -0,0 +1,37 @@ +import sys +import ctypes +import logging + +import llama_cpp + +# enum ggml_log_level { +# GGML_LOG_LEVEL_ERROR = 2, +# GGML_LOG_LEVEL_WARN = 3, +# GGML_LOG_LEVEL_INFO = 4, +# GGML_LOG_LEVEL_DEBUG = 5 +# }; +GGML_LOG_LEVEL_TO_LOGGING_LEVEL = { + 2: logging.ERROR, + 3: logging.WARNING, + 4: logging.INFO, + 5: logging.DEBUG, +} + +logger = logging.getLogger("llama-cpp-python") + + +@llama_cpp.llama_log_callback +def llama_log_callback( + level: int, + text: bytes, + user_data: ctypes.c_void_p, +): + if logger.level <= GGML_LOG_LEVEL_TO_LOGGING_LEVEL[level]: + print(text.decode("utf-8"), end="", flush=True, file=sys.stderr) + + +llama_cpp.llama_log_set(llama_log_callback, ctypes.c_void_p(0)) + + +def set_verbose(verbose: bool): + logger.setLevel(logging.DEBUG if verbose else logging.ERROR) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index f00fd4f..85943db 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -35,7 +35,6 @@ from llama_cpp.llama_speculative import LlamaDraftModel import numpy as np import numpy.typing as npt -from ._utils import suppress_stdout_stderr from ._internals import ( _LlamaModel, # type: ignore _LlamaContext, # type: ignore @@ -44,6 +43,7 @@ from ._internals import ( _LlamaSamplingParams, # type: ignore _LlamaSamplingContext, # type: ignore ) +from ._logger import set_verbose class Llama: @@ -169,10 +169,11 @@ class Llama: """ self.verbose = verbose + set_verbose(verbose) + self.numa = numa if not Llama.__backend_initialized: - with suppress_stdout_stderr(disable=self.verbose): - llama_cpp.llama_backend_init(self.numa) + llama_cpp.llama_backend_init(self.numa) Llama.__backend_initialized = True self.model_path = model_path