fix: Use llama_log_callback to avoid suppress_stdout_stderr

2024-02-05 21:52:12 -05:00 · 2024-02-05 21:52:12 -05:00 · 59760c85ed
commit 59760c85ed
parent 3553b14670
3 changed files with 61 additions and 34 deletions
--- a/llama_cpp/_internals.py
+++ b/llama_cpp/_internals.py
@ -18,8 +18,6 @@ from .llama_grammar import LlamaGrammar
 import llama_cpp.llama_cpp as llama_cpp
 from ._utils import suppress_stdout_stderr
 # Python wrappers over llama.h structs
@ -30,7 +28,6 @@ class _LlamaModel:
    _llama_free_model = None
    # NOTE: this must be "saved" here to avoid exceptions when calling __del__
    _suppress_stdout_stderr = suppress_stdout_stderr
    def __init__(
        self,
@ -48,13 +45,11 @@ class _LlamaModel:
        if not os.path.exists(path_model):
            raise ValueError(f"Model path does not exist: {path_model}")
        with self._suppress_stdout_stderr(disable=self.verbose):
        self.model = llama_cpp.llama_load_model_from_file(
            self.path_model.encode("utf-8"), self.params
        )
    def __del__(self):
        with self._suppress_stdout_stderr(disable=self.verbose):
        if self.model is not None and self._llama_free_model is not None:
            self._llama_free_model(self.model)
            self.model = None
@ -240,8 +235,6 @@ class _LlamaContext:
    NOTE: For stability it's recommended you use the Llama class instead."""
    _llama_free = None
    # NOTE: this must be "saved" here to avoid exceptions when calling __del__
    _suppress_stdout_stderr = suppress_stdout_stderr
    def __init__(
        self,
@ -256,13 +249,13 @@ class _LlamaContext:
        self._llama_free = llama_cpp._lib.llama_free  # type: ignore
-        with self._suppress_stdout_stderr(disable=self.verbose):
+        assert self.model.model is not None
        self.ctx = llama_cpp.llama_new_context_with_model(
            self.model.model, self.params
        )
    def __del__(self):
        with self._suppress_stdout_stderr(disable=self.verbose):
        if self.ctx is not None and self._llama_free is not None:
            self._llama_free(self.ctx)
            self.ctx = None
@ -493,8 +486,6 @@ class _LlamaContext:
 class _LlamaBatch:
    _llama_batch_free = None
    # NOTE: this must be "saved" here to avoid exceptions when calling __del__
    _suppress_stdout_stderr = suppress_stdout_stderr
    def __init__(
        self, *, n_tokens: int, embd: int, n_seq_max: int, verbose: bool = True
@ -506,13 +497,11 @@ class _LlamaBatch:
        self._llama_batch_free = llama_cpp._lib.llama_batch_free  # type: ignore
        with self._suppress_stdout_stderr(disable=self.verbose):
        self.batch = llama_cpp.llama_batch_init(
            self.n_tokens, self.embd, self.n_seq_max
        )
    def __del__(self):
        with self._suppress_stdout_stderr(disable=self.verbose):
        if self.batch is not None and self._llama_batch_free is not None:
            self._llama_batch_free(self.batch)
            self.batch = None
--- a/llama_cpp/_logger.py
+++ b/llama_cpp/_logger.py
@ -0,0 +1,37 @@
 import sys
 import ctypes
 import logging
 import llama_cpp
 # enum ggml_log_level {
 #     GGML_LOG_LEVEL_ERROR = 2,
 #     GGML_LOG_LEVEL_WARN = 3,
 #     GGML_LOG_LEVEL_INFO = 4,
 #     GGML_LOG_LEVEL_DEBUG = 5
 # };
 GGML_LOG_LEVEL_TO_LOGGING_LEVEL = {
    2: logging.ERROR,
    3: logging.WARNING,
    4: logging.INFO,
    5: logging.DEBUG,
 }
 logger = logging.getLogger("llama-cpp-python")
@llama_cpp.llama_log_callback
 def llama_log_callback(
    level: int,
    text: bytes,
    user_data: ctypes.c_void_p,
 ):
    if logger.level <= GGML_LOG_LEVEL_TO_LOGGING_LEVEL[level]:
        print(text.decode("utf-8"), end="", flush=True, file=sys.stderr)
 llama_cpp.llama_log_set(llama_log_callback, ctypes.c_void_p(0))
 def set_verbose(verbose: bool):
    logger.setLevel(logging.DEBUG if verbose else logging.ERROR)
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@ -35,7 +35,6 @@ from llama_cpp.llama_speculative import LlamaDraftModel
 import numpy as np
 import numpy.typing as npt
 from ._utils import suppress_stdout_stderr
 from ._internals import (
    _LlamaModel,  # type: ignore
    _LlamaContext,  # type: ignore
@ -44,6 +43,7 @@ from ._internals import (
    _LlamaSamplingParams,  # type: ignore
    _LlamaSamplingContext,  # type: ignore
 )
 from ._logger import set_verbose
 class Llama:
@ -169,9 +169,10 @@ class Llama:
        """
        self.verbose = verbose
        set_verbose(verbose)
        self.numa = numa
        if not Llama.__backend_initialized:
            with suppress_stdout_stderr(disable=self.verbose):
            llama_cpp.llama_backend_init(self.numa)
            Llama.__backend_initialized = True