fix: Use llama_log_callback to avoid suppress_stdout_stderr

This commit is contained in:
Andrei Betlen 2024-02-05 21:52:12 -05:00
parent 3553b14670
commit 59760c85ed
3 changed files with 61 additions and 34 deletions

View file

@ -18,8 +18,6 @@ from .llama_grammar import LlamaGrammar
import llama_cpp.llama_cpp as llama_cpp import llama_cpp.llama_cpp as llama_cpp
from ._utils import suppress_stdout_stderr
# Python wrappers over llama.h structs # Python wrappers over llama.h structs
@ -30,7 +28,6 @@ class _LlamaModel:
_llama_free_model = None _llama_free_model = None
# NOTE: this must be "saved" here to avoid exceptions when calling __del__ # NOTE: this must be "saved" here to avoid exceptions when calling __del__
_suppress_stdout_stderr = suppress_stdout_stderr
def __init__( def __init__(
self, self,
@ -48,13 +45,11 @@ class _LlamaModel:
if not os.path.exists(path_model): if not os.path.exists(path_model):
raise ValueError(f"Model path does not exist: {path_model}") raise ValueError(f"Model path does not exist: {path_model}")
with self._suppress_stdout_stderr(disable=self.verbose):
self.model = llama_cpp.llama_load_model_from_file( self.model = llama_cpp.llama_load_model_from_file(
self.path_model.encode("utf-8"), self.params self.path_model.encode("utf-8"), self.params
) )
def __del__(self): def __del__(self):
with self._suppress_stdout_stderr(disable=self.verbose):
if self.model is not None and self._llama_free_model is not None: if self.model is not None and self._llama_free_model is not None:
self._llama_free_model(self.model) self._llama_free_model(self.model)
self.model = None self.model = None
@ -240,8 +235,6 @@ class _LlamaContext:
NOTE: For stability it's recommended you use the Llama class instead.""" NOTE: For stability it's recommended you use the Llama class instead."""
_llama_free = None _llama_free = None
# NOTE: this must be "saved" here to avoid exceptions when calling __del__
_suppress_stdout_stderr = suppress_stdout_stderr
def __init__( def __init__(
self, self,
@ -256,13 +249,13 @@ class _LlamaContext:
self._llama_free = llama_cpp._lib.llama_free # type: ignore self._llama_free = llama_cpp._lib.llama_free # type: ignore
with self._suppress_stdout_stderr(disable=self.verbose): assert self.model.model is not None
self.ctx = llama_cpp.llama_new_context_with_model( self.ctx = llama_cpp.llama_new_context_with_model(
self.model.model, self.params self.model.model, self.params
) )
def __del__(self): def __del__(self):
with self._suppress_stdout_stderr(disable=self.verbose):
if self.ctx is not None and self._llama_free is not None: if self.ctx is not None and self._llama_free is not None:
self._llama_free(self.ctx) self._llama_free(self.ctx)
self.ctx = None self.ctx = None
@ -493,8 +486,6 @@ class _LlamaContext:
class _LlamaBatch: class _LlamaBatch:
_llama_batch_free = None _llama_batch_free = None
# NOTE: this must be "saved" here to avoid exceptions when calling __del__
_suppress_stdout_stderr = suppress_stdout_stderr
def __init__( def __init__(
self, *, n_tokens: int, embd: int, n_seq_max: int, verbose: bool = True self, *, n_tokens: int, embd: int, n_seq_max: int, verbose: bool = True
@ -506,13 +497,11 @@ class _LlamaBatch:
self._llama_batch_free = llama_cpp._lib.llama_batch_free # type: ignore self._llama_batch_free = llama_cpp._lib.llama_batch_free # type: ignore
with self._suppress_stdout_stderr(disable=self.verbose):
self.batch = llama_cpp.llama_batch_init( self.batch = llama_cpp.llama_batch_init(
self.n_tokens, self.embd, self.n_seq_max self.n_tokens, self.embd, self.n_seq_max
) )
def __del__(self): def __del__(self):
with self._suppress_stdout_stderr(disable=self.verbose):
if self.batch is not None and self._llama_batch_free is not None: if self.batch is not None and self._llama_batch_free is not None:
self._llama_batch_free(self.batch) self._llama_batch_free(self.batch)
self.batch = None self.batch = None

37
llama_cpp/_logger.py Normal file
View file

@ -0,0 +1,37 @@
import sys
import ctypes
import logging
import llama_cpp
# enum ggml_log_level {
# GGML_LOG_LEVEL_ERROR = 2,
# GGML_LOG_LEVEL_WARN = 3,
# GGML_LOG_LEVEL_INFO = 4,
# GGML_LOG_LEVEL_DEBUG = 5
# };
GGML_LOG_LEVEL_TO_LOGGING_LEVEL = {
2: logging.ERROR,
3: logging.WARNING,
4: logging.INFO,
5: logging.DEBUG,
}
logger = logging.getLogger("llama-cpp-python")
@llama_cpp.llama_log_callback
def llama_log_callback(
level: int,
text: bytes,
user_data: ctypes.c_void_p,
):
if logger.level <= GGML_LOG_LEVEL_TO_LOGGING_LEVEL[level]:
print(text.decode("utf-8"), end="", flush=True, file=sys.stderr)
llama_cpp.llama_log_set(llama_log_callback, ctypes.c_void_p(0))
def set_verbose(verbose: bool):
logger.setLevel(logging.DEBUG if verbose else logging.ERROR)

View file

@ -35,7 +35,6 @@ from llama_cpp.llama_speculative import LlamaDraftModel
import numpy as np import numpy as np
import numpy.typing as npt import numpy.typing as npt
from ._utils import suppress_stdout_stderr
from ._internals import ( from ._internals import (
_LlamaModel, # type: ignore _LlamaModel, # type: ignore
_LlamaContext, # type: ignore _LlamaContext, # type: ignore
@ -44,6 +43,7 @@ from ._internals import (
_LlamaSamplingParams, # type: ignore _LlamaSamplingParams, # type: ignore
_LlamaSamplingContext, # type: ignore _LlamaSamplingContext, # type: ignore
) )
from ._logger import set_verbose
class Llama: class Llama:
@ -169,9 +169,10 @@ class Llama:
""" """
self.verbose = verbose self.verbose = verbose
set_verbose(verbose)
self.numa = numa self.numa = numa
if not Llama.__backend_initialized: if not Llama.__backend_initialized:
with suppress_stdout_stderr(disable=self.verbose):
llama_cpp.llama_backend_init(self.numa) llama_cpp.llama_backend_init(self.numa)
Llama.__backend_initialized = True Llama.__backend_initialized = True