fix: Use llama_log_callback to avoid suppress_stdout_stderr
This commit is contained in:
parent
3553b14670
commit
59760c85ed
3 changed files with 61 additions and 34 deletions
|
@ -18,8 +18,6 @@ from .llama_grammar import LlamaGrammar
|
|||
|
||||
import llama_cpp.llama_cpp as llama_cpp
|
||||
|
||||
from ._utils import suppress_stdout_stderr
|
||||
|
||||
|
||||
# Python wrappers over llama.h structs
|
||||
|
||||
|
@ -30,7 +28,6 @@ class _LlamaModel:
|
|||
|
||||
_llama_free_model = None
|
||||
# NOTE: this must be "saved" here to avoid exceptions when calling __del__
|
||||
_suppress_stdout_stderr = suppress_stdout_stderr
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
@ -48,16 +45,14 @@ class _LlamaModel:
|
|||
if not os.path.exists(path_model):
|
||||
raise ValueError(f"Model path does not exist: {path_model}")
|
||||
|
||||
with self._suppress_stdout_stderr(disable=self.verbose):
|
||||
self.model = llama_cpp.llama_load_model_from_file(
|
||||
self.path_model.encode("utf-8"), self.params
|
||||
)
|
||||
self.model = llama_cpp.llama_load_model_from_file(
|
||||
self.path_model.encode("utf-8"), self.params
|
||||
)
|
||||
|
||||
def __del__(self):
|
||||
with self._suppress_stdout_stderr(disable=self.verbose):
|
||||
if self.model is not None and self._llama_free_model is not None:
|
||||
self._llama_free_model(self.model)
|
||||
self.model = None
|
||||
if self.model is not None and self._llama_free_model is not None:
|
||||
self._llama_free_model(self.model)
|
||||
self.model = None
|
||||
|
||||
def vocab_type(self) -> int:
|
||||
assert self.model is not None
|
||||
|
@ -240,8 +235,6 @@ class _LlamaContext:
|
|||
NOTE: For stability it's recommended you use the Llama class instead."""
|
||||
|
||||
_llama_free = None
|
||||
# NOTE: this must be "saved" here to avoid exceptions when calling __del__
|
||||
_suppress_stdout_stderr = suppress_stdout_stderr
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
|
@ -256,16 +249,16 @@ class _LlamaContext:
|
|||
|
||||
self._llama_free = llama_cpp._lib.llama_free # type: ignore
|
||||
|
||||
with self._suppress_stdout_stderr(disable=self.verbose):
|
||||
self.ctx = llama_cpp.llama_new_context_with_model(
|
||||
self.model.model, self.params
|
||||
)
|
||||
assert self.model.model is not None
|
||||
|
||||
self.ctx = llama_cpp.llama_new_context_with_model(
|
||||
self.model.model, self.params
|
||||
)
|
||||
|
||||
def __del__(self):
|
||||
with self._suppress_stdout_stderr(disable=self.verbose):
|
||||
if self.ctx is not None and self._llama_free is not None:
|
||||
self._llama_free(self.ctx)
|
||||
self.ctx = None
|
||||
if self.ctx is not None and self._llama_free is not None:
|
||||
self._llama_free(self.ctx)
|
||||
self.ctx = None
|
||||
|
||||
def n_ctx(self) -> int:
|
||||
assert self.ctx is not None
|
||||
|
@ -493,8 +486,6 @@ class _LlamaContext:
|
|||
|
||||
class _LlamaBatch:
|
||||
_llama_batch_free = None
|
||||
# NOTE: this must be "saved" here to avoid exceptions when calling __del__
|
||||
_suppress_stdout_stderr = suppress_stdout_stderr
|
||||
|
||||
def __init__(
|
||||
self, *, n_tokens: int, embd: int, n_seq_max: int, verbose: bool = True
|
||||
|
@ -506,16 +497,14 @@ class _LlamaBatch:
|
|||
|
||||
self._llama_batch_free = llama_cpp._lib.llama_batch_free # type: ignore
|
||||
|
||||
with self._suppress_stdout_stderr(disable=self.verbose):
|
||||
self.batch = llama_cpp.llama_batch_init(
|
||||
self.n_tokens, self.embd, self.n_seq_max
|
||||
)
|
||||
self.batch = llama_cpp.llama_batch_init(
|
||||
self.n_tokens, self.embd, self.n_seq_max
|
||||
)
|
||||
|
||||
def __del__(self):
|
||||
with self._suppress_stdout_stderr(disable=self.verbose):
|
||||
if self.batch is not None and self._llama_batch_free is not None:
|
||||
self._llama_batch_free(self.batch)
|
||||
self.batch = None
|
||||
if self.batch is not None and self._llama_batch_free is not None:
|
||||
self._llama_batch_free(self.batch)
|
||||
self.batch = None
|
||||
|
||||
def set_batch(self, batch: Sequence[int], n_past: int, logits_all: bool):
|
||||
assert self.batch is not None
|
||||
|
|
37
llama_cpp/_logger.py
Normal file
37
llama_cpp/_logger.py
Normal file
|
@ -0,0 +1,37 @@
|
|||
import sys
|
||||
import ctypes
|
||||
import logging
|
||||
|
||||
import llama_cpp
|
||||
|
||||
# enum ggml_log_level {
|
||||
# GGML_LOG_LEVEL_ERROR = 2,
|
||||
# GGML_LOG_LEVEL_WARN = 3,
|
||||
# GGML_LOG_LEVEL_INFO = 4,
|
||||
# GGML_LOG_LEVEL_DEBUG = 5
|
||||
# };
|
||||
GGML_LOG_LEVEL_TO_LOGGING_LEVEL = {
|
||||
2: logging.ERROR,
|
||||
3: logging.WARNING,
|
||||
4: logging.INFO,
|
||||
5: logging.DEBUG,
|
||||
}
|
||||
|
||||
logger = logging.getLogger("llama-cpp-python")
|
||||
|
||||
|
||||
@llama_cpp.llama_log_callback
|
||||
def llama_log_callback(
|
||||
level: int,
|
||||
text: bytes,
|
||||
user_data: ctypes.c_void_p,
|
||||
):
|
||||
if logger.level <= GGML_LOG_LEVEL_TO_LOGGING_LEVEL[level]:
|
||||
print(text.decode("utf-8"), end="", flush=True, file=sys.stderr)
|
||||
|
||||
|
||||
llama_cpp.llama_log_set(llama_log_callback, ctypes.c_void_p(0))
|
||||
|
||||
|
||||
def set_verbose(verbose: bool):
|
||||
logger.setLevel(logging.DEBUG if verbose else logging.ERROR)
|
|
@ -35,7 +35,6 @@ from llama_cpp.llama_speculative import LlamaDraftModel
|
|||
import numpy as np
|
||||
import numpy.typing as npt
|
||||
|
||||
from ._utils import suppress_stdout_stderr
|
||||
from ._internals import (
|
||||
_LlamaModel, # type: ignore
|
||||
_LlamaContext, # type: ignore
|
||||
|
@ -44,6 +43,7 @@ from ._internals import (
|
|||
_LlamaSamplingParams, # type: ignore
|
||||
_LlamaSamplingContext, # type: ignore
|
||||
)
|
||||
from ._logger import set_verbose
|
||||
|
||||
|
||||
class Llama:
|
||||
|
@ -169,10 +169,11 @@ class Llama:
|
|||
"""
|
||||
self.verbose = verbose
|
||||
|
||||
set_verbose(verbose)
|
||||
|
||||
self.numa = numa
|
||||
if not Llama.__backend_initialized:
|
||||
with suppress_stdout_stderr(disable=self.verbose):
|
||||
llama_cpp.llama_backend_init(self.numa)
|
||||
llama_cpp.llama_backend_init(self.numa)
|
||||
Llama.__backend_initialized = True
|
||||
|
||||
self.model_path = model_path
|
||||
|
|
Loading…
Reference in a new issue