Improve logging messages
This commit is contained in:
parent
43f2907e3a
commit
9e5b6d675a
1 changed files with 5 additions and 3 deletions
|
@ -396,7 +396,7 @@ class Llama:
|
||||||
and tuple(self.eval_tokens) == tuple(tokens[: len(self.eval_tokens)])
|
and tuple(self.eval_tokens) == tuple(tokens[: len(self.eval_tokens)])
|
||||||
):
|
):
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
print("generate cache hit", file=sys.stderr)
|
print("Llama.generate: cache hit", file=sys.stderr)
|
||||||
reset = False
|
reset = False
|
||||||
tokens = tokens[len(self.eval_tokens) :]
|
tokens = tokens[len(self.eval_tokens) :]
|
||||||
|
|
||||||
|
@ -518,7 +518,7 @@ class Llama:
|
||||||
|
|
||||||
if self.cache and prompt_tokens in self.cache:
|
if self.cache and prompt_tokens in self.cache:
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
print("cache hit", file=sys.stderr)
|
print("Llama._create_completion: cache hit", file=sys.stderr)
|
||||||
self.load_state(self.cache[prompt_tokens])
|
self.load_state(self.cache[prompt_tokens])
|
||||||
|
|
||||||
finish_reason = "length"
|
finish_reason = "length"
|
||||||
|
@ -538,7 +538,7 @@ class Llama:
|
||||||
if self.cache and len(completion_tokens) == 0:
|
if self.cache and len(completion_tokens) == 0:
|
||||||
if prompt_tokens not in self.cache:
|
if prompt_tokens not in self.cache:
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
print("cache miss", file=sys.stderr)
|
print("Llama._create_completion: cache miss", file=sys.stderr)
|
||||||
self.cache[prompt_tokens] = self.save_state()
|
self.cache[prompt_tokens] = self.save_state()
|
||||||
|
|
||||||
completion_tokens.append(token)
|
completion_tokens.append(token)
|
||||||
|
@ -957,6 +957,8 @@ class Llama:
|
||||||
raise RuntimeError("Failed to copy llama state data")
|
raise RuntimeError("Failed to copy llama state data")
|
||||||
llama_state_compact = (llama_cpp.c_uint8 * int(n_bytes))()
|
llama_state_compact = (llama_cpp.c_uint8 * int(n_bytes))()
|
||||||
llama_cpp.ctypes.memmove(llama_state_compact, llama_state, int(n_bytes))
|
llama_cpp.ctypes.memmove(llama_state_compact, llama_state, int(n_bytes))
|
||||||
|
if self.verbose:
|
||||||
|
print(f"Llama.save_state: saving {n_bytes} bytes of llama state", file=sys.stderr)
|
||||||
return LlamaState(
|
return LlamaState(
|
||||||
eval_tokens=self.eval_tokens.copy(),
|
eval_tokens=self.eval_tokens.copy(),
|
||||||
eval_logits=self.eval_logits.copy(),
|
eval_logits=self.eval_logits.copy(),
|
||||||
|
|
Loading…
Reference in a new issue