Temporarily disable cache until save state bug is fixed.
This commit is contained in:
parent
be0403da98
commit
0da655b3be
2 changed files with 7 additions and 3 deletions
|
@ -12,7 +12,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
- (build-system) Migrate from scikit-build to scikit-build-core
|
- (build-system) Migrate from scikit-build to scikit-build-core
|
||||||
|
|
||||||
### Fixed
|
### Fixed
|
||||||
|
|
||||||
- Truncate max_tokens in create_completion so requested tokens doesn't exceed context size.
|
- Truncate max_tokens in create_completion so requested tokens doesn't exceed context size.
|
||||||
|
- Temporarily disable cache for completion requests
|
||||||
|
|
||||||
## [v0.1.59]
|
## [v0.1.59]
|
||||||
|
|
||||||
|
|
|
@ -831,7 +831,9 @@ class Llama:
|
||||||
"logprobs is not supported for models created with logits_all=False"
|
"logprobs is not supported for models created with logits_all=False"
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.cache:
|
# Temporarily disable usage of the cache
|
||||||
|
# See: https://github.com/abetlen/llama-cpp-python/issues/348#issuecomment-1583072408
|
||||||
|
if self.cache and False:
|
||||||
try:
|
try:
|
||||||
cache_item = self.cache[prompt_tokens]
|
cache_item = self.cache[prompt_tokens]
|
||||||
cache_prefix_len = Llama.longest_token_prefix(
|
cache_prefix_len = Llama.longest_token_prefix(
|
||||||
|
@ -1069,14 +1071,14 @@ class Llama:
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
if self.cache:
|
if self.cache and False:
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
print("Llama._create_completion: cache save", file=sys.stderr)
|
print("Llama._create_completion: cache save", file=sys.stderr)
|
||||||
self.cache[prompt_tokens + completion_tokens] = self.save_state()
|
self.cache[prompt_tokens + completion_tokens] = self.save_state()
|
||||||
print("Llama._create_completion: cache saved", file=sys.stderr)
|
print("Llama._create_completion: cache saved", file=sys.stderr)
|
||||||
return
|
return
|
||||||
|
|
||||||
if self.cache:
|
if self.cache and False:
|
||||||
if self.verbose:
|
if self.verbose:
|
||||||
print("Llama._create_completion: cache save", file=sys.stderr)
|
print("Llama._create_completion: cache save", file=sys.stderr)
|
||||||
self.cache[prompt_tokens + completion_tokens] = self.save_state()
|
self.cache[prompt_tokens + completion_tokens] = self.save_state()
|
||||||
|
|
Loading…
Reference in a new issue