Merge branch 'abetlen:main' into main

This commit is contained in:
Maximilian Winter 2023-05-31 12:55:51 +02:00 committed by GitHub
commit 9ea7a379d3
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 118 additions and 69 deletions

View file

@ -57,7 +57,17 @@ Please provide detailed steps for reproducing the issue. We are not sitting in f
3. step 3 3. step 3
4. etc. 4. etc.
**Note: Many issues seem to be regarding performance issues / differences with `llama.cpp`. In these cases we need to confirm that you're comparing against the version of `llama.cpp` that was built with your python package, and which parameters you're passing to the context.** **Note: Many issues seem to be regarding functional or performance issues / differences with `llama.cpp`. In these cases we need to confirm that you're comparing against the version of `llama.cpp` that was built with your python package, and which parameters you're passing to the context.**
Try the following:
1. `git clone https://github.com/abetlen/llama-cpp-python`
2. `cd llama-cpp-python`
3. `rm -rf _skbuild/` # delete any old builds
4. `python setup.py develop`
5. `cd ./vendor/llama.cpp`
6. Follow [llama.cpp's instructions](https://github.com/ggerganov/llama.cpp#build) to `cmake` llama.cpp
7. Run llama.cpp's `./main` with the same arguments you previously passed to llama-cpp-python and see if you can reproduce the issue. If you can, [log an issue with llama.cpp](https://github.com/ggerganov/llama.cpp/issues)
# Failure Logs # Failure Logs
@ -73,8 +83,14 @@ commit 47b0aa6e957b93dbe2c29d53af16fbae2dd628f2
llama-cpp-python$ python3 --version llama-cpp-python$ python3 --version
Python 3.10.10 Python 3.10.10
llama-cpp-python$ pip list | egrep "uvicorn|fastapi|sse-starlette" llama-cpp-python$ pip list | egrep "uvicorn|fastapi|sse-starlette|numpy"
fastapi 0.95.0 fastapi 0.95.0
numpy 1.24.3
sse-starlette 1.3.3 sse-starlette 1.3.3
uvicorn 0.21.1 uvicorn 0.21.1
llama-cpp-python/vendor/llama.cpp$ git log | head -3
commit 66874d4fbcc7866377246efbcee938e8cc9c7d76
Author: Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com>
Date: Thu May 25 20:18:01 2023 -0600
``` ```

View file

@ -7,9 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] ## [Unreleased]
## [v0.1.56]
### Added ### Added
- Added first version of the changelog - Added first version of the changelog
- Server: Use async routes
- Use numpy for internal buffers to reduce memory usage and improve performance.
### Fixed ### Fixed

View file

@ -22,6 +22,9 @@ import diskcache
from . import llama_cpp from . import llama_cpp
from .llama_types import * from .llama_types import *
import numpy as np
import numpy.typing as npt
class LlamaCache: class LlamaCache:
@ -76,11 +79,15 @@ class LlamaState:
self, self,
eval_tokens: Deque[int], eval_tokens: Deque[int],
eval_logits: Deque[List[float]], eval_logits: Deque[List[float]],
input_ids: npt.NDArray[np.intc],
scores: npt.NDArray[np.single],
llama_state, # type: llama_cpp.Array[llama_cpp.c_uint8] llama_state, # type: llama_cpp.Array[llama_cpp.c_uint8]
llama_state_size: int, llama_state_size: int,
): ):
self.eval_tokens = eval_tokens self.eval_tokens = eval_tokens
self.eval_logits = eval_logits self.eval_logits = eval_logits
self.input_ids = input_ids
self.scores = scores
self.llama_state = llama_state self.llama_state = llama_state
self.llama_state_size = llama_state_size self.llama_state_size = llama_state_size
@ -210,20 +217,17 @@ class Llama:
self._n_vocab = self.n_vocab() self._n_vocab = self.n_vocab()
self._n_ctx = self.n_ctx() self._n_ctx = self.n_ctx()
data = (llama_cpp.llama_token_data * self._n_vocab)(
*[
llama_cpp.llama_token_data(
id=llama_cpp.llama_token(i),
logit=llama_cpp.c_float(0.0),
p=llama_cpp.c_float(0.0),
)
for i in range(self._n_vocab)
]
)
size = llama_cpp.c_size_t(self._n_vocab) size = llama_cpp.c_size_t(self._n_vocab)
sorted = False sorted = llama_cpp.c_bool(False)
self._candidates_data = np.array(
[],
dtype=np.dtype(
[("id", np.intc), ("logit", np.single), ("p", np.single)], align=True
),
)
self._candidates_data.resize(3, self._n_vocab)
candidates = llama_cpp.llama_token_data_array( candidates = llama_cpp.llama_token_data_array(
data=data, data=self._candidates_data.ctypes.data_as(llama_cpp.llama_token_data_p),
size=size, size=size,
sorted=sorted, sorted=sorted,
) )
@ -231,6 +235,9 @@ class Llama:
self._token_nl = Llama.token_nl() self._token_nl = Llama.token_nl()
self._token_eos = Llama.token_eos() self._token_eos = Llama.token_eos()
self._input_ids = np.array([], dtype=np.intc)
self._scores = np.ndarray((0, self._n_vocab), dtype=np.single)
def tokenize(self, text: bytes, add_bos: bool = True) -> List[int]: def tokenize(self, text: bytes, add_bos: bool = True) -> List[int]:
"""Tokenize a string. """Tokenize a string.
@ -298,6 +305,8 @@ class Llama:
"""Reset the model state.""" """Reset the model state."""
self.eval_tokens.clear() self.eval_tokens.clear()
self.eval_logits.clear() self.eval_logits.clear()
self._input_ids = np.array([], dtype=np.intc)
self._scores = np.ndarray((0, self._n_vocab), dtype=np.single)
def eval(self, tokens: Sequence[int]): def eval(self, tokens: Sequence[int]):
"""Evaluate a list of tokens. """Evaluate a list of tokens.
@ -309,7 +318,7 @@ class Llama:
n_ctx = self._n_ctx n_ctx = self._n_ctx
for i in range(0, len(tokens), self.n_batch): for i in range(0, len(tokens), self.n_batch):
batch = tokens[i : min(len(tokens), i + self.n_batch)] batch = tokens[i : min(len(tokens), i + self.n_batch)]
n_past = min(n_ctx - len(batch), len(self.eval_tokens)) n_past = min(n_ctx - len(batch), len(self._input_ids))
n_tokens = len(batch) n_tokens = len(batch)
return_code = llama_cpp.llama_eval( return_code = llama_cpp.llama_eval(
ctx=self.ctx, ctx=self.ctx,
@ -322,6 +331,9 @@ class Llama:
raise RuntimeError(f"llama_eval returned {return_code}") raise RuntimeError(f"llama_eval returned {return_code}")
# Save tokens # Save tokens
self.eval_tokens.extend(batch) self.eval_tokens.extend(batch)
self._input_ids: npt.NDArray[np.intc] = np.concatenate(
(self._input_ids, np.array(batch, dtype=np.intc)), axis=0
)
# Save logits # Save logits
rows = n_tokens if self.params.logits_all else 1 rows = n_tokens if self.params.logits_all else 1
n_vocab = self._n_vocab n_vocab = self._n_vocab
@ -329,6 +341,9 @@ class Llama:
logits_view = llama_cpp.llama_get_logits(self.ctx) logits_view = llama_cpp.llama_get_logits(self.ctx)
logits = [logits_view[i * cols : (i + 1) * cols] for i in range(rows)] logits = [logits_view[i * cols : (i + 1) * cols] for i in range(rows)]
self.eval_logits.extend(logits) self.eval_logits.extend(logits)
self._scores: npt.NDArray[np.single] = np.concatenate(
(self._scores, np.array(logits, dtype=np.single)), axis=0
)
def _sample( def _sample(
self, self,
@ -349,6 +364,7 @@ class Llama:
): ):
assert self.ctx is not None assert self.ctx is not None
assert len(self.eval_logits) > 0 assert len(self.eval_logits) > 0
assert self._scores.shape[0] > 0
n_vocab = self._n_vocab n_vocab = self._n_vocab
n_ctx = self._n_ctx n_ctx = self._n_ctx
top_k = llama_cpp.c_int(n_vocab) if top_k.value <= 0 else top_k top_k = llama_cpp.c_int(n_vocab) if top_k.value <= 0 else top_k
@ -357,18 +373,23 @@ class Llama:
if last_n_tokens_size.value < 0 if last_n_tokens_size.value < 0
else last_n_tokens_size else last_n_tokens_size
) )
logits = self.eval_logits[-1] logits: npt.NDArray[np.single] = self._scores[-1, :]
if logits_processor is not None: if logits_processor is not None:
logits = logits_processor(list(self.eval_tokens), logits) logits = np.array(
self.eval_logits[-1] = logits logits_processor(self._input_ids.tolist(), logits.tolist()),
dtype=np.single,
)
self._scores[-1, :] = logits
self.eval_logits[-1] = logits.tolist()
nl_logit = logits[self._token_nl] nl_logit = logits[self._token_nl]
candidates = self._candidates candidates = self._candidates
for i, logit in enumerate(logits): candidates_data = self._candidates_data
candidates.data[i].id = llama_cpp.llama_token(i) candidates_data["id"] = np.arange(n_vocab, dtype=np.intc) # type: ignore
candidates.data[i].logit = llama_cpp.c_float(logit) candidates_data["logit"] = logits
candidates.data[i].p = llama_cpp.c_float(0.0) candidates_data["p"] = np.zeros(n_vocab, dtype=np.single)
candidates.data = candidates_data.ctypes.data_as(llama_cpp.llama_token_data_p)
candidates.sorted = llama_cpp.c_bool(False) candidates.sorted = llama_cpp.c_bool(False)
candidates.size = llama_cpp.c_size_t(n_vocab) candidates.size = llama_cpp.c_size_t(n_vocab)
llama_cpp.llama_sample_repetition_penalty( llama_cpp.llama_sample_repetition_penalty(
@ -486,8 +507,8 @@ class Llama:
""" """
assert self.ctx is not None assert self.ctx is not None
last_n_tokens_data = [llama_cpp.llama_token(0)] * max( last_n_tokens_data = [llama_cpp.llama_token(0)] * max(
0, self.last_n_tokens_size - len(self.eval_tokens) 0, self.last_n_tokens_size - len(self._input_ids)
) + list(self.eval_tokens)[-self.last_n_tokens_size :] ) + self._input_ids[-self.last_n_tokens_size :].tolist()
return self._sample( return self._sample(
last_n_tokens_data=(llama_cpp.llama_token * self.last_n_tokens_size)( last_n_tokens_data=(llama_cpp.llama_token * self.last_n_tokens_size)(
*last_n_tokens_data *last_n_tokens_data
@ -545,9 +566,9 @@ class Llama:
""" """
assert self.ctx is not None assert self.ctx is not None
if reset and len(self.eval_tokens) > 0: if reset and len(self._input_ids) > 0:
longest_prefix = 0 longest_prefix = 0
for a, b in zip(self.eval_tokens, tokens[:-1]): for a, b in zip(self._input_ids, tokens[:-1]):
if a == b: if a == b:
longest_prefix += 1 longest_prefix += 1
else: else:
@ -557,6 +578,8 @@ class Llama:
print("Llama.generate: prefix-match hit", file=sys.stderr) print("Llama.generate: prefix-match hit", file=sys.stderr)
reset = False reset = False
tokens = tokens[longest_prefix:] tokens = tokens[longest_prefix:]
self._input_ids = self._input_ids[:longest_prefix]
self._scores = self._scores[:longest_prefix, :]
for _ in range(len(self.eval_tokens) - longest_prefix): for _ in range(len(self.eval_tokens) - longest_prefix):
self.eval_tokens.pop() self.eval_tokens.pop()
try: try:
@ -583,7 +606,7 @@ class Llama:
logits_processor=logits_processor, logits_processor=logits_processor,
) )
if stopping_criteria is not None and stopping_criteria( if stopping_criteria is not None and stopping_criteria(
list(self.eval_tokens), self.eval_logits[-1] self._input_ids.tolist(), self._scores[-1, :].tolist()
): ):
return return
tokens_or_none = yield token tokens_or_none = yield token
@ -718,10 +741,10 @@ class Llama:
try: try:
cache_item = self.cache[prompt_tokens] cache_item = self.cache[prompt_tokens]
cache_prefix_len = Llama.longest_token_prefix( cache_prefix_len = Llama.longest_token_prefix(
cache_item.eval_tokens, prompt_tokens cache_item.input_ids.tolist(), prompt_tokens
) )
eval_prefix_len = Llama.longest_token_prefix( eval_prefix_len = Llama.longest_token_prefix(
self.eval_tokens, prompt_tokens self._input_ids.tolist(), prompt_tokens
) )
if cache_prefix_len > eval_prefix_len: if cache_prefix_len > eval_prefix_len:
self.load_state(cache_item) self.load_state(cache_item)
@ -810,7 +833,7 @@ class Llama:
self.detokenize(completion_tokens[:returned_tokens]) self.detokenize(completion_tokens[:returned_tokens])
) )
token_offset = len(prompt_tokens) + returned_tokens token_offset = len(prompt_tokens) + returned_tokens
logits = self.eval_logits[token_offset - 1] logits = self._scores[token_offset - 1, :].tolist()
current_logprobs = Llama.logits_to_logprobs(logits) current_logprobs = Llama.logits_to_logprobs(logits)
sorted_logprobs = list( sorted_logprobs = list(
sorted( sorted(
@ -859,7 +882,7 @@ class Llama:
break break
if stopping_criteria is not None and stopping_criteria( if stopping_criteria is not None and stopping_criteria(
list(self.eval_tokens), self.eval_logits[-1] self._input_ids.tolist(), self._scores[-1, :].tolist()
): ):
text = self.detokenize(completion_tokens) text = self.detokenize(completion_tokens)
finish_reason = "stop" finish_reason = "stop"
@ -889,7 +912,7 @@ class Llama:
self.detokenize(completion_tokens[:returned_tokens]) self.detokenize(completion_tokens[:returned_tokens])
) )
token_offset = len(prompt_tokens) + returned_tokens - 1 token_offset = len(prompt_tokens) + returned_tokens - 1
logits = self.eval_logits[token_offset] logits = self._scores[token_offset, :].tolist()
current_logprobs = Llama.logits_to_logprobs(logits) current_logprobs = Llama.logits_to_logprobs(logits)
sorted_logprobs = list( sorted_logprobs = list(
sorted( sorted(
@ -991,8 +1014,7 @@ class Llama:
for token in all_tokens for token in all_tokens
] ]
all_logprobs = [ all_logprobs = [
Llama.logits_to_logprobs(list(map(float, row))) Llama.logits_to_logprobs(row.tolist()) for row in self._scores
for row in self.eval_logits
][token_offset:] ][token_offset:]
for token, token_str, logprobs_token in zip( for token, token_str, logprobs_token in zip(
all_tokens, all_token_strs, all_logprobs all_tokens, all_token_strs, all_logprobs
@ -1376,6 +1398,8 @@ class Llama:
return LlamaState( return LlamaState(
eval_tokens=self.eval_tokens.copy(), eval_tokens=self.eval_tokens.copy(),
eval_logits=self.eval_logits.copy(), eval_logits=self.eval_logits.copy(),
scores=self._scores.copy(),
input_ids=self._input_ids.copy(),
llama_state=llama_state_compact, llama_state=llama_state_compact,
llama_state_size=n_bytes, llama_state_size=n_bytes,
) )
@ -1384,6 +1408,8 @@ class Llama:
assert self.ctx is not None assert self.ctx is not None
self.eval_tokens = state.eval_tokens.copy() self.eval_tokens = state.eval_tokens.copy()
self.eval_logits = state.eval_logits.copy() self.eval_logits = state.eval_logits.copy()
self._scores = state.scores.copy()
self._input_ids = state.input_ids.copy()
state_size = state.llama_state_size state_size = state.llama_state_size
if llama_cpp.llama_set_state_data(self.ctx, state.llama_state) != state_size: if llama_cpp.llama_set_state_data(self.ctx, state.llama_state) != state_size:
raise RuntimeError("Failed to set llama state data") raise RuntimeError("Failed to set llama state data")

48
poetry.lock generated
View file

@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. # This file is automatically @generated by Poetry and should not be changed by hand.
[[package]] [[package]]
name = "anyio" name = "anyio"
@ -800,14 +800,14 @@ mkdocs = ">=1.1"
[[package]] [[package]]
name = "mkdocs-material" name = "mkdocs-material"
version = "9.1.14" version = "9.1.15"
description = "Documentation that simply works" description = "Documentation that simply works"
category = "dev" category = "dev"
optional = false optional = false
python-versions = ">=3.7" python-versions = ">=3.7"
files = [ files = [
{file = "mkdocs_material-9.1.14-py3-none-any.whl", hash = "sha256:b56a9f955ed32d38333715cbbf68ce38f683bf38610c65094fa4ef2db9f08bcd"}, {file = "mkdocs_material-9.1.15-py3-none-any.whl", hash = "sha256:b49e12869ab464558e2dd3c5792da5b748a7e0c48ee83b4d05715f98125a7a39"},
{file = "mkdocs_material-9.1.14.tar.gz", hash = "sha256:1ae74cc5464ef2f64574d4884512efed7f4db386fb9bc6af20fd427d7a702f49"}, {file = "mkdocs_material-9.1.15.tar.gz", hash = "sha256:8513ab847c9a541ed3d11a3a7eed556caf72991ee786c31c5aac6691a121088a"},
] ]
[package.dependencies] [package.dependencies]
@ -835,17 +835,18 @@ files = [
[[package]] [[package]]
name = "mkdocstrings" name = "mkdocstrings"
version = "0.21.2" version = "0.22.0"
description = "Automatic documentation from sources, for MkDocs." description = "Automatic documentation from sources, for MkDocs."
category = "dev" category = "dev"
optional = false optional = false
python-versions = ">=3.7" python-versions = ">=3.7"
files = [ files = [
{file = "mkdocstrings-0.21.2-py3-none-any.whl", hash = "sha256:949ef8da92df9d692ca07be50616459a6b536083a25520fd54b00e8814ce019b"}, {file = "mkdocstrings-0.22.0-py3-none-any.whl", hash = "sha256:2d4095d461554ff6a778fdabdca3c00c468c2f1459d469f7a7f622a2b23212ba"},
{file = "mkdocstrings-0.21.2.tar.gz", hash = "sha256:304e56a2e90595708a38a13a278e538a67ad82052dd5c8b71f77a604a4f3d911"}, {file = "mkdocstrings-0.22.0.tar.gz", hash = "sha256:82a33b94150ebb3d4b5c73bab4598c3e21468c79ec072eff6931c8f3bfc38256"},
] ]
[package.dependencies] [package.dependencies]
importlib-metadata = {version = ">=4.6", markers = "python_version < \"3.10\""}
Jinja2 = ">=2.11.1" Jinja2 = ">=2.11.1"
Markdown = ">=3.3" Markdown = ">=3.3"
MarkupSafe = ">=1.1" MarkupSafe = ">=1.1"
@ -1374,25 +1375,28 @@ jupyter = ["ipywidgets (>=7.5.1,<9)"]
[[package]] [[package]]
name = "scikit-build" name = "scikit-build"
version = "0.13.0" version = "0.17.5"
description = "Improved build system generator for Python C/C++/Fortran/Cython extensions" description = "Improved build system generator for Python C/C++/Fortran/Cython extensions"
category = "dev" category = "dev"
optional = false optional = false
python-versions = "*" python-versions = ">=3.7"
files = [ files = [
{file = "scikit-build-0.13.0.tar.gz", hash = "sha256:a6ca1b7f1cc8a718564c19f535014f3a71f34508f72e750d4221f987eed0f06d"}, {file = "scikit_build-0.17.5-py3-none-any.whl", hash = "sha256:18861286b34fd2d685327d3bec6ebf4d33303adfaef28a08dd856710d16cf20f"},
{file = "scikit_build-0.13.0-py2.py3-none-any.whl", hash = "sha256:f903fef5cd76aa81dee040fa9cf3daaeff5c71fccfe5fc0bf6a62e54b166d492"}, {file = "scikit_build-0.17.5.tar.gz", hash = "sha256:76856e7631d9e8887a7aa71913d5f184a6177246225391af96ce4801d89fa254"},
] ]
[package.dependencies] [package.dependencies]
distro = "*" distro = "*"
packaging = "*" packaging = "*"
setuptools = {version = ">=28.0.0", markers = "python_version >= \"3\""} setuptools = ">=42.0.0"
wheel = ">=0.29.0" tomli = {version = "*", markers = "python_version < \"3.11\""}
wheel = ">=0.32.0"
[package.extras] [package.extras]
cov = ["coverage[toml] (>=4.2)", "pytest-cov (>=2.7.1)"]
docs = ["pygments", "sphinx (>=4)", "sphinx-issues", "sphinx-rtd-theme (>=1.0)", "sphinxcontrib-moderncmakedomain (>=3.19)"] docs = ["pygments", "sphinx (>=4)", "sphinx-issues", "sphinx-rtd-theme (>=1.0)", "sphinxcontrib-moderncmakedomain (>=3.19)"]
test = ["build (>=0.5)", "codecov (>=2.0.5)", "coverage (>=4.2)", "cython (>=0.25.1)", "flake8 (>=3.0.4)", "path.py (>=11.5.0)", "pathlib2", "pytest (>=4.5.0)", "pytest-cov (>=2.7.1)", "pytest-mock (>=1.10.4)", "pytest-runner (>=5.1)", "pytest-virtualenv (>=1.2.5)", "requests", "six (>=1.10.0)", "ubelt (>=0.8.2)", "virtualenv", "xdoctest (>=0.10.0)"] doctest = ["ubelt (>=0.8.2)", "xdoctest (>=0.10.0)"]
test = ["build (>=0.7)", "cython (>=0.25.1)", "importlib-metadata", "pytest (>=6.0.0)", "pytest-mock (>=1.10.4)", "pytest-virtualenv (>=1.2.5)", "requests", "virtualenv"]
[[package]] [[package]]
name = "secretstorage" name = "secretstorage"
@ -1522,14 +1526,14 @@ urllib3 = ">=1.26.0"
[[package]] [[package]]
name = "typing-extensions" name = "typing-extensions"
version = "4.5.0" version = "4.6.2"
description = "Backported and Experimental Type Hints for Python 3.7+" description = "Backported and Experimental Type Hints for Python 3.7+"
category = "main" category = "main"
optional = false optional = false
python-versions = ">=3.7" python-versions = ">=3.7"
files = [ files = [
{file = "typing_extensions-4.5.0-py3-none-any.whl", hash = "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"}, {file = "typing_extensions-4.6.2-py3-none-any.whl", hash = "sha256:3a8b36f13dd5fdc5d1b16fe317f5668545de77fa0b8e02006381fd49d731ab98"},
{file = "typing_extensions-4.5.0.tar.gz", hash = "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb"}, {file = "typing_extensions-4.6.2.tar.gz", hash = "sha256:06006244c70ac8ee83fa8282cb188f697b8db25bc8b4df07be1873c43897060c"},
] ]
[[package]] [[package]]
@ -1552,14 +1556,14 @@ zstd = ["zstandard (>=0.18.0)"]
[[package]] [[package]]
name = "uvicorn" name = "uvicorn"
version = "0.21.1" version = "0.22.0"
description = "The lightning-fast ASGI server." description = "The lightning-fast ASGI server."
category = "main" category = "main"
optional = true optional = true
python-versions = ">=3.7" python-versions = ">=3.7"
files = [ files = [
{file = "uvicorn-0.21.1-py3-none-any.whl", hash = "sha256:e47cac98a6da10cd41e6fd036d472c6f58ede6c5dbee3dbee3ef7a100ed97742"}, {file = "uvicorn-0.22.0-py3-none-any.whl", hash = "sha256:e9434d3bbf05f310e762147f769c9f21235ee118ba2d2bf1155a7196448bd996"},
{file = "uvicorn-0.21.1.tar.gz", hash = "sha256:0fac9cb342ba099e0d582966005f3fdba5b0290579fed4a6266dc702ca7bb032"}, {file = "uvicorn-0.22.0.tar.gz", hash = "sha256:79277ae03db57ce7d9aa0567830bbb51d7a612f54d6e1e3e92da3ef24c2c8ed8"},
] ]
[package.dependencies] [package.dependencies]
@ -1653,9 +1657,9 @@ docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker
testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"]
[extras] [extras]
server = ["fastapi", "sse-starlette", "uvicorn"] server = ["uvicorn", "fastapi", "sse-starlette"]
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.8.1" python-versions = "^3.8.1"
content-hash = "b1b158e4c9640e4dc197fe43e22c9f87e6e90945ec9b8bcba6042f81249d251e" content-hash = "f5aacb68729427e49bb796a598890fedd8ba1950af3fd577fb85edde2c27338f"

View file

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "llama_cpp_python" name = "llama_cpp_python"
version = "0.1.55" version = "0.1.56"
description = "Python bindings for the llama.cpp library" description = "Python bindings for the llama.cpp library"
authors = ["Andrei Betlen <abetlen@gmail.com>"] authors = ["Andrei Betlen <abetlen@gmail.com>"]
license = "MIT" license = "MIT"
@ -14,8 +14,9 @@ include = [
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = "^3.8.1" python = "^3.8.1"
typing-extensions = "^4.5.0" typing-extensions = "^4.6.2"
uvicorn = { version = "^0.21.1", optional = true } numpy = "^1.20.0"
uvicorn = { version = "^0.22.0", optional = true }
fastapi = { version = "^0.95.0", optional = true } fastapi = { version = "^0.95.0", optional = true }
sse-starlette = { version = "^1.3.3", optional = true } sse-starlette = { version = "^1.3.3", optional = true }
@ -23,11 +24,11 @@ sse-starlette = { version = "^1.3.3", optional = true }
black = "^23.3.0" black = "^23.3.0"
twine = "^4.0.2" twine = "^4.0.2"
mkdocs = "^1.4.3" mkdocs = "^1.4.3"
mkdocstrings = {extras = ["python"], version = "^0.21.2"} mkdocstrings = {extras = ["python"], version = "^0.22.0"}
mkdocs-material = "^9.1.14" mkdocs-material = "^9.1.15"
pytest = "^7.3.1" pytest = "^7.3.1"
httpx = "^0.24.1" httpx = "^0.24.1"
scikit-build = "0.13" scikit-build = "0.17.5"
[tool.poetry.extras] [tool.poetry.extras]
server = ["uvicorn", "fastapi", "sse-starlette"] server = ["uvicorn", "fastapi", "sse-starlette"]

View file

@ -10,15 +10,13 @@ setup(
description="A Python wrapper for llama.cpp", description="A Python wrapper for llama.cpp",
long_description=long_description, long_description=long_description,
long_description_content_type="text/markdown", long_description_content_type="text/markdown",
version="0.1.55", version="0.1.56",
author="Andrei Betlen", author="Andrei Betlen",
author_email="abetlen@gmail.com", author_email="abetlen@gmail.com",
license="MIT", license="MIT",
package_dir={"llama_cpp": "llama_cpp", "llama_cpp.server": "llama_cpp/server"}, package_dir={"llama_cpp": "llama_cpp", "llama_cpp.server": "llama_cpp/server"},
packages=["llama_cpp", "llama_cpp.server"], packages=["llama_cpp", "llama_cpp.server"],
install_requires=[ install_requires=["typing-extensions>=4.5.0", "numpy>=1.20.0"],
"typing-extensions>=4.5.0",
],
extras_require={ extras_require={
"server": ["uvicorn>=0.21.1", "fastapi>=0.95.0", "sse-starlette>=1.3.3"], "server": ["uvicorn>=0.21.1", "fastapi>=0.95.0", "sse-starlette>=1.3.3"],
}, },