Merge branch 'main' into fix-state-pickle

This commit is contained in:
Andrei 2023-06-23 15:13:07 -04:00 committed by GitHub
commit 877ca6d016
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 234 additions and 127 deletions

View file

@ -7,6 +7,28 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]
## [0.1.65]
### Added
- (llama.cpp) Fix struct misalignment bug
## [0.1.64]
### Added
- (llama.cpp) Update llama.cpp
- Fix docs for seed. Set -1 for random.
## [0.1.63]
### Added
- (llama.cpp) Add full gpu utilisation in CUDA
- (llama.cpp) Add get_vocab
- (llama.cpp) Add low_vram parameter
- (server) Add logit_bias parameter
## [0.1.62]
### Fixed

View file

@ -17,6 +17,7 @@ This package provides:
Documentation is available at [https://abetlen.github.io/llama-cpp-python](https://abetlen.github.io/llama-cpp-python).
## Installation from PyPI (recommended)
Install from PyPI (requires a c compiler):
@ -25,7 +26,7 @@ Install from PyPI (requires a c compiler):
pip install llama-cpp-python
```
The above command will attempt to install the package and build build `llama.cpp` from source.
The above command will attempt to install the package and build `llama.cpp` from source.
This is the recommended installation method as it ensures that `llama.cpp` is built with the available optimizations for your system.
If you have previously installed `llama-cpp-python` through pip and want to upgrade your version or rebuild the package with different compiler options, please add the following flags to ensure that the package is rebuilt correctly:
@ -70,6 +71,8 @@ To install with Metal (MPS), set the `LLAMA_METAL=on` environment variable befor
CMAKE_ARGS="-DLLAMA_METAL=on" FORCE_CMAKE=1 pip install llama-cpp-python
```
Detailed MacOS Metal GPU install documentation is available at [docs/macos_install.md](docs/macos_install.md)
## High-level API
The high-level API provides a simple managed interface through the `Llama` class.

59
docs/macos_install.md Normal file
View file

@ -0,0 +1,59 @@
# llama-cpp-python - MacOS Install with Metal GPU
**(1) Make sure you have xcode installed... at least the command line parts**
```
# check the path of your xcode install
xcode-select -p
# xcode installed returns
# /Applications/Xcode-beta.app/Contents/Developer
# if xcode is missing then install it... it takes ages;
xcode-select --install
```
**(2) Install the conda version for MacOS that supports Metal GPU**
```
wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh
bash Miniforge3-MacOSX-arm64.sh
```
**(3) Make a conda environment**
```
conda create -n llama python=3.9.16
conda activate llama
```
**(4) Install the LATEST llama-cpp-python.. which, as of just today, happily supports MacOS Metal GPU**
*(you needed xcode installed in order pip to build/compile the C++ code)*
```
pip uninstall llama-cpp-python -y
CMAKE_ARGS="-DLLAMA_METAL=on" FORCE_CMAKE=1 pip install -U llama-cpp-python --no-cache-dir
pip install 'llama-cpp-python[server]'
# you should now have llama-cpp-python v0.1.62 installed
llama-cpp-python         0.1.62     
```
**(4) Download a v3 ggml model**
- **ggmlv3**
- file name ends with **q4_0.bin** - indicating it is 4bit quantized, with quantisation method 0
https://huggingface.co/TheBloke/open-llama-7b-open-instruct-GGML
**(6) run the llama-cpp-python API server with MacOS Metal GPU support**
```
# config your ggml model path
# make sure it is ggml v3
# make sure it is q4_0
export MODEL=[path to your llama.cpp ggml models]]/[ggml-model-name]]q4_0.bin
python3 -m llama_cpp.server --model $MODEL --n_gpu_layers 1
```
***Note:** If you omit the `--n_gpu_layers 1` then CPU will be used*

View file

@ -221,6 +221,7 @@ class Llama:
last_n_tokens_size: int = 64,
lora_base: Optional[str] = None,
lora_path: Optional[str] = None,
low_vram: bool = False,
verbose: bool = True,
):
"""Load a llama.cpp model from `model_path`.
@ -229,7 +230,7 @@ class Llama:
model_path: Path to the model.
n_ctx: Maximum context size.
n_parts: Number of parts to split the model into. If -1, the number of parts is automatically determined.
seed: Random seed. 0 for random.
seed: Random seed. -1 for random.
f16_kv: Use half-precision for key/value cache.
logits_all: Return logits for all tokens, not just the last token.
vocab_only: Only load the vocabulary no weights.
@ -262,6 +263,7 @@ class Llama:
self.params.use_mmap = use_mmap if lora_path is None else False
self.params.use_mlock = use_mlock
self.params.embedding = embedding
self.params.low_vram = low_vram
self.last_n_tokens_size = last_n_tokens_size
self.n_batch = min(n_ctx, n_batch)
@ -814,7 +816,7 @@ class Llama:
llama_cpp.llama_reset_timings(self.ctx)
if len(prompt_tokens) > self._n_ctx:
raise ValueError(f"Requested tokens exceed context window of {self._n_ctx}")
raise ValueError(f"Requested tokens ({len(prompt_tokens)}) exceed context window of {self._n_ctx}")
# Truncate max_tokens if requested tokens would exceed the context window
max_tokens = (
@ -1380,6 +1382,7 @@ class Llama:
mirostat_tau: float = 5.0,
mirostat_eta: float = 0.1,
model: Optional[str] = None,
logits_processor: Optional[LogitsProcessorList] = None,
) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]:
"""Generate a chat completion from a list of messages.
@ -1421,6 +1424,7 @@ class Llama:
mirostat_tau=mirostat_tau,
mirostat_eta=mirostat_eta,
model=model,
logits_processor=logits_processor,
)
if stream:
chunks: Iterator[CompletionChunk] = completion_or_chunks # type: ignore
@ -1447,6 +1451,7 @@ class Llama:
use_mmap=self.params.use_mmap,
use_mlock=self.params.use_mlock,
embedding=self.params.embedding,
low_vram=self.params.low_vram,
last_n_tokens_size=self.last_n_tokens_size,
n_batch=self.n_batch,
n_threads=self.n_threads,
@ -1470,6 +1475,7 @@ class Llama:
use_mmap=state["use_mmap"],
use_mlock=state["use_mlock"],
embedding=state["embedding"],
low_vram=state["low_vram"],
n_threads=state["n_threads"],
n_batch=state["n_batch"],
last_n_tokens_size=state["last_n_tokens_size"],

View file

@ -150,45 +150,43 @@ llama_progress_callback = ctypes.CFUNCTYPE(None, c_float, c_void_p)
# struct llama_context_params {
# int seed; // RNG seed, -1 for random
# int n_ctx; // text context
# int n_batch; // prompt processing batch size
# int n_gpu_layers; // number of layers to store in VRAM
# int main_gpu; // the GPU that is used for scratch and small tensors
# float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
# int seed; // RNG seed, -1 for random
# // called with a progress value between 0 and 1, pass NULL to disable
# llama_progress_callback progress_callback;
# // context pointer passed to the progress callback
# void * progress_callback_user_data;
# // Keep the booleans together to avoid misalignment during copy-by-value.
# bool low_vram; // if true, reduce VRAM usage at the cost of performance
# bool f16_kv; // use fp16 for KV cache
# bool logits_all; // the llama_eval() call computes all logits, not just the last one
# bool vocab_only; // only load the vocabulary, no weights
# bool use_mmap; // use mmap if possible
# bool use_mlock; // force system to keep model in RAM
# bool embedding; // embedding mode only
# // called with a progress value between 0 and 1, pass NULL to disable
# llama_progress_callback progress_callback;
# // context pointer passed to the progress callback
# void * progress_callback_user_data;
# };
class llama_context_params(Structure):
_fields_ = [
("seed", c_int),
("n_ctx", c_int),
("n_batch", c_int),
("n_gpu_layers", c_int),
("main_gpu", c_int),
("tensor_split", c_float * LLAMA_MAX_DEVICES.value),
("seed", c_int),
("progress_callback", llama_progress_callback),
("progress_callback_user_data", c_void_p),
("low_vram", c_bool),
("f16_kv", c_bool),
(
"logits_all",
c_bool,
),
("logits_all", c_bool),
("vocab_only", c_bool),
("use_mmap", c_bool),
("use_mlock", c_bool),
("embedding", c_bool),
("progress_callback", llama_progress_callback),
("progress_callback_user_data", c_void_p),
]
@ -555,6 +553,26 @@ _lib.llama_n_embd.argtypes = [llama_context_p]
_lib.llama_n_embd.restype = c_int
# // Get the vocabulary as output parameters.
# // Returns number of results.
# LLAMA_API int llama_get_vocab(
# const struct llama_context * ctx,
# const char * * strings,
# float * scores,
# int capacity);
def llama_get_vocab(
ctx: llama_context_p,
strings, # type: Array[c_char_p] # type: ignore
scores, # type: Array[c_float] # type: ignore
capacity: c_int,
) -> int:
return _lib.llama_get_vocab(ctx, strings, scores, capacity)
_lib.llama_get_vocab.argtypes = [llama_context_p, c_char_p, c_float, c_int]
_lib.llama_get_vocab.restype = c_int
# Token logits obtained from the last call to llama_eval()
# The logits for the last token are stored in the last row
# Can be mutated in order to change the probabilities of the next token
@ -596,7 +614,7 @@ _lib.llama_token_to_str.restype = c_char_p
# Special tokens
# LLAMA_API llama_token llama_token_bos();
# LLAMA_API llama_token llama_token_bos(); // beginning-of-sentence
def llama_token_bos() -> int:
return _lib.llama_token_bos()
@ -605,7 +623,7 @@ _lib.llama_token_bos.argtypes = []
_lib.llama_token_bos.restype = llama_token
# LLAMA_API llama_token llama_token_eos();
# LLAMA_API llama_token llama_token_eos(); // end-of-sentence
def llama_token_eos() -> int:
return _lib.llama_token_eos()
@ -614,7 +632,7 @@ _lib.llama_token_eos.argtypes = []
_lib.llama_token_eos.restype = llama_token
# LLAMA_API llama_token llama_token_nl();
# LLAMA_API llama_token llama_token_nl(); // next-line
def llama_token_nl() -> int:
return _lib.llama_token_nl()

View file

@ -46,5 +46,5 @@ if __name__ == "__main__":
app = create_app(settings=settings)
uvicorn.run(
app, host=os.getenv("HOST", "localhost"), port=int(os.getenv("PORT", 8000))
app, host=os.getenv("HOST", settings.host), port=int(os.getenv("PORT", settings.port))
)

View file

@ -30,6 +30,9 @@ class Settings(BaseSettings):
ge=0,
description="The number of layers to put on the GPU. The rest will be on the CPU.",
)
seed: int = Field(
default=1337, description="Random seed. -1 for random."
)
n_batch: int = Field(
default=512, ge=1, description="The batch size to use per eval."
)
@ -48,6 +51,10 @@ class Settings(BaseSettings):
description="Use mmap.",
)
embedding: bool = Field(default=True, description="Whether to use embeddings.")
low_vram: bool = Field(
default=False,
description="Whether to use less VRAM. This will reduce performance.",
)
last_n_tokens_size: int = Field(
default=64,
ge=0,
@ -72,6 +79,12 @@ class Settings(BaseSettings):
verbose: bool = Field(
default=True, description="Whether to print debug information."
)
host: str = Field(
default="localhost", description="Listen address"
)
port: int = Field(
default=8000, description="Listen port"
)
router = APIRouter()
@ -99,6 +112,7 @@ def create_app(settings: Optional[Settings] = None):
llama = llama_cpp.Llama(
model_path=settings.model,
n_gpu_layers=settings.n_gpu_layers,
seed=settings.seed,
f16_kv=settings.f16_kv,
use_mlock=settings.use_mlock,
use_mmap=settings.use_mmap,
@ -113,8 +127,12 @@ def create_app(settings: Optional[Settings] = None):
)
if settings.cache:
if settings.cache_type == "disk":
if settings.verbose:
print(f"Using disk cache with size {settings.cache_size}")
cache = llama_cpp.LlamaDiskCache(capacity_bytes=settings.cache_size)
else:
if settings.verbose:
print(f"Using ram cache with size {settings.cache_size}")
cache = llama_cpp.LlamaRAMCache(capacity_bytes=settings.cache_size)
cache = llama_cpp.LlamaCache(capacity_bytes=settings.cache_size)
@ -249,18 +267,19 @@ class CreateCompletionRequest(BaseModel):
)
presence_penalty: Optional[float] = presence_penalty_field
frequency_penalty: Optional[float] = frequency_penalty_field
logit_bias: Optional[Dict[str, float]] = Field(None)
logprobs: Optional[int] = Field(None)
# ignored or currently unsupported
model: Optional[str] = model_field
n: Optional[int] = 1
logprobs: Optional[int] = Field(None)
best_of: Optional[int] = 1
logit_bias: Optional[Dict[str, float]] = Field(None)
user: Optional[str] = Field(None)
# llama.cpp specific parameters
top_k: int = top_k_field
repeat_penalty: float = repeat_penalty_field
logit_bias_type: Optional[Literal["input_ids", "tokens"]] = Field(None)
class Config:
schema_extra = {
@ -274,6 +293,39 @@ class CreateCompletionRequest(BaseModel):
CreateCompletionResponse = create_model_from_typeddict(llama_cpp.Completion)
def make_logit_bias_processor(
llama: llama_cpp.Llama,
logit_bias: Dict[str, float],
logit_bias_type: Optional[Literal["input_ids", "tokens"]],
):
if logit_bias_type is None:
logit_bias_type = "input_ids"
to_bias: Dict[int, float] = {}
if logit_bias_type == "input_ids":
for input_id, score in logit_bias.items():
input_id = int(input_id)
to_bias[input_id] = score
elif logit_bias_type == "tokens":
for token, score in logit_bias.items():
token = token.encode('utf-8')
for input_id in llama.tokenize(token, add_bos=False):
to_bias[input_id] = score
def logit_bias_processor(
input_ids: List[int],
scores: List[float],
) -> List[float]:
new_scores = [None] * len(scores)
for input_id, score in enumerate(scores):
new_scores[input_id] = score + to_bias.get(input_id, 0.0)
return new_scores
return logit_bias_processor
@router.post(
"/v1/completions",
response_model=CreateCompletionResponse,
@ -291,9 +343,16 @@ async def create_completion(
"n",
"best_of",
"logit_bias",
"logit_bias_type",
"user",
}
kwargs = body.dict(exclude=exclude)
if body.logit_bias is not None:
kwargs['logits_processor'] = llama_cpp.LogitsProcessorList([
make_logit_bias_processor(llama, body.logit_bias, body.logit_bias_type),
])
if body.stream:
send_chan, recv_chan = anyio.create_memory_object_stream(10)
@ -372,16 +431,17 @@ class CreateChatCompletionRequest(BaseModel):
stream: bool = stream_field
presence_penalty: Optional[float] = presence_penalty_field
frequency_penalty: Optional[float] = frequency_penalty_field
logit_bias: Optional[Dict[str, float]] = Field(None)
# ignored or currently unsupported
model: Optional[str] = model_field
n: Optional[int] = 1
logit_bias: Optional[Dict[str, float]] = Field(None)
user: Optional[str] = Field(None)
# llama.cpp specific parameters
top_k: int = top_k_field
repeat_penalty: float = repeat_penalty_field
logit_bias_type: Optional[Literal["input_ids", "tokens"]] = Field(None)
class Config:
schema_extra = {
@ -413,9 +473,16 @@ async def create_chat_completion(
exclude = {
"n",
"logit_bias",
"logit_bias_type",
"user",
}
kwargs = body.dict(exclude=exclude)
if body.logit_bias is not None:
kwargs['logits_processor'] = llama_cpp.LogitsProcessorList([
make_logit_bias_processor(llama, body.logit_bias, body.logit_bias_type),
])
if body.stream:
send_chan, recv_chan = anyio.create_memory_object_stream(10)

126
poetry.lock generated
View file

@ -1,10 +1,9 @@
# This file is automatically @generated by Poetry and should not be changed by hand.
# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
[[package]]
name = "anyio"
version = "3.6.2"
description = "High level compatibility layer for multiple asynchronous event loop implementations"
category = "main"
optional = false
python-versions = ">=3.6.2"
files = [
@ -25,7 +24,6 @@ trio = ["trio (>=0.16,<0.22)"]
name = "black"
version = "23.3.0"
description = "The uncompromising code formatter."
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -75,7 +73,6 @@ uvloop = ["uvloop (>=0.15.2)"]
name = "bleach"
version = "6.0.0"
description = "An easy safelist-based HTML-sanitizing tool."
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -94,7 +91,6 @@ css = ["tinycss2 (>=1.1.0,<1.2)"]
name = "certifi"
version = "2023.5.7"
description = "Python package for providing Mozilla's CA Bundle."
category = "dev"
optional = false
python-versions = ">=3.6"
files = [
@ -106,7 +102,6 @@ files = [
name = "cffi"
version = "1.15.1"
description = "Foreign Function Interface for Python calling C code."
category = "dev"
optional = false
python-versions = "*"
files = [
@ -183,7 +178,6 @@ pycparser = "*"
name = "charset-normalizer"
version = "3.1.0"
description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
category = "dev"
optional = false
python-versions = ">=3.7.0"
files = [
@ -268,7 +262,6 @@ files = [
name = "click"
version = "8.1.3"
description = "Composable command line interface toolkit"
category = "main"
optional = false
python-versions = ">=3.7"
files = [
@ -283,7 +276,6 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""}
name = "colorama"
version = "0.4.6"
description = "Cross-platform colored terminal text."
category = "main"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
files = [
@ -295,7 +287,6 @@ files = [
name = "cryptography"
version = "40.0.2"
description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
category = "dev"
optional = false
python-versions = ">=3.6"
files = [
@ -333,11 +324,21 @@ test = ["iso8601", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-co
test-randomorder = ["pytest-randomly"]
tox = ["tox"]
[[package]]
name = "diskcache"
version = "5.6.1"
description = "Disk Cache -- Disk and file backed persistent cache."
optional = false
python-versions = ">=3"
files = [
{file = "diskcache-5.6.1-py3-none-any.whl", hash = "sha256:558c6a2d5d7c721bb00e40711803d6804850c9f76c426ed81ecc627fe9d2ce2d"},
{file = "diskcache-5.6.1.tar.gz", hash = "sha256:e4c978532feff5814c4cc00fe1e11e40501985946643d73220d41ee7737c72c3"},
]
[[package]]
name = "distro"
version = "1.8.0"
description = "Distro - an OS platform information API"
category = "dev"
optional = false
python-versions = ">=3.6"
files = [
@ -349,7 +350,6 @@ files = [
name = "docutils"
version = "0.20"
description = "Docutils -- Python Documentation Utilities"
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -361,7 +361,6 @@ files = [
name = "exceptiongroup"
version = "1.1.1"
description = "Backport of PEP 654 (exception groups)"
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -374,31 +373,26 @@ test = ["pytest (>=6)"]
[[package]]
name = "fastapi"
version = "0.96.0"
version = "0.97.0"
description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production"
category = "main"
optional = true
python-versions = ">=3.7"
files = [
{file = "fastapi-0.96.0-py3-none-any.whl", hash = "sha256:b8e11fe81e81eab4e1504209917338e0b80f783878a42c2b99467e5e1019a1e9"},
{file = "fastapi-0.96.0.tar.gz", hash = "sha256:71232d47c2787446991c81c41c249f8a16238d52d779c0e6b43927d3773dbe3c"},
{file = "fastapi-0.97.0-py3-none-any.whl", hash = "sha256:95d757511c596409930bd20673358d4a4d709004edb85c5d24d6ffc48fabcbf2"},
{file = "fastapi-0.97.0.tar.gz", hash = "sha256:b53248ee45f64f19bb7600953696e3edf94b0f7de94df1e5433fc5c6136fa986"},
]
[package.dependencies]
pydantic = ">=1.6.2,<1.7 || >1.7,<1.7.1 || >1.7.1,<1.7.2 || >1.7.2,<1.7.3 || >1.7.3,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0"
pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0"
starlette = ">=0.27.0,<0.28.0"
[package.extras]
all = ["email-validator (>=1.1.1)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "python-multipart (>=0.0.5)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"]
dev = ["pre-commit (>=2.17.0,<3.0.0)", "ruff (==0.0.138)", "uvicorn[standard] (>=0.12.0,<0.21.0)"]
doc = ["mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-markdownextradata-plugin (>=0.1.7,<0.3.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "pyyaml (>=5.3.1,<7.0.0)", "typer-cli (>=0.0.13,<0.0.14)", "typer[all] (>=0.6.1,<0.8.0)"]
test = ["anyio[trio] (>=3.2.1,<4.0.0)", "black (==23.1.0)", "coverage[toml] (>=6.5.0,<8.0)", "databases[sqlite] (>=0.3.2,<0.7.0)", "email-validator (>=1.1.1,<2.0.0)", "flask (>=1.1.2,<3.0.0)", "httpx (>=0.23.0,<0.24.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.982)", "orjson (>=3.2.1,<4.0.0)", "passlib[bcrypt] (>=1.7.2,<2.0.0)", "peewee (>=3.13.3,<4.0.0)", "pytest (>=7.1.3,<8.0.0)", "python-jose[cryptography] (>=3.3.0,<4.0.0)", "python-multipart (>=0.0.5,<0.0.7)", "pyyaml (>=5.3.1,<7.0.0)", "ruff (==0.0.138)", "sqlalchemy (>=1.3.18,<1.4.43)", "types-orjson (==3.6.2)", "types-ujson (==5.7.0.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,<6.0.0)"]
[[package]]
name = "ghp-import"
version = "2.1.0"
description = "Copy your docs directly to the gh-pages branch."
category = "dev"
optional = false
python-versions = "*"
files = [
@ -416,7 +410,6 @@ dev = ["flake8", "markdown", "twine", "wheel"]
name = "griffe"
version = "0.27.3"
description = "Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API."
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -431,7 +424,6 @@ colorama = ">=0.4"
name = "h11"
version = "0.14.0"
description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
category = "main"
optional = false
python-versions = ">=3.7"
files = [
@ -443,7 +435,6 @@ files = [
name = "httpcore"
version = "0.17.0"
description = "A minimal low-level HTTP client."
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -455,17 +446,16 @@ files = [
anyio = ">=3.0,<5.0"
certifi = "*"
h11 = ">=0.13,<0.15"
sniffio = ">=1.0.0,<2.0.0"
sniffio = "==1.*"
[package.extras]
http2 = ["h2 (>=3,<5)"]
socks = ["socksio (>=1.0.0,<2.0.0)"]
socks = ["socksio (==1.*)"]
[[package]]
name = "httpx"
version = "0.24.1"
description = "The next generation HTTP client."
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -481,15 +471,14 @@ sniffio = "*"
[package.extras]
brotli = ["brotli", "brotlicffi"]
cli = ["click (>=8.0.0,<9.0.0)", "pygments (>=2.0.0,<3.0.0)", "rich (>=10,<14)"]
cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
http2 = ["h2 (>=3,<5)"]
socks = ["socksio (>=1.0.0,<2.0.0)"]
socks = ["socksio (==1.*)"]
[[package]]
name = "idna"
version = "3.4"
description = "Internationalized Domain Names in Applications (IDNA)"
category = "main"
optional = false
python-versions = ">=3.5"
files = [
@ -501,7 +490,6 @@ files = [
name = "importlib-metadata"
version = "6.6.0"
description = "Read metadata from Python packages"
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -521,7 +509,6 @@ testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packag
name = "importlib-resources"
version = "5.12.0"
description = "Read resources from Python packages"
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -540,7 +527,6 @@ testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-chec
name = "iniconfig"
version = "2.0.0"
description = "brain-dead simple config-ini parsing"
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -552,7 +538,6 @@ files = [
name = "jaraco-classes"
version = "3.2.3"
description = "Utility functions for Python class constructs"
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -571,7 +556,6 @@ testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-chec
name = "jeepney"
version = "0.8.0"
description = "Low-level, pure Python DBus protocol wrapper."
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -587,7 +571,6 @@ trio = ["async_generator", "trio"]
name = "jinja2"
version = "3.1.2"
description = "A very fast and expressive template engine."
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -605,7 +588,6 @@ i18n = ["Babel (>=2.7)"]
name = "keyring"
version = "23.13.1"
description = "Store and access your passwords safely."
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -630,7 +612,6 @@ testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-chec
name = "markdown"
version = "3.3.7"
description = "Python implementation of Markdown."
category = "dev"
optional = false
python-versions = ">=3.6"
files = [
@ -648,7 +629,6 @@ testing = ["coverage", "pyyaml"]
name = "markdown-it-py"
version = "2.2.0"
description = "Python port of markdown-it. Markdown parsing, done right!"
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -673,7 +653,6 @@ testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"]
name = "markupsafe"
version = "2.1.2"
description = "Safely add untrusted strings to HTML/XML markup."
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -733,7 +712,6 @@ files = [
name = "mdurl"
version = "0.1.2"
description = "Markdown URL utilities"
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -745,7 +723,6 @@ files = [
name = "mergedeep"
version = "1.3.4"
description = "A deep merge function for 🐍."
category = "dev"
optional = false
python-versions = ">=3.6"
files = [
@ -757,7 +734,6 @@ files = [
name = "mkdocs"
version = "1.4.3"
description = "Project documentation with Markdown."
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -786,7 +762,6 @@ min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-imp
name = "mkdocs-autorefs"
version = "0.4.1"
description = "Automatically link across pages in MkDocs."
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -800,14 +775,13 @@ mkdocs = ">=1.1"
[[package]]
name = "mkdocs-material"
version = "9.1.15"
version = "9.1.16"
description = "Documentation that simply works"
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
{file = "mkdocs_material-9.1.15-py3-none-any.whl", hash = "sha256:b49e12869ab464558e2dd3c5792da5b748a7e0c48ee83b4d05715f98125a7a39"},
{file = "mkdocs_material-9.1.15.tar.gz", hash = "sha256:8513ab847c9a541ed3d11a3a7eed556caf72991ee786c31c5aac6691a121088a"},
{file = "mkdocs_material-9.1.16-py3-none-any.whl", hash = "sha256:f9e62558a6b01ffac314423cbc223d970c25fbc78999860226245b64e64d6751"},
{file = "mkdocs_material-9.1.16.tar.gz", hash = "sha256:1021bfea20f00a9423530c8c2ae9be3c78b80f5a527b3f822e6de3d872e5ab79"},
]
[package.dependencies]
@ -825,7 +799,6 @@ requests = ">=2.26"
name = "mkdocs-material-extensions"
version = "1.1.1"
description = "Extension pack for Python Markdown and MkDocs Material."
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -837,7 +810,6 @@ files = [
name = "mkdocstrings"
version = "0.22.0"
description = "Automatic documentation from sources, for MkDocs."
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -865,7 +837,6 @@ python-legacy = ["mkdocstrings-python-legacy (>=0.2.1)"]
name = "mkdocstrings-python"
version = "0.10.1"
description = "A Python handler for mkdocstrings."
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -881,7 +852,6 @@ mkdocstrings = ">=0.20"
name = "more-itertools"
version = "9.1.0"
description = "More routines for operating on iterables, beyond itertools"
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -893,7 +863,6 @@ files = [
name = "mypy-extensions"
version = "1.0.0"
description = "Type system extensions for programs checked with the mypy type checker."
category = "dev"
optional = false
python-versions = ">=3.5"
files = [
@ -905,7 +874,6 @@ files = [
name = "numpy"
version = "1.24.3"
description = "Fundamental package for array computing in Python"
category = "main"
optional = false
python-versions = ">=3.8"
files = [
@ -943,7 +911,6 @@ files = [
name = "packaging"
version = "23.1"
description = "Core utilities for Python packages"
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -955,7 +922,6 @@ files = [
name = "pathspec"
version = "0.11.1"
description = "Utility library for gitignore style pattern matching of file paths."
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -967,7 +933,6 @@ files = [
name = "pkginfo"
version = "1.9.6"
description = "Query metadata from sdists / bdists / installed packages."
category = "dev"
optional = false
python-versions = ">=3.6"
files = [
@ -982,7 +947,6 @@ testing = ["pytest", "pytest-cov"]
name = "platformdirs"
version = "3.5.0"
description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"."
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -998,7 +962,6 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest-
name = "pluggy"
version = "1.0.0"
description = "plugin and hook calling mechanisms for python"
category = "dev"
optional = false
python-versions = ">=3.6"
files = [
@ -1014,7 +977,6 @@ testing = ["pytest", "pytest-benchmark"]
name = "pycparser"
version = "2.21"
description = "C parser in Python"
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
files = [
@ -1026,7 +988,6 @@ files = [
name = "pydantic"
version = "1.10.7"
description = "Data validation and settings management using python type hints"
category = "main"
optional = true
python-versions = ">=3.7"
files = [
@ -1079,7 +1040,6 @@ email = ["email-validator (>=1.0.3)"]
name = "pygments"
version = "2.15.1"
description = "Pygments is a syntax highlighting package written in Python."
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -1094,7 +1054,6 @@ plugins = ["importlib-metadata"]
name = "pymdown-extensions"
version = "9.11"
description = "Extension pack for Python Markdown."
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -1108,14 +1067,13 @@ pyyaml = "*"
[[package]]
name = "pytest"
version = "7.3.1"
version = "7.3.2"
description = "pytest: simple powerful testing with Python"
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
{file = "pytest-7.3.1-py3-none-any.whl", hash = "sha256:3799fa815351fea3a5e96ac7e503a96fa51cc9942c3753cda7651b93c1cfa362"},
{file = "pytest-7.3.1.tar.gz", hash = "sha256:434afafd78b1d78ed0addf160ad2b77a30d35d4bdf8af234fe621919d9ed15e3"},
{file = "pytest-7.3.2-py3-none-any.whl", hash = "sha256:cdcbd012c9312258922f8cd3f1b62a6580fdced17db6014896053d47cddf9295"},
{file = "pytest-7.3.2.tar.gz", hash = "sha256:ee990a3cc55ba808b80795a79944756f315c67c12b56abd3ac993a7b8c17030b"},
]
[package.dependencies]
@ -1127,13 +1085,12 @@ pluggy = ">=0.12,<2.0"
tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""}
[package.extras]
testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"]
testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"]
[[package]]
name = "python-dateutil"
version = "2.8.2"
description = "Extensions to the standard Python datetime module"
category = "dev"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
files = [
@ -1148,7 +1105,6 @@ six = ">=1.5"
name = "pywin32-ctypes"
version = "0.2.0"
description = ""
category = "dev"
optional = false
python-versions = "*"
files = [
@ -1160,7 +1116,6 @@ files = [
name = "pyyaml"
version = "6.0"
description = "YAML parser and emitter for Python"
category = "dev"
optional = false
python-versions = ">=3.6"
files = [
@ -1210,7 +1165,6 @@ files = [
name = "pyyaml-env-tag"
version = "0.1"
description = "A custom YAML tag for referencing environment variables in YAML files. "
category = "dev"
optional = false
python-versions = ">=3.6"
files = [
@ -1225,7 +1179,6 @@ pyyaml = "*"
name = "readme-renderer"
version = "37.3"
description = "readme_renderer is a library for rendering \"readme\" descriptions for Warehouse"
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -1245,7 +1198,6 @@ md = ["cmarkgfm (>=0.8.0)"]
name = "regex"
version = "2023.5.5"
description = "Alternative regular expression module, to replace re."
category = "dev"
optional = false
python-versions = ">=3.6"
files = [
@ -1343,7 +1295,6 @@ files = [
name = "requests"
version = "2.30.0"
description = "Python HTTP for Humans."
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -1365,7 +1316,6 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
name = "requests-toolbelt"
version = "1.0.0"
description = "A utility belt for advanced users of python-requests"
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
files = [
@ -1380,7 +1330,6 @@ requests = ">=2.0.1,<3.0.0"
name = "rfc3986"
version = "2.0.0"
description = "Validating URI References per RFC 3986"
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -1395,7 +1344,6 @@ idna2008 = ["idna"]
name = "rich"
version = "13.3.5"
description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal"
category = "dev"
optional = false
python-versions = ">=3.7.0"
files = [
@ -1415,7 +1363,6 @@ jupyter = ["ipywidgets (>=7.5.1,<9)"]
name = "scikit-build"
version = "0.17.6"
description = "Improved build system generator for Python C/C++/Fortran/Cython extensions"
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -1440,7 +1387,6 @@ test = ["build (>=0.7)", "cython (>=0.25.1)", "importlib-metadata", "pytest (>=6
name = "secretstorage"
version = "3.3.3"
description = "Python bindings to FreeDesktop.org Secret Service API"
category = "dev"
optional = false
python-versions = ">=3.6"
files = [
@ -1456,7 +1402,6 @@ jeepney = ">=0.6"
name = "setuptools"
version = "67.7.2"
description = "Easily download, build, install, upgrade, and uninstall Python packages"
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -1473,7 +1418,6 @@ testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (
name = "six"
version = "1.16.0"
description = "Python 2 and 3 compatibility utilities"
category = "dev"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
files = [
@ -1485,7 +1429,6 @@ files = [
name = "sniffio"
version = "1.3.0"
description = "Sniff out which async library your code is running under"
category = "main"
optional = false
python-versions = ">=3.7"
files = [
@ -1497,7 +1440,6 @@ files = [
name = "sse-starlette"
version = "1.6.1"
description = "\"SSE plugin for Starlette\""
category = "main"
optional = true
python-versions = ">=3.8"
files = [
@ -1512,7 +1454,6 @@ starlette = "*"
name = "starlette"
version = "0.27.0"
description = "The little ASGI library that shines."
category = "main"
optional = true
python-versions = ">=3.7"
files = [
@ -1531,7 +1472,6 @@ full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyam
name = "tomli"
version = "2.0.1"
description = "A lil' TOML parser"
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -1543,7 +1483,6 @@ files = [
name = "twine"
version = "4.0.2"
description = "Collection of utilities for publishing packages on PyPI"
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -1566,7 +1505,6 @@ urllib3 = ">=1.26.0"
name = "typing-extensions"
version = "4.6.3"
description = "Backported and Experimental Type Hints for Python 3.7+"
category = "main"
optional = false
python-versions = ">=3.7"
files = [
@ -1578,7 +1516,6 @@ files = [
name = "urllib3"
version = "2.0.2"
description = "HTTP library with thread-safe connection pooling, file post, and more."
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -1596,7 +1533,6 @@ zstd = ["zstandard (>=0.18.0)"]
name = "uvicorn"
version = "0.22.0"
description = "The lightning-fast ASGI server."
category = "main"
optional = true
python-versions = ">=3.7"
files = [
@ -1615,7 +1551,6 @@ standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)",
name = "watchdog"
version = "3.0.0"
description = "Filesystem events monitoring"
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -1655,7 +1590,6 @@ watchmedo = ["PyYAML (>=3.10)"]
name = "webencodings"
version = "0.5.1"
description = "Character encoding aliases for legacy web content"
category = "dev"
optional = false
python-versions = "*"
files = [
@ -1667,7 +1601,6 @@ files = [
name = "wheel"
version = "0.40.0"
description = "A built-package format for Python"
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -1682,7 +1615,6 @@ test = ["pytest (>=6.0.0)"]
name = "zipp"
version = "3.15.0"
description = "Backport of pathlib-compatible object wrapper for zip files"
category = "dev"
optional = false
python-versions = ">=3.7"
files = [
@ -1695,9 +1627,9 @@ docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker
testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"]
[extras]
server = ["uvicorn", "fastapi", "sse-starlette"]
server = ["fastapi", "sse-starlette", "uvicorn"]
[metadata]
lock-version = "2.0"
python-versions = "^3.8.1"
content-hash = "5c3354c253bc7ab7c7577a9a3733c7a341e91176e1d0c13dc2e3f3dcc0971bbe"
content-hash = "fabdd2d7dba563fe7b01b4592dfb33e520b5f6e67317ce5f03205ecba396a577"

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "llama_cpp_python"
version = "0.1.62"
version = "0.1.65"
description = "Python bindings for the llama.cpp library"
authors = ["Andrei Betlen <abetlen@gmail.com>"]
license = "MIT"
@ -18,7 +18,7 @@ typing-extensions = "^4.6.3"
numpy = "^1.20.0"
diskcache = "^5.6.1"
uvicorn = { version = "^0.22.0", optional = true }
fastapi = { version = "^0.96.0", optional = true }
fastapi = { version = "^0.97.0", optional = true }
sse-starlette = { version = "^1.6.1", optional = true }
[tool.poetry.group.dev.dependencies]
@ -26,8 +26,8 @@ black = "^23.3.0"
twine = "^4.0.2"
mkdocs = "^1.4.3"
mkdocstrings = {extras = ["python"], version = "^0.22.0"}
mkdocs-material = "^9.1.15"
pytest = "^7.3.1"
mkdocs-material = "^9.1.16"
pytest = "^7.3.2"
httpx = "^0.24.1"
scikit-build = "0.17.6"

View file

@ -10,7 +10,7 @@ setup(
description="A Python wrapper for llama.cpp",
long_description=long_description,
long_description_content_type="text/markdown",
version="0.1.62",
version="0.1.65",
author="Andrei Betlen",
author_email="abetlen@gmail.com",
license="MIT",

2
vendor/llama.cpp vendored

@ -1 +1 @@
Subproject commit 4de0334f5cabf4696eced2e5d6e279fdfaa6c0f2
Subproject commit 2322ec223a21625dfe9bd73ee677444a98a24ac9