diff --git a/CHANGELOG.md b/CHANGELOG.md index bf6ed5d..d5925bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,28 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.1.65] + +### Added + +- (llama.cpp) Fix struct misalignment bug + +## [0.1.64] + +### Added + +- (llama.cpp) Update llama.cpp +- Fix docs for seed. Set -1 for random. + +## [0.1.63] + +### Added + +- (llama.cpp) Add full gpu utilisation in CUDA +- (llama.cpp) Add get_vocab +- (llama.cpp) Add low_vram parameter +- (server) Add logit_bias parameter + ## [0.1.62] ### Fixed diff --git a/README.md b/README.md index ee6e540..0e62f3d 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,7 @@ This package provides: Documentation is available at [https://abetlen.github.io/llama-cpp-python](https://abetlen.github.io/llama-cpp-python). + ## Installation from PyPI (recommended) Install from PyPI (requires a c compiler): @@ -25,7 +26,7 @@ Install from PyPI (requires a c compiler): pip install llama-cpp-python ``` -The above command will attempt to install the package and build build `llama.cpp` from source. +The above command will attempt to install the package and build `llama.cpp` from source. This is the recommended installation method as it ensures that `llama.cpp` is built with the available optimizations for your system. If you have previously installed `llama-cpp-python` through pip and want to upgrade your version or rebuild the package with different compiler options, please add the following flags to ensure that the package is rebuilt correctly: @@ -70,6 +71,8 @@ To install with Metal (MPS), set the `LLAMA_METAL=on` environment variable befor CMAKE_ARGS="-DLLAMA_METAL=on" FORCE_CMAKE=1 pip install llama-cpp-python ``` +Detailed MacOS Metal GPU install documentation is available at [docs/macos_install.md](docs/macos_install.md) + ## High-level API The high-level API provides a simple managed interface through the `Llama` class. diff --git a/docs/macos_install.md b/docs/macos_install.md new file mode 100644 index 0000000..33dcb5d --- /dev/null +++ b/docs/macos_install.md @@ -0,0 +1,59 @@ + +# llama-cpp-python - MacOS Install with Metal GPU + + +**(1) Make sure you have xcode installed... at least the command line parts** +``` +# check the path of your xcode install +xcode-select -p + +# xcode installed returns +# /Applications/Xcode-beta.app/Contents/Developer + +# if xcode is missing then install it... it takes ages; +xcode-select --install +``` + +**(2) Install the conda version for MacOS that supports Metal GPU** +``` +wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh +bash Miniforge3-MacOSX-arm64.sh +``` + +**(3) Make a conda environment** +``` +conda create -n llama python=3.9.16 +conda activate llama +``` + +**(4) Install the LATEST llama-cpp-python.. which, as of just today, happily supports MacOS Metal GPU** + *(you needed xcode installed in order pip to build/compile the C++ code)* +``` +pip uninstall llama-cpp-python -y +CMAKE_ARGS="-DLLAMA_METAL=on" FORCE_CMAKE=1 pip install -U llama-cpp-python --no-cache-dir +pip install 'llama-cpp-python[server]' + +# you should now have llama-cpp-python v0.1.62 installed +llama-cpp-python         0.1.62      + +``` + +**(4) Download a v3 ggml model** + - **ggmlv3** + - file name ends with **q4_0.bin** - indicating it is 4bit quantized, with quantisation method 0 + +https://huggingface.co/TheBloke/open-llama-7b-open-instruct-GGML + + +**(6) run the llama-cpp-python API server with MacOS Metal GPU support** +``` +# config your ggml model path +# make sure it is ggml v3 +# make sure it is q4_0 +export MODEL=[path to your llama.cpp ggml models]]/[ggml-model-name]]q4_0.bin +python3 -m llama_cpp.server --model $MODEL --n_gpu_layers 1 +``` + +***Note:** If you omit the `--n_gpu_layers 1` then CPU will be used* + + diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index 0c3d72b..816cf11 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -221,6 +221,7 @@ class Llama: last_n_tokens_size: int = 64, lora_base: Optional[str] = None, lora_path: Optional[str] = None, + low_vram: bool = False, verbose: bool = True, ): """Load a llama.cpp model from `model_path`. @@ -229,7 +230,7 @@ class Llama: model_path: Path to the model. n_ctx: Maximum context size. n_parts: Number of parts to split the model into. If -1, the number of parts is automatically determined. - seed: Random seed. 0 for random. + seed: Random seed. -1 for random. f16_kv: Use half-precision for key/value cache. logits_all: Return logits for all tokens, not just the last token. vocab_only: Only load the vocabulary no weights. @@ -262,6 +263,7 @@ class Llama: self.params.use_mmap = use_mmap if lora_path is None else False self.params.use_mlock = use_mlock self.params.embedding = embedding + self.params.low_vram = low_vram self.last_n_tokens_size = last_n_tokens_size self.n_batch = min(n_ctx, n_batch) @@ -814,7 +816,7 @@ class Llama: llama_cpp.llama_reset_timings(self.ctx) if len(prompt_tokens) > self._n_ctx: - raise ValueError(f"Requested tokens exceed context window of {self._n_ctx}") + raise ValueError(f"Requested tokens ({len(prompt_tokens)}) exceed context window of {self._n_ctx}") # Truncate max_tokens if requested tokens would exceed the context window max_tokens = ( @@ -1380,6 +1382,7 @@ class Llama: mirostat_tau: float = 5.0, mirostat_eta: float = 0.1, model: Optional[str] = None, + logits_processor: Optional[LogitsProcessorList] = None, ) -> Union[ChatCompletion, Iterator[ChatCompletionChunk]]: """Generate a chat completion from a list of messages. @@ -1421,6 +1424,7 @@ class Llama: mirostat_tau=mirostat_tau, mirostat_eta=mirostat_eta, model=model, + logits_processor=logits_processor, ) if stream: chunks: Iterator[CompletionChunk] = completion_or_chunks # type: ignore @@ -1447,6 +1451,7 @@ class Llama: use_mmap=self.params.use_mmap, use_mlock=self.params.use_mlock, embedding=self.params.embedding, + low_vram=self.params.low_vram, last_n_tokens_size=self.last_n_tokens_size, n_batch=self.n_batch, n_threads=self.n_threads, @@ -1470,6 +1475,7 @@ class Llama: use_mmap=state["use_mmap"], use_mlock=state["use_mlock"], embedding=state["embedding"], + low_vram=state["low_vram"], n_threads=state["n_threads"], n_batch=state["n_batch"], last_n_tokens_size=state["last_n_tokens_size"], diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index 29136c7..a516829 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -150,45 +150,43 @@ llama_progress_callback = ctypes.CFUNCTYPE(None, c_float, c_void_p) # struct llama_context_params { +# int seed; // RNG seed, -1 for random # int n_ctx; // text context # int n_batch; // prompt processing batch size # int n_gpu_layers; // number of layers to store in VRAM # int main_gpu; // the GPU that is used for scratch and small tensors # float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs -# int seed; // RNG seed, -1 for random +# // called with a progress value between 0 and 1, pass NULL to disable +# llama_progress_callback progress_callback; +# // context pointer passed to the progress callback +# void * progress_callback_user_data; +# // Keep the booleans together to avoid misalignment during copy-by-value. +# bool low_vram; // if true, reduce VRAM usage at the cost of performance # bool f16_kv; // use fp16 for KV cache # bool logits_all; // the llama_eval() call computes all logits, not just the last one # bool vocab_only; // only load the vocabulary, no weights # bool use_mmap; // use mmap if possible # bool use_mlock; // force system to keep model in RAM # bool embedding; // embedding mode only - - -# // called with a progress value between 0 and 1, pass NULL to disable -# llama_progress_callback progress_callback; -# // context pointer passed to the progress callback -# void * progress_callback_user_data; # }; class llama_context_params(Structure): _fields_ = [ + ("seed", c_int), ("n_ctx", c_int), ("n_batch", c_int), ("n_gpu_layers", c_int), ("main_gpu", c_int), ("tensor_split", c_float * LLAMA_MAX_DEVICES.value), - ("seed", c_int), + ("progress_callback", llama_progress_callback), + ("progress_callback_user_data", c_void_p), + ("low_vram", c_bool), ("f16_kv", c_bool), - ( - "logits_all", - c_bool, - ), + ("logits_all", c_bool), ("vocab_only", c_bool), ("use_mmap", c_bool), ("use_mlock", c_bool), ("embedding", c_bool), - ("progress_callback", llama_progress_callback), - ("progress_callback_user_data", c_void_p), ] @@ -555,6 +553,26 @@ _lib.llama_n_embd.argtypes = [llama_context_p] _lib.llama_n_embd.restype = c_int +# // Get the vocabulary as output parameters. +# // Returns number of results. +# LLAMA_API int llama_get_vocab( +# const struct llama_context * ctx, +# const char * * strings, +# float * scores, +# int capacity); +def llama_get_vocab( + ctx: llama_context_p, + strings, # type: Array[c_char_p] # type: ignore + scores, # type: Array[c_float] # type: ignore + capacity: c_int, +) -> int: + return _lib.llama_get_vocab(ctx, strings, scores, capacity) + + +_lib.llama_get_vocab.argtypes = [llama_context_p, c_char_p, c_float, c_int] +_lib.llama_get_vocab.restype = c_int + + # Token logits obtained from the last call to llama_eval() # The logits for the last token are stored in the last row # Can be mutated in order to change the probabilities of the next token @@ -596,7 +614,7 @@ _lib.llama_token_to_str.restype = c_char_p # Special tokens -# LLAMA_API llama_token llama_token_bos(); +# LLAMA_API llama_token llama_token_bos(); // beginning-of-sentence def llama_token_bos() -> int: return _lib.llama_token_bos() @@ -605,7 +623,7 @@ _lib.llama_token_bos.argtypes = [] _lib.llama_token_bos.restype = llama_token -# LLAMA_API llama_token llama_token_eos(); +# LLAMA_API llama_token llama_token_eos(); // end-of-sentence def llama_token_eos() -> int: return _lib.llama_token_eos() @@ -614,7 +632,7 @@ _lib.llama_token_eos.argtypes = [] _lib.llama_token_eos.restype = llama_token -# LLAMA_API llama_token llama_token_nl(); +# LLAMA_API llama_token llama_token_nl(); // next-line def llama_token_nl() -> int: return _lib.llama_token_nl() diff --git a/llama_cpp/server/__main__.py b/llama_cpp/server/__main__.py index 4fe1d94..748a2af 100644 --- a/llama_cpp/server/__main__.py +++ b/llama_cpp/server/__main__.py @@ -46,5 +46,5 @@ if __name__ == "__main__": app = create_app(settings=settings) uvicorn.run( - app, host=os.getenv("HOST", "localhost"), port=int(os.getenv("PORT", 8000)) + app, host=os.getenv("HOST", settings.host), port=int(os.getenv("PORT", settings.port)) ) diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py index f70d8f0..ef319c7 100644 --- a/llama_cpp/server/app.py +++ b/llama_cpp/server/app.py @@ -30,6 +30,9 @@ class Settings(BaseSettings): ge=0, description="The number of layers to put on the GPU. The rest will be on the CPU.", ) + seed: int = Field( + default=1337, description="Random seed. -1 for random." + ) n_batch: int = Field( default=512, ge=1, description="The batch size to use per eval." ) @@ -48,6 +51,10 @@ class Settings(BaseSettings): description="Use mmap.", ) embedding: bool = Field(default=True, description="Whether to use embeddings.") + low_vram: bool = Field( + default=False, + description="Whether to use less VRAM. This will reduce performance.", + ) last_n_tokens_size: int = Field( default=64, ge=0, @@ -72,6 +79,12 @@ class Settings(BaseSettings): verbose: bool = Field( default=True, description="Whether to print debug information." ) + host: str = Field( + default="localhost", description="Listen address" + ) + port: int = Field( + default=8000, description="Listen port" + ) router = APIRouter() @@ -99,6 +112,7 @@ def create_app(settings: Optional[Settings] = None): llama = llama_cpp.Llama( model_path=settings.model, n_gpu_layers=settings.n_gpu_layers, + seed=settings.seed, f16_kv=settings.f16_kv, use_mlock=settings.use_mlock, use_mmap=settings.use_mmap, @@ -113,8 +127,12 @@ def create_app(settings: Optional[Settings] = None): ) if settings.cache: if settings.cache_type == "disk": + if settings.verbose: + print(f"Using disk cache with size {settings.cache_size}") cache = llama_cpp.LlamaDiskCache(capacity_bytes=settings.cache_size) else: + if settings.verbose: + print(f"Using ram cache with size {settings.cache_size}") cache = llama_cpp.LlamaRAMCache(capacity_bytes=settings.cache_size) cache = llama_cpp.LlamaCache(capacity_bytes=settings.cache_size) @@ -249,18 +267,19 @@ class CreateCompletionRequest(BaseModel): ) presence_penalty: Optional[float] = presence_penalty_field frequency_penalty: Optional[float] = frequency_penalty_field + logit_bias: Optional[Dict[str, float]] = Field(None) + logprobs: Optional[int] = Field(None) # ignored or currently unsupported model: Optional[str] = model_field n: Optional[int] = 1 - logprobs: Optional[int] = Field(None) best_of: Optional[int] = 1 - logit_bias: Optional[Dict[str, float]] = Field(None) user: Optional[str] = Field(None) # llama.cpp specific parameters top_k: int = top_k_field repeat_penalty: float = repeat_penalty_field + logit_bias_type: Optional[Literal["input_ids", "tokens"]] = Field(None) class Config: schema_extra = { @@ -274,6 +293,39 @@ class CreateCompletionRequest(BaseModel): CreateCompletionResponse = create_model_from_typeddict(llama_cpp.Completion) +def make_logit_bias_processor( + llama: llama_cpp.Llama, + logit_bias: Dict[str, float], + logit_bias_type: Optional[Literal["input_ids", "tokens"]], +): + if logit_bias_type is None: + logit_bias_type = "input_ids" + + to_bias: Dict[int, float] = {} + if logit_bias_type == "input_ids": + for input_id, score in logit_bias.items(): + input_id = int(input_id) + to_bias[input_id] = score + + elif logit_bias_type == "tokens": + for token, score in logit_bias.items(): + token = token.encode('utf-8') + for input_id in llama.tokenize(token, add_bos=False): + to_bias[input_id] = score + + def logit_bias_processor( + input_ids: List[int], + scores: List[float], + ) -> List[float]: + new_scores = [None] * len(scores) + for input_id, score in enumerate(scores): + new_scores[input_id] = score + to_bias.get(input_id, 0.0) + + return new_scores + + return logit_bias_processor + + @router.post( "/v1/completions", response_model=CreateCompletionResponse, @@ -291,9 +343,16 @@ async def create_completion( "n", "best_of", "logit_bias", + "logit_bias_type", "user", } kwargs = body.dict(exclude=exclude) + + if body.logit_bias is not None: + kwargs['logits_processor'] = llama_cpp.LogitsProcessorList([ + make_logit_bias_processor(llama, body.logit_bias, body.logit_bias_type), + ]) + if body.stream: send_chan, recv_chan = anyio.create_memory_object_stream(10) @@ -372,16 +431,17 @@ class CreateChatCompletionRequest(BaseModel): stream: bool = stream_field presence_penalty: Optional[float] = presence_penalty_field frequency_penalty: Optional[float] = frequency_penalty_field + logit_bias: Optional[Dict[str, float]] = Field(None) # ignored or currently unsupported model: Optional[str] = model_field n: Optional[int] = 1 - logit_bias: Optional[Dict[str, float]] = Field(None) user: Optional[str] = Field(None) # llama.cpp specific parameters top_k: int = top_k_field repeat_penalty: float = repeat_penalty_field + logit_bias_type: Optional[Literal["input_ids", "tokens"]] = Field(None) class Config: schema_extra = { @@ -413,9 +473,16 @@ async def create_chat_completion( exclude = { "n", "logit_bias", + "logit_bias_type", "user", } kwargs = body.dict(exclude=exclude) + + if body.logit_bias is not None: + kwargs['logits_processor'] = llama_cpp.LogitsProcessorList([ + make_logit_bias_processor(llama, body.logit_bias, body.logit_bias_type), + ]) + if body.stream: send_chan, recv_chan = anyio.create_memory_object_stream(10) diff --git a/poetry.lock b/poetry.lock index 4a9c572..e006449 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,10 +1,9 @@ -# This file is automatically @generated by Poetry and should not be changed by hand. +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. [[package]] name = "anyio" version = "3.6.2" description = "High level compatibility layer for multiple asynchronous event loop implementations" -category = "main" optional = false python-versions = ">=3.6.2" files = [ @@ -25,7 +24,6 @@ trio = ["trio (>=0.16,<0.22)"] name = "black" version = "23.3.0" description = "The uncompromising code formatter." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -75,7 +73,6 @@ uvloop = ["uvloop (>=0.15.2)"] name = "bleach" version = "6.0.0" description = "An easy safelist-based HTML-sanitizing tool." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -94,7 +91,6 @@ css = ["tinycss2 (>=1.1.0,<1.2)"] name = "certifi" version = "2023.5.7" description = "Python package for providing Mozilla's CA Bundle." -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -106,7 +102,6 @@ files = [ name = "cffi" version = "1.15.1" description = "Foreign Function Interface for Python calling C code." -category = "dev" optional = false python-versions = "*" files = [ @@ -183,7 +178,6 @@ pycparser = "*" name = "charset-normalizer" version = "3.1.0" description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." -category = "dev" optional = false python-versions = ">=3.7.0" files = [ @@ -268,7 +262,6 @@ files = [ name = "click" version = "8.1.3" description = "Composable command line interface toolkit" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -283,7 +276,6 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""} name = "colorama" version = "0.4.6" description = "Cross-platform colored terminal text." -category = "main" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" files = [ @@ -295,7 +287,6 @@ files = [ name = "cryptography" version = "40.0.2" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -333,11 +324,21 @@ test = ["iso8601", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-co test-randomorder = ["pytest-randomly"] tox = ["tox"] +[[package]] +name = "diskcache" +version = "5.6.1" +description = "Disk Cache -- Disk and file backed persistent cache." +optional = false +python-versions = ">=3" +files = [ + {file = "diskcache-5.6.1-py3-none-any.whl", hash = "sha256:558c6a2d5d7c721bb00e40711803d6804850c9f76c426ed81ecc627fe9d2ce2d"}, + {file = "diskcache-5.6.1.tar.gz", hash = "sha256:e4c978532feff5814c4cc00fe1e11e40501985946643d73220d41ee7737c72c3"}, +] + [[package]] name = "distro" version = "1.8.0" description = "Distro - an OS platform information API" -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -349,7 +350,6 @@ files = [ name = "docutils" version = "0.20" description = "Docutils -- Python Documentation Utilities" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -361,7 +361,6 @@ files = [ name = "exceptiongroup" version = "1.1.1" description = "Backport of PEP 654 (exception groups)" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -374,31 +373,26 @@ test = ["pytest (>=6)"] [[package]] name = "fastapi" -version = "0.96.0" +version = "0.97.0" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" -category = "main" optional = true python-versions = ">=3.7" files = [ - {file = "fastapi-0.96.0-py3-none-any.whl", hash = "sha256:b8e11fe81e81eab4e1504209917338e0b80f783878a42c2b99467e5e1019a1e9"}, - {file = "fastapi-0.96.0.tar.gz", hash = "sha256:71232d47c2787446991c81c41c249f8a16238d52d779c0e6b43927d3773dbe3c"}, + {file = "fastapi-0.97.0-py3-none-any.whl", hash = "sha256:95d757511c596409930bd20673358d4a4d709004edb85c5d24d6ffc48fabcbf2"}, + {file = "fastapi-0.97.0.tar.gz", hash = "sha256:b53248ee45f64f19bb7600953696e3edf94b0f7de94df1e5433fc5c6136fa986"}, ] [package.dependencies] -pydantic = ">=1.6.2,<1.7 || >1.7,<1.7.1 || >1.7.1,<1.7.2 || >1.7.2,<1.7.3 || >1.7.3,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0" +pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0" starlette = ">=0.27.0,<0.28.0" [package.extras] all = ["email-validator (>=1.1.1)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "python-multipart (>=0.0.5)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"] -dev = ["pre-commit (>=2.17.0,<3.0.0)", "ruff (==0.0.138)", "uvicorn[standard] (>=0.12.0,<0.21.0)"] -doc = ["mdx-include (>=1.4.1,<2.0.0)", "mkdocs (>=1.1.2,<2.0.0)", "mkdocs-markdownextradata-plugin (>=0.1.7,<0.3.0)", "mkdocs-material (>=8.1.4,<9.0.0)", "pyyaml (>=5.3.1,<7.0.0)", "typer-cli (>=0.0.13,<0.0.14)", "typer[all] (>=0.6.1,<0.8.0)"] -test = ["anyio[trio] (>=3.2.1,<4.0.0)", "black (==23.1.0)", "coverage[toml] (>=6.5.0,<8.0)", "databases[sqlite] (>=0.3.2,<0.7.0)", "email-validator (>=1.1.1,<2.0.0)", "flask (>=1.1.2,<3.0.0)", "httpx (>=0.23.0,<0.24.0)", "isort (>=5.0.6,<6.0.0)", "mypy (==0.982)", "orjson (>=3.2.1,<4.0.0)", "passlib[bcrypt] (>=1.7.2,<2.0.0)", "peewee (>=3.13.3,<4.0.0)", "pytest (>=7.1.3,<8.0.0)", "python-jose[cryptography] (>=3.3.0,<4.0.0)", "python-multipart (>=0.0.5,<0.0.7)", "pyyaml (>=5.3.1,<7.0.0)", "ruff (==0.0.138)", "sqlalchemy (>=1.3.18,<1.4.43)", "types-orjson (==3.6.2)", "types-ujson (==5.7.0.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0,<6.0.0)"] [[package]] name = "ghp-import" version = "2.1.0" description = "Copy your docs directly to the gh-pages branch." -category = "dev" optional = false python-versions = "*" files = [ @@ -416,7 +410,6 @@ dev = ["flake8", "markdown", "twine", "wheel"] name = "griffe" version = "0.27.3" description = "Signatures for entire Python programs. Extract the structure, the frame, the skeleton of your project, to generate API documentation or find breaking changes in your API." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -431,7 +424,6 @@ colorama = ">=0.4" name = "h11" version = "0.14.0" description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -443,7 +435,6 @@ files = [ name = "httpcore" version = "0.17.0" description = "A minimal low-level HTTP client." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -455,17 +446,16 @@ files = [ anyio = ">=3.0,<5.0" certifi = "*" h11 = ">=0.13,<0.15" -sniffio = ">=1.0.0,<2.0.0" +sniffio = "==1.*" [package.extras] http2 = ["h2 (>=3,<5)"] -socks = ["socksio (>=1.0.0,<2.0.0)"] +socks = ["socksio (==1.*)"] [[package]] name = "httpx" version = "0.24.1" description = "The next generation HTTP client." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -481,15 +471,14 @@ sniffio = "*" [package.extras] brotli = ["brotli", "brotlicffi"] -cli = ["click (>=8.0.0,<9.0.0)", "pygments (>=2.0.0,<3.0.0)", "rich (>=10,<14)"] +cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"] http2 = ["h2 (>=3,<5)"] -socks = ["socksio (>=1.0.0,<2.0.0)"] +socks = ["socksio (==1.*)"] [[package]] name = "idna" version = "3.4" description = "Internationalized Domain Names in Applications (IDNA)" -category = "main" optional = false python-versions = ">=3.5" files = [ @@ -501,7 +490,6 @@ files = [ name = "importlib-metadata" version = "6.6.0" description = "Read metadata from Python packages" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -521,7 +509,6 @@ testing = ["flake8 (<5)", "flufl.flake8", "importlib-resources (>=1.3)", "packag name = "importlib-resources" version = "5.12.0" description = "Read resources from Python packages" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -540,7 +527,6 @@ testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-chec name = "iniconfig" version = "2.0.0" description = "brain-dead simple config-ini parsing" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -552,7 +538,6 @@ files = [ name = "jaraco-classes" version = "3.2.3" description = "Utility functions for Python class constructs" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -571,7 +556,6 @@ testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-chec name = "jeepney" version = "0.8.0" description = "Low-level, pure Python DBus protocol wrapper." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -587,7 +571,6 @@ trio = ["async_generator", "trio"] name = "jinja2" version = "3.1.2" description = "A very fast and expressive template engine." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -605,7 +588,6 @@ i18n = ["Babel (>=2.7)"] name = "keyring" version = "23.13.1" description = "Store and access your passwords safely." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -630,7 +612,6 @@ testing = ["flake8 (<5)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-chec name = "markdown" version = "3.3.7" description = "Python implementation of Markdown." -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -648,7 +629,6 @@ testing = ["coverage", "pyyaml"] name = "markdown-it-py" version = "2.2.0" description = "Python port of markdown-it. Markdown parsing, done right!" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -673,7 +653,6 @@ testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] name = "markupsafe" version = "2.1.2" description = "Safely add untrusted strings to HTML/XML markup." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -733,7 +712,6 @@ files = [ name = "mdurl" version = "0.1.2" description = "Markdown URL utilities" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -745,7 +723,6 @@ files = [ name = "mergedeep" version = "1.3.4" description = "A deep merge function for 🐍." -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -757,7 +734,6 @@ files = [ name = "mkdocs" version = "1.4.3" description = "Project documentation with Markdown." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -786,7 +762,6 @@ min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-imp name = "mkdocs-autorefs" version = "0.4.1" description = "Automatically link across pages in MkDocs." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -800,14 +775,13 @@ mkdocs = ">=1.1" [[package]] name = "mkdocs-material" -version = "9.1.15" +version = "9.1.16" description = "Documentation that simply works" -category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "mkdocs_material-9.1.15-py3-none-any.whl", hash = "sha256:b49e12869ab464558e2dd3c5792da5b748a7e0c48ee83b4d05715f98125a7a39"}, - {file = "mkdocs_material-9.1.15.tar.gz", hash = "sha256:8513ab847c9a541ed3d11a3a7eed556caf72991ee786c31c5aac6691a121088a"}, + {file = "mkdocs_material-9.1.16-py3-none-any.whl", hash = "sha256:f9e62558a6b01ffac314423cbc223d970c25fbc78999860226245b64e64d6751"}, + {file = "mkdocs_material-9.1.16.tar.gz", hash = "sha256:1021bfea20f00a9423530c8c2ae9be3c78b80f5a527b3f822e6de3d872e5ab79"}, ] [package.dependencies] @@ -825,7 +799,6 @@ requests = ">=2.26" name = "mkdocs-material-extensions" version = "1.1.1" description = "Extension pack for Python Markdown and MkDocs Material." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -837,7 +810,6 @@ files = [ name = "mkdocstrings" version = "0.22.0" description = "Automatic documentation from sources, for MkDocs." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -865,7 +837,6 @@ python-legacy = ["mkdocstrings-python-legacy (>=0.2.1)"] name = "mkdocstrings-python" version = "0.10.1" description = "A Python handler for mkdocstrings." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -881,7 +852,6 @@ mkdocstrings = ">=0.20" name = "more-itertools" version = "9.1.0" description = "More routines for operating on iterables, beyond itertools" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -893,7 +863,6 @@ files = [ name = "mypy-extensions" version = "1.0.0" description = "Type system extensions for programs checked with the mypy type checker." -category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -905,7 +874,6 @@ files = [ name = "numpy" version = "1.24.3" description = "Fundamental package for array computing in Python" -category = "main" optional = false python-versions = ">=3.8" files = [ @@ -943,7 +911,6 @@ files = [ name = "packaging" version = "23.1" description = "Core utilities for Python packages" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -955,7 +922,6 @@ files = [ name = "pathspec" version = "0.11.1" description = "Utility library for gitignore style pattern matching of file paths." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -967,7 +933,6 @@ files = [ name = "pkginfo" version = "1.9.6" description = "Query metadata from sdists / bdists / installed packages." -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -982,7 +947,6 @@ testing = ["pytest", "pytest-cov"] name = "platformdirs" version = "3.5.0" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -998,7 +962,6 @@ test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.3.1)", "pytest- name = "pluggy" version = "1.0.0" description = "plugin and hook calling mechanisms for python" -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1014,7 +977,6 @@ testing = ["pytest", "pytest-benchmark"] name = "pycparser" version = "2.21" description = "C parser in Python" -category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1026,7 +988,6 @@ files = [ name = "pydantic" version = "1.10.7" description = "Data validation and settings management using python type hints" -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1079,7 +1040,6 @@ email = ["email-validator (>=1.0.3)"] name = "pygments" version = "2.15.1" description = "Pygments is a syntax highlighting package written in Python." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1094,7 +1054,6 @@ plugins = ["importlib-metadata"] name = "pymdown-extensions" version = "9.11" description = "Extension pack for Python Markdown." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1108,14 +1067,13 @@ pyyaml = "*" [[package]] name = "pytest" -version = "7.3.1" +version = "7.3.2" description = "pytest: simple powerful testing with Python" -category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "pytest-7.3.1-py3-none-any.whl", hash = "sha256:3799fa815351fea3a5e96ac7e503a96fa51cc9942c3753cda7651b93c1cfa362"}, - {file = "pytest-7.3.1.tar.gz", hash = "sha256:434afafd78b1d78ed0addf160ad2b77a30d35d4bdf8af234fe621919d9ed15e3"}, + {file = "pytest-7.3.2-py3-none-any.whl", hash = "sha256:cdcbd012c9312258922f8cd3f1b62a6580fdced17db6014896053d47cddf9295"}, + {file = "pytest-7.3.2.tar.gz", hash = "sha256:ee990a3cc55ba808b80795a79944756f315c67c12b56abd3ac993a7b8c17030b"}, ] [package.dependencies] @@ -1127,13 +1085,12 @@ pluggy = ">=0.12,<2.0" tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} [package.extras] -testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "setuptools", "xmlschema"] [[package]] name = "python-dateutil" version = "2.8.2" description = "Extensions to the standard Python datetime module" -category = "dev" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ @@ -1148,7 +1105,6 @@ six = ">=1.5" name = "pywin32-ctypes" version = "0.2.0" description = "" -category = "dev" optional = false python-versions = "*" files = [ @@ -1160,7 +1116,6 @@ files = [ name = "pyyaml" version = "6.0" description = "YAML parser and emitter for Python" -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1210,7 +1165,6 @@ files = [ name = "pyyaml-env-tag" version = "0.1" description = "A custom YAML tag for referencing environment variables in YAML files. " -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1225,7 +1179,6 @@ pyyaml = "*" name = "readme-renderer" version = "37.3" description = "readme_renderer is a library for rendering \"readme\" descriptions for Warehouse" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1245,7 +1198,6 @@ md = ["cmarkgfm (>=0.8.0)"] name = "regex" version = "2023.5.5" description = "Alternative regular expression module, to replace re." -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1343,7 +1295,6 @@ files = [ name = "requests" version = "2.30.0" description = "Python HTTP for Humans." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1365,7 +1316,6 @@ use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] name = "requests-toolbelt" version = "1.0.0" description = "A utility belt for advanced users of python-requests" -category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -1380,7 +1330,6 @@ requests = ">=2.0.1,<3.0.0" name = "rfc3986" version = "2.0.0" description = "Validating URI References per RFC 3986" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1395,7 +1344,6 @@ idna2008 = ["idna"] name = "rich" version = "13.3.5" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" -category = "dev" optional = false python-versions = ">=3.7.0" files = [ @@ -1415,7 +1363,6 @@ jupyter = ["ipywidgets (>=7.5.1,<9)"] name = "scikit-build" version = "0.17.6" description = "Improved build system generator for Python C/C++/Fortran/Cython extensions" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1440,7 +1387,6 @@ test = ["build (>=0.7)", "cython (>=0.25.1)", "importlib-metadata", "pytest (>=6 name = "secretstorage" version = "3.3.3" description = "Python bindings to FreeDesktop.org Secret Service API" -category = "dev" optional = false python-versions = ">=3.6" files = [ @@ -1456,7 +1402,6 @@ jeepney = ">=0.6" name = "setuptools" version = "67.7.2" description = "Easily download, build, install, upgrade, and uninstall Python packages" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1473,7 +1418,6 @@ testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs ( name = "six" version = "1.16.0" description = "Python 2 and 3 compatibility utilities" -category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -1485,7 +1429,6 @@ files = [ name = "sniffio" version = "1.3.0" description = "Sniff out which async library your code is running under" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1497,7 +1440,6 @@ files = [ name = "sse-starlette" version = "1.6.1" description = "\"SSE plugin for Starlette\"" -category = "main" optional = true python-versions = ">=3.8" files = [ @@ -1512,7 +1454,6 @@ starlette = "*" name = "starlette" version = "0.27.0" description = "The little ASGI library that shines." -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1531,7 +1472,6 @@ full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyam name = "tomli" version = "2.0.1" description = "A lil' TOML parser" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1543,7 +1483,6 @@ files = [ name = "twine" version = "4.0.2" description = "Collection of utilities for publishing packages on PyPI" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1566,7 +1505,6 @@ urllib3 = ">=1.26.0" name = "typing-extensions" version = "4.6.3" description = "Backported and Experimental Type Hints for Python 3.7+" -category = "main" optional = false python-versions = ">=3.7" files = [ @@ -1578,7 +1516,6 @@ files = [ name = "urllib3" version = "2.0.2" description = "HTTP library with thread-safe connection pooling, file post, and more." -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1596,7 +1533,6 @@ zstd = ["zstandard (>=0.18.0)"] name = "uvicorn" version = "0.22.0" description = "The lightning-fast ASGI server." -category = "main" optional = true python-versions = ">=3.7" files = [ @@ -1615,7 +1551,6 @@ standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", name = "watchdog" version = "3.0.0" description = "Filesystem events monitoring" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1655,7 +1590,6 @@ watchmedo = ["PyYAML (>=3.10)"] name = "webencodings" version = "0.5.1" description = "Character encoding aliases for legacy web content" -category = "dev" optional = false python-versions = "*" files = [ @@ -1667,7 +1601,6 @@ files = [ name = "wheel" version = "0.40.0" description = "A built-package format for Python" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1682,7 +1615,6 @@ test = ["pytest (>=6.0.0)"] name = "zipp" version = "3.15.0" description = "Backport of pathlib-compatible object wrapper for zip files" -category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -1695,9 +1627,9 @@ docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] [extras] -server = ["uvicorn", "fastapi", "sse-starlette"] +server = ["fastapi", "sse-starlette", "uvicorn"] [metadata] lock-version = "2.0" python-versions = "^3.8.1" -content-hash = "5c3354c253bc7ab7c7577a9a3733c7a341e91176e1d0c13dc2e3f3dcc0971bbe" +content-hash = "fabdd2d7dba563fe7b01b4592dfb33e520b5f6e67317ce5f03205ecba396a577" diff --git a/pyproject.toml b/pyproject.toml index 564059c..c6ffe38 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "llama_cpp_python" -version = "0.1.62" +version = "0.1.65" description = "Python bindings for the llama.cpp library" authors = ["Andrei Betlen "] license = "MIT" @@ -18,7 +18,7 @@ typing-extensions = "^4.6.3" numpy = "^1.20.0" diskcache = "^5.6.1" uvicorn = { version = "^0.22.0", optional = true } -fastapi = { version = "^0.96.0", optional = true } +fastapi = { version = "^0.97.0", optional = true } sse-starlette = { version = "^1.6.1", optional = true } [tool.poetry.group.dev.dependencies] @@ -26,8 +26,8 @@ black = "^23.3.0" twine = "^4.0.2" mkdocs = "^1.4.3" mkdocstrings = {extras = ["python"], version = "^0.22.0"} -mkdocs-material = "^9.1.15" -pytest = "^7.3.1" +mkdocs-material = "^9.1.16" +pytest = "^7.3.2" httpx = "^0.24.1" scikit-build = "0.17.6" diff --git a/setup.py b/setup.py index bb423d8..9f27648 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup( description="A Python wrapper for llama.cpp", long_description=long_description, long_description_content_type="text/markdown", - version="0.1.62", + version="0.1.65", author="Andrei Betlen", author_email="abetlen@gmail.com", license="MIT", diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 4de0334..2322ec2 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 4de0334f5cabf4696eced2e5d6e279fdfaa6c0f2 +Subproject commit 2322ec223a21625dfe9bd73ee677444a98a24ac9