Merge branch 'main' of github.com:abetlen/llama_cpp_python into main

This commit is contained in:
Andrei Betlen 2023-05-17 02:00:48 -04:00
commit 626003c884
3 changed files with 12 additions and 6 deletions

View file

@ -26,6 +26,12 @@ pip install llama-cpp-python
The above command will attempt to install the package and build build `llama.cpp` from source. The above command will attempt to install the package and build build `llama.cpp` from source.
This is the recommended installation method as it ensures that `llama.cpp` is built with the available optimizations for your system. This is the recommended installation method as it ensures that `llama.cpp` is built with the available optimizations for your system.
Note: If you are using Apple Silicon (M1) Mac, make sure you have installed a version of Python that supports arm64 architecture. For example:
```
wget https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh
bash Miniforge3-MacOSX-arm64.sh
```
Otherwise, while installing it will build the llama.ccp x86 version which will be 10x slower on Apple Silicon (M1) Mac.
### Installation with OpenBLAS / cuBLAS / CLBlast ### Installation with OpenBLAS / cuBLAS / CLBlast
@ -120,7 +126,7 @@ Below is a short example demonstrating how to use the low-level API to tokenize
>>> ctx = llama_cpp.llama_init_from_file(b"./models/7b/ggml-model.bin", params) >>> ctx = llama_cpp.llama_init_from_file(b"./models/7b/ggml-model.bin", params)
>>> max_tokens = params.n_ctx >>> max_tokens = params.n_ctx
# use ctypes arrays for array params # use ctypes arrays for array params
>>> tokens = (llama_cppp.llama_token * int(max_tokens))() >>> tokens = (llama_cpp.llama_token * int(max_tokens))()
>>> n_tokens = llama_cpp.llama_tokenize(ctx, b"Q: Name the planets in the solar system? A: ", tokens, max_tokens, add_bos=llama_cpp.c_bool(True)) >>> n_tokens = llama_cpp.llama_tokenize(ctx, b"Q: Name the planets in the solar system? A: ", tokens, max_tokens, add_bos=llama_cpp.c_bool(True))
>>> llama_cpp.llama_free(ctx) >>> llama_cpp.llama_free(ctx)
``` ```

8
poetry.lock generated
View file

@ -773,14 +773,14 @@ mkdocs = ">=1.1"
[[package]] [[package]]
name = "mkdocs-material" name = "mkdocs-material"
version = "9.1.11" version = "9.1.12"
description = "Documentation that simply works" description = "Documentation that simply works"
category = "dev" category = "dev"
optional = false optional = false
python-versions = ">=3.7" python-versions = ">=3.7"
files = [ files = [
{file = "mkdocs_material-9.1.11-py3-none-any.whl", hash = "sha256:fbc86d50ec2cf34d40d5c4365780f290ceedde23f1a0704323b34e7f16b0c0dd"}, {file = "mkdocs_material-9.1.12-py3-none-any.whl", hash = "sha256:68c57d95d10104179c8c3ce9a88ee9d2322a5145b3d0f1f38ff686253fb5ec98"},
{file = "mkdocs_material-9.1.11.tar.gz", hash = "sha256:f5d473eb79d6640a5e668d4b2ab5b9de5e76ae0a0e2d864112df0cfe9016dc1d"}, {file = "mkdocs_material-9.1.12.tar.gz", hash = "sha256:d4ebe9b5031ce63a265c19fb5eab4d27ea4edadb05de206372e831b2b7570fb5"},
] ]
[package.dependencies] [package.dependencies]
@ -1439,4 +1439,4 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "^3.8.1" python-versions = "^3.8.1"
content-hash = "6bea74d847b958639276d4be527c2b65dafeb0a455b6e3d1f29fee5171ce73b2" content-hash = "d188fc14200f7ee348bef821265d676d584762983bcaf10f90c14221b4ed26a9"

View file

@ -22,7 +22,7 @@ black = "^23.3.0"
twine = "^4.0.2" twine = "^4.0.2"
mkdocs = "^1.4.3" mkdocs = "^1.4.3"
mkdocstrings = {extras = ["python"], version = "^0.21.2"} mkdocstrings = {extras = ["python"], version = "^0.21.2"}
mkdocs-material = "^9.1.11" mkdocs-material = "^9.1.12"
pytest = "^7.3.1" pytest = "^7.3.1"
httpx = "^0.24.0" httpx = "^0.24.0"