diff --git a/README.md b/README.md index ea1e07f..7c515d0 100644 --- a/README.md +++ b/README.md @@ -169,7 +169,7 @@ docker run --rm -it -p 8000:8000 -v /path/to/models:/models -e MODEL=/models/ggm ## Low-level API The low-level API is a direct [`ctypes`](https://docs.python.org/3/library/ctypes.html) binding to the C API provided by `llama.cpp`. -The entire lowe-level API can be found in [llama_cpp/llama_cpp.py](https://github.com/abetlen/llama-cpp-python/blob/master/llama_cpp/llama_cpp.py) and directly mirrors the C API in [llama.h](https://github.com/ggerganov/llama.cpp/blob/master/llama.h). +The entire low-level API can be found in [llama_cpp/llama_cpp.py](https://github.com/abetlen/llama-cpp-python/blob/master/llama_cpp/llama_cpp.py) and directly mirrors the C API in [llama.h](https://github.com/ggerganov/llama.cpp/blob/master/llama.h). Below is a short example demonstrating how to use the low-level API to tokenize a prompt: diff --git a/docker/openblas_simple/Dockerfile b/docker/openblas_simple/Dockerfile index 8231bdb..020c34d 100644 --- a/docker/openblas_simple/Dockerfile +++ b/docker/openblas_simple/Dockerfile @@ -9,7 +9,7 @@ COPY . . RUN apt update && apt install -y libopenblas-dev ninja-build build-essential RUN python -m pip install --upgrade pip pytest cmake scikit-build setuptools fastapi uvicorn sse-starlette pydantic-settings -RUN LLAMA_OPENBLAS=1 pip install llama_cpp_python --verbose +RUN CMAKE_ARGS="-DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS" pip install llama_cpp_python --verbose # Run the server CMD python3 -m llama_cpp.server diff --git a/poetry.lock b/poetry.lock index 8fad112..0157ab6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -384,17 +384,17 @@ test = ["pytest (>=6)"] [[package]] name = "fastapi" -version = "0.100.0" +version = "0.101.0" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" optional = true python-versions = ">=3.7" files = [ - {file = "fastapi-0.100.0-py3-none-any.whl", hash = "sha256:271662daf986da8fa98dc2b7c7f61c4abdfdccfb4786d79ed8b2878f172c6d5f"}, - {file = "fastapi-0.100.0.tar.gz", hash = "sha256:acb5f941ea8215663283c10018323ba7ea737c571b67fc7e88e9469c7eb1d12e"}, + {file = "fastapi-0.101.0-py3-none-any.whl", hash = "sha256:494eb3494d89e8079c20859d7ca695f66eaccc40f46fe8c75ab6186d15f05ffd"}, + {file = "fastapi-0.101.0.tar.gz", hash = "sha256:ca2ae65fe42f6a34b5cf6c994337149154b1b400c39809d7b2dccdceb5ae77af"}, ] [package.dependencies] -pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<3.0.0" +pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.1.0 || >2.1.0,<3.0.0" starlette = ">=0.27.0,<0.28.0" typing-extensions = ">=4.5.0" @@ -744,13 +744,13 @@ files = [ [[package]] name = "mkdocs" -version = "1.4.3" +version = "1.5.2" description = "Project documentation with Markdown." optional = false python-versions = ">=3.7" files = [ - {file = "mkdocs-1.4.3-py3-none-any.whl", hash = "sha256:6ee46d309bda331aac915cd24aab882c179a933bd9e77b80ce7d2eaaa3f689dd"}, - {file = "mkdocs-1.4.3.tar.gz", hash = "sha256:5955093bbd4dd2e9403c5afaf57324ad8b04f16886512a3ee6ef828956481c57"}, + {file = "mkdocs-1.5.2-py3-none-any.whl", hash = "sha256:60a62538519c2e96fe8426654a67ee177350451616118a41596ae7c876bb7eac"}, + {file = "mkdocs-1.5.2.tar.gz", hash = "sha256:70d0da09c26cff288852471be03c23f0f521fc15cf16ac89c7a3bfb9ae8d24f9"}, ] [package.dependencies] @@ -759,16 +759,19 @@ colorama = {version = ">=0.4", markers = "platform_system == \"Windows\""} ghp-import = ">=1.0" importlib-metadata = {version = ">=4.3", markers = "python_version < \"3.10\""} jinja2 = ">=2.11.1" -markdown = ">=3.2.1,<3.4" +markdown = ">=3.2.1" +markupsafe = ">=2.0.1" mergedeep = ">=1.3.4" packaging = ">=20.5" +pathspec = ">=0.11.1" +platformdirs = ">=2.2.0" pyyaml = ">=5.1" pyyaml-env-tag = ">=0.1" watchdog = ">=2.0" [package.extras] i18n = ["babel (>=2.9.0)"] -min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-import (==1.0)", "importlib-metadata (==4.3)", "jinja2 (==2.11.1)", "markdown (==3.2.1)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "packaging (==20.5)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "typing-extensions (==3.10)", "watchdog (==2.0)"] +min-versions = ["babel (==2.9.0)", "click (==7.0)", "colorama (==0.4)", "ghp-import (==1.0)", "importlib-metadata (==4.3)", "jinja2 (==2.11.1)", "markdown (==3.2.1)", "markupsafe (==2.0.1)", "mergedeep (==1.3.4)", "packaging (==20.5)", "pathspec (==0.11.1)", "platformdirs (==2.2.0)", "pyyaml (==5.1)", "pyyaml-env-tag (==0.1)", "typing-extensions (==3.10)", "watchdog (==2.0)"] [[package]] name = "mkdocs-autorefs" @@ -787,20 +790,20 @@ mkdocs = ">=1.1" [[package]] name = "mkdocs-material" -version = "9.1.19" +version = "9.1.21" description = "Documentation that simply works" optional = false python-versions = ">=3.7" files = [ - {file = "mkdocs_material-9.1.19-py3-none-any.whl", hash = "sha256:fb0a149294b319aedf36983919d8c40c9e566db21ead16258e20ebd2e6c0961c"}, - {file = "mkdocs_material-9.1.19.tar.gz", hash = "sha256:73b94b08c765e92a80645aac58d6a741fc5f587deec2b715489c714827b15a6f"}, + {file = "mkdocs_material-9.1.21-py3-none-any.whl", hash = "sha256:58bb2f11ef240632e176d6f0f7d1cff06be1d11c696a5a1b553b808b4280ed47"}, + {file = "mkdocs_material-9.1.21.tar.gz", hash = "sha256:71940cdfca84ab296b6362889c25395b1621273fb16c93deda257adb7ff44ec8"}, ] [package.dependencies] colorama = ">=0.4" jinja2 = ">=3.0" markdown = ">=3.2" -mkdocs = ">=1.4.2" +mkdocs = ">=1.5.0" mkdocs-material-extensions = ">=1.1" pygments = ">=2.14" pymdown-extensions = ">=9.9.1" @@ -1652,13 +1655,13 @@ zstd = ["zstandard (>=0.18.0)"] [[package]] name = "uvicorn" -version = "0.23.1" +version = "0.23.2" description = "The lightning-fast ASGI server." optional = true python-versions = ">=3.8" files = [ - {file = "uvicorn-0.23.1-py3-none-any.whl", hash = "sha256:1d55d46b83ee4ce82b4e82f621f2050adb3eb7b5481c13f9af1744951cae2f1f"}, - {file = "uvicorn-0.23.1.tar.gz", hash = "sha256:da9b0c8443b2d7ee9db00a345f1eee6db7317432c9d4400f5049cc8d358383be"}, + {file = "uvicorn-0.23.2-py3-none-any.whl", hash = "sha256:1f9be6558f01239d4fdf22ef8126c39cb1ad0addf76c40e760549d2c2f43ab53"}, + {file = "uvicorn-0.23.2.tar.gz", hash = "sha256:4d3cc12d7727ba72b64d12d3cc7743124074c0a69f7b201512fc50c3e3f1569a"}, ] [package.dependencies] @@ -1754,4 +1757,4 @@ server = ["fastapi", "pydantic-settings", "sse-starlette", "uvicorn"] [metadata] lock-version = "2.0" python-versions = "^3.8.1" -content-hash = "95adf05a0934d122dd601835c2d6353cc1dda03e4e8a5c5af02bfd1369afa74a" +content-hash = "4bfb67dfb72b02c845376211f7f958b2ece8c985944fbd03d246c858e846ddf6" diff --git a/pyproject.toml b/pyproject.toml index 2ac020a..c636d5d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ python = "^3.8.1" typing-extensions = "^4.7.1" numpy = "^1.24.4" diskcache = "^5.6.1" -uvicorn = { version = "^0.23.1", optional = true } +uvicorn = { version = "^0.23.2", optional = true } fastapi = { version = ">=0.100.0", optional = true } sse-starlette = { version = ">=1.6.1", optional = true } pydantic-settings = { version = ">=2.0.1", optional = true } @@ -25,9 +25,9 @@ pydantic-settings = { version = ">=2.0.1", optional = true } [tool.poetry.group.dev.dependencies] black = "^23.7.0" twine = "^4.0.2" -mkdocs = "^1.4.3" +mkdocs = "^1.5.2" mkdocstrings = {extras = ["python"], version = "^0.22.0"} -mkdocs-material = "^9.1.19" +mkdocs-material = "^9.1.21" pytest = "^7.4.0" httpx = "^0.24.1" scikit-build = "0.17.6"