From 8eb9769f78465ae0926d5f7d28cc368b877be96d Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 26 May 2023 16:12:45 -0400 Subject: [PATCH 01/16] Add support for numpy --- llama_cpp/llama.py | 57 ++++++++++++++++++++++++++++++---------------- setup.py | 4 +--- 2 files changed, 39 insertions(+), 22 deletions(-) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index 012bb86..6babebd 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -20,6 +20,9 @@ from collections import deque, OrderedDict from . import llama_cpp from .llama_types import * +import numpy as np +import numpy.typing as npt + class LlamaCache: """Cache for a llama.cpp model.""" @@ -73,11 +76,15 @@ class LlamaState: self, eval_tokens: Deque[int], eval_logits: Deque[List[float]], + input_ids: npt.NDArray[np.intc], + scores: npt.NDArray[np.single], llama_state, # type: llama_cpp.Array[llama_cpp.c_uint8] llama_state_size: int, ): self.eval_tokens = eval_tokens self.eval_logits = eval_logits + self.input_ids = input_ids + self.scores = scores self.llama_state = llama_state self.llama_state_size = llama_state_size @@ -207,20 +214,14 @@ class Llama: self._n_vocab = self.n_vocab() self._n_ctx = self.n_ctx() - data = (llama_cpp.llama_token_data * self._n_vocab)( - *[ - llama_cpp.llama_token_data( - id=llama_cpp.llama_token(i), - logit=llama_cpp.c_float(0.0), - p=llama_cpp.c_float(0.0), - ) - for i in range(self._n_vocab) - ] - ) size = llama_cpp.c_size_t(self._n_vocab) - sorted = False + sorted = llama_cpp.c_bool(False) + self._candidates_data = np.array( + [], dtype=[("id", np.intc), ("logit", np.single), ("p", np.single)] + ) + self._candidates_data.resize(3, self._n_vocab) candidates = llama_cpp.llama_token_data_array( - data=data, + data=self._candidates_data.ctypes.data_as(llama_cpp.llama_token_data_p), size=size, sorted=sorted, ) @@ -228,6 +229,9 @@ class Llama: self._token_nl = Llama.token_nl() self._token_eos = Llama.token_eos() + self._input_ids = np.array([], dtype=np.intc) + self._scores = np.ndarray((0, self._n_vocab), dtype=np.single) + def tokenize(self, text: bytes, add_bos: bool = True) -> List[int]: """Tokenize a string. @@ -319,6 +323,9 @@ class Llama: raise RuntimeError(f"llama_eval returned {return_code}") # Save tokens self.eval_tokens.extend(batch) + self._input_ids: npt.NDArray[np.intc] = np.concatenate( + (self._input_ids, np.array(batch, dtype=np.intc)), axis=0 + ) # Save logits rows = n_tokens if self.params.logits_all else 1 n_vocab = self._n_vocab @@ -326,6 +333,9 @@ class Llama: logits_view = llama_cpp.llama_get_logits(self.ctx) logits = [logits_view[i * cols : (i + 1) * cols] for i in range(rows)] self.eval_logits.extend(logits) + self._scores: npt.NDArray[np.single] = np.concatenate( + (self._scores, np.array(logits, dtype=np.single)), axis=0 + ) def _sample( self, @@ -354,18 +364,23 @@ class Llama: if last_n_tokens_size.value < 0 else last_n_tokens_size ) - logits = self.eval_logits[-1] + logits: npt.NDArray[np.single] = self._scores[-1, :] if logits_processor is not None: - logits = logits_processor(list(self.eval_tokens), logits) - self.eval_logits[-1] = logits + logits = np.array( + logits_processor(list(self.eval_tokens), logits.tolist()), + dtype=np.single, + ) + self._scores[-1, :] = logits + self.eval_logits[-1] = logits.tolist() nl_logit = logits[self._token_nl] candidates = self._candidates - for i, logit in enumerate(logits): - candidates.data[i].id = llama_cpp.llama_token(i) - candidates.data[i].logit = llama_cpp.c_float(logit) - candidates.data[i].p = llama_cpp.c_float(0.0) + candidates_data = self._candidates_data + candidates_data["id"] = np.arange(n_vocab, dtype=np.intc) # type: ignore + candidates_data["logit"] = logits + candidates_data["p"] = np.zeros(n_vocab, dtype=np.single) + candidates.data = candidates_data.ctypes.data_as(llama_cpp.llama_token_data_p) candidates.sorted = llama_cpp.c_bool(False) candidates.size = llama_cpp.c_size_t(n_vocab) llama_cpp.llama_sample_repetition_penalty( @@ -1371,6 +1386,8 @@ class Llama: return LlamaState( eval_tokens=self.eval_tokens.copy(), eval_logits=self.eval_logits.copy(), + scores=self._scores.copy(), + input_ids=self._input_ids.copy(), llama_state=llama_state_compact, llama_state_size=n_bytes, ) @@ -1379,6 +1396,8 @@ class Llama: assert self.ctx is not None self.eval_tokens = state.eval_tokens.copy() self.eval_logits = state.eval_logits.copy() + self._scores = state.scores.copy() + self._input_ids = state.input_ids.copy() state_size = state.llama_state_size if llama_cpp.llama_set_state_data(self.ctx, state.llama_state) != state_size: raise RuntimeError("Failed to set llama state data") diff --git a/setup.py b/setup.py index bd7192f..198dd74 100644 --- a/setup.py +++ b/setup.py @@ -16,9 +16,7 @@ setup( license="MIT", package_dir={"llama_cpp": "llama_cpp", "llama_cpp.server": "llama_cpp/server"}, packages=["llama_cpp", "llama_cpp.server"], - install_requires=[ - "typing-extensions>=4.5.0", - ], + install_requires=["typing-extensions>=4.5.0", "numpy>=1.24.2"], extras_require={ "server": ["uvicorn>=0.21.1", "fastapi>=0.95.0", "sse-starlette>=1.3.3"], }, From bd4b95da45aa129277cdba0ccdab10a1af99c2e5 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 26 May 2023 16:38:21 -0400 Subject: [PATCH 02/16] Reduce numpy version dependency --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 198dd74..c51202e 100644 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ setup( license="MIT", package_dir={"llama_cpp": "llama_cpp", "llama_cpp.server": "llama_cpp/server"}, packages=["llama_cpp", "llama_cpp.server"], - install_requires=["typing-extensions>=4.5.0", "numpy>=1.24.2"], + install_requires=["typing-extensions>=4.5.0", "numpy>=1.20.0"], extras_require={ "server": ["uvicorn>=0.21.1", "fastapi>=0.95.0", "sse-starlette>=1.3.3"], }, From fe331ec58914feaacfa3052957fef53bbd005997 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 26 May 2023 20:03:31 -0400 Subject: [PATCH 03/16] Replace eval_logits and eval_tokens with numpy arrays --- llama_cpp/llama.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index 6babebd..4f10227 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -299,6 +299,8 @@ class Llama: """Reset the model state.""" self.eval_tokens.clear() self.eval_logits.clear() + self._input_ids = np.array([], dtype=np.intc) + self._scores = np.ndarray((0, self._n_vocab), dtype=np.single) def eval(self, tokens: Sequence[int]): """Evaluate a list of tokens. @@ -310,7 +312,7 @@ class Llama: n_ctx = self._n_ctx for i in range(0, len(tokens), self.n_batch): batch = tokens[i : min(len(tokens), i + self.n_batch)] - n_past = min(n_ctx - len(batch), len(self.eval_tokens)) + n_past = min(n_ctx - len(batch), len(self._input_ids)) n_tokens = len(batch) return_code = llama_cpp.llama_eval( ctx=self.ctx, @@ -356,6 +358,7 @@ class Llama: ): assert self.ctx is not None assert len(self.eval_logits) > 0 + assert self._scores.shape[0] > 0 n_vocab = self._n_vocab n_ctx = self._n_ctx top_k = llama_cpp.c_int(n_vocab) if top_k.value <= 0 else top_k @@ -368,7 +371,7 @@ class Llama: if logits_processor is not None: logits = np.array( - logits_processor(list(self.eval_tokens), logits.tolist()), + logits_processor(self._input_ids.tolist(), logits.tolist()), dtype=np.single, ) self._scores[-1, :] = logits @@ -498,8 +501,8 @@ class Llama: """ assert self.ctx is not None last_n_tokens_data = [llama_cpp.llama_token(0)] * max( - 0, self.last_n_tokens_size - len(self.eval_tokens) - ) + list(self.eval_tokens)[-self.last_n_tokens_size :] + 0, self.last_n_tokens_size - len(self._input_ids) + ) + self._input_ids[-self.last_n_tokens_size :].tolist() return self._sample( last_n_tokens_data=(llama_cpp.llama_token * self.last_n_tokens_size)( *last_n_tokens_data @@ -557,9 +560,9 @@ class Llama: """ assert self.ctx is not None - if reset and len(self.eval_tokens) > 0: + if reset and len(self._input_ids) > 0: longest_prefix = 0 - for a, b in zip(self.eval_tokens, tokens[:-1]): + for a, b in zip(self._input_ids, tokens[:-1]): if a == b: longest_prefix += 1 else: @@ -569,6 +572,8 @@ class Llama: print("Llama.generate: prefix-match hit", file=sys.stderr) reset = False tokens = tokens[longest_prefix:] + self._input_ids = self._input_ids[:longest_prefix] + self._scores = self._scores[:longest_prefix, :] for _ in range(len(self.eval_tokens) - longest_prefix): self.eval_tokens.pop() try: @@ -595,7 +600,7 @@ class Llama: logits_processor=logits_processor, ) if stopping_criteria is not None and stopping_criteria( - list(self.eval_tokens), self.eval_logits[-1] + self._input_ids.tolist(), self._scores[-1, :].tolist() ): return tokens_or_none = yield token @@ -820,7 +825,7 @@ class Llama: self.detokenize(completion_tokens[:returned_tokens]) ) token_offset = len(prompt_tokens) + returned_tokens - logits = self.eval_logits[token_offset - 1] + logits = self._scores[token_offset - 1, :].tolist() current_logprobs = Llama.logits_to_logprobs(logits) sorted_logprobs = list( sorted( @@ -869,7 +874,7 @@ class Llama: break if stopping_criteria is not None and stopping_criteria( - list(self.eval_tokens), self.eval_logits[-1] + self._input_ids.tolist(), self._scores[-1, :].tolist() ): text = self.detokenize(completion_tokens) finish_reason = "stop" @@ -899,7 +904,7 @@ class Llama: self.detokenize(completion_tokens[:returned_tokens]) ) token_offset = len(prompt_tokens) + returned_tokens - 1 - logits = self.eval_logits[token_offset] + logits = self._scores[token_offset, :].tolist() current_logprobs = Llama.logits_to_logprobs(logits) sorted_logprobs = list( sorted( @@ -1001,8 +1006,7 @@ class Llama: for token in all_tokens ] all_logprobs = [ - Llama.logits_to_logprobs(list(map(float, row))) - for row in self.eval_logits + Llama.logits_to_logprobs(row.tolist()) for row in self._scores ][token_offset:] for token, token_str, logprobs_token in zip( all_tokens, all_token_strs, all_logprobs From 7fc7bc30e712c10d633a7acf912134ae92c0fbe3 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 26 May 2023 20:12:05 -0400 Subject: [PATCH 04/16] Remove usage of eval_tokens for cache check --- llama_cpp/llama.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index 4f10227..064b982 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -735,10 +735,10 @@ class Llama: try: cache_item = self.cache[prompt_tokens] cache_prefix_len = Llama.longest_token_prefix( - cache_item.eval_tokens, prompt_tokens + cache_item.input_ids.tolist(), prompt_tokens ) eval_prefix_len = Llama.longest_token_prefix( - self.eval_tokens, prompt_tokens + self._input_ids.tolist(), prompt_tokens ) if cache_prefix_len > eval_prefix_len: self.load_state(cache_item) From b0b154cfa6d22d317ad26f974f9916f79bbc78c2 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 26 May 2023 20:26:08 -0400 Subject: [PATCH 05/16] Add changelog message for numpy --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b5fbec..ccb1c7e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added first version of the changelog +- Use numpy for internal buffers to reduce memory usage and improve performance. ### Fixed From 84e313bd6e18e341f35be6c87e7151e7ce8d926d Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 26 May 2023 22:02:16 -0400 Subject: [PATCH 06/16] Align dtype to match c structs --- llama_cpp/llama.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index 3084b33..ac51ce5 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -217,7 +217,7 @@ class Llama: size = llama_cpp.c_size_t(self._n_vocab) sorted = llama_cpp.c_bool(False) self._candidates_data = np.array( - [], dtype=[("id", np.intc), ("logit", np.single), ("p", np.single)] + [], dtype=np.dtype([("id", np.intc), ("logit", np.single), ("p", np.single)], align=True) ) self._candidates_data.resize(3, self._n_vocab) candidates = llama_cpp.llama_token_data_array( From 8f2b4456ad5b7a80be9264fa94927e8a79ed16a9 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 26 May 2023 22:04:31 -0400 Subject: [PATCH 07/16] Format --- llama_cpp/llama.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index ac51ce5..18372c8 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -217,7 +217,10 @@ class Llama: size = llama_cpp.c_size_t(self._n_vocab) sorted = llama_cpp.c_bool(False) self._candidates_data = np.array( - [], dtype=np.dtype([("id", np.intc), ("logit", np.single), ("p", np.single)], align=True) + [], + dtype=np.dtype( + [("id", np.intc), ("logit", np.single), ("p", np.single)], align=True + ), ) self._candidates_data.resize(3, self._n_vocab) candidates = llama_cpp.llama_token_data_array( From 62ac7c3761518d718343866e87abfb4f0ae6b9bd Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 29 May 2023 21:03:33 +0000 Subject: [PATCH 08/16] Bump mkdocstrings from 0.21.2 to 0.22.0 Bumps [mkdocstrings](https://github.com/mkdocstrings/mkdocstrings) from 0.21.2 to 0.22.0. - [Release notes](https://github.com/mkdocstrings/mkdocstrings/releases) - [Changelog](https://github.com/mkdocstrings/mkdocstrings/blob/master/CHANGELOG.md) - [Commits](https://github.com/mkdocstrings/mkdocstrings/compare/0.21.2...0.22.0) --- updated-dependencies: - dependency-name: mkdocstrings dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- poetry.lock | 13 +++++++------ pyproject.toml | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/poetry.lock b/poetry.lock index 50ae0cb..733aa33 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand. +# This file is automatically @generated by Poetry and should not be changed by hand. [[package]] name = "anyio" @@ -835,17 +835,18 @@ files = [ [[package]] name = "mkdocstrings" -version = "0.21.2" +version = "0.22.0" description = "Automatic documentation from sources, for MkDocs." category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "mkdocstrings-0.21.2-py3-none-any.whl", hash = "sha256:949ef8da92df9d692ca07be50616459a6b536083a25520fd54b00e8814ce019b"}, - {file = "mkdocstrings-0.21.2.tar.gz", hash = "sha256:304e56a2e90595708a38a13a278e538a67ad82052dd5c8b71f77a604a4f3d911"}, + {file = "mkdocstrings-0.22.0-py3-none-any.whl", hash = "sha256:2d4095d461554ff6a778fdabdca3c00c468c2f1459d469f7a7f622a2b23212ba"}, + {file = "mkdocstrings-0.22.0.tar.gz", hash = "sha256:82a33b94150ebb3d4b5c73bab4598c3e21468c79ec072eff6931c8f3bfc38256"}, ] [package.dependencies] +importlib-metadata = {version = ">=4.6", markers = "python_version < \"3.10\""} Jinja2 = ">=2.11.1" Markdown = ">=3.3" MarkupSafe = ">=1.1" @@ -1653,9 +1654,9 @@ docs = ["furo", "jaraco.packaging (>=9)", "jaraco.tidelift (>=1.4)", "rst.linker testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.3)", "pytest-flake8", "pytest-mypy (>=0.9.1)"] [extras] -server = ["fastapi", "sse-starlette", "uvicorn"] +server = ["uvicorn", "fastapi", "sse-starlette"] [metadata] lock-version = "2.0" python-versions = "^3.8.1" -content-hash = "b1b158e4c9640e4dc197fe43e22c9f87e6e90945ec9b8bcba6042f81249d251e" +content-hash = "d372864238c465628bc679cbeeedd2da04ea8e33382ba5a1cc8d76b3481fcb1a" diff --git a/pyproject.toml b/pyproject.toml index aacdac0..419a971 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ sse-starlette = { version = "^1.3.3", optional = true } black = "^23.3.0" twine = "^4.0.2" mkdocs = "^1.4.3" -mkdocstrings = {extras = ["python"], version = "^0.21.2"} +mkdocstrings = {extras = ["python"], version = "^0.22.0"} mkdocs-material = "^9.1.14" pytest = "^7.3.1" httpx = "^0.24.1" From fa79484a294c789c4fba107e6aa5210174235c9a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 30 May 2023 00:29:33 +0000 Subject: [PATCH 09/16] Bump scikit-build from 0.13.0 to 0.17.5 Bumps [scikit-build](https://github.com/scikit-build/scikit-build) from 0.13.0 to 0.17.5. - [Release notes](https://github.com/scikit-build/scikit-build/releases) - [Changelog](https://github.com/scikit-build/scikit-build/blob/main/CHANGES.rst) - [Commits](https://github.com/scikit-build/scikit-build/compare/0.13.0...0.17.5) --- updated-dependencies: - dependency-name: scikit-build dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- poetry.lock | 19 +++++++++++-------- pyproject.toml | 2 +- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/poetry.lock b/poetry.lock index 733aa33..16b92f7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1375,25 +1375,28 @@ jupyter = ["ipywidgets (>=7.5.1,<9)"] [[package]] name = "scikit-build" -version = "0.13.0" +version = "0.17.5" description = "Improved build system generator for Python C/C++/Fortran/Cython extensions" category = "dev" optional = false -python-versions = "*" +python-versions = ">=3.7" files = [ - {file = "scikit-build-0.13.0.tar.gz", hash = "sha256:a6ca1b7f1cc8a718564c19f535014f3a71f34508f72e750d4221f987eed0f06d"}, - {file = "scikit_build-0.13.0-py2.py3-none-any.whl", hash = "sha256:f903fef5cd76aa81dee040fa9cf3daaeff5c71fccfe5fc0bf6a62e54b166d492"}, + {file = "scikit_build-0.17.5-py3-none-any.whl", hash = "sha256:18861286b34fd2d685327d3bec6ebf4d33303adfaef28a08dd856710d16cf20f"}, + {file = "scikit_build-0.17.5.tar.gz", hash = "sha256:76856e7631d9e8887a7aa71913d5f184a6177246225391af96ce4801d89fa254"}, ] [package.dependencies] distro = "*" packaging = "*" -setuptools = {version = ">=28.0.0", markers = "python_version >= \"3\""} -wheel = ">=0.29.0" +setuptools = ">=42.0.0" +tomli = {version = "*", markers = "python_version < \"3.11\""} +wheel = ">=0.32.0" [package.extras] +cov = ["coverage[toml] (>=4.2)", "pytest-cov (>=2.7.1)"] docs = ["pygments", "sphinx (>=4)", "sphinx-issues", "sphinx-rtd-theme (>=1.0)", "sphinxcontrib-moderncmakedomain (>=3.19)"] -test = ["build (>=0.5)", "codecov (>=2.0.5)", "coverage (>=4.2)", "cython (>=0.25.1)", "flake8 (>=3.0.4)", "path.py (>=11.5.0)", "pathlib2", "pytest (>=4.5.0)", "pytest-cov (>=2.7.1)", "pytest-mock (>=1.10.4)", "pytest-runner (>=5.1)", "pytest-virtualenv (>=1.2.5)", "requests", "six (>=1.10.0)", "ubelt (>=0.8.2)", "virtualenv", "xdoctest (>=0.10.0)"] +doctest = ["ubelt (>=0.8.2)", "xdoctest (>=0.10.0)"] +test = ["build (>=0.7)", "cython (>=0.25.1)", "importlib-metadata", "pytest (>=6.0.0)", "pytest-mock (>=1.10.4)", "pytest-virtualenv (>=1.2.5)", "requests", "virtualenv"] [[package]] name = "secretstorage" @@ -1659,4 +1662,4 @@ server = ["uvicorn", "fastapi", "sse-starlette"] [metadata] lock-version = "2.0" python-versions = "^3.8.1" -content-hash = "d372864238c465628bc679cbeeedd2da04ea8e33382ba5a1cc8d76b3481fcb1a" +content-hash = "af969208807cf8dd49c51acdb309ea14019a0cd967a21c45b92e8af9f922eb3c" diff --git a/pyproject.toml b/pyproject.toml index 419a971..a8a8139 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ mkdocstrings = {extras = ["python"], version = "^0.22.0"} mkdocs-material = "^9.1.14" pytest = "^7.3.1" httpx = "^0.24.1" -scikit-build = "0.13" +scikit-build = "0.17.5" [tool.poetry.extras] server = ["uvicorn", "fastapi", "sse-starlette"] From 38b918503f0ab53036d518a57f232581856a2d02 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 30 May 2023 00:40:52 +0000 Subject: [PATCH 10/16] Bump mkdocs-material from 9.1.14 to 9.1.15 Bumps [mkdocs-material](https://github.com/squidfunk/mkdocs-material) from 9.1.14 to 9.1.15. - [Release notes](https://github.com/squidfunk/mkdocs-material/releases) - [Changelog](https://github.com/squidfunk/mkdocs-material/blob/master/CHANGELOG) - [Commits](https://github.com/squidfunk/mkdocs-material/compare/9.1.14...9.1.15) --- updated-dependencies: - dependency-name: mkdocs-material dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- poetry.lock | 8 ++++---- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index 16b92f7..833935b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -800,14 +800,14 @@ mkdocs = ">=1.1" [[package]] name = "mkdocs-material" -version = "9.1.14" +version = "9.1.15" description = "Documentation that simply works" category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "mkdocs_material-9.1.14-py3-none-any.whl", hash = "sha256:b56a9f955ed32d38333715cbbf68ce38f683bf38610c65094fa4ef2db9f08bcd"}, - {file = "mkdocs_material-9.1.14.tar.gz", hash = "sha256:1ae74cc5464ef2f64574d4884512efed7f4db386fb9bc6af20fd427d7a702f49"}, + {file = "mkdocs_material-9.1.15-py3-none-any.whl", hash = "sha256:b49e12869ab464558e2dd3c5792da5b748a7e0c48ee83b4d05715f98125a7a39"}, + {file = "mkdocs_material-9.1.15.tar.gz", hash = "sha256:8513ab847c9a541ed3d11a3a7eed556caf72991ee786c31c5aac6691a121088a"}, ] [package.dependencies] @@ -1662,4 +1662,4 @@ server = ["uvicorn", "fastapi", "sse-starlette"] [metadata] lock-version = "2.0" python-versions = "^3.8.1" -content-hash = "af969208807cf8dd49c51acdb309ea14019a0cd967a21c45b92e8af9f922eb3c" +content-hash = "3835d3727fcf88b9a9cbba2e376980cd32252d351f3dab279de1bf615ba28160" diff --git a/pyproject.toml b/pyproject.toml index a8a8139..52ad34b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ black = "^23.3.0" twine = "^4.0.2" mkdocs = "^1.4.3" mkdocstrings = {extras = ["python"], version = "^0.22.0"} -mkdocs-material = "^9.1.14" +mkdocs-material = "^9.1.15" pytest = "^7.3.1" httpx = "^0.24.1" scikit-build = "0.17.5" From 8dfb0816dfa33df71d58e5e1749beef49f27de88 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 30 May 2023 01:04:25 +0000 Subject: [PATCH 11/16] Bump uvicorn from 0.21.1 to 0.22.0 Bumps [uvicorn](https://github.com/encode/uvicorn) from 0.21.1 to 0.22.0. - [Release notes](https://github.com/encode/uvicorn/releases) - [Changelog](https://github.com/encode/uvicorn/blob/master/CHANGELOG.md) - [Commits](https://github.com/encode/uvicorn/compare/0.21.1...0.22.0) --- updated-dependencies: - dependency-name: uvicorn dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- poetry.lock | 8 ++++---- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index 833935b..945ef5a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1556,14 +1556,14 @@ zstd = ["zstandard (>=0.18.0)"] [[package]] name = "uvicorn" -version = "0.21.1" +version = "0.22.0" description = "The lightning-fast ASGI server." category = "main" optional = true python-versions = ">=3.7" files = [ - {file = "uvicorn-0.21.1-py3-none-any.whl", hash = "sha256:e47cac98a6da10cd41e6fd036d472c6f58ede6c5dbee3dbee3ef7a100ed97742"}, - {file = "uvicorn-0.21.1.tar.gz", hash = "sha256:0fac9cb342ba099e0d582966005f3fdba5b0290579fed4a6266dc702ca7bb032"}, + {file = "uvicorn-0.22.0-py3-none-any.whl", hash = "sha256:e9434d3bbf05f310e762147f769c9f21235ee118ba2d2bf1155a7196448bd996"}, + {file = "uvicorn-0.22.0.tar.gz", hash = "sha256:79277ae03db57ce7d9aa0567830bbb51d7a612f54d6e1e3e92da3ef24c2c8ed8"}, ] [package.dependencies] @@ -1662,4 +1662,4 @@ server = ["uvicorn", "fastapi", "sse-starlette"] [metadata] lock-version = "2.0" python-versions = "^3.8.1" -content-hash = "3835d3727fcf88b9a9cbba2e376980cd32252d351f3dab279de1bf615ba28160" +content-hash = "5a89d0ed28ac6e795e43b7b06f2b99d198ab56a6d0ab05d47768b84ea8a0337a" diff --git a/pyproject.toml b/pyproject.toml index 52ad34b..9ea9116 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,7 @@ include = [ [tool.poetry.dependencies] python = "^3.8.1" typing-extensions = "^4.5.0" -uvicorn = { version = "^0.21.1", optional = true } +uvicorn = { version = "^0.22.0", optional = true } fastapi = { version = "^0.95.0", optional = true } sse-starlette = { version = "^1.3.3", optional = true } From f4fc126a00874c756c846ce6fbf13704b60fd0b2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 30 May 2023 01:09:10 +0000 Subject: [PATCH 12/16] Bump typing-extensions from 4.5.0 to 4.6.2 Bumps [typing-extensions](https://github.com/python/typing_extensions) from 4.5.0 to 4.6.2. - [Changelog](https://github.com/python/typing_extensions/blob/main/CHANGELOG.md) - [Commits](https://github.com/python/typing_extensions/compare/4.5.0...4.6.2) --- updated-dependencies: - dependency-name: typing-extensions dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- poetry.lock | 8 ++++---- pyproject.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/poetry.lock b/poetry.lock index 945ef5a..70e4272 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1526,14 +1526,14 @@ urllib3 = ">=1.26.0" [[package]] name = "typing-extensions" -version = "4.5.0" +version = "4.6.2" description = "Backported and Experimental Type Hints for Python 3.7+" category = "main" optional = false python-versions = ">=3.7" files = [ - {file = "typing_extensions-4.5.0-py3-none-any.whl", hash = "sha256:fb33085c39dd998ac16d1431ebc293a8b3eedd00fd4a32de0ff79002c19511b4"}, - {file = "typing_extensions-4.5.0.tar.gz", hash = "sha256:5cb5f4a79139d699607b3ef622a1dedafa84e115ab0024e0d9c044a9479ca7cb"}, + {file = "typing_extensions-4.6.2-py3-none-any.whl", hash = "sha256:3a8b36f13dd5fdc5d1b16fe317f5668545de77fa0b8e02006381fd49d731ab98"}, + {file = "typing_extensions-4.6.2.tar.gz", hash = "sha256:06006244c70ac8ee83fa8282cb188f697b8db25bc8b4df07be1873c43897060c"}, ] [[package]] @@ -1662,4 +1662,4 @@ server = ["uvicorn", "fastapi", "sse-starlette"] [metadata] lock-version = "2.0" python-versions = "^3.8.1" -content-hash = "5a89d0ed28ac6e795e43b7b06f2b99d198ab56a6d0ab05d47768b84ea8a0337a" +content-hash = "f5aacb68729427e49bb796a598890fedd8ba1950af3fd577fb85edde2c27338f" diff --git a/pyproject.toml b/pyproject.toml index 9ea9116..39b731e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,7 +14,7 @@ include = [ [tool.poetry.dependencies] python = "^3.8.1" -typing-extensions = "^4.5.0" +typing-extensions = "^4.6.2" uvicorn = { version = "^0.22.0", optional = true } fastapi = { version = "^0.95.0", optional = true } sse-starlette = { version = "^1.3.3", optional = true } From b1daf568e390e11da6206737d4a1a8d92bb4568b Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Mon, 29 May 2023 21:39:19 -0400 Subject: [PATCH 13/16] Update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b5fbec..1f6dac7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Added first version of the changelog +- Server: Use async routes ### Fixed From f4ff8a03c4bb43ca0e0ca51e6dcdbc24f0fb13dd Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Tue, 30 May 2023 03:06:57 -0400 Subject: [PATCH 14/16] Add numpy dependency to pyproject --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 39b731e..f75b802 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ include = [ [tool.poetry.dependencies] python = "^3.8.1" typing-extensions = "^4.6.2" +numpy = "^1.20.0" uvicorn = { version = "^0.22.0", optional = true } fastapi = { version = "^0.95.0", optional = true } sse-starlette = { version = "^1.3.3", optional = true } From cb0bcdbbb7bbcb4182cbb8106b8e183c9da70481 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Tue, 30 May 2023 03:07:36 -0400 Subject: [PATCH 15/16] Bump version --- CHANGELOG.md | 2 ++ pyproject.toml | 2 +- setup.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cb5f443..d9f52da 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [v0.1.56] + ### Added - Added first version of the changelog diff --git a/pyproject.toml b/pyproject.toml index f75b802..9f83e19 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "llama_cpp_python" -version = "0.1.55" +version = "0.1.56" description = "Python bindings for the llama.cpp library" authors = ["Andrei Betlen "] license = "MIT" diff --git a/setup.py b/setup.py index a1a2c5b..39e1416 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup( description="A Python wrapper for llama.cpp", long_description=long_description, long_description_content_type="text/markdown", - version="0.1.55", + version="0.1.56", author="Andrei Betlen", author_email="abetlen@gmail.com", license="MIT", From 9dd8cf34726b839222af45684701ee63bb6cd535 Mon Sep 17 00:00:00 2001 From: Gary Mulder Date: Tue, 30 May 2023 08:20:34 +0100 Subject: [PATCH 16/16] Update bug_report.md - Added section on how to repro using llama.cpp in ./vendor/llama.cpp - Added a few more example environment commands to aid in debugging. --- .github/ISSUE_TEMPLATE/bug_report.md | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index b8e33e5..5df12aa 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -57,7 +57,17 @@ Please provide detailed steps for reproducing the issue. We are not sitting in f 3. step 3 4. etc. -**Note: Many issues seem to be regarding performance issues / differences with `llama.cpp`. In these cases we need to confirm that you're comparing against the version of `llama.cpp` that was built with your python package, and which parameters you're passing to the context.** +**Note: Many issues seem to be regarding functional or performance issues / differences with `llama.cpp`. In these cases we need to confirm that you're comparing against the version of `llama.cpp` that was built with your python package, and which parameters you're passing to the context.** + +Try the following: + +1. `git clone https://github.com/abetlen/llama-cpp-python` +2. `cd llama-cpp-python` +3. `rm -rf _skbuild/` # delete any old builds +4. `python setup.py develop` +5. `cd ./vendor/llama.cpp` +6. Follow [llama.cpp's instructions](https://github.com/ggerganov/llama.cpp#build) to `cmake` llama.cpp +7. Run llama.cpp's `./main` with the same arguments you previously passed to llama-cpp-python and see if you can reproduce the issue. If you can, [log an issue with llama.cpp](https://github.com/ggerganov/llama.cpp/issues) # Failure Logs @@ -73,8 +83,14 @@ commit 47b0aa6e957b93dbe2c29d53af16fbae2dd628f2 llama-cpp-python$ python3 --version Python 3.10.10 -llama-cpp-python$ pip list | egrep "uvicorn|fastapi|sse-starlette" -fastapi 0.95.0 -sse-starlette 1.3.3 -uvicorn 0.21.1 +llama-cpp-python$ pip list | egrep "uvicorn|fastapi|sse-starlette|numpy" +fastapi 0.95.0 +numpy 1.24.3 +sse-starlette 1.3.3 +uvicorn 0.21.1 + +llama-cpp-python/vendor/llama.cpp$ git log | head -3 +commit 66874d4fbcc7866377246efbcee938e8cc9c7d76 +Author: Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com> +Date: Thu May 25 20:18:01 2023 -0600 ```