From d9b38e3e3a28395203669ca99731b5f91aefad39 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 10 Nov 2023 04:41:19 -0500 Subject: [PATCH 1/9] Potential bugfix for eval --- llama_cpp/llama.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index 1e78221..f2e1383 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -1019,12 +1019,11 @@ class Llama: """ assert self._ctx.ctx is not None assert self._batch.batch is not None - n_ctx = self._n_ctx + self._ctx.kv_cache_seq_rm(-1, self.n_tokens, -1) for i in range(0, len(tokens), self.n_batch): batch = tokens[i : min(len(tokens), i + self.n_batch)] - n_past = min(n_ctx - len(batch), self.n_tokens) + n_past = self.n_tokens n_tokens = len(batch) - self._ctx.kv_cache_seq_rm(-1, n_past, -1) self._batch.set_batch( batch=batch, n_past=n_past, logits_all=self.context_params.logits_all ) From 6f0b0b1b840af846938ed74d0e8170a91c40e617 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 10 Nov 2023 05:15:41 -0500 Subject: [PATCH 2/9] Fix sampling bug when logits_all=False --- llama_cpp/llama.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index f2e1383..2e18b47 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -1029,16 +1029,16 @@ class Llama: ) self._ctx.decode(self._batch) # Save tokens - self.input_ids[self.n_tokens : self.n_tokens + n_tokens] = batch + self.input_ids[n_past : n_past + n_tokens] = batch # Save logits - rows = n_tokens if self.context_params.logits_all else 1 + rows = n_tokens cols = self._n_vocab offset = ( 0 if self.context_params.logits_all else n_tokens - 1 ) # NOTE: Only save the last token logits if logits_all is False - self.scores[self.n_tokens + offset : self.n_tokens + n_tokens, :].reshape( + self.scores[n_past + offset : n_past + n_tokens, :].reshape( -1 - )[:] = self._ctx.get_logits()[: rows * cols] + )[:] = self._ctx.get_logits()[offset * cols: rows * cols] # Update n_tokens self.n_tokens += n_tokens From e32ecb051687de4aa0257cb66d1f729f2337b115 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 10 Nov 2023 05:39:42 -0500 Subject: [PATCH 3/9] Fix tests --- tests/test_llama.py | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/tests/test_llama.py b/tests/test_llama.py index 5448743..23c7e86 100644 --- a/tests/test_llama.py +++ b/tests/test_llama.py @@ -1,4 +1,7 @@ +import ctypes + import pytest + import llama_cpp MODEL = "./vendor/llama.cpp/models/ggml-vocab-llama.gguf" @@ -36,19 +39,20 @@ def test_llama_cpp_tokenization(): def test_llama_patch(monkeypatch): - llama = llama_cpp.Llama(model_path=MODEL, vocab_only=True) + n_ctx = 128 + llama = llama_cpp.Llama(model_path=MODEL, vocab_only=True, n_ctx=n_ctx) n_vocab = llama_cpp.llama_n_vocab(llama._model.model) + assert n_vocab == 32000 ## Set up mock function - def mock_eval(*args, **kwargs): + def mock_decode(*args, **kwargs): return 0 def mock_get_logits(*args, **kwargs): - return (llama_cpp.c_float * n_vocab)( - *[llama_cpp.c_float(0) for _ in range(n_vocab)] - ) + size = n_vocab * n_ctx + return (llama_cpp.c_float * size)() - monkeypatch.setattr("llama_cpp.llama_cpp.llama_decode", mock_eval) + monkeypatch.setattr("llama_cpp.llama_cpp.llama_decode", mock_decode) monkeypatch.setattr("llama_cpp.llama_cpp.llama_get_logits", mock_get_logits) output_text = " jumps over the lazy dog." @@ -126,19 +130,19 @@ def test_llama_pickle(): def test_utf8(monkeypatch): - llama = llama_cpp.Llama(model_path=MODEL, vocab_only=True) + n_ctx = 512 + llama = llama_cpp.Llama(model_path=MODEL, vocab_only=True, n_ctx=n_ctx, logits_all=True) n_vocab = llama.n_vocab() ## Set up mock function - def mock_eval(*args, **kwargs): + def mock_decode(*args, **kwargs): return 0 def mock_get_logits(*args, **kwargs): - return (llama_cpp.c_float * n_vocab)( - *[llama_cpp.c_float(0) for _ in range(n_vocab)] - ) + size = n_vocab * n_ctx + return (llama_cpp.c_float * size)() - monkeypatch.setattr("llama_cpp.llama_cpp.llama_decode", mock_eval) + monkeypatch.setattr("llama_cpp.llama_cpp.llama_decode", mock_decode) monkeypatch.setattr("llama_cpp.llama_cpp.llama_get_logits", mock_get_logits) output_text = "😀" From 2f070afd6115ab6733358c08538f01694b9aabf1 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 10 Nov 2023 05:45:44 -0500 Subject: [PATCH 4/9] Don't install in editable mode for release --- .github/workflows/build-and-release.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-and-release.yaml b/.github/workflows/build-and-release.yaml index 1356d37..1f80046 100644 --- a/.github/workflows/build-and-release.yaml +++ b/.github/workflows/build-and-release.yaml @@ -29,7 +29,7 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install -e .[all] + python -m pip install .[all] - name: Build wheels run: python -m cibuildwheel --output-dir wheelhouse From ed5a9260f6e240df87c2b366d3aebdb49d3ba4ac Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 10 Nov 2023 05:54:23 -0500 Subject: [PATCH 5/9] Force LD_LIBRARY_PATH --- .github/workflows/build-and-release.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-and-release.yaml b/.github/workflows/build-and-release.yaml index 1f80046..7a9644d 100644 --- a/.github/workflows/build-and-release.yaml +++ b/.github/workflows/build-and-release.yaml @@ -29,7 +29,12 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install .[all] + python -m pip install -e .[all] + + - name: Set LD_LIBRARY_PATH (Hack) + run: | + echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/runner/work/llama-cpp-python/llama-cpp-python/llama_cpp/" >> $GITHUB_ENV + - name: Build wheels run: python -m cibuildwheel --output-dir wheelhouse From e02d52df298281fb4bf1a9c638491201639545ed Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 10 Nov 2023 06:01:58 -0500 Subject: [PATCH 6/9] Try to clean before calling cibuildwheel --- .github/workflows/build-and-release.yaml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/build-and-release.yaml b/.github/workflows/build-and-release.yaml index 7a9644d..b4c2b7e 100644 --- a/.github/workflows/build-and-release.yaml +++ b/.github/workflows/build-and-release.yaml @@ -30,11 +30,7 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install -e .[all] - - - name: Set LD_LIBRARY_PATH (Hack) - run: | - echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/runner/work/llama-cpp-python/llama-cpp-python/llama_cpp/" >> $GITHUB_ENV - + make clean - name: Build wheels run: python -m cibuildwheel --output-dir wheelhouse From 5f15a3d91c2ccdeef37d53a971525d33ca0854a1 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 10 Nov 2023 06:05:42 -0500 Subject: [PATCH 7/9] Disable wheel repair command --- .github/workflows/build-and-release.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build-and-release.yaml b/.github/workflows/build-and-release.yaml index b4c2b7e..61027ef 100644 --- a/.github/workflows/build-and-release.yaml +++ b/.github/workflows/build-and-release.yaml @@ -30,10 +30,12 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install -e .[all] - make clean - name: Build wheels run: python -m cibuildwheel --output-dir wheelhouse + env: + # disable repair + CIBW_REPAIR_WHEEL_COMMAND: "" - uses: actions/upload-artifact@v3 with: From fb743f6c870f4a051961228160644f8ceca66580 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 10 Nov 2023 06:21:14 -0500 Subject: [PATCH 8/9] Update llama.cpp --- vendor/llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 875fb42..a75fa57 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 875fb42871a0f5a88fbe31a0b5edd697b84038e4 +Subproject commit a75fa576abba9d37f463580c379e4bbf1e1ad03c From b7e60b66f47950e385980a1329af9dfb14da6906 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Fri, 10 Nov 2023 06:21:24 -0500 Subject: [PATCH 9/9] Bump version --- CHANGELOG.md | 13 +++++++++++++ llama_cpp/__init__.py | 2 +- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index de3a03f..e6f0241 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,19 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.2.16] + +- Update llama.cpp to ggerganov/llama.cp@a75fa576abba9d37f463580c379e4bbf1e1ad03c +- Add `set_seed` to `Llama` class by @abetlen in fd41ed3a908761d286102a019a34c2938a15118d +- Fix server doc arguments by @kjunggithub in #892 +- Fix response_format handler in llava chat handler by @abetlen in b62c44983921197ed10a7d29dc4ba920e9979380 +- Fix default max_tokens, chat completion is now unlimited (to context length) and completion is 16 tokens to match OpenAI defaults by @abetlen in e7962d2c733cbbeec5a37392c81f64185a9a39e8 +- Fix json_schema_to_gbnf helper so that it takes a json schema string as input instead by @abetlen in faeae181b1e868643c0dc28fcf039f077baf0829 +- Add support for $ref and $def in json_schema_to_gbnf to handle more complex function schemas by @abetlen in 770df344369c0630df1be14be9f9e301e7c56d24 +- Update functionary chat handler for new OpenAI api by abetlen in 1b376c62b775b401653facf25a519d116aafe99a +- Fix add default stop sequence to chatml chat format by @abetlen in b84d76a844149216d511cfd8cdb9827148a1853c +- Fix sampling bug when logits_all=False by @abetlen in 6f0b0b1b840af846938ed74d0e8170a91c40e617 + ## [0.2.15] - Update llama.cpp to ggerganov/llama.cpp@0a7c980b6f94a049cb804573df2d8092a34df8e4 diff --git a/llama_cpp/__init__.py b/llama_cpp/__init__.py index 6e64afb..a24e550 100644 --- a/llama_cpp/__init__.py +++ b/llama_cpp/__init__.py @@ -1,4 +1,4 @@ from .llama_cpp import * from .llama import * -__version__ = "0.2.15" \ No newline at end of file +__version__ = "0.2.16" \ No newline at end of file