diff --git a/Makefile b/Makefile index 806b120..ff1484c 100644 --- a/Makefile +++ b/Makefile @@ -30,6 +30,12 @@ build.metal: build.vulkan: CMAKE_ARGS="-DLLAMA_VULKAN=on" python3 -m pip install --verbose -e . +build.kompute: + CMAKE_ARGS="-DLLAMA_KOMPUTE=on" python3 -m pip install --verbose -e . + +build.sycl: + CMAKE_ARGS="-DLLAMA_SYCL=on" python3 -m pip install --verbose -e . + build.sdist: python3 -m build --sdist diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index c4256dd..2168579 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -187,6 +187,7 @@ LLAMA_TOKEN_TYPE_BYTE = 6 # LLAMA_FTYPE_MOSTLY_IQ2_XS = 20, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q2_K_S = 21, // except 1d tensors # LLAMA_FTYPE_MOSTLY_Q3_K_XS = 22, // except 1d tensors +# LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23, // except 1d tensors # LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file # }; @@ -211,6 +212,7 @@ LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19 LLAMA_FTYPE_MOSTLY_IQ2_XS = 20 LLAMA_FTYPE_MOSTLY_Q2_K_S = 21 LLAMA_FTYPE_MOSTLY_Q3_K_XS = 22 +LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23 LLAMA_FTYPE_GUESSED = 1024 # enum llama_rope_scaling_type { diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 2aed77e..8f8ddfc 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 2aed77eb06a329f0d82bb1c467f4244904d4073f +Subproject commit 8f8ddfcfadc830b936318c3ea9fe2e8e3365aa85