diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index b4717bf..e0424c6 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -136,9 +136,9 @@ LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16 = ctypes.c_int( ) # tok_embeddings.weight and output.weight are F16 LLAMA_FTYPE_MOSTLY_Q4_2 = ctypes.c_int(5) # except 1d tensors # LLAMA_FTYPE_MOSTYL_Q4_3 = ctypes.c_int(6) # except 1d tensors -LLAMA_FTYPE_MOSTYL_Q8_0 = ctypes.c_int(7) # except 1d tensors -LLAMA_FTYPE_MOSTYL_Q5_0 = ctypes.c_int(8) # except 1d tensors -LLAMA_FTYPE_MOSTYL_Q5_1 = ctypes.c_int(9) # except 1d tensors +LLAMA_FTYPE_MOSTLY_Q8_0 = ctypes.c_int(7) # except 1d tensors +LLAMA_FTYPE_MOSTLY_Q5_0 = ctypes.c_int(8) # except 1d tensors +LLAMA_FTYPE_MOSTLY_Q5_1 = ctypes.c_int(9) # except 1d tensors # Functions