diff --git a/llm/ggml.go b/llm/ggml.go index 3fb0539c..c4fc0a7c 100644 --- a/llm/ggml.go +++ b/llm/ggml.go @@ -31,6 +31,11 @@ const ( fileTypeQ5_K_S fileTypeQ5_K_M fileTypeQ6_K + fileTypeIQ2_XXS + fileTypeIQ2_XS + fileTypeQ2_K_S + fileTypeQ3_K_XS + fileTypeIQ3_XXS ) func fileType(fileType uint32) string { @@ -69,6 +74,16 @@ func fileType(fileType uint32) string { return "Q5_K_M" case fileTypeQ6_K: return "Q6_K" + case fileTypeIQ2_XXS: + return "IQ2_XXS" + case fileTypeIQ2_XS: + return "IQ2_XS" + case fileTypeQ2_K_S: + return "Q2_K_S" + case fileTypeQ3_K_XS: + return "Q3_K_XS" + case fileTypeIQ3_XXS: + return "IQ3_XXS" default: return "unknown" } diff --git a/llm/gguf.go b/llm/gguf.go index 436be42c..b630b7f4 100644 --- a/llm/gguf.go +++ b/llm/gguf.go @@ -115,6 +115,14 @@ func (t tensor) typeSize() uint64 { return 2 + 2 + 12 + blockSize/8 + blockSize/2 case 14: // Q6_K return blockSize/2 + blockSize/4 + blockSize/16 + 2 + case 15: // Q8_K + return 2 + blockSize + 2*blockSize/16 + case 16: // IQ2_XXS + return 2 + 2*blockSize/8 + case 17: // IQ2_XS + return 2 + 2*blockSize/8 + blockSize/32 + case 18: // IQ3_XXS + return 2 + 3*blockSize/8 default: return 0 }