2024-10-17 12:59:52 -06:00
|
|
|
From 91d3f886f1645b38d9658c0e125603e8d5338146 Mon Sep 17 00:00:00 2001
|
|
|
|
From: nobody <>
|
|
|
|
Date: Tue, 1 Oct 2024 13:55:01 -0600
|
|
|
|
Subject: [PATCH] metal
|
2024-09-16 15:58:55 -07:00
|
|
|
|
|
|
|
---
|
|
|
|
ggml/src/ggml-metal.m | 30 +++++++++++++-----------------
|
|
|
|
1 file changed, 13 insertions(+), 17 deletions(-)
|
|
|
|
|
2024-07-05 13:25:58 -04:00
|
|
|
diff --git a/ggml/src/ggml-metal.m b/ggml/src/ggml-metal.m
|
2024-10-17 12:59:52 -06:00
|
|
|
index 9da08fe2..3a433703 100644
|
2024-07-05 13:25:58 -04:00
|
|
|
--- a/ggml/src/ggml-metal.m
|
|
|
|
+++ b/ggml/src/ggml-metal.m
|
2024-10-17 12:59:52 -06:00
|
|
|
@@ -1720,27 +1720,23 @@ static void ggml_metal_encode_node(
|
|
|
|
// to the matrix-vector kernel
|
|
|
|
int ne11_mm_min = 1;
|
2024-04-25 00:33:33 -04:00
|
|
|
|
|
|
|
-#if 0
|
2024-10-17 12:59:52 -06:00
|
|
|
// the numbers below are measured on M2 Ultra for 7B and 13B models
|
|
|
|
// these numbers do not translate to other devices or model sizes
|
|
|
|
// TODO: need to find a better approach
|
2024-04-25 00:33:33 -04:00
|
|
|
- if ([ctx->device.name isEqualToString:@"Apple M2 Ultra"]) {
|
|
|
|
- switch (src0t) {
|
|
|
|
- case GGML_TYPE_F16: ne11_mm_min = 2; break;
|
|
|
|
- case GGML_TYPE_Q8_0: ne11_mm_min = 7; break;
|
|
|
|
- case GGML_TYPE_Q2_K: ne11_mm_min = 15; break;
|
|
|
|
- case GGML_TYPE_Q3_K: ne11_mm_min = 7; break;
|
|
|
|
- case GGML_TYPE_Q4_0:
|
|
|
|
- case GGML_TYPE_Q4_1: ne11_mm_min = 15; break;
|
|
|
|
- case GGML_TYPE_Q4_K: ne11_mm_min = 11; break;
|
|
|
|
- case GGML_TYPE_Q5_0: // not tested yet
|
|
|
|
- case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet
|
|
|
|
- case GGML_TYPE_Q5_K: ne11_mm_min = 7; break;
|
|
|
|
- case GGML_TYPE_Q6_K: ne11_mm_min = 7; break;
|
|
|
|
- default: ne11_mm_min = 1; break;
|
|
|
|
- }
|
|
|
|
+ switch (src0t) {
|
|
|
|
+ case GGML_TYPE_F16: ne11_mm_min = 2; break;
|
|
|
|
+ case GGML_TYPE_Q8_0: ne11_mm_min = 7; break;
|
|
|
|
+ case GGML_TYPE_Q2_K: ne11_mm_min = 15; break;
|
|
|
|
+ case GGML_TYPE_Q3_K: ne11_mm_min = 7; break;
|
|
|
|
+ case GGML_TYPE_Q4_0:
|
|
|
|
+ case GGML_TYPE_Q4_1: ne11_mm_min = 15; break;
|
|
|
|
+ case GGML_TYPE_Q4_K: ne11_mm_min = 11; break;
|
|
|
|
+ case GGML_TYPE_Q5_0: // not tested yet
|
|
|
|
+ case GGML_TYPE_Q5_1: ne11_mm_min = 13; break; // not tested yet
|
|
|
|
+ case GGML_TYPE_Q5_K: ne11_mm_min = 7; break;
|
|
|
|
+ case GGML_TYPE_Q6_K: ne11_mm_min = 7; break;
|
|
|
|
+ default: ne11_mm_min = 1; break;
|
|
|
|
}
|
|
|
|
-#endif
|
|
|
|
|
|
|
|
// for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
|
|
|
|
// AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
|
2024-09-16 15:58:55 -07:00
|
|
|
--
|
2024-10-17 12:59:52 -06:00
|
|
|
2.39.3 (Apple Git-146)
|
2024-09-16 15:58:55 -07:00
|
|
|
|