From 571dc61955ced560a45e9d32b1cd2a52d9803c8c Mon Sep 17 00:00:00 2001 From: Jeffrey Morgan Date: Sun, 7 Jul 2024 13:03:09 -0400 Subject: [PATCH] Update llama.cpp submodule to `a8db2a9c` (#5530) --- llm/llama.cpp | 2 +- llm/patches/05-default-pretokenizer.diff | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/llm/llama.cpp b/llm/llama.cpp index d7fd29ff..a8db2a9c 160000 --- a/llm/llama.cpp +++ b/llm/llama.cpp @@ -1 +1 @@ -Subproject commit d7fd29fff16456ce9c3a23fd2d09a66256b05aff +Subproject commit a8db2a9ce64cd4417f6a312ab61858f17f0f8584 diff --git a/llm/patches/05-default-pretokenizer.diff b/llm/patches/05-default-pretokenizer.diff index f4eaced7..341a6f59 100644 --- a/llm/patches/05-default-pretokenizer.diff +++ b/llm/patches/05-default-pretokenizer.diff @@ -1,11 +1,11 @@ diff --git a/src/llama.cpp b/src/llama.cpp -index 73f52435..2b81b4bd 100644 +index 2b9ace28..172640e2 100644 --- a/src/llama.cpp +++ b/src/llama.cpp -@@ -5092,16 +5092,7 @@ static void llm_load_vocab( - - // for now, only BPE models have pre-tokenizers +@@ -5357,16 +5357,7 @@ static void llm_load_vocab( if (vocab.type == LLAMA_VOCAB_TYPE_BPE) { + vocab.tokenizer_add_space_prefix = false; + vocab.tokenizer_clean_spaces = true; - if (tokenizer_pre.empty()) { - LLAMA_LOG_WARN("%s: missing pre-tokenizer type, using: 'default'\n", __func__); - LLAMA_LOG_WARN("%s: \n", __func__); @@ -20,7 +20,7 @@ index 73f52435..2b81b4bd 100644 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT; } else if ( tokenizer_pre == "llama3" || -@@ -5164,7 +5155,8 @@ static void llm_load_vocab( +@@ -5439,7 +5430,8 @@ static void llm_load_vocab( tokenizer_pre == "jais") { vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_JAIS; } else {