diff --git a/llm/ext_server/server.cpp b/llm/ext_server/server.cpp
index e342d5f1..e0424a92 100644
--- a/llm/ext_server/server.cpp
+++ b/llm/ext_server/server.cpp
@@ -738,7 +738,7 @@ struct llama_server_context
                     sampler_names.emplace_back(sampler_name);
                 }
             }
-            slot->sparams.samplers_sequence = sampler_types_from_names(sampler_names, false);
+            slot->sparams.samplers_sequence = llama_sampling_types_from_names(sampler_names, false);
         }
         else
         {
@@ -1096,7 +1096,7 @@ struct llama_server_context
         std::vector<std::string> samplers_sequence;
         for (const auto &sampler_type : slot.sparams.samplers_sequence)
         {
-            samplers_sequence.emplace_back(sampler_type_to_name_string(sampler_type));
+            samplers_sequence.emplace_back(llama_sampling_type_to_str(sampler_type));
         }
 
         return json {
diff --git a/llm/llama.cpp b/llm/llama.cpp
index 614d3b91..74f33adf 160000
--- a/llm/llama.cpp
+++ b/llm/llama.cpp
@@ -1 +1 @@
-Subproject commit 614d3b914e1c3e02596f869649eb4f1d3b68614d
+Subproject commit 74f33adf5f8b20b08fc5a6aa17ce081abe86ef2f
diff --git a/llm/patches/03-load_exception.diff b/llm/patches/03-load_exception.diff
index 9e838fa9..eb245c2a 100644
--- a/llm/patches/03-load_exception.diff
+++ b/llm/patches/03-load_exception.diff
@@ -1,8 +1,17 @@
+From 544a2d2e646d39e878d87dfbb3398a356bc560ab Mon Sep 17 00:00:00 2001
+From: Michael Yang <mxyng@pm.me>
+Date: Thu, 23 May 2024 11:18:45 -0700
+Subject: [PATCH] throw exception on load errors
+
+---
+ llama.cpp | 25 ++++++++++++++++---------
+ 1 file changed, 16 insertions(+), 9 deletions(-)
+
 diff --git a/llama.cpp b/llama.cpp
-index 4225f955..7b762f86 100644
+index 15c66077..8ba90b6a 100644
 --- a/llama.cpp
 +++ b/llama.cpp
-@@ -4756,7 +4756,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
+@@ -6346,7 +6346,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
          }
      } catch (const std::exception & err) {
          LLAMA_LOG_ERROR("%s: error loading model: %s\n", __func__, err.what());
@@ -11,10 +20,10 @@ index 4225f955..7b762f86 100644
      }
  
      return 0;
-@@ -12102,16 +12102,22 @@ struct llama_model * llama_load_model_from_file(
-         };
+@@ -15600,16 +15600,23 @@ struct llama_model * llama_load_model_from_file(
+         }
+         model->rpc_servers.push_back(servers);
      }
- 
 -    int status = llama_model_load(path_model, *model, params);
 -    GGML_ASSERT(status <= 0);
 -    if (status < 0) {
@@ -22,6 +31,7 @@ index 4225f955..7b762f86 100644
 -            LLAMA_LOG_ERROR("%s: failed to load model\n", __func__);
 -        } else if (status == -2) {
 -            LLAMA_LOG_INFO("%s: cancelled model load\n", __func__);
++
 +    try {
 +        int status = llama_model_load(path_model, *model, params);
 +        GGML_ASSERT(status <= 0);
@@ -42,3 +52,6 @@ index 4225f955..7b762f86 100644
      }
  
      return model;
+-- 
+2.45.1
+
diff --git a/llm/patches/05-default-pretokenizer.diff b/llm/patches/05-default-pretokenizer.diff
new file mode 100644
index 00000000..0d0bf05d
--- /dev/null
+++ b/llm/patches/05-default-pretokenizer.diff
@@ -0,0 +1,35 @@
+From d02a06f3f45a09255ace8684a66590e06ce44605 Mon Sep 17 00:00:00 2001
+From: Michael Yang <mxyng@pm.me>
+Date: Thu, 23 May 2024 11:33:20 -0700
+Subject: [PATCH] default pretokenizer on unrecognized type
+
+---
+ llama.cpp | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+diff --git a/llama.cpp b/llama.cpp
+index 15c66077..af1aede3 100644
+--- a/llama.cpp
++++ b/llama.cpp
+@@ -4504,9 +4504,6 @@ static void llm_load_vocab(
+                 LLAMA_LOG_WARN("%s: ************************************        \n", __func__);
+                 LLAMA_LOG_WARN("%s:                                             \n", __func__);
+                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
+-            } else if (
+-                    tokenizer_pre == "default") {
+-                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
+             } else if (
+                     tokenizer_pre == "llama3"   ||
+                     tokenizer_pre == "llama-v3" ||
+@@ -4553,7 +4550,7 @@ static void llm_load_vocab(
+                 tokenizer_pre == "dbrx") {
+                 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DBRX;
+             } else {
+-                throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
++                vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
+             }
+         } else {
+             vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
+-- 
+2.45.1
+