diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py
index 23643e2..52fc14e 100644
--- a/llama_cpp/llama_cpp.py
+++ b/llama_cpp/llama_cpp.py
@@ -290,13 +290,14 @@ _lib.llama_mlock_supported.restype = c_bool
 
 # // TODO: not great API - very likely to change
 # // Initialize the llama + ggml backend
+# // If numa is true, use NUMA optimizations
 # // Call once at the start of the program
-# LLAMA_API void llama_init_backend();
-def llama_init_backend():
-    return _lib.llama_init_backend()
+# LLAMA_API void llama_init_backend(bool numa);
+def llama_init_backend(numa: c_bool):
+    return _lib.llama_init_backend(numa)
 
 
-_lib.llama_init_backend.argtypes = []
+_lib.llama_init_backend.argtypes = [c_bool]
 _lib.llama_init_backend.restype = None
 
 
@@ -565,6 +566,27 @@ _lib.llama_eval.argtypes = [llama_context_p, llama_token_p, c_int, c_int, c_int]
 _lib.llama_eval.restype = c_int
 
 
+# // Same as llama_eval, but use float matrix input directly.
+# LLAMA_API int llama_eval_embd(
+#         struct llama_context * ctx,
+#                     const float * embd,
+#                             int   n_tokens,
+#                             int   n_past,
+#                             int   n_threads);
+def llama_eval_embd(
+    ctx: llama_context_p,
+    embd,  # type: Array[c_float]
+    n_tokens: c_int,
+    n_past: c_int,
+    n_threads: c_int,
+) -> int:
+    return _lib.llama_eval_embd(ctx, embd, n_tokens, n_past, n_threads)
+
+
+_lib.llama_eval_embd.argtypes = [llama_context_p, c_float_p, c_int, c_int, c_int]
+_lib.llama_eval_embd.restype = c_int
+
+
 # Convert the provided text into tokens.
 # The tokens pointer must be large enough to hold the resulting tokens.
 # Returns the number of tokens on success, no more than n_max_tokens
@@ -998,5 +1020,5 @@ _lib.llama_print_system_info.restype = c_char_p
 _llama_initialized = False
 
 if not _llama_initialized:
-    llama_init_backend()
+    llama_init_backend(c_bool(False))
     _llama_initialized = True
diff --git a/vendor/llama.cpp b/vendor/llama.cpp
index 447ccbe..96a712c 160000
--- a/vendor/llama.cpp
+++ b/vendor/llama.cpp
@@ -1 +1 @@
-Subproject commit 447ccbe8c39332fcdd0d98a041b6e2ff6f06219d
+Subproject commit 96a712ca1b7f427e3bd7ffc0c70b2105cfc7fbf1