From c3c2623e8b78a215a161b55264a03a7ea213c368 Mon Sep 17 00:00:00 2001 From: Andrei Betlen Date: Sun, 9 Apr 2023 22:01:33 -0400 Subject: [PATCH] Update llama.cpp --- llama_cpp/llama_cpp.py | 12 ++++++++++++ vendor/llama.cpp | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/llama_cpp/llama_cpp.py b/llama_cpp/llama_cpp.py index 069ae10..6030888 100644 --- a/llama_cpp/llama_cpp.py +++ b/llama_cpp/llama_cpp.py @@ -77,6 +77,7 @@ class llama_context_params(Structure): c_bool, ), # the llama_eval() call computes all logits, not just the last one ("vocab_only", c_bool), # only load the vocabulary, no weights + ("use_mmap", c_bool), # use mmap if possible ("use_mlock", c_bool), # force system to keep model in RAM ("embedding", c_bool), # embedding mode only # called with a progress value between 0 and 1, pass NULL to disable @@ -99,6 +100,17 @@ def llama_context_default_params() -> llama_context_params: _lib.llama_context_default_params.argtypes = [] _lib.llama_context_default_params.restype = llama_context_params +def llama_mmap_supported() -> c_bool: + return _lib.llama_mmap_supported() + +_lib.llama_mmap_supported.argtypes = [] +_lib.llama_mmap_supported.restype = c_bool + +def llama_mlock_supported() -> c_bool: + return _lib.llama_mlock_supported() + +_lib.llama_mlock_supported.argtypes = [] +_lib.llama_mlock_supported.restype = c_bool # Various functions for loading a ggml llama model. # Allocate (almost) all memory needed for the model. diff --git a/vendor/llama.cpp b/vendor/llama.cpp index 62cfc54..180b693 160000 --- a/vendor/llama.cpp +++ b/vendor/llama.cpp @@ -1 +1 @@ -Subproject commit 62cfc54f77e519057110265b52b0d614fa363e2a +Subproject commit 180b693a47b6b825288ef9f2c39d24b6eea4eea6