Update llama.cpp

This commit is contained in:
Andrei Betlen 2023-07-06 17:57:56 -04:00
parent a1b2d5c09b
commit 98ae4e58a3
3 changed files with 43 additions and 1 deletions

View file

@ -33,6 +33,9 @@ deploy.gh-docs:
mkdocs build mkdocs build
mkdocs gh-deploy mkdocs gh-deploy
test:
python3 -m pytest
clean: clean:
- cd vendor/llama.cpp && make clean - cd vendor/llama.cpp && make clean
- cd vendor/llama.cpp && rm libllama.so - cd vendor/llama.cpp && rm libllama.so

View file

@ -2,6 +2,7 @@ import sys
import os import os
import ctypes import ctypes
from ctypes import ( from ctypes import (
c_double,
c_int, c_int,
c_float, c_float,
c_char_p, c_char_p,
@ -169,6 +170,7 @@ llama_progress_callback = ctypes.CFUNCTYPE(None, c_float, c_void_p)
# // context pointer passed to the progress callback # // context pointer passed to the progress callback
# void * progress_callback_user_data; # void * progress_callback_user_data;
# // Keep the booleans together to avoid misalignment during copy-by-value. # // Keep the booleans together to avoid misalignment during copy-by-value.
# bool low_vram; // if true, reduce VRAM usage at the cost of performance # bool low_vram; // if true, reduce VRAM usage at the cost of performance
# bool f16_kv; // use fp16 for KV cache # bool f16_kv; // use fp16 for KV cache
@ -256,6 +258,34 @@ class llama_model_quantize_params(Structure):
] ]
# // performance timing information
# struct llama_timings {
# double t_start_ms;
# double t_end_ms;
# double t_load_ms;
# double t_sample_ms;
# double t_p_eval_ms;
# double t_eval_ms;
# int32_t n_sample;
# int32_t n_p_eval;
# int32_t n_eval;
# };
class llama_timings(Structure):
_fields_ = [
("t_start_ms", c_double),
("t_end_ms", c_double),
("t_load_ms", c_double),
("t_sample_ms", c_double),
("t_p_eval_ms", c_double),
("t_eval_ms", c_double),
("n_sample", c_int32),
("n_p_eval", c_int32),
("n_eval", c_int32),
]
# LLAMA_API struct llama_context_params llama_context_default_params(); # LLAMA_API struct llama_context_params llama_context_default_params();
def llama_context_default_params() -> llama_context_params: def llama_context_default_params() -> llama_context_params:
return _lib.llama_context_default_params() return _lib.llama_context_default_params()
@ -991,6 +1021,15 @@ _lib.llama_sample_token.restype = llama_token
# Performance information # Performance information
# LLAMA_API struct llama_timings llama_get_timings(struct llama_context * ctx);
def llama_get_timings(ctx: llama_context_p) -> llama_timings:
return _lib.llama_get_timings(ctx)
_lib.llama_get_timings.argtypes = [llama_context_p]
_lib.llama_get_timings.restype = llama_timings
# LLAMA_API void llama_print_timings(struct llama_context * ctx); # LLAMA_API void llama_print_timings(struct llama_context * ctx);
def llama_print_timings(ctx: llama_context_p): def llama_print_timings(ctx: llama_context_p):
_lib.llama_print_timings(ctx) _lib.llama_print_timings(ctx)

2
vendor/llama.cpp vendored

@ -1 +1 @@
Subproject commit 7f0e9a775ecc4c6ade271c217f63d6dc93e79eaa Subproject commit dfd9fce6d65599bf33df43e616e85aa639bdae4c