Update llama.cpp
This commit is contained in:
parent
a1b2d5c09b
commit
98ae4e58a3
3 changed files with 43 additions and 1 deletions
3
Makefile
3
Makefile
|
@ -33,6 +33,9 @@ deploy.gh-docs:
|
||||||
mkdocs build
|
mkdocs build
|
||||||
mkdocs gh-deploy
|
mkdocs gh-deploy
|
||||||
|
|
||||||
|
test:
|
||||||
|
python3 -m pytest
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
- cd vendor/llama.cpp && make clean
|
- cd vendor/llama.cpp && make clean
|
||||||
- cd vendor/llama.cpp && rm libllama.so
|
- cd vendor/llama.cpp && rm libllama.so
|
||||||
|
|
|
@ -2,6 +2,7 @@ import sys
|
||||||
import os
|
import os
|
||||||
import ctypes
|
import ctypes
|
||||||
from ctypes import (
|
from ctypes import (
|
||||||
|
c_double,
|
||||||
c_int,
|
c_int,
|
||||||
c_float,
|
c_float,
|
||||||
c_char_p,
|
c_char_p,
|
||||||
|
@ -169,6 +170,7 @@ llama_progress_callback = ctypes.CFUNCTYPE(None, c_float, c_void_p)
|
||||||
# // context pointer passed to the progress callback
|
# // context pointer passed to the progress callback
|
||||||
# void * progress_callback_user_data;
|
# void * progress_callback_user_data;
|
||||||
|
|
||||||
|
|
||||||
# // Keep the booleans together to avoid misalignment during copy-by-value.
|
# // Keep the booleans together to avoid misalignment during copy-by-value.
|
||||||
# bool low_vram; // if true, reduce VRAM usage at the cost of performance
|
# bool low_vram; // if true, reduce VRAM usage at the cost of performance
|
||||||
# bool f16_kv; // use fp16 for KV cache
|
# bool f16_kv; // use fp16 for KV cache
|
||||||
|
@ -256,6 +258,34 @@ class llama_model_quantize_params(Structure):
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# // performance timing information
|
||||||
|
# struct llama_timings {
|
||||||
|
# double t_start_ms;
|
||||||
|
# double t_end_ms;
|
||||||
|
# double t_load_ms;
|
||||||
|
# double t_sample_ms;
|
||||||
|
# double t_p_eval_ms;
|
||||||
|
# double t_eval_ms;
|
||||||
|
|
||||||
|
|
||||||
|
# int32_t n_sample;
|
||||||
|
# int32_t n_p_eval;
|
||||||
|
# int32_t n_eval;
|
||||||
|
# };
|
||||||
|
class llama_timings(Structure):
|
||||||
|
_fields_ = [
|
||||||
|
("t_start_ms", c_double),
|
||||||
|
("t_end_ms", c_double),
|
||||||
|
("t_load_ms", c_double),
|
||||||
|
("t_sample_ms", c_double),
|
||||||
|
("t_p_eval_ms", c_double),
|
||||||
|
("t_eval_ms", c_double),
|
||||||
|
("n_sample", c_int32),
|
||||||
|
("n_p_eval", c_int32),
|
||||||
|
("n_eval", c_int32),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
# LLAMA_API struct llama_context_params llama_context_default_params();
|
# LLAMA_API struct llama_context_params llama_context_default_params();
|
||||||
def llama_context_default_params() -> llama_context_params:
|
def llama_context_default_params() -> llama_context_params:
|
||||||
return _lib.llama_context_default_params()
|
return _lib.llama_context_default_params()
|
||||||
|
@ -991,6 +1021,15 @@ _lib.llama_sample_token.restype = llama_token
|
||||||
# Performance information
|
# Performance information
|
||||||
|
|
||||||
|
|
||||||
|
# LLAMA_API struct llama_timings llama_get_timings(struct llama_context * ctx);
|
||||||
|
def llama_get_timings(ctx: llama_context_p) -> llama_timings:
|
||||||
|
return _lib.llama_get_timings(ctx)
|
||||||
|
|
||||||
|
|
||||||
|
_lib.llama_get_timings.argtypes = [llama_context_p]
|
||||||
|
_lib.llama_get_timings.restype = llama_timings
|
||||||
|
|
||||||
|
|
||||||
# LLAMA_API void llama_print_timings(struct llama_context * ctx);
|
# LLAMA_API void llama_print_timings(struct llama_context * ctx);
|
||||||
def llama_print_timings(ctx: llama_context_p):
|
def llama_print_timings(ctx: llama_context_p):
|
||||||
_lib.llama_print_timings(ctx)
|
_lib.llama_print_timings(ctx)
|
||||||
|
|
2
vendor/llama.cpp
vendored
2
vendor/llama.cpp
vendored
|
@ -1 +1 @@
|
||||||
Subproject commit 7f0e9a775ecc4c6ade271c217f63d6dc93e79eaa
|
Subproject commit dfd9fce6d65599bf33df43e616e85aa639bdae4c
|
Loading…
Reference in a new issue