remove prompt cache
This commit is contained in:
parent
45bf83ff58
commit
da74384a3e
4 changed files with 12 additions and 41 deletions
llama
|
@ -24,7 +24,7 @@
|
|||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || \
|
||||
#if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || \
|
||||
defined(_WIN32)
|
||||
void sigint_handler(int signo) {
|
||||
if (signo == SIGINT) {
|
||||
|
@ -573,15 +573,13 @@ void *llama_allocate_params(
|
|||
const char **antiprompt, int antiprompt_count, float tfs_z, float typical_p,
|
||||
float frequency_penalty, float presence_penalty, int mirostat,
|
||||
float mirostat_eta, float mirostat_tau, bool penalize_nl,
|
||||
const char *logit_bias, const char *session_file, bool prompt_cache_all,
|
||||
bool mlock, bool mmap, const char *maingpu, const char *tensorsplit,
|
||||
bool prompt_cache_ro) {
|
||||
const char *logit_bias, bool mlock, bool mmap, const char *maingpu,
|
||||
const char *tensorsplit) {
|
||||
gpt_params *params = new gpt_params;
|
||||
params->seed = seed;
|
||||
params->n_threads = threads;
|
||||
params->n_predict = tokens;
|
||||
params->repeat_last_n = repeat_last_n;
|
||||
params->prompt_cache_ro = prompt_cache_ro;
|
||||
params->top_k = top_k;
|
||||
params->top_p = top_p;
|
||||
params->memory_f16 = memory_f16;
|
||||
|
@ -612,9 +610,6 @@ void *llama_allocate_params(
|
|||
}
|
||||
}
|
||||
|
||||
params->prompt_cache_all = prompt_cache_all;
|
||||
params->path_prompt_cache = session_file;
|
||||
|
||||
if (ignore_eos) {
|
||||
params->logit_bias[llama_token_eos()] = -INFINITY;
|
||||
}
|
||||
|
|
|
@ -31,9 +31,8 @@ void *llama_allocate_params(
|
|||
const char **antiprompt, int antiprompt_count, float tfs_z, float typical_p,
|
||||
float frequency_penalty, float presence_penalty, int mirostat,
|
||||
float mirostat_eta, float mirostat_tau, bool penalize_nl,
|
||||
const char *logit_bias, const char *session_file, bool prompt_cache_all,
|
||||
bool mlock, bool mmap, const char *maingpu, const char *tensorsplit,
|
||||
bool prompt_cache_ro);
|
||||
const char *logit_bias, bool mlock, bool mmap, const char *maingpu,
|
||||
const char *tensorsplit);
|
||||
|
||||
void llama_free_params(void *params_ptr);
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@ package llama
|
|||
// #include "binding/binding.h"
|
||||
// #include <stdlib.h>
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
@ -86,9 +87,7 @@ func (l *LLama) Eval(text string, opts ...PredictOption) error {
|
|||
C.int(po.Batch), C.int(po.NKeep), pass, C.int(reverseCount),
|
||||
C.float(po.TailFreeSamplingZ), C.float(po.TypicalP), C.float(po.FrequencyPenalty), C.float(po.PresencePenalty),
|
||||
C.int(po.Mirostat), C.float(po.MirostatETA), C.float(po.MirostatTAU), C.bool(po.PenalizeNL), C.CString(po.LogitBias),
|
||||
C.CString(po.PathPromptCache), C.bool(po.PromptCacheAll), C.bool(po.MLock), C.bool(po.MMap),
|
||||
C.CString(po.MainGPU), C.CString(po.TensorSplit),
|
||||
C.bool(po.PromptCacheRO),
|
||||
C.bool(po.MLock), C.bool(po.MMap), C.CString(po.MainGPU), C.CString(po.TensorSplit),
|
||||
)
|
||||
defer C.llama_free_params(params)
|
||||
|
||||
|
@ -128,9 +127,6 @@ func (l *LLama) Predict(text string, opts ...PredictOption) (string, error) {
|
|||
cLogitBias := C.CString(po.LogitBias)
|
||||
defer C.free(unsafe.Pointer(cLogitBias))
|
||||
|
||||
cPathPromptCache := C.CString(po.PathPromptCache)
|
||||
defer C.free(unsafe.Pointer(cPathPromptCache))
|
||||
|
||||
cMainGPU := C.CString(po.MainGPU)
|
||||
defer C.free(unsafe.Pointer(cMainGPU))
|
||||
|
||||
|
@ -143,9 +139,7 @@ func (l *LLama) Predict(text string, opts ...PredictOption) (string, error) {
|
|||
C.int(po.Batch), C.int(po.NKeep), pass, C.int(reverseCount),
|
||||
C.float(po.TailFreeSamplingZ), C.float(po.TypicalP), C.float(po.FrequencyPenalty), C.float(po.PresencePenalty),
|
||||
C.int(po.Mirostat), C.float(po.MirostatETA), C.float(po.MirostatTAU), C.bool(po.PenalizeNL), cLogitBias,
|
||||
cPathPromptCache, C.bool(po.PromptCacheAll), C.bool(po.MLock), C.bool(po.MMap),
|
||||
cMainGPU, cTensorSplit,
|
||||
C.bool(po.PromptCacheRO),
|
||||
C.bool(po.MLock), C.bool(po.MMap), cMainGPU, cTensorSplit,
|
||||
)
|
||||
defer C.llama_free_params(params)
|
||||
|
||||
|
|
|
@ -57,11 +57,9 @@ type PredictOptions struct {
|
|||
LogitBias string
|
||||
TokenCallback func(string) bool
|
||||
|
||||
PathPromptCache string
|
||||
MLock, MMap, PromptCacheAll bool
|
||||
PromptCacheRO bool
|
||||
MainGPU string
|
||||
TensorSplit string
|
||||
MLock, MMap bool
|
||||
MainGPU string
|
||||
TensorSplit string
|
||||
}
|
||||
|
||||
type PredictOption func(p *PredictOptions)
|
||||
|
@ -182,14 +180,6 @@ var Debug PredictOption = func(p *PredictOptions) {
|
|||
p.DebugMode = true
|
||||
}
|
||||
|
||||
var EnablePromptCacheAll PredictOption = func(p *PredictOptions) {
|
||||
p.PromptCacheAll = true
|
||||
}
|
||||
|
||||
var EnablePromptCacheRO PredictOption = func(p *PredictOptions) {
|
||||
p.PromptCacheRO = true
|
||||
}
|
||||
|
||||
var EnableMLock ModelOption = func(p *ModelOptions) {
|
||||
p.MLock = true
|
||||
}
|
||||
|
@ -284,13 +274,6 @@ func SetTemperature(temp float64) PredictOption {
|
|||
}
|
||||
}
|
||||
|
||||
// SetPathPromptCache sets the session file to store the prompt cache.
|
||||
func SetPathPromptCache(f string) PredictOption {
|
||||
return func(p *PredictOptions) {
|
||||
p.PathPromptCache = f
|
||||
}
|
||||
}
|
||||
|
||||
// SetPenalty sets the repetition penalty for text generation.
|
||||
func SetPenalty(penalty float64) PredictOption {
|
||||
return func(p *PredictOptions) {
|
||||
|
|
Loading…
Reference in a new issue