remove prompt cache

This commit is contained in:
Bruce MacDonald 2023-07-06 17:49:05 -04:00
parent 45bf83ff58
commit da74384a3e
4 changed files with 12 additions and 41 deletions

View file

@ -573,15 +573,13 @@ void *llama_allocate_params(
const char **antiprompt, int antiprompt_count, float tfs_z, float typical_p, const char **antiprompt, int antiprompt_count, float tfs_z, float typical_p,
float frequency_penalty, float presence_penalty, int mirostat, float frequency_penalty, float presence_penalty, int mirostat,
float mirostat_eta, float mirostat_tau, bool penalize_nl, float mirostat_eta, float mirostat_tau, bool penalize_nl,
const char *logit_bias, const char *session_file, bool prompt_cache_all, const char *logit_bias, bool mlock, bool mmap, const char *maingpu,
bool mlock, bool mmap, const char *maingpu, const char *tensorsplit, const char *tensorsplit) {
bool prompt_cache_ro) {
gpt_params *params = new gpt_params; gpt_params *params = new gpt_params;
params->seed = seed; params->seed = seed;
params->n_threads = threads; params->n_threads = threads;
params->n_predict = tokens; params->n_predict = tokens;
params->repeat_last_n = repeat_last_n; params->repeat_last_n = repeat_last_n;
params->prompt_cache_ro = prompt_cache_ro;
params->top_k = top_k; params->top_k = top_k;
params->top_p = top_p; params->top_p = top_p;
params->memory_f16 = memory_f16; params->memory_f16 = memory_f16;
@ -612,9 +610,6 @@ void *llama_allocate_params(
} }
} }
params->prompt_cache_all = prompt_cache_all;
params->path_prompt_cache = session_file;
if (ignore_eos) { if (ignore_eos) {
params->logit_bias[llama_token_eos()] = -INFINITY; params->logit_bias[llama_token_eos()] = -INFINITY;
} }

View file

@ -31,9 +31,8 @@ void *llama_allocate_params(
const char **antiprompt, int antiprompt_count, float tfs_z, float typical_p, const char **antiprompt, int antiprompt_count, float tfs_z, float typical_p,
float frequency_penalty, float presence_penalty, int mirostat, float frequency_penalty, float presence_penalty, int mirostat,
float mirostat_eta, float mirostat_tau, bool penalize_nl, float mirostat_eta, float mirostat_tau, bool penalize_nl,
const char *logit_bias, const char *session_file, bool prompt_cache_all, const char *logit_bias, bool mlock, bool mmap, const char *maingpu,
bool mlock, bool mmap, const char *maingpu, const char *tensorsplit, const char *tensorsplit);
bool prompt_cache_ro);
void llama_free_params(void *params_ptr); void llama_free_params(void *params_ptr);

View file

@ -28,6 +28,7 @@ package llama
// #include "binding/binding.h" // #include "binding/binding.h"
// #include <stdlib.h> // #include <stdlib.h>
import "C" import "C"
import ( import (
"fmt" "fmt"
"strings" "strings"
@ -86,9 +87,7 @@ func (l *LLama) Eval(text string, opts ...PredictOption) error {
C.int(po.Batch), C.int(po.NKeep), pass, C.int(reverseCount), C.int(po.Batch), C.int(po.NKeep), pass, C.int(reverseCount),
C.float(po.TailFreeSamplingZ), C.float(po.TypicalP), C.float(po.FrequencyPenalty), C.float(po.PresencePenalty), C.float(po.TailFreeSamplingZ), C.float(po.TypicalP), C.float(po.FrequencyPenalty), C.float(po.PresencePenalty),
C.int(po.Mirostat), C.float(po.MirostatETA), C.float(po.MirostatTAU), C.bool(po.PenalizeNL), C.CString(po.LogitBias), C.int(po.Mirostat), C.float(po.MirostatETA), C.float(po.MirostatTAU), C.bool(po.PenalizeNL), C.CString(po.LogitBias),
C.CString(po.PathPromptCache), C.bool(po.PromptCacheAll), C.bool(po.MLock), C.bool(po.MMap), C.bool(po.MLock), C.bool(po.MMap), C.CString(po.MainGPU), C.CString(po.TensorSplit),
C.CString(po.MainGPU), C.CString(po.TensorSplit),
C.bool(po.PromptCacheRO),
) )
defer C.llama_free_params(params) defer C.llama_free_params(params)
@ -128,9 +127,6 @@ func (l *LLama) Predict(text string, opts ...PredictOption) (string, error) {
cLogitBias := C.CString(po.LogitBias) cLogitBias := C.CString(po.LogitBias)
defer C.free(unsafe.Pointer(cLogitBias)) defer C.free(unsafe.Pointer(cLogitBias))
cPathPromptCache := C.CString(po.PathPromptCache)
defer C.free(unsafe.Pointer(cPathPromptCache))
cMainGPU := C.CString(po.MainGPU) cMainGPU := C.CString(po.MainGPU)
defer C.free(unsafe.Pointer(cMainGPU)) defer C.free(unsafe.Pointer(cMainGPU))
@ -143,9 +139,7 @@ func (l *LLama) Predict(text string, opts ...PredictOption) (string, error) {
C.int(po.Batch), C.int(po.NKeep), pass, C.int(reverseCount), C.int(po.Batch), C.int(po.NKeep), pass, C.int(reverseCount),
C.float(po.TailFreeSamplingZ), C.float(po.TypicalP), C.float(po.FrequencyPenalty), C.float(po.PresencePenalty), C.float(po.TailFreeSamplingZ), C.float(po.TypicalP), C.float(po.FrequencyPenalty), C.float(po.PresencePenalty),
C.int(po.Mirostat), C.float(po.MirostatETA), C.float(po.MirostatTAU), C.bool(po.PenalizeNL), cLogitBias, C.int(po.Mirostat), C.float(po.MirostatETA), C.float(po.MirostatTAU), C.bool(po.PenalizeNL), cLogitBias,
cPathPromptCache, C.bool(po.PromptCacheAll), C.bool(po.MLock), C.bool(po.MMap), C.bool(po.MLock), C.bool(po.MMap), cMainGPU, cTensorSplit,
cMainGPU, cTensorSplit,
C.bool(po.PromptCacheRO),
) )
defer C.llama_free_params(params) defer C.llama_free_params(params)

View file

@ -57,9 +57,7 @@ type PredictOptions struct {
LogitBias string LogitBias string
TokenCallback func(string) bool TokenCallback func(string) bool
PathPromptCache string MLock, MMap bool
MLock, MMap, PromptCacheAll bool
PromptCacheRO bool
MainGPU string MainGPU string
TensorSplit string TensorSplit string
} }
@ -182,14 +180,6 @@ var Debug PredictOption = func(p *PredictOptions) {
p.DebugMode = true p.DebugMode = true
} }
var EnablePromptCacheAll PredictOption = func(p *PredictOptions) {
p.PromptCacheAll = true
}
var EnablePromptCacheRO PredictOption = func(p *PredictOptions) {
p.PromptCacheRO = true
}
var EnableMLock ModelOption = func(p *ModelOptions) { var EnableMLock ModelOption = func(p *ModelOptions) {
p.MLock = true p.MLock = true
} }
@ -284,13 +274,6 @@ func SetTemperature(temp float64) PredictOption {
} }
} }
// SetPathPromptCache sets the session file to store the prompt cache.
func SetPathPromptCache(f string) PredictOption {
return func(p *PredictOptions) {
p.PathPromptCache = f
}
}
// SetPenalty sets the repetition penalty for text generation. // SetPenalty sets the repetition penalty for text generation.
func SetPenalty(penalty float64) PredictOption { func SetPenalty(penalty float64) PredictOption {
return func(p *PredictOptions) { return func(p *PredictOptions) {