remove prompt cache

2023-07-06 17:49:05 -04:00 · 2023-07-06 17:49:05 -04:00 · da74384a3e
commit da74384a3e
parent 45bf83ff58
4 changed files with 12 additions and 41 deletions
--- a/llama/binding/binding.cpp
+++ b/llama/binding/binding.cpp
@ -573,15 +573,13 @@ void *llama_allocate_params(
    const char **antiprompt, int antiprompt_count, float tfs_z, float typical_p,
    float frequency_penalty, float presence_penalty, int mirostat,
    float mirostat_eta, float mirostat_tau, bool penalize_nl,
-    const char *logit_bias, const char *session_file, bool prompt_cache_all,
+    const char *logit_bias, bool mlock, bool mmap, const char *maingpu,
-    bool mlock, bool mmap, const char *maingpu, const char *tensorsplit,
+    const char *tensorsplit) {
    bool prompt_cache_ro) {
  gpt_params *params = new gpt_params;
  params->seed = seed;
  params->n_threads = threads;
  params->n_predict = tokens;
  params->repeat_last_n = repeat_last_n;
  params->prompt_cache_ro = prompt_cache_ro;
  params->top_k = top_k;
  params->top_p = top_p;
  params->memory_f16 = memory_f16;
@ -612,9 +610,6 @@ void *llama_allocate_params(
    }
  }
  params->prompt_cache_all = prompt_cache_all;
  params->path_prompt_cache = session_file;
  if (ignore_eos) {
    params->logit_bias[llama_token_eos()] = -INFINITY;
  }
--- a/llama/binding/binding.h
+++ b/llama/binding/binding.h
@ -31,9 +31,8 @@ void *llama_allocate_params(
    const char **antiprompt, int antiprompt_count, float tfs_z, float typical_p,
    float frequency_penalty, float presence_penalty, int mirostat,
    float mirostat_eta, float mirostat_tau, bool penalize_nl,
-    const char *logit_bias, const char *session_file, bool prompt_cache_all,
+    const char *logit_bias, bool mlock, bool mmap, const char *maingpu,
-    bool mlock, bool mmap, const char *maingpu, const char *tensorsplit,
+    const char *tensorsplit);
    bool prompt_cache_ro);
 void llama_free_params(void *params_ptr);
--- a/llama/llama.go
+++ b/llama/llama.go
@ -28,6 +28,7 @@ package llama
 // #include "binding/binding.h"
 // #include <stdlib.h>
 import "C"
 import (
 	"fmt"
 	"strings"
@ -86,9 +87,7 @@ func (l *LLama) Eval(text string, opts ...PredictOption) error {
 		C.int(po.Batch), C.int(po.NKeep), pass, C.int(reverseCount),
 		C.float(po.TailFreeSamplingZ), C.float(po.TypicalP), C.float(po.FrequencyPenalty), C.float(po.PresencePenalty),
 		C.int(po.Mirostat), C.float(po.MirostatETA), C.float(po.MirostatTAU), C.bool(po.PenalizeNL), C.CString(po.LogitBias),
-		C.CString(po.PathPromptCache), C.bool(po.PromptCacheAll), C.bool(po.MLock), C.bool(po.MMap),
+		C.bool(po.MLock), C.bool(po.MMap), C.CString(po.MainGPU), C.CString(po.TensorSplit),
 		C.CString(po.MainGPU), C.CString(po.TensorSplit),
 		C.bool(po.PromptCacheRO),
 	)
 	defer C.llama_free_params(params)
@ -128,9 +127,6 @@ func (l *LLama) Predict(text string, opts ...PredictOption) (string, error) {
 	cLogitBias := C.CString(po.LogitBias)
 	defer C.free(unsafe.Pointer(cLogitBias))
 	cPathPromptCache := C.CString(po.PathPromptCache)
 	defer C.free(unsafe.Pointer(cPathPromptCache))
 	cMainGPU := C.CString(po.MainGPU)
 	defer C.free(unsafe.Pointer(cMainGPU))
@ -143,9 +139,7 @@ func (l *LLama) Predict(text string, opts ...PredictOption) (string, error) {
 		C.int(po.Batch), C.int(po.NKeep), pass, C.int(reverseCount),
 		C.float(po.TailFreeSamplingZ), C.float(po.TypicalP), C.float(po.FrequencyPenalty), C.float(po.PresencePenalty),
 		C.int(po.Mirostat), C.float(po.MirostatETA), C.float(po.MirostatTAU), C.bool(po.PenalizeNL), cLogitBias,
-		cPathPromptCache, C.bool(po.PromptCacheAll), C.bool(po.MLock), C.bool(po.MMap),
+		C.bool(po.MLock), C.bool(po.MMap), cMainGPU, cTensorSplit,
 		cMainGPU, cTensorSplit,
 		C.bool(po.PromptCacheRO),
 	)
 	defer C.llama_free_params(params)
--- a/llama/options.go
+++ b/llama/options.go
@ -57,9 +57,7 @@ type PredictOptions struct {
 	LogitBias         string
 	TokenCallback     func(string) bool
-	PathPromptCache             string
+	MLock, MMap bool
 	MLock, MMap, PromptCacheAll bool
 	PromptCacheRO               bool
 	MainGPU     string
 	TensorSplit string
 }
@ -182,14 +180,6 @@ var Debug PredictOption = func(p *PredictOptions) {
 	p.DebugMode = true
 }
 var EnablePromptCacheAll PredictOption = func(p *PredictOptions) {
 	p.PromptCacheAll = true
 }
 var EnablePromptCacheRO PredictOption = func(p *PredictOptions) {
 	p.PromptCacheRO = true
 }
 var EnableMLock ModelOption = func(p *ModelOptions) {
 	p.MLock = true
 }
@ -284,13 +274,6 @@ func SetTemperature(temp float64) PredictOption {
 	}
 }
 // SetPathPromptCache sets the session file to store the prompt cache.
 func SetPathPromptCache(f string) PredictOption {
 	return func(p *PredictOptions) {
 		p.PathPromptCache = f
 	}
 }
 // SetPenalty sets the repetition penalty for text generation.
 func SetPenalty(penalty float64) PredictOption {
 	return func(p *PredictOptions) {