some gocritic
This commit is contained in:
parent
dad7a987ae
commit
c895a7d13f
10 changed files with 21 additions and 20 deletions
|
@ -14,4 +14,6 @@ linters:
|
||||||
# - goimports
|
# - goimports
|
||||||
- misspell
|
- misspell
|
||||||
- nilerr
|
- nilerr
|
||||||
|
- nolintlint
|
||||||
|
- nosprintfhostport
|
||||||
- unused
|
- unused
|
||||||
|
|
|
@ -306,7 +306,7 @@ type GenerateResponse struct {
|
||||||
// Model is the model name that generated the response.
|
// Model is the model name that generated the response.
|
||||||
Model string `json:"model"`
|
Model string `json:"model"`
|
||||||
|
|
||||||
//CreatedAt is the timestamp of the response.
|
// CreatedAt is the timestamp of the response.
|
||||||
CreatedAt time.Time `json:"created_at"`
|
CreatedAt time.Time `json:"created_at"`
|
||||||
|
|
||||||
// Response is the textual response itself.
|
// Response is the textual response itself.
|
||||||
|
|
|
@ -119,11 +119,12 @@ func llamaRepack(name string, params *Params, data []float32, shape []uint64) ([
|
||||||
}
|
}
|
||||||
|
|
||||||
var heads int
|
var heads int
|
||||||
if strings.HasSuffix(name, "attn_q.weight") {
|
switch {
|
||||||
|
case strings.HasSuffix(name, "attn_q.weight"):
|
||||||
heads = params.AttentionHeads
|
heads = params.AttentionHeads
|
||||||
} else if strings.HasSuffix(name, "attn_k.weight") {
|
case strings.HasSuffix(name, "attn_k.weight"):
|
||||||
heads = cmp.Or(params.KeyValHeads, params.AttentionHeads)
|
heads = cmp.Or(params.KeyValHeads, params.AttentionHeads)
|
||||||
} else {
|
default:
|
||||||
return nil, fmt.Errorf("unknown tensor name: %s", name)
|
return nil, fmt.Errorf("unknown tensor name: %s", name)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -120,7 +120,7 @@ func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params)
|
||||||
Name: name,
|
Name: name,
|
||||||
Kind: kind,
|
Kind: kind,
|
||||||
Offset: offset,
|
Offset: offset,
|
||||||
Shape: shape[:],
|
Shape: shape,
|
||||||
}
|
}
|
||||||
|
|
||||||
t.WriterTo = safetensorWriterTo{
|
t.WriterTo = safetensorWriterTo{
|
||||||
|
|
|
@ -85,11 +85,8 @@ func parseTokens(dirpath string) (pre string, tokens []Token, merges []string, e
|
||||||
|
|
||||||
sha256sum := sha256.New()
|
sha256sum := sha256.New()
|
||||||
for _, pt := range t.PreTokenizer.PreTokenizers {
|
for _, pt := range t.PreTokenizer.PreTokenizers {
|
||||||
switch pt.Type {
|
if pt.Type == "Split" && pt.Pattern.Regex != "" {
|
||||||
case "Split":
|
sha256sum.Write([]byte(pt.Pattern.Regex))
|
||||||
if pt.Pattern.Regex != "" {
|
|
||||||
sha256sum.Write([]byte(pt.Pattern.Regex))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -88,7 +88,7 @@ func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor,
|
||||||
Name: ggufName,
|
Name: ggufName,
|
||||||
Kind: kind,
|
Kind: kind,
|
||||||
Offset: offset, // calculate the offset
|
Offset: offset, // calculate the offset
|
||||||
Shape: shape[:],
|
Shape: shape,
|
||||||
}
|
}
|
||||||
|
|
||||||
tensor.WriterTo = torchWriterTo{
|
tensor.WriterTo = torchWriterTo{
|
||||||
|
|
|
@ -127,7 +127,7 @@ func LoadConfig() {
|
||||||
var paths []string
|
var paths []string
|
||||||
for _, root := range []string{filepath.Dir(appExe), cwd} {
|
for _, root := range []string{filepath.Dir(appExe), cwd} {
|
||||||
paths = append(paths,
|
paths = append(paths,
|
||||||
filepath.Join(root),
|
root,
|
||||||
filepath.Join(root, "windows-"+runtime.GOARCH),
|
filepath.Join(root, "windows-"+runtime.GOARCH),
|
||||||
filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
|
filepath.Join(root, "dist", "windows-"+runtime.GOARCH),
|
||||||
)
|
)
|
||||||
|
|
|
@ -104,21 +104,22 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||||
var layers int
|
var layers int
|
||||||
layers, estimatedVRAM, estimatedTotal = EstimateGPULayers(gpus, ggml, projectors, opts)
|
layers, estimatedVRAM, estimatedTotal = EstimateGPULayers(gpus, ggml, projectors, opts)
|
||||||
|
|
||||||
if gpus[0].Library == "metal" && estimatedVRAM > systemMemory {
|
switch {
|
||||||
|
case gpus[0].Library == "metal" && estimatedVRAM > systemMemory:
|
||||||
// disable partial offloading when model is greater than total system memory as this
|
// disable partial offloading when model is greater than total system memory as this
|
||||||
// can lead to locking up the system
|
// can lead to locking up the system
|
||||||
opts.NumGPU = 0
|
opts.NumGPU = 0
|
||||||
} else if gpus[0].Library != "metal" && layers == 0 {
|
case gpus[0].Library != "metal" && layers == 0:
|
||||||
// Don't bother loading into the GPU if no layers can fit
|
// Don't bother loading into the GPU if no layers can fit
|
||||||
cpuRunner = serverForCpu()
|
cpuRunner = serverForCpu()
|
||||||
gpuCount = 0
|
gpuCount = 0
|
||||||
} else if opts.NumGPU < 0 && layers > 0 && gpus[0].Library != "cpu" {
|
case opts.NumGPU < 0 && layers > 0 && gpus[0].Library != "cpu":
|
||||||
opts.NumGPU = layers
|
opts.NumGPU = layers
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Loop through potential servers
|
// Loop through potential servers
|
||||||
finalErr := fmt.Errorf("no suitable llama servers found")
|
finalErr := errors.New("no suitable llama servers found")
|
||||||
|
|
||||||
if len(adapters) > 1 {
|
if len(adapters) > 1 {
|
||||||
return nil, errors.New("ollama supports only one lora adapter, but multiple were provided")
|
return nil, errors.New("ollama supports only one lora adapter, but multiple were provided")
|
||||||
|
@ -284,7 +285,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||||
|
|
||||||
server := filepath.Join(dir, "ollama_llama_server")
|
server := filepath.Join(dir, "ollama_llama_server")
|
||||||
if runtime.GOOS == "windows" {
|
if runtime.GOOS == "windows" {
|
||||||
server = server + ".exe"
|
server += ".exe"
|
||||||
}
|
}
|
||||||
|
|
||||||
// Detect tmp cleaners wiping out the file
|
// Detect tmp cleaners wiping out the file
|
||||||
|
@ -459,7 +460,7 @@ func (s *llmServer) getServerStatus(ctx context.Context) (ServerStatus, error) {
|
||||||
resp, err := http.DefaultClient.Do(req)
|
resp, err := http.DefaultClient.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if errors.Is(err, context.DeadlineExceeded) {
|
if errors.Is(err, context.DeadlineExceeded) {
|
||||||
return ServerStatusNotResponding, fmt.Errorf("server not responding")
|
return ServerStatusNotResponding, errors.New("server not responding")
|
||||||
}
|
}
|
||||||
return ServerStatusError, fmt.Errorf("health resp: %w", err)
|
return ServerStatusError, fmt.Errorf("health resp: %w", err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -66,7 +66,7 @@ func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options,
|
||||||
opts.NumCtx = 4
|
opts.NumCtx = 4
|
||||||
}
|
}
|
||||||
|
|
||||||
opts.NumCtx = opts.NumCtx * envconfig.NumParallel
|
opts.NumCtx *= envconfig.NumParallel
|
||||||
|
|
||||||
req := &LlmRequest{
|
req := &LlmRequest{
|
||||||
ctx: c,
|
ctx: c,
|
||||||
|
|
|
@ -325,7 +325,7 @@ func TestParseNameFromFilepath(t *testing.T) {
|
||||||
filepath.Join("host:port", "namespace", "model", "tag"): {Host: "host:port", Namespace: "namespace", Model: "model", Tag: "tag"},
|
filepath.Join("host:port", "namespace", "model", "tag"): {Host: "host:port", Namespace: "namespace", Model: "model", Tag: "tag"},
|
||||||
filepath.Join("namespace", "model", "tag"): {},
|
filepath.Join("namespace", "model", "tag"): {},
|
||||||
filepath.Join("model", "tag"): {},
|
filepath.Join("model", "tag"): {},
|
||||||
filepath.Join("model"): {},
|
"model": {},
|
||||||
filepath.Join("..", "..", "model", "tag"): {},
|
filepath.Join("..", "..", "model", "tag"): {},
|
||||||
filepath.Join("", "namespace", ".", "tag"): {},
|
filepath.Join("", "namespace", ".", "tag"): {},
|
||||||
filepath.Join(".", ".", ".", "."): {},
|
filepath.Join(".", ".", ".", "."): {},
|
||||||
|
|
Loading…
Reference in a new issue