Merge branch 'ollama:main' into arm64static
This commit is contained in:
commit
ea4c284a48
8 changed files with 125 additions and 188 deletions
60
.github/ISSUE_TEMPLATE/10_bug_report.yml
vendored
Normal file
60
.github/ISSUE_TEMPLATE/10_bug_report.yml
vendored
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
name: Bug report
|
||||||
|
labels: [bug]
|
||||||
|
description: Something isn't working right.
|
||||||
|
body:
|
||||||
|
- type: textarea
|
||||||
|
id: description
|
||||||
|
attributes:
|
||||||
|
label: What is the issue?
|
||||||
|
description: What happened? What did you expect to happen?
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: dropdown
|
||||||
|
id: os
|
||||||
|
attributes:
|
||||||
|
label: OS
|
||||||
|
description: Which operating system are you using?
|
||||||
|
multiple: true
|
||||||
|
options:
|
||||||
|
- Linux
|
||||||
|
- macOS
|
||||||
|
- Windows
|
||||||
|
- Docker
|
||||||
|
- WSL2
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: dropdown
|
||||||
|
id: gpu
|
||||||
|
attributes:
|
||||||
|
label: GPU
|
||||||
|
description: Which GPU are you using?
|
||||||
|
multiple: true
|
||||||
|
options:
|
||||||
|
- Nvidia
|
||||||
|
- AMD
|
||||||
|
- Intel
|
||||||
|
- Apple
|
||||||
|
- Other
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: dropdown
|
||||||
|
id: cpu
|
||||||
|
attributes:
|
||||||
|
label: CPU
|
||||||
|
description: Which CPU are you using?
|
||||||
|
multiple: true
|
||||||
|
options:
|
||||||
|
- Intel
|
||||||
|
- AMD
|
||||||
|
- Apple
|
||||||
|
- Other
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: input
|
||||||
|
id: version
|
||||||
|
attributes:
|
||||||
|
label: Ollama version
|
||||||
|
description: What version of Ollama are you using? (`ollama --version`)
|
||||||
|
placeholder: e.g., 0.1.32
|
||||||
|
validations:
|
||||||
|
required: false
|
18
.github/ISSUE_TEMPLATE/10_model_request.yml
vendored
18
.github/ISSUE_TEMPLATE/10_model_request.yml
vendored
|
@ -1,18 +0,0 @@
|
||||||
name: Model request
|
|
||||||
description: Request a new model for the library
|
|
||||||
labels: [mr]
|
|
||||||
body:
|
|
||||||
- type: markdown
|
|
||||||
attributes:
|
|
||||||
value: |
|
|
||||||
Please check if your Model request is [already available](https://ollama.com/search) or that you cannot [import it](https://github.com/ollama/ollama/blob/main/docs/import.md#import-a-model) yourself.
|
|
||||||
Tell us about which Model you'd like to see in the library!
|
|
||||||
- type: textarea
|
|
||||||
id: problem
|
|
||||||
attributes:
|
|
||||||
label: What model would you like?
|
|
||||||
description: Please provide a link to the model.
|
|
||||||
- type: markdown
|
|
||||||
attributes:
|
|
||||||
value: |
|
|
||||||
Thanks for filing a model request!
|
|
36
.github/ISSUE_TEMPLATE/20_feature_request.yml
vendored
36
.github/ISSUE_TEMPLATE/20_feature_request.yml
vendored
|
@ -1,41 +1,11 @@
|
||||||
name: Feature request
|
name: Feature request
|
||||||
description: Propose a new feature
|
labels: ['feature request']
|
||||||
labels: [needs-triage, fr]
|
description: Request a new feature.
|
||||||
body:
|
body:
|
||||||
- type: markdown
|
|
||||||
attributes:
|
|
||||||
value: |
|
|
||||||
Please check if your feature request is [already filed](https://github.com/ollama/ollama/issues).
|
|
||||||
Tell us about your idea!
|
|
||||||
- type: textarea
|
- type: textarea
|
||||||
id: problem
|
id: problem
|
||||||
attributes:
|
attributes:
|
||||||
label: What are you trying to do?
|
label: What new feature would you like to see?
|
||||||
description: Tell us about the problem you're trying to solve.
|
description: Tell us about the problem you're trying to solve.
|
||||||
validations:
|
validations:
|
||||||
required: false
|
required: false
|
||||||
- type: textarea
|
|
||||||
id: solution
|
|
||||||
attributes:
|
|
||||||
label: How should we solve this?
|
|
||||||
description: If you have an idea of how you'd like to see this feature work, let us know.
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: textarea
|
|
||||||
id: alternative
|
|
||||||
attributes:
|
|
||||||
label: What is the impact of not solving this?
|
|
||||||
description: (How) Are you currently working around the issue?
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: textarea
|
|
||||||
id: context
|
|
||||||
attributes:
|
|
||||||
label: Anything else?
|
|
||||||
description: Any additional context to share, e.g., links
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: markdown
|
|
||||||
attributes:
|
|
||||||
value: |
|
|
||||||
Thanks for filing a feature request!
|
|
||||||
|
|
9
.github/ISSUE_TEMPLATE/30_model_request.yml
vendored
Normal file
9
.github/ISSUE_TEMPLATE/30_model_request.yml
vendored
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
name: Model request
|
||||||
|
labels: ['model request']
|
||||||
|
description: Request a new model.
|
||||||
|
body:
|
||||||
|
- type: textarea
|
||||||
|
id: problem
|
||||||
|
attributes:
|
||||||
|
label: What model would you like?
|
||||||
|
description: Please provide a link to the model.
|
125
.github/ISSUE_TEMPLATE/90_bug_report.yml
vendored
125
.github/ISSUE_TEMPLATE/90_bug_report.yml
vendored
|
@ -1,125 +0,0 @@
|
||||||
name: Bug report
|
|
||||||
description: File a bug report. If you need help, please join our Discord server.
|
|
||||||
labels: [needs-triage, bug]
|
|
||||||
body:
|
|
||||||
- type: markdown
|
|
||||||
attributes:
|
|
||||||
value: |
|
|
||||||
Please check if your bug is [already filed](https://github.com/ollama/ollama/issues) before filing a new one.
|
|
||||||
- type: textarea
|
|
||||||
id: what-happened
|
|
||||||
attributes:
|
|
||||||
label: What is the issue?
|
|
||||||
description: What happened? What did you expect to happen?
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
id: what-was-expected
|
|
||||||
attributes:
|
|
||||||
label: What did you expect to see?
|
|
||||||
description: What did you expect to see/happen instead?
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: textarea
|
|
||||||
id: steps
|
|
||||||
attributes:
|
|
||||||
label: Steps to reproduce
|
|
||||||
description: What are the steps you took that hit this issue?
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: textarea
|
|
||||||
id: changes
|
|
||||||
attributes:
|
|
||||||
label: Are there any recent changes that introduced the issue?
|
|
||||||
description: If so, what are those changes?
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: dropdown
|
|
||||||
id: os
|
|
||||||
attributes:
|
|
||||||
label: OS
|
|
||||||
description: What OS are you using? You may select more than one.
|
|
||||||
multiple: true
|
|
||||||
options:
|
|
||||||
- Linux
|
|
||||||
- macOS
|
|
||||||
- Windows
|
|
||||||
- Other
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: dropdown
|
|
||||||
id: architecture
|
|
||||||
attributes:
|
|
||||||
label: Architecture
|
|
||||||
description: What architecture are you using? You may select more than one.
|
|
||||||
multiple: true
|
|
||||||
options:
|
|
||||||
- arm64
|
|
||||||
- amd64
|
|
||||||
- x86
|
|
||||||
- Other
|
|
||||||
- type: dropdown
|
|
||||||
id: platform
|
|
||||||
attributes:
|
|
||||||
label: Platform
|
|
||||||
description: What platform are you using? You may select more than one.
|
|
||||||
multiple: true
|
|
||||||
options:
|
|
||||||
- Docker
|
|
||||||
- WSL
|
|
||||||
- WSL2
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: input
|
|
||||||
id: ollama-version
|
|
||||||
attributes:
|
|
||||||
label: Ollama version
|
|
||||||
description: What Ollama version are you using? (`ollama --version`)
|
|
||||||
placeholder: e.g., 1.14.4
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: dropdown
|
|
||||||
id: gpu
|
|
||||||
attributes:
|
|
||||||
label: GPU
|
|
||||||
description: What GPU, if any, are you using? You may select more than one.
|
|
||||||
multiple: true
|
|
||||||
options:
|
|
||||||
- Nvidia
|
|
||||||
- AMD
|
|
||||||
- Intel
|
|
||||||
- Apple
|
|
||||||
- Other
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: textarea
|
|
||||||
id: gpu-info
|
|
||||||
attributes:
|
|
||||||
label: GPU info
|
|
||||||
description: What GPU info do you have? (`nvidia-smi`, `rocminfo`, `system_profiler SPDisplaysDataType`, etc.)
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: dropdown
|
|
||||||
id: cpu
|
|
||||||
attributes:
|
|
||||||
label: CPU
|
|
||||||
description: What CPU are you using? You may select more than one.
|
|
||||||
multiple: true
|
|
||||||
options:
|
|
||||||
- Intel
|
|
||||||
- AMD
|
|
||||||
- Apple
|
|
||||||
- Other
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: textarea
|
|
||||||
id: other-software
|
|
||||||
attributes:
|
|
||||||
label: Other software
|
|
||||||
description: What other software are you using that might be related to this issue?
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: markdown
|
|
||||||
attributes:
|
|
||||||
value: |
|
|
||||||
Thanks for filing a bug report!
|
|
|
@ -60,7 +60,6 @@ Here are some example models that can be downloaded:
|
||||||
| Llama 2 13B | 13B | 7.3GB | `ollama run llama2:13b` |
|
| Llama 2 13B | 13B | 7.3GB | `ollama run llama2:13b` |
|
||||||
| Llama 2 70B | 70B | 39GB | `ollama run llama2:70b` |
|
| Llama 2 70B | 70B | 39GB | `ollama run llama2:70b` |
|
||||||
| Orca Mini | 3B | 1.9GB | `ollama run orca-mini` |
|
| Orca Mini | 3B | 1.9GB | `ollama run orca-mini` |
|
||||||
| Vicuna | 7B | 3.8GB | `ollama run vicuna` |
|
|
||||||
| LLaVA | 7B | 4.5GB | `ollama run llava` |
|
| LLaVA | 7B | 4.5GB | `ollama run llava` |
|
||||||
| Gemma | 2B | 1.4GB | `ollama run gemma:2b` |
|
| Gemma | 2B | 1.4GB | `ollama run gemma:2b` |
|
||||||
| Gemma | 7B | 4.8GB | `ollama run gemma:7b` |
|
| Gemma | 7B | 4.8GB | `ollama run gemma:7b` |
|
||||||
|
@ -378,3 +377,6 @@ See the [API documentation](./docs/api.md) for all endpoints.
|
||||||
- [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
|
- [Page Assist](https://github.com/n4ze3m/page-assist) (Chrome Extension)
|
||||||
- [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
|
- [AI Telegram Bot](https://github.com/tusharhero/aitelegrambot) (Telegram bot using Ollama in backend)
|
||||||
- [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
|
- [AI ST Completion](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (Sublime Text 4 AI assistant plugin with Ollama support)
|
||||||
|
|
||||||
|
### Supported backends
|
||||||
|
- [llama.cpp](https://github.com/ggerganov/llama.cpp) project founded by Georgi Gerganov.
|
||||||
|
|
|
@ -164,7 +164,8 @@ func (ts Tensors) Layers() map[string]Layer {
|
||||||
for _, t := range ts {
|
for _, t := range ts {
|
||||||
parts := strings.Split(t.Name, ".")
|
parts := strings.Split(t.Name, ".")
|
||||||
if parts[0] == "blk" {
|
if parts[0] == "blk" {
|
||||||
parts = parts[1:]
|
// join first and second part, e.g. blk.%d
|
||||||
|
parts = append([]string{fmt.Sprintf("%s.%s", parts[0], parts[1])}, parts[2:]...)
|
||||||
}
|
}
|
||||||
|
|
||||||
if _, ok := layers[parts[0]]; !ok {
|
if _, ok := layers[parts[0]]; !ok {
|
||||||
|
|
|
@ -97,7 +97,7 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
|
||||||
var layerCount int
|
var layerCount int
|
||||||
layers := ggml.Tensors().Layers()
|
layers := ggml.Tensors().Layers()
|
||||||
for i := 0; i < int(ggml.KV().BlockCount()); i++ {
|
for i := 0; i < int(ggml.KV().BlockCount()); i++ {
|
||||||
memoryLayer := layers[fmt.Sprintf("%d", i)].size()
|
memoryLayer := layers[fmt.Sprintf("blk.%d", i)].size()
|
||||||
|
|
||||||
// KV is proportional to the number of layers
|
// KV is proportional to the number of layers
|
||||||
memoryLayer += kv / ggml.KV().BlockCount()
|
memoryLayer += kv / ggml.KV().BlockCount()
|
||||||
|
@ -109,7 +109,14 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
memoryLayerOutput := layers["output"].size()
|
var memoryLayerOutput uint64
|
||||||
|
for k, v := range layers {
|
||||||
|
if !strings.HasPrefix(k, "blk.") {
|
||||||
|
slog.Info("aaa", "name", k, "size", format.HumanBytes2(v.size()))
|
||||||
|
memoryLayerOutput += v.size()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
memoryRequiredTotal += memoryLayerOutput
|
memoryRequiredTotal += memoryLayerOutput
|
||||||
|
|
||||||
if info.Library == "metal" && memoryRequiredTotal > info.TotalMemory {
|
if info.Library == "metal" && memoryRequiredTotal > info.TotalMemory {
|
||||||
|
@ -124,16 +131,47 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
|
||||||
opts.NumGPU = layerCount
|
opts.NumGPU = layerCount
|
||||||
}
|
}
|
||||||
|
|
||||||
|
memoryWeights := memoryRequiredTotal - memoryMinimum - graphFullOffload - kv
|
||||||
|
|
||||||
slog.Info(
|
slog.Info(
|
||||||
"offload to gpu",
|
"offload to gpu",
|
||||||
"reallayers", opts.NumGPU,
|
slog.Group(
|
||||||
"layers", layerCount,
|
"layers",
|
||||||
"required", format.HumanBytes2(memoryRequiredTotal),
|
// actual number of layers offloaded
|
||||||
"used", format.HumanBytes2(memoryRequiredPartial),
|
"real", opts.NumGPU,
|
||||||
|
// estimated number of layers that can be offloaded
|
||||||
|
"estimate", layerCount,
|
||||||
|
),
|
||||||
|
slog.Group(
|
||||||
|
"memory",
|
||||||
|
// memory available for offloading
|
||||||
"available", format.HumanBytes2(memoryAvailable),
|
"available", format.HumanBytes2(memoryAvailable),
|
||||||
|
slog.Group(
|
||||||
|
"required",
|
||||||
|
// memory required for full offloading
|
||||||
|
"full", format.HumanBytes2(memoryRequiredTotal),
|
||||||
|
// memory required to offload layers.estimate layers
|
||||||
|
"partial", format.HumanBytes2(memoryRequiredPartial),
|
||||||
|
// memory of KV cache
|
||||||
"kv", format.HumanBytes2(kv),
|
"kv", format.HumanBytes2(kv),
|
||||||
"fulloffload", format.HumanBytes2(graphFullOffload),
|
),
|
||||||
"partialoffload", format.HumanBytes2(graphPartialOffload),
|
slog.Group(
|
||||||
|
"weights",
|
||||||
|
// memory of the weights
|
||||||
|
"total", format.HumanBytes2(memoryWeights),
|
||||||
|
// memory of repeating layers
|
||||||
|
"repeating", format.HumanBytes2(memoryWeights-memoryLayerOutput),
|
||||||
|
// memory of non-repeating layers
|
||||||
|
"nonrepeating", format.HumanBytes2(memoryLayerOutput),
|
||||||
|
),
|
||||||
|
slog.Group(
|
||||||
|
"graph",
|
||||||
|
// memory of graph when fully offloaded
|
||||||
|
"full", format.HumanBytes2(graphFullOffload),
|
||||||
|
// memory of graph when not fully offloaded
|
||||||
|
"partial", format.HumanBytes2(graphPartialOffload),
|
||||||
|
),
|
||||||
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
if len(adapters) > 1 {
|
if len(adapters) > 1 {
|
||||||
|
|
Loading…
Reference in a new issue