2023-11-29 11:00:37 -08:00
|
|
|
package gpu
|
|
|
|
|
2024-05-07 14:54:26 -07:00
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"log/slog"
|
|
|
|
|
|
|
|
"github.com/ollama/ollama/format"
|
|
|
|
)
|
|
|
|
|
2023-12-22 15:43:31 -08:00
|
|
|
type memInfo struct {
|
2023-11-29 11:00:37 -08:00
|
|
|
TotalMemory uint64 `json:"total_memory,omitempty"`
|
|
|
|
FreeMemory uint64 `json:"free_memory,omitempty"`
|
2024-07-11 16:42:57 -07:00
|
|
|
FreeSwap uint64 `json:"free_swap,omitempty"`
|
2023-12-22 15:43:31 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Beginning of an `ollama info` command
|
|
|
|
type GpuInfo struct {
|
|
|
|
memInfo
|
|
|
|
Library string `json:"library,omitempty"`
|
2023-11-29 11:00:37 -08:00
|
|
|
|
2024-01-05 12:13:08 -08:00
|
|
|
// Optional variant to select (e.g. versions, cpu feature flags)
|
2024-05-30 21:54:07 -07:00
|
|
|
Variant string `json:"variant"`
|
2024-01-05 12:13:08 -08:00
|
|
|
|
2024-03-18 10:45:22 +01:00
|
|
|
// MinimumMemory represents the minimum memory required to use the GPU
|
2024-04-05 14:50:38 -07:00
|
|
|
MinimumMemory uint64 `json:"-"`
|
2024-03-18 10:45:22 +01:00
|
|
|
|
2024-03-30 09:50:05 -07:00
|
|
|
// Any extra PATH/LD_LIBRARY_PATH dependencies required for the Library to operate properly
|
|
|
|
DependencyPath string `json:"lib_path,omitempty"`
|
|
|
|
|
2024-05-31 16:15:21 -07:00
|
|
|
// Extra environment variables specific to the GPU as list of [key,value]
|
|
|
|
EnvWorkarounds [][2]string `json:"envs,omitempty"`
|
|
|
|
|
2024-06-19 13:35:38 -07:00
|
|
|
// Set to true if we can NOT reliably discover FreeMemory. A value of true indicates
|
|
|
|
// the FreeMemory is best effort, and may over or under report actual memory usage
|
|
|
|
// False indicates FreeMemory can generally be trusted on this GPU
|
|
|
|
UnreliableFreeMemory bool
|
|
|
|
|
2024-03-30 09:50:05 -07:00
|
|
|
// GPU information
|
2024-05-07 14:54:26 -07:00
|
|
|
ID string `json:"gpu_id"` // string to use for selection of this specific GPU
|
|
|
|
Name string `json:"name"` // user friendly name if available
|
|
|
|
Compute string `json:"compute"` // Compute Capability or gfx
|
|
|
|
|
|
|
|
// Driver Information - TODO no need to put this on each GPU
|
|
|
|
DriverMajor int `json:"driver_major,omitempty"`
|
|
|
|
DriverMinor int `json:"driver_minor,omitempty"`
|
2024-03-30 09:50:05 -07:00
|
|
|
|
|
|
|
// TODO other performance capability info to help in scheduling decisions
|
2023-11-29 11:00:37 -08:00
|
|
|
}
|
2024-02-11 14:50:06 -08:00
|
|
|
|
2024-05-15 15:13:16 -07:00
|
|
|
type CPUInfo struct {
|
|
|
|
GpuInfo
|
|
|
|
}
|
|
|
|
|
|
|
|
type CudaGPUInfo struct {
|
|
|
|
GpuInfo
|
2024-06-13 20:46:14 -07:00
|
|
|
OSOverhead uint64 // Memory overhead between the driver library and management library
|
|
|
|
index int //nolint:unused,nolintlint
|
|
|
|
computeMajor int //nolint:unused,nolintlint
|
|
|
|
computeMinor int //nolint:unused,nolintlint
|
2024-05-15 15:13:16 -07:00
|
|
|
}
|
|
|
|
type CudaGPUInfoList []CudaGPUInfo
|
|
|
|
|
|
|
|
type RocmGPUInfo struct {
|
|
|
|
GpuInfo
|
2024-06-05 12:07:20 -07:00
|
|
|
usedFilepath string //nolint:unused,nolintlint
|
|
|
|
index int //nolint:unused,nolintlint
|
2024-05-15 15:13:16 -07:00
|
|
|
}
|
|
|
|
type RocmGPUInfoList []RocmGPUInfo
|
|
|
|
|
|
|
|
type OneapiGPUInfo struct {
|
|
|
|
GpuInfo
|
2024-06-05 12:07:20 -07:00
|
|
|
driverIndex int //nolint:unused,nolintlint
|
|
|
|
gpuIndex int //nolint:unused,nolintlint
|
2024-05-15 15:13:16 -07:00
|
|
|
}
|
|
|
|
type OneapiGPUInfoList []OneapiGPUInfo
|
|
|
|
|
2024-03-30 09:50:05 -07:00
|
|
|
type GpuInfoList []GpuInfo
|
|
|
|
|
|
|
|
// Split up the set of gpu info's by Library and variant
|
|
|
|
func (l GpuInfoList) ByLibrary() []GpuInfoList {
|
|
|
|
resp := []GpuInfoList{}
|
|
|
|
libs := []string{}
|
|
|
|
for _, info := range l {
|
|
|
|
found := false
|
|
|
|
requested := info.Library
|
2024-05-30 21:54:07 -07:00
|
|
|
if info.Variant != CPUCapabilityNone.String() {
|
|
|
|
requested += "_" + info.Variant
|
2024-03-30 09:50:05 -07:00
|
|
|
}
|
|
|
|
for i, lib := range libs {
|
|
|
|
if lib == requested {
|
|
|
|
resp[i] = append(resp[i], info)
|
|
|
|
found = true
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if !found {
|
2024-08-23 15:11:56 -07:00
|
|
|
libs = append(libs, requested)
|
2024-03-30 09:50:05 -07:00
|
|
|
resp = append(resp, []GpuInfo{info})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return resp
|
2024-02-11 14:50:06 -08:00
|
|
|
}
|
2024-03-30 09:50:05 -07:00
|
|
|
|
2024-05-07 14:54:26 -07:00
|
|
|
// Report the GPU information into the log an Info level
|
|
|
|
func (l GpuInfoList) LogDetails() {
|
|
|
|
for _, g := range l {
|
|
|
|
slog.Info("inference compute",
|
|
|
|
"id", g.ID,
|
|
|
|
"library", g.Library,
|
2024-06-19 09:36:30 -07:00
|
|
|
"variant", g.Variant,
|
2024-05-07 14:54:26 -07:00
|
|
|
"compute", g.Compute,
|
|
|
|
"driver", fmt.Sprintf("%d.%d", g.DriverMajor, g.DriverMinor),
|
|
|
|
"name", g.Name,
|
|
|
|
"total", format.HumanBytes2(g.TotalMemory),
|
|
|
|
"available", format.HumanBytes2(g.FreeMemory),
|
|
|
|
)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-03-30 09:50:05 -07:00
|
|
|
// Sort by Free Space
|
|
|
|
type ByFreeMemory []GpuInfo
|
|
|
|
|
|
|
|
func (a ByFreeMemory) Len() int { return len(a) }
|
|
|
|
func (a ByFreeMemory) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
|
|
|
|
func (a ByFreeMemory) Less(i, j int) bool { return a[i].FreeMemory < a[j].FreeMemory }
|
2024-05-15 15:13:16 -07:00
|
|
|
|
|
|
|
type CPUCapability uint32
|
|
|
|
|
|
|
|
// Override at build time when building base GPU runners
|
|
|
|
var GPURunnerCPUCapability = CPUCapabilityAVX
|
|
|
|
|
|
|
|
const (
|
2024-06-05 12:07:20 -07:00
|
|
|
CPUCapabilityNone CPUCapability = iota
|
2024-05-15 15:13:16 -07:00
|
|
|
CPUCapabilityAVX
|
|
|
|
CPUCapabilityAVX2
|
|
|
|
// TODO AVX512
|
|
|
|
)
|
|
|
|
|
2024-06-05 12:07:20 -07:00
|
|
|
func (c CPUCapability) String() string {
|
2024-05-15 15:13:16 -07:00
|
|
|
switch c {
|
|
|
|
case CPUCapabilityAVX:
|
|
|
|
return "avx"
|
|
|
|
case CPUCapabilityAVX2:
|
|
|
|
return "avx2"
|
|
|
|
default:
|
2024-06-05 12:07:20 -07:00
|
|
|
return "no vector extensions"
|
2024-05-15 15:13:16 -07:00
|
|
|
}
|
|
|
|
}
|