gpu: add env var for detecting Intel oneapi gpus (#5076)
* gpu: add env var for detecting intel oneapi gpus * fix build error
This commit is contained in:
parent
4c2c8f93dd
commit
163cd3e77c
2 changed files with 35 additions and 26 deletions
|
@ -57,6 +57,8 @@ var (
|
||||||
SchedSpread bool
|
SchedSpread bool
|
||||||
// Set via OLLAMA_TMPDIR in the environment
|
// Set via OLLAMA_TMPDIR in the environment
|
||||||
TmpDir string
|
TmpDir string
|
||||||
|
// Set via OLLAMA_INTEL_GPU in the environment
|
||||||
|
IntelGpu bool
|
||||||
|
|
||||||
// Set via CUDA_VISIBLE_DEVICES in the environment
|
// Set via CUDA_VISIBLE_DEVICES in the environment
|
||||||
CudaVisibleDevices string
|
CudaVisibleDevices string
|
||||||
|
@ -101,6 +103,7 @@ func AsMap() map[string]EnvVar {
|
||||||
ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices, "Set which AMD devices are visible"}
|
ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices, "Set which AMD devices are visible"}
|
||||||
ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal, "Set which AMD devices are visible"}
|
ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal, "Set which AMD devices are visible"}
|
||||||
ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion, "Override the gfx used for all detected AMD GPUs"}
|
ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion, "Override the gfx used for all detected AMD GPUs"}
|
||||||
|
ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGpu, "Enable experimental Intel GPU detection"}
|
||||||
}
|
}
|
||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
@ -276,6 +279,10 @@ func LoadConfig() {
|
||||||
slog.Error("invalid setting", "OLLAMA_HOST", Host, "error", err, "using default port", Host.Port)
|
slog.Error("invalid setting", "OLLAMA_HOST", Host, "error", err, "using default port", Host.Port)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if set, err := strconv.ParseBool(clean("OLLAMA_INTEL_GPU")); err == nil {
|
||||||
|
IntelGpu = set
|
||||||
|
}
|
||||||
|
|
||||||
CudaVisibleDevices = clean("CUDA_VISIBLE_DEVICES")
|
CudaVisibleDevices = clean("CUDA_VISIBLE_DEVICES")
|
||||||
HipVisibleDevices = clean("HIP_VISIBLE_DEVICES")
|
HipVisibleDevices = clean("HIP_VISIBLE_DEVICES")
|
||||||
RocrVisibleDevices = clean("ROCR_VISIBLE_DEVICES")
|
RocrVisibleDevices = clean("ROCR_VISIBLE_DEVICES")
|
||||||
|
|
54
gpu/gpu.go
54
gpu/gpu.go
|
@ -280,33 +280,35 @@ func GetGPUInfo() GpuInfoList {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Intel
|
// Intel
|
||||||
oHandles = initOneAPIHandles()
|
if envconfig.IntelGpu {
|
||||||
for d := 0; oHandles.oneapi != nil && d < int(oHandles.oneapi.num_drivers); d++ {
|
oHandles = initOneAPIHandles()
|
||||||
if oHandles.oneapi == nil {
|
for d := range oHandles.oneapi.num_drivers {
|
||||||
// shouldn't happen
|
if oHandles.oneapi == nil {
|
||||||
slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
|
// shouldn't happen
|
||||||
continue
|
slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers))
|
||||||
}
|
continue
|
||||||
devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
|
}
|
||||||
for i := range devCount {
|
devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d))
|
||||||
gpuInfo := OneapiGPUInfo{
|
for i := range devCount {
|
||||||
GpuInfo: GpuInfo{
|
gpuInfo := OneapiGPUInfo{
|
||||||
Library: "oneapi",
|
GpuInfo: GpuInfo{
|
||||||
},
|
Library: "oneapi",
|
||||||
driverIndex: d,
|
},
|
||||||
gpuIndex: int(i),
|
driverIndex: int(d),
|
||||||
|
gpuIndex: int(i),
|
||||||
|
}
|
||||||
|
// TODO - split bootstrapping from updating free memory
|
||||||
|
C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
|
||||||
|
// TODO - convert this to MinimumMemory based on testing...
|
||||||
|
var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
|
||||||
|
memInfo.free = C.uint64_t(totalFreeMem)
|
||||||
|
gpuInfo.TotalMemory = uint64(memInfo.total)
|
||||||
|
gpuInfo.FreeMemory = uint64(memInfo.free)
|
||||||
|
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
||||||
|
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
||||||
|
// TODO dependency path?
|
||||||
|
oneapiGPUs = append(oneapiGPUs, gpuInfo)
|
||||||
}
|
}
|
||||||
// TODO - split bootstrapping from updating free memory
|
|
||||||
C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo)
|
|
||||||
// TODO - convert this to MinimumMemory based on testing...
|
|
||||||
var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
|
|
||||||
memInfo.free = C.uint64_t(totalFreeMem)
|
|
||||||
gpuInfo.TotalMemory = uint64(memInfo.total)
|
|
||||||
gpuInfo.FreeMemory = uint64(memInfo.free)
|
|
||||||
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
|
||||||
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
|
||||||
// TODO dependency path?
|
|
||||||
oneapiGPUs = append(oneapiGPUs, gpuInfo)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue