diff --git a/envconfig/config.go b/envconfig/config.go index bcf2e18a..e86f72e6 100644 --- a/envconfig/config.go +++ b/envconfig/config.go @@ -57,6 +57,8 @@ var ( SchedSpread bool // Set via OLLAMA_TMPDIR in the environment TmpDir string + // Set via OLLAMA_INTEL_GPU in the environment + IntelGpu bool // Set via CUDA_VISIBLE_DEVICES in the environment CudaVisibleDevices string @@ -101,6 +103,7 @@ func AsMap() map[string]EnvVar { ret["ROCR_VISIBLE_DEVICES"] = EnvVar{"ROCR_VISIBLE_DEVICES", RocrVisibleDevices, "Set which AMD devices are visible"} ret["GPU_DEVICE_ORDINAL"] = EnvVar{"GPU_DEVICE_ORDINAL", GpuDeviceOrdinal, "Set which AMD devices are visible"} ret["HSA_OVERRIDE_GFX_VERSION"] = EnvVar{"HSA_OVERRIDE_GFX_VERSION", HsaOverrideGfxVersion, "Override the gfx used for all detected AMD GPUs"} + ret["OLLAMA_INTEL_GPU"] = EnvVar{"OLLAMA_INTEL_GPU", IntelGpu, "Enable experimental Intel GPU detection"} } return ret } @@ -276,6 +279,10 @@ func LoadConfig() { slog.Error("invalid setting", "OLLAMA_HOST", Host, "error", err, "using default port", Host.Port) } + if set, err := strconv.ParseBool(clean("OLLAMA_INTEL_GPU")); err == nil { + IntelGpu = set + } + CudaVisibleDevices = clean("CUDA_VISIBLE_DEVICES") HipVisibleDevices = clean("HIP_VISIBLE_DEVICES") RocrVisibleDevices = clean("ROCR_VISIBLE_DEVICES") diff --git a/gpu/gpu.go b/gpu/gpu.go index 56a4dbfa..ce0a1049 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -280,33 +280,35 @@ func GetGPUInfo() GpuInfoList { } // Intel - oHandles = initOneAPIHandles() - for d := 0; oHandles.oneapi != nil && d < int(oHandles.oneapi.num_drivers); d++ { - if oHandles.oneapi == nil { - // shouldn't happen - slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers)) - continue - } - devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d)) - for i := range devCount { - gpuInfo := OneapiGPUInfo{ - GpuInfo: GpuInfo{ - Library: "oneapi", - }, - driverIndex: d, - gpuIndex: int(i), + if envconfig.IntelGpu { + oHandles = initOneAPIHandles() + for d := range oHandles.oneapi.num_drivers { + if oHandles.oneapi == nil { + // shouldn't happen + slog.Warn("nil oneapi handle with driver count", "count", int(oHandles.oneapi.num_drivers)) + continue + } + devCount := C.oneapi_get_device_count(*oHandles.oneapi, C.int(d)) + for i := range devCount { + gpuInfo := OneapiGPUInfo{ + GpuInfo: GpuInfo{ + Library: "oneapi", + }, + driverIndex: int(d), + gpuIndex: int(i), + } + // TODO - split bootstrapping from updating free memory + C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo) + // TODO - convert this to MinimumMemory based on testing... + var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend. + memInfo.free = C.uint64_t(totalFreeMem) + gpuInfo.TotalMemory = uint64(memInfo.total) + gpuInfo.FreeMemory = uint64(memInfo.free) + gpuInfo.ID = C.GoString(&memInfo.gpu_id[0]) + gpuInfo.Name = C.GoString(&memInfo.gpu_name[0]) + // TODO dependency path? + oneapiGPUs = append(oneapiGPUs, gpuInfo) } - // TODO - split bootstrapping from updating free memory - C.oneapi_check_vram(*oHandles.oneapi, C.int(d), i, &memInfo) - // TODO - convert this to MinimumMemory based on testing... - var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend. - memInfo.free = C.uint64_t(totalFreeMem) - gpuInfo.TotalMemory = uint64(memInfo.total) - gpuInfo.FreeMemory = uint64(memInfo.free) - gpuInfo.ID = C.GoString(&memInfo.gpu_id[0]) - gpuInfo.Name = C.GoString(&memInfo.gpu_name[0]) - // TODO dependency path? - oneapiGPUs = append(oneapiGPUs, gpuInfo) } }