Compare commits

...

6 commits

Author SHA1 Message Date
1134287d38
Merge https://github.com/ollama/ollama 2024-10-15 16:19:26 +05:30
frob
09035b71cd
Add missing BF16 tensor type. (#7193)
Co-authored-by: Richard Lyons <frob@cloudstaff.com>
2024-10-14 17:06:35 -07:00
Daniel Hiltgen
f3c8b898cd
Track GPU discovery failure information (#5820)
* Expose GPU discovery failure information

* Remove exposed API for now
2024-10-14 16:26:45 -07:00
Daniel Hiltgen
5dd0477fd4
Fix regression on older macos versions (#7192)
The new cgo compilation requires a flag to target older macos versions
2024-10-13 10:47:42 -07:00
Daniel Hiltgen
c3d321d405
llm: Remove GGML_CUDA_NO_PEER_COPY for ROCm (#7174)
This workaround logic in llama.cpp is causing crashes for users with less system memory than VRAM.
2024-10-12 09:56:49 -07:00
Jesse Gross
7fe3902552 cli: Send all images in conversation history
Currently the CLI only sends images from the most recent image-
containing message. This prevents doing things like sending
one message with an image and then a follow message with a
second image and asking for comparision based on additional
information not present in any text that was output.

It's possible that some models have a problem with this but the
CLI is not the right place to do this since any adjustments are
model-specific and should affect all clients.

Both llava:34b and minicpm-v do reasonable things with multiple
images in the history.
2024-10-10 11:21:51 -07:00
10 changed files with 251 additions and 107 deletions

View file

@ -442,13 +442,6 @@ func generateInteractive(cmd *cobra.Command, opts runOptions) error {
return err return err
} }
// clear all previous images for better responses
if len(images) > 0 {
for i := range opts.Messages {
opts.Messages[i].Images = nil
}
}
newMessage.Content = msg newMessage.Content = msg
newMessage.Images = images newMessage.Images = images
} }

View file

@ -47,10 +47,11 @@ var (
) )
// Gather GPU information from the amdgpu driver if any supported GPUs are detected // Gather GPU information from the amdgpu driver if any supported GPUs are detected
func AMDGetGPUInfo() []RocmGPUInfo { // Only called once during bootstrap
func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
resp := []RocmGPUInfo{} resp := []RocmGPUInfo{}
if !AMDDetected() { if !AMDDetected() {
return resp return resp, fmt.Errorf("AMD GPUs not detected")
} }
// Opportunistic logging of driver version to aid in troubleshooting // Opportunistic logging of driver version to aid in troubleshooting
@ -194,13 +195,9 @@ func AMDGetGPUInfo() []RocmGPUInfo {
// Shouldn't happen, but just in case... // Shouldn't happen, but just in case...
if gpuID < 0 { if gpuID < 0 {
slog.Error("unexpected amdgpu sysfs data resulted in negative GPU ID, please set OLLAMA_DEBUG=1 and report an issue") err := fmt.Errorf("unexpected amdgpu sysfs data resulted in negative GPU ID, please set OLLAMA_DEBUG=1 and report an issue")
return nil slog.Error(err.Error())
} return nil, err
if int(major) < RocmComputeMin {
slog.Warn(fmt.Sprintf("amdgpu too old gfx%d%x%x", major, minor, patch), "gpu", gpuID)
continue
} }
// Look up the memory for the current node // Look up the memory for the current node
@ -270,19 +267,12 @@ func AMDGetGPUInfo() []RocmGPUInfo {
break break
} }
// iGPU detection, remove this check once we can support an iGPU variant of the rocm library
if totalMemory < IGPUMemLimit {
slog.Info("unsupported Radeon iGPU detected skipping", "id", gpuID, "total", format.HumanBytes2(totalMemory))
continue
}
var name string var name string
// TODO - PCI ID lookup // TODO - PCI ID lookup
if vendor > 0 && device > 0 { if vendor > 0 && device > 0 {
name = fmt.Sprintf("%04x:%04x", vendor, device) name = fmt.Sprintf("%04x:%04x", vendor, device)
} }
slog.Debug("amdgpu memory", "gpu", gpuID, "total", format.HumanBytes2(totalMemory))
slog.Debug("amdgpu memory", "gpu", gpuID, "available", format.HumanBytes2(totalMemory-usedMemory))
gpuInfo := RocmGPUInfo{ gpuInfo := RocmGPUInfo{
GpuInfo: GpuInfo{ GpuInfo: GpuInfo{
Library: "rocm", Library: "rocm",
@ -300,6 +290,31 @@ func AMDGetGPUInfo() []RocmGPUInfo {
usedFilepath: usedFile, usedFilepath: usedFile,
} }
// iGPU detection, remove this check once we can support an iGPU variant of the rocm library
if totalMemory < IGPUMemLimit {
reason := "unsupported Radeon iGPU detected skipping"
slog.Info(reason, "id", gpuID, "total", format.HumanBytes2(totalMemory))
unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{
GpuInfo: gpuInfo.GpuInfo,
Reason: reason,
})
continue
}
if int(major) < RocmComputeMin {
reason := fmt.Sprintf("amdgpu too old gfx%d%x%x", major, minor, patch)
slog.Warn(reason, "gpu", gpuID)
unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{
GpuInfo: gpuInfo.GpuInfo,
Reason: reason,
})
continue
}
slog.Debug("amdgpu memory", "gpu", gpuID, "total", format.HumanBytes2(totalMemory))
slog.Debug("amdgpu memory", "gpu", gpuID, "available", format.HumanBytes2(totalMemory-usedMemory))
// If the user wants to filter to a subset of devices, filter out if we aren't a match // If the user wants to filter to a subset of devices, filter out if we aren't a match
if len(visibleDevices) > 0 { if len(visibleDevices) > 0 {
include := false include := false
@ -310,7 +325,13 @@ func AMDGetGPUInfo() []RocmGPUInfo {
} }
} }
if !include { if !include {
slog.Info("filtering out device per user request", "id", gpuInfo.ID, "visible_devices", visibleDevices) reason := "filtering out device per user request"
slog.Info(reason, "id", gpuInfo.ID, "visible_devices", visibleDevices)
unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{
GpuInfo: gpuInfo.GpuInfo,
Reason: reason,
})
continue continue
} }
} }
@ -320,8 +341,13 @@ func AMDGetGPUInfo() []RocmGPUInfo {
if libDir == "" { if libDir == "" {
libDir, err = AMDValidateLibDir() libDir, err = AMDValidateLibDir()
if err != nil { if err != nil {
slog.Warn("unable to verify rocm library, will use cpu", "error", err) err = fmt.Errorf("unable to verify rocm library: %w", err)
return nil slog.Warn(err.Error())
unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{
GpuInfo: gpuInfo.GpuInfo,
Reason: err.Error(),
})
return nil, err
} }
} }
gpuInfo.DependencyPath = libDir gpuInfo.DependencyPath = libDir
@ -331,14 +357,25 @@ func AMDGetGPUInfo() []RocmGPUInfo {
if len(supported) == 0 { if len(supported) == 0 {
supported, err = GetSupportedGFX(libDir) supported, err = GetSupportedGFX(libDir)
if err != nil { if err != nil {
slog.Warn("failed to lookup supported GFX types, falling back to CPU mode", "error", err) err = fmt.Errorf("failed to lookup supported GFX types: %w", err)
return nil slog.Warn(err.Error())
unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{
GpuInfo: gpuInfo.GpuInfo,
Reason: err.Error(),
})
return nil, err
} }
slog.Debug("rocm supported GPUs", "types", supported) slog.Debug("rocm supported GPUs", "types", supported)
} }
gfx := gpuInfo.Compute gfx := gpuInfo.Compute
if !slices.Contains[[]string, string](supported, gfx) { if !slices.Contains[[]string, string](supported, gfx) {
slog.Warn("amdgpu is not supported", "gpu", gpuInfo.ID, "gpu_type", gfx, "library", libDir, "supported_types", supported) reason := fmt.Sprintf("amdgpu is not supported (supported types:%s)", supported)
slog.Warn(reason, "gpu_type", gfx, "gpu", gpuInfo.ID, "library", libDir)
unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{
GpuInfo: gpuInfo.GpuInfo,
Reason: reason,
})
// TODO - consider discrete markdown just for ROCM troubleshooting? // TODO - consider discrete markdown just for ROCM troubleshooting?
slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/gpu.md#overrides for HSA_OVERRIDE_GFX_VERSION usage") slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/gpu.md#overrides for HSA_OVERRIDE_GFX_VERSION usage")
continue continue
@ -358,13 +395,16 @@ func AMDGetGPUInfo() []RocmGPUInfo {
resp = append(resp, gpuInfo) resp = append(resp, gpuInfo)
} }
if len(resp) == 0 { if len(resp) == 0 {
slog.Info("no compatible amdgpu devices detected") err := fmt.Errorf("no compatible amdgpu devices detected")
slog.Info(err.Error())
return nil, err
} }
if err := verifyKFDDriverAccess(); err != nil { if err := verifyKFDDriverAccess(); err != nil {
slog.Error("amdgpu devices detected but permission problems block access", "error", err) err = fmt.Errorf("amdgpu devices detected but permission problems block access: %w", err)
return nil slog.Error(err.Error())
return nil, err
} }
return resp return resp, nil
} }
// Quick check for AMD driver so we can skip amdgpu discovery if not present // Quick check for AMD driver so we can skip amdgpu discovery if not present

View file

@ -3,6 +3,7 @@ package gpu
import ( import (
"bytes" "bytes"
"errors" "errors"
"fmt"
"log/slog" "log/slog"
"os" "os"
"path/filepath" "path/filepath"
@ -26,12 +27,13 @@ var (
RocmStandardLocations = []string{"C:\\Program Files\\AMD\\ROCm\\6.1\\bin"} // TODO glob? RocmStandardLocations = []string{"C:\\Program Files\\AMD\\ROCm\\6.1\\bin"} // TODO glob?
) )
func AMDGetGPUInfo() []RocmGPUInfo { // Only called once during bootstrap
func AMDGetGPUInfo() ([]RocmGPUInfo, error) {
resp := []RocmGPUInfo{} resp := []RocmGPUInfo{}
hl, err := NewHipLib() hl, err := NewHipLib()
if err != nil { if err != nil {
slog.Debug(err.Error()) slog.Debug(err.Error())
return nil return nil, err
} }
defer hl.Release() defer hl.Release()
@ -44,12 +46,15 @@ func AMDGetGPUInfo() []RocmGPUInfo {
// Note: the HIP library automatically handles subsetting to any HIP_VISIBLE_DEVICES the user specified // Note: the HIP library automatically handles subsetting to any HIP_VISIBLE_DEVICES the user specified
count := hl.HipGetDeviceCount() count := hl.HipGetDeviceCount()
if count == 0 { if count == 0 {
return nil err := fmt.Errorf("no compatible amdgpu devices detected")
slog.Info(err.Error())
return nil, err
} }
libDir, err := AMDValidateLibDir() libDir, err := AMDValidateLibDir()
if err != nil { if err != nil {
slog.Warn("unable to verify rocm library, will use cpu", "error", err) err = fmt.Errorf("unable to verify rocm library: %w", err)
return nil slog.Warn(err.Error())
return nil, err
} }
var supported []string var supported []string
@ -57,8 +62,9 @@ func AMDGetGPUInfo() []RocmGPUInfo {
if gfxOverride == "" { if gfxOverride == "" {
supported, err = GetSupportedGFX(libDir) supported, err = GetSupportedGFX(libDir)
if err != nil { if err != nil {
slog.Warn("failed to lookup supported GFX types, falling back to CPU mode", "error", err) err = fmt.Errorf("failed to lookup supported GFX types: %w", err)
return nil slog.Warn(err.Error())
return nil, err
} }
} else { } else {
slog.Info("skipping rocm gfx compatibility check", "HSA_OVERRIDE_GFX_VERSION", gfxOverride) slog.Info("skipping rocm gfx compatibility check", "HSA_OVERRIDE_GFX_VERSION", gfxOverride)
@ -87,21 +93,6 @@ func AMDGetGPUInfo() []RocmGPUInfo {
slog.Debug("hip device", "id", i, "name", name, "gfx", gfx) slog.Debug("hip device", "id", i, "name", name, "gfx", gfx)
// slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY! Always 0 // slog.Info(fmt.Sprintf("[%d] Integrated: %d", i, props.iGPU)) // DOESN'T REPORT CORRECTLY! Always 0
// TODO Why isn't props.iGPU accurate!? // TODO Why isn't props.iGPU accurate!?
if strings.EqualFold(name, iGPUName) {
slog.Info("unsupported Radeon iGPU detected skipping", "id", i, "name", name, "gfx", gfx)
continue
}
if gfxOverride == "" {
// Strip off Target Features when comparing
if !slices.Contains[[]string, string](supported, strings.Split(gfx, ":")[0]) {
slog.Warn("amdgpu is not supported", "gpu", i, "gpu_type", gfx, "library", libDir, "supported_types", supported)
// TODO - consider discrete markdown just for ROCM troubleshooting?
slog.Warn("See https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for HSA_OVERRIDE_GFX_VERSION usage")
continue
} else {
slog.Debug("amdgpu is supported", "gpu", i, "gpu_type", gfx)
}
}
freeMemory, totalMemory, err := hl.HipMemGetInfo() freeMemory, totalMemory, err := hl.HipMemGetInfo()
if err != nil { if err != nil {
@ -109,14 +100,6 @@ func AMDGetGPUInfo() []RocmGPUInfo {
continue continue
} }
// iGPU detection, remove this check once we can support an iGPU variant of the rocm library
if totalMemory < IGPUMemLimit {
slog.Info("amdgpu appears to be an iGPU, skipping", "gpu", i, "total", format.HumanBytes2(totalMemory))
continue
}
slog.Debug("amdgpu memory", "gpu", i, "total", format.HumanBytes2(totalMemory))
slog.Debug("amdgpu memory", "gpu", i, "available", format.HumanBytes2(freeMemory))
gpuInfo := RocmGPUInfo{ gpuInfo := RocmGPUInfo{
GpuInfo: GpuInfo{ GpuInfo: GpuInfo{
Library: "rocm", Library: "rocm",
@ -138,10 +121,38 @@ func AMDGetGPUInfo() []RocmGPUInfo {
index: i, index: i,
} }
// iGPU detection, remove this check once we can support an iGPU variant of the rocm library
if strings.EqualFold(name, iGPUName) || totalMemory < IGPUMemLimit {
reason := "unsupported Radeon iGPU detected skipping"
slog.Info(reason, "id", gpuInfo.ID, "total", format.HumanBytes2(totalMemory))
unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{
GpuInfo: gpuInfo.GpuInfo,
Reason: reason,
})
continue
}
// Strip off Target Features when comparing
if !slices.Contains[[]string, string](supported, strings.Split(gfx, ":")[0]) {
reason := fmt.Sprintf("amdgpu is not supported (supported types:%s)", supported)
slog.Warn(reason, "gpu_type", gfx, "gpu", gpuInfo.ID, "library", libDir)
unsupportedGPUs = append(unsupportedGPUs, UnsupportedGPUInfo{
GpuInfo: gpuInfo.GpuInfo,
Reason: reason,
})
// HSA_OVERRIDE_GFX_VERSION not supported on windows
continue
} else {
slog.Debug("amdgpu is supported", "gpu", i, "gpu_type", gfx)
}
slog.Debug("amdgpu memory", "gpu", i, "total", format.HumanBytes2(totalMemory))
slog.Debug("amdgpu memory", "gpu", i, "available", format.HumanBytes2(freeMemory))
resp = append(resp, gpuInfo) resp = append(resp, gpuInfo)
} }
return resp return resp, nil
} }
func AMDValidateLibDir() (string, error) { func AMDValidateLibDir() (string, error) {

View file

@ -54,6 +54,13 @@ var (
nvmlLibPath string nvmlLibPath string
rocmGPUs []RocmGPUInfo rocmGPUs []RocmGPUInfo
oneapiGPUs []OneapiGPUInfo oneapiGPUs []OneapiGPUInfo
// If any discovered GPUs are incompatible, report why
unsupportedGPUs []UnsupportedGPUInfo
// Keep track of errors during bootstrapping so that if GPUs are missing
// they expected to be present this may explain why
bootstrapErrors []error
) )
// With our current CUDA compile flags, older than 5.0 will not work properly // With our current CUDA compile flags, older than 5.0 will not work properly
@ -70,16 +77,17 @@ func initCudaHandles() *cudaHandles {
cHandles := &cudaHandles{} cHandles := &cudaHandles{}
// Short Circuit if we already know which library to use // Short Circuit if we already know which library to use
// ignore bootstrap errors in this case since we already recorded them
if nvmlLibPath != "" { if nvmlLibPath != "" {
cHandles.nvml, _ = LoadNVMLMgmt([]string{nvmlLibPath}) cHandles.nvml, _, _ = loadNVMLMgmt([]string{nvmlLibPath})
return cHandles return cHandles
} }
if nvcudaLibPath != "" { if nvcudaLibPath != "" {
cHandles.deviceCount, cHandles.nvcuda, _ = LoadNVCUDAMgmt([]string{nvcudaLibPath}) cHandles.deviceCount, cHandles.nvcuda, _, _ = loadNVCUDAMgmt([]string{nvcudaLibPath})
return cHandles return cHandles
} }
if cudartLibPath != "" { if cudartLibPath != "" {
cHandles.deviceCount, cHandles.cudart, _ = LoadCUDARTMgmt([]string{cudartLibPath}) cHandles.deviceCount, cHandles.cudart, _, _ = loadCUDARTMgmt([]string{cudartLibPath})
return cHandles return cHandles
} }
@ -102,18 +110,21 @@ func initCudaHandles() *cudaHandles {
if len(NvmlGlobs) > 0 { if len(NvmlGlobs) > 0 {
nvmlLibPaths := FindGPULibs(NvmlMgmtName, NvmlGlobs) nvmlLibPaths := FindGPULibs(NvmlMgmtName, NvmlGlobs)
if len(nvmlLibPaths) > 0 { if len(nvmlLibPaths) > 0 {
nvml, libPath := LoadNVMLMgmt(nvmlLibPaths) nvml, libPath, err := loadNVMLMgmt(nvmlLibPaths)
if nvml != nil { if nvml != nil {
slog.Debug("nvidia-ml loaded", "library", libPath) slog.Debug("nvidia-ml loaded", "library", libPath)
cHandles.nvml = nvml cHandles.nvml = nvml
nvmlLibPath = libPath nvmlLibPath = libPath
} }
if err != nil {
bootstrapErrors = append(bootstrapErrors, err)
}
} }
} }
nvcudaLibPaths := FindGPULibs(NvcudaMgmtName, nvcudaMgmtPatterns) nvcudaLibPaths := FindGPULibs(NvcudaMgmtName, nvcudaMgmtPatterns)
if len(nvcudaLibPaths) > 0 { if len(nvcudaLibPaths) > 0 {
deviceCount, nvcuda, libPath := LoadNVCUDAMgmt(nvcudaLibPaths) deviceCount, nvcuda, libPath, err := loadNVCUDAMgmt(nvcudaLibPaths)
if nvcuda != nil { if nvcuda != nil {
slog.Debug("detected GPUs", "count", deviceCount, "library", libPath) slog.Debug("detected GPUs", "count", deviceCount, "library", libPath)
cHandles.nvcuda = nvcuda cHandles.nvcuda = nvcuda
@ -121,11 +132,14 @@ func initCudaHandles() *cudaHandles {
nvcudaLibPath = libPath nvcudaLibPath = libPath
return cHandles return cHandles
} }
if err != nil {
bootstrapErrors = append(bootstrapErrors, err)
}
} }
cudartLibPaths := FindGPULibs(CudartMgmtName, cudartMgmtPatterns) cudartLibPaths := FindGPULibs(CudartMgmtName, cudartMgmtPatterns)
if len(cudartLibPaths) > 0 { if len(cudartLibPaths) > 0 {
deviceCount, cudart, libPath := LoadCUDARTMgmt(cudartLibPaths) deviceCount, cudart, libPath, err := loadCUDARTMgmt(cudartLibPaths)
if cudart != nil { if cudart != nil {
slog.Debug("detected GPUs", "library", libPath, "count", deviceCount) slog.Debug("detected GPUs", "library", libPath, "count", deviceCount)
cHandles.cudart = cudart cHandles.cudart = cudart
@ -133,6 +147,9 @@ func initCudaHandles() *cudaHandles {
cudartLibPath = libPath cudartLibPath = libPath
return cHandles return cHandles
} }
if err != nil {
bootstrapErrors = append(bootstrapErrors, err)
}
} }
return cHandles return cHandles
@ -143,14 +160,19 @@ func initOneAPIHandles() *oneapiHandles {
oHandles := &oneapiHandles{} oHandles := &oneapiHandles{}
// Short Circuit if we already know which library to use // Short Circuit if we already know which library to use
// ignore bootstrap errors in this case since we already recorded them
if oneapiLibPath != "" { if oneapiLibPath != "" {
oHandles.deviceCount, oHandles.oneapi, _ = LoadOneapiMgmt([]string{oneapiLibPath}) oHandles.deviceCount, oHandles.oneapi, _, _ = loadOneapiMgmt([]string{oneapiLibPath})
return oHandles return oHandles
} }
oneapiLibPaths := FindGPULibs(OneapiMgmtName, OneapiGlobs) oneapiLibPaths := FindGPULibs(OneapiMgmtName, OneapiGlobs)
if len(oneapiLibPaths) > 0 { if len(oneapiLibPaths) > 0 {
oHandles.deviceCount, oHandles.oneapi, oneapiLibPath = LoadOneapiMgmt(oneapiLibPaths) var err error
oHandles.deviceCount, oHandles.oneapi, oneapiLibPath, err = loadOneapiMgmt(oneapiLibPaths)
if err != nil {
bootstrapErrors = append(bootstrapErrors, err)
}
} }
return oHandles return oHandles
@ -197,6 +219,7 @@ func GetGPUInfo() GpuInfoList {
if !bootstrapped { if !bootstrapped {
slog.Info("looking for compatible GPUs") slog.Info("looking for compatible GPUs")
bootstrapErrors = []error{}
needRefresh = false needRefresh = false
cpuCapability = GetCPUCapability() cpuCapability = GetCPUCapability()
var memInfo C.mem_info_t var memInfo C.mem_info_t
@ -221,7 +244,9 @@ func GetGPUInfo() GpuInfoList {
// Fallback to CPU mode if we're lacking required vector extensions on x86 // Fallback to CPU mode if we're lacking required vector extensions on x86
if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" { if cpuCapability < GPURunnerCPUCapability && runtime.GOARCH == "amd64" {
slog.Warn("CPU does not have minimum vector extensions, GPU inference disabled", "required", GPURunnerCPUCapability, "detected", cpuCapability) err := fmt.Errorf("CPU does not have minimum vector extensions, GPU inference disabled. Required:%s Detected:%s", GPURunnerCPUCapability, cpuCapability)
slog.Warn(err.Error())
bootstrapErrors = append(bootstrapErrors, err)
bootstrapped = true bootstrapped = true
// No need to do any GPU discovery, since we can't run on them // No need to do any GPU discovery, since we can't run on them
return GpuInfoList{cpus[0].GpuInfo} return GpuInfoList{cpus[0].GpuInfo}
@ -253,10 +278,6 @@ func GetGPUInfo() GpuInfoList {
C.free(unsafe.Pointer(memInfo.err)) C.free(unsafe.Pointer(memInfo.err))
continue continue
} }
if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
continue
}
gpuInfo.TotalMemory = uint64(memInfo.total) gpuInfo.TotalMemory = uint64(memInfo.total)
gpuInfo.FreeMemory = uint64(memInfo.free) gpuInfo.FreeMemory = uint64(memInfo.free)
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0]) gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
@ -279,6 +300,15 @@ func GetGPUInfo() GpuInfoList {
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0]) gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
gpuInfo.Variant = variant gpuInfo.Variant = variant
if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
unsupportedGPUs = append(unsupportedGPUs,
UnsupportedGPUInfo{
GpuInfo: gpuInfo.GpuInfo,
})
slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
continue
}
// query the management library as well so we can record any skew between the two // query the management library as well so we can record any skew between the two
// which represents overhead on the GPU we must set aside on subsequent updates // which represents overhead on the GPU we must set aside on subsequent updates
if cHandles.nvml != nil { if cHandles.nvml != nil {
@ -341,7 +371,10 @@ func GetGPUInfo() GpuInfoList {
} }
} }
rocmGPUs = AMDGetGPUInfo() rocmGPUs, err = AMDGetGPUInfo()
if err != nil {
bootstrapErrors = append(bootstrapErrors, err)
}
bootstrapped = true bootstrapped = true
if len(cudaGPUs) == 0 && len(rocmGPUs) == 0 && len(oneapiGPUs) == 0 { if len(cudaGPUs) == 0 && len(rocmGPUs) == 0 && len(oneapiGPUs) == 0 {
slog.Info("no compatible GPUs were discovered") slog.Info("no compatible GPUs were discovered")
@ -526,92 +559,114 @@ func FindGPULibs(baseLibName string, defaultPatterns []string) []string {
return gpuLibPaths return gpuLibPaths
} }
func LoadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string) { // Bootstrap the runtime library
// Returns: num devices, handle, libPath, error
func loadCUDARTMgmt(cudartLibPaths []string) (int, *C.cudart_handle_t, string, error) {
var resp C.cudart_init_resp_t var resp C.cudart_init_resp_t
resp.ch.verbose = getVerboseState() resp.ch.verbose = getVerboseState()
var err error
for _, libPath := range cudartLibPaths { for _, libPath := range cudartLibPaths {
lib := C.CString(libPath) lib := C.CString(libPath)
defer C.free(unsafe.Pointer(lib)) defer C.free(unsafe.Pointer(lib))
C.cudart_init(lib, &resp) C.cudart_init(lib, &resp)
if resp.err != nil { if resp.err != nil {
slog.Debug("Unable to load cudart", "library", libPath, "error", C.GoString(resp.err)) err = fmt.Errorf("Unable to load cudart library %s: %s", libPath, C.GoString(resp.err))
slog.Debug(err.Error())
C.free(unsafe.Pointer(resp.err)) C.free(unsafe.Pointer(resp.err))
} else { } else {
return int(resp.num_devices), &resp.ch, libPath err = nil
return int(resp.num_devices), &resp.ch, libPath, err
} }
} }
return 0, nil, "" return 0, nil, "", err
} }
func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) { // Bootstrap the driver library
// Returns: num devices, handle, libPath, error
func loadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string, error) {
var resp C.nvcuda_init_resp_t var resp C.nvcuda_init_resp_t
resp.ch.verbose = getVerboseState() resp.ch.verbose = getVerboseState()
var err error
for _, libPath := range nvcudaLibPaths { for _, libPath := range nvcudaLibPaths {
lib := C.CString(libPath) lib := C.CString(libPath)
defer C.free(unsafe.Pointer(lib)) defer C.free(unsafe.Pointer(lib))
C.nvcuda_init(lib, &resp) C.nvcuda_init(lib, &resp)
if resp.err != nil { if resp.err != nil {
// Decide what log level based on the type of error message to help users understand why // Decide what log level based on the type of error message to help users understand why
msg := C.GoString(resp.err)
switch resp.cudaErr { switch resp.cudaErr {
case C.CUDA_ERROR_INSUFFICIENT_DRIVER, C.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH: case C.CUDA_ERROR_INSUFFICIENT_DRIVER, C.CUDA_ERROR_SYSTEM_DRIVER_MISMATCH:
slog.Warn("version mismatch between driver and cuda driver library - reboot or upgrade may be required", "library", libPath, "error", msg) err = fmt.Errorf("version mismatch between driver and cuda driver library - reboot or upgrade may be required: library %s", libPath)
slog.Warn(err.Error())
case C.CUDA_ERROR_NO_DEVICE: case C.CUDA_ERROR_NO_DEVICE:
slog.Info("no nvidia devices detected", "library", libPath) err = fmt.Errorf("no nvidia devices detected by library %s", libPath)
slog.Info(err.Error())
case C.CUDA_ERROR_UNKNOWN: case C.CUDA_ERROR_UNKNOWN:
slog.Warn("unknown error initializing cuda driver library", "library", libPath, "error", msg) err = fmt.Errorf("unknown error initializing cuda driver library %s: %s. see https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for more information", libPath, C.GoString(resp.err))
slog.Warn("see https://github.com/ollama/ollama/blob/main/docs/troubleshooting.md for more information") slog.Warn(err.Error())
default: default:
msg := C.GoString(resp.err)
if strings.Contains(msg, "wrong ELF class") { if strings.Contains(msg, "wrong ELF class") {
slog.Debug("skipping 32bit library", "library", libPath) slog.Debug("skipping 32bit library", "library", libPath)
} else { } else {
slog.Info("unable to load cuda driver library", "library", libPath, "error", msg) err = fmt.Errorf("Unable to load cudart library %s: %s", libPath, C.GoString(resp.err))
slog.Info(err.Error())
} }
} }
C.free(unsafe.Pointer(resp.err)) C.free(unsafe.Pointer(resp.err))
} else { } else {
return int(resp.num_devices), &resp.ch, libPath err = nil
return int(resp.num_devices), &resp.ch, libPath, err
} }
} }
return 0, nil, "" return 0, nil, "", err
} }
func LoadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string) { // Bootstrap the management library
// Returns: handle, libPath, error
func loadNVMLMgmt(nvmlLibPaths []string) (*C.nvml_handle_t, string, error) {
var resp C.nvml_init_resp_t var resp C.nvml_init_resp_t
resp.ch.verbose = getVerboseState() resp.ch.verbose = getVerboseState()
var err error
for _, libPath := range nvmlLibPaths { for _, libPath := range nvmlLibPaths {
lib := C.CString(libPath) lib := C.CString(libPath)
defer C.free(unsafe.Pointer(lib)) defer C.free(unsafe.Pointer(lib))
C.nvml_init(lib, &resp) C.nvml_init(lib, &resp)
if resp.err != nil { if resp.err != nil {
slog.Info(fmt.Sprintf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err))) err = fmt.Errorf("Unable to load NVML management library %s: %s", libPath, C.GoString(resp.err))
slog.Info(err.Error())
C.free(unsafe.Pointer(resp.err)) C.free(unsafe.Pointer(resp.err))
} else { } else {
return &resp.ch, libPath err = nil
return &resp.ch, libPath, err
} }
} }
return nil, "" return nil, "", err
} }
func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) { // bootstrap the Intel GPU library
// Returns: num devices, handle, libPath, error
func loadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string, error) {
var resp C.oneapi_init_resp_t var resp C.oneapi_init_resp_t
num_devices := 0 num_devices := 0
resp.oh.verbose = getVerboseState() resp.oh.verbose = getVerboseState()
var err error
for _, libPath := range oneapiLibPaths { for _, libPath := range oneapiLibPaths {
lib := C.CString(libPath) lib := C.CString(libPath)
defer C.free(unsafe.Pointer(lib)) defer C.free(unsafe.Pointer(lib))
C.oneapi_init(lib, &resp) C.oneapi_init(lib, &resp)
if resp.err != nil { if resp.err != nil {
slog.Debug("Unable to load oneAPI management library", "library", libPath, "error", C.GoString(resp.err)) err = fmt.Errorf("Unable to load oneAPI management library %s: %s", libPath, C.GoString(resp.err))
slog.Debug(err.Error())
C.free(unsafe.Pointer(resp.err)) C.free(unsafe.Pointer(resp.err))
} else { } else {
err = nil
for i := range resp.oh.num_drivers { for i := range resp.oh.num_drivers {
num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i))) num_devices += int(C.oneapi_get_device_count(resp.oh, C.int(i)))
} }
return num_devices, &resp.oh, libPath return num_devices, &resp.oh, libPath, err
} }
} }
return 0, nil, "" return 0, nil, "", err
} }
func getVerboseState() C.uint16_t { func getVerboseState() C.uint16_t {
@ -669,3 +724,23 @@ func LibraryDir() string {
slog.Warn("unable to locate gpu dependency libraries") slog.Warn("unable to locate gpu dependency libraries")
return "" return ""
} }
func GetSystemInfo() SystemInfo {
gpus := GetGPUInfo()
gpuMutex.Lock()
defer gpuMutex.Unlock()
discoveryErrors := []string{}
for _, err := range bootstrapErrors {
discoveryErrors = append(discoveryErrors, err.Error())
}
if len(gpus) == 1 && gpus[0].Library == "cpu" {
gpus = []GpuInfo{}
}
return SystemInfo{
System: cpus[0],
GPUs: gpus,
UnsupportedGPUs: unsupportedGPUs,
DiscoveryErrors: discoveryErrors,
}
}

View file

@ -66,3 +66,15 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
// No-op on darwin // No-op on darwin
return "", "" return "", ""
} }
func GetSystemInfo() SystemInfo {
mem, _ := GetCPUMem()
return SystemInfo{
System: CPUInfo{
GpuInfo: GpuInfo{
memInfo: mem,
},
},
GPUs: GetGPUInfo(),
}
}

View file

@ -76,6 +76,11 @@ type OneapiGPUInfoList []OneapiGPUInfo
type GpuInfoList []GpuInfo type GpuInfoList []GpuInfo
type UnsupportedGPUInfo struct {
GpuInfo
Reason string `json:"reason"`
}
// Split up the set of gpu info's by Library and variant // Split up the set of gpu info's by Library and variant
func (l GpuInfoList) ByLibrary() []GpuInfoList { func (l GpuInfoList) ByLibrary() []GpuInfoList {
resp := []GpuInfoList{} resp := []GpuInfoList{}
@ -146,3 +151,10 @@ func (c CPUCapability) String() string {
return "no vector extensions" return "no vector extensions"
} }
} }
type SystemInfo struct {
System CPUInfo `json:"system"`
GPUs []GpuInfo `json:"gpus"`
UnsupportedGPUs []UnsupportedGPUInfo `json:"unsupported_gpus"`
DiscoveryErrors []string `json:"discovery_errors"`
}

View file

@ -3,12 +3,12 @@ package llama
/* /*
#cgo CFLAGS: -O2 -std=c11 -DGGML_BUILD=1 -DNDEBUG -DLOG_DISABLE_LOGS -DGGML_USE_LLAMAFILE #cgo CFLAGS: -O2 -std=c11 -DGGML_BUILD=1 -DNDEBUG -DLOG_DISABLE_LOGS -DGGML_USE_LLAMAFILE
#cgo CXXFLAGS: -O2 -std=c++11 -DGGML_BUILD=1 -DNDEBUG -DLOG_DISABLE_LOGS -DGGML_USE_LLAMAFILE #cgo CXXFLAGS: -O2 -std=c++11 -DGGML_BUILD=1 -DNDEBUG -DLOG_DISABLE_LOGS -DGGML_USE_LLAMAFILE
#cgo darwin,arm64 CFLAGS: -DGGML_USE_METAL -DGGML_USE_ACCELERATE -DGGML_METAL_EMBED_LIBRARY -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64 -DGGML_USE_BLAS #cgo darwin,arm64 CFLAGS: -DGGML_USE_METAL -DGGML_USE_ACCELERATE -DGGML_METAL_EMBED_LIBRARY -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64 -DGGML_USE_BLAS -mmacosx-version-min=11.3
#cgo darwin,arm64 CXXFLAGS: -DGGML_USE_METAL -DGGML_USE_ACCELERATE -DGGML_METAL_EMBED_LIBRARY -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64 -DGGML_USE_BLAS #cgo darwin,arm64 CXXFLAGS: -DGGML_USE_METAL -DGGML_USE_ACCELERATE -DGGML_METAL_EMBED_LIBRARY -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64 -DGGML_USE_BLAS -mmacosx-version-min=11.3
#cgo darwin,arm64 LDFLAGS: -framework Foundation -framework Metal -framework MetalKit -framework Accelerate #cgo darwin,arm64 LDFLAGS: -framework Foundation -framework Metal -framework MetalKit -framework Accelerate -mmacosx-version-min=11.3
#cgo darwin,amd64 CFLAGS: -Wno-incompatible-pointer-types-discards-qualifiers #cgo darwin,amd64 CFLAGS: -Wno-incompatible-pointer-types-discards-qualifiers -mmacosx-version-min=11.3
#cgo darwin,amd64 CXXFLAGS: -Wno-incompatible-pointer-types-discards-qualifiers #cgo darwin,amd64 CXXFLAGS: -Wno-incompatible-pointer-types-discards-qualifiers -mmacosx-version-min=11.3
#cgo darwin,amd64 LDFLAGS: -framework Foundation #cgo darwin,amd64 LDFLAGS: -framework Foundation -mmacosx-version-min=11.3
#cgo darwin,amd64,avx2 CFLAGS: -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64 #cgo darwin,amd64,avx2 CFLAGS: -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
#cgo darwin,amd64,avx2 CXXFLAGS: -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64 #cgo darwin,amd64,avx2 CXXFLAGS: -DGGML_USE_ACCELERATE -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
#cgo darwin,amd64,avx2 LDFLAGS: -framework Accelerate #cgo darwin,amd64,avx2 LDFLAGS: -framework Accelerate

View file

@ -251,7 +251,7 @@ if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocblas.so.*.*.????? | cut -f5 -d. || true) ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocblas.so.*.*.????? | cut -f5 -d. || true)
fi fi
init_vars init_vars
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DGGML_HIPBLAS=on -DGGML_CUDA_NO_PEER_COPY=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)" CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DGGML_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
# Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp # Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
if [ -n "${OLLAMA_CUSTOM_ROCM_DEFS}" ]; then if [ -n "${OLLAMA_CUSTOM_ROCM_DEFS}" ]; then
echo "OLLAMA_CUSTOM_ROCM_DEFS=\"${OLLAMA_CUSTOM_ROCM_DEFS}\"" echo "OLLAMA_CUSTOM_ROCM_DEFS=\"${OLLAMA_CUSTOM_ROCM_DEFS}\""

View file

@ -340,7 +340,6 @@ function build_rocm() {
"-DCMAKE_C_COMPILER=clang.exe", "-DCMAKE_C_COMPILER=clang.exe",
"-DCMAKE_CXX_COMPILER=clang++.exe", "-DCMAKE_CXX_COMPILER=clang++.exe",
"-DGGML_HIPBLAS=on", "-DGGML_HIPBLAS=on",
"-DGGML_CUDA_NO_PEER_COPY=on",
"-DHIP_PLATFORM=amd", "-DHIP_PLATFORM=amd",
"-DGGML_AVX=on", "-DGGML_AVX=on",
"-DGGML_AVX2=off", "-DGGML_AVX2=off",

View file

@ -244,6 +244,8 @@ func (t Tensor) typeSize() uint64 {
return 8 return 8
case 29: // IQ1_M case 29: // IQ1_M
return blockSize/8 + blockSize/16 + blockSize/32 return blockSize/8 + blockSize/16 + blockSize/32
case 30: // BF16
return 2
default: default:
return 0 return 0
} }