Merge pull request #4188 from dhiltgen/use_our_lib
User our bundled libraries (cuda) instead of the host library
This commit is contained in:
commit
b08870aff3
2 changed files with 31 additions and 15 deletions
|
@ -166,6 +166,12 @@ func GetGPUInfo() GpuInfoList {
|
||||||
slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.")
|
slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// On windows we bundle the nvidia library one level above the runner dir
|
||||||
|
depPath := ""
|
||||||
|
if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
|
||||||
|
depPath = filepath.Dir(envconfig.RunnersDir)
|
||||||
|
}
|
||||||
|
|
||||||
var memInfo C.mem_info_t
|
var memInfo C.mem_info_t
|
||||||
resp := []GpuInfo{}
|
resp := []GpuInfo{}
|
||||||
|
|
||||||
|
@ -198,6 +204,7 @@ func GetGPUInfo() GpuInfoList {
|
||||||
gpuInfo.Major = int(memInfo.major)
|
gpuInfo.Major = int(memInfo.major)
|
||||||
gpuInfo.Minor = int(memInfo.minor)
|
gpuInfo.Minor = int(memInfo.minor)
|
||||||
gpuInfo.MinimumMemory = cudaMinimumMemory
|
gpuInfo.MinimumMemory = cudaMinimumMemory
|
||||||
|
gpuInfo.DependencyPath = depPath
|
||||||
|
|
||||||
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
|
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
|
||||||
resp = append(resp, gpuInfo)
|
resp = append(resp, gpuInfo)
|
||||||
|
|
|
@ -233,13 +233,13 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||||
if runtime.GOOS == "windows" {
|
if runtime.GOOS == "windows" {
|
||||||
pathEnv = "PATH"
|
pathEnv = "PATH"
|
||||||
}
|
}
|
||||||
// append the server directory to LD_LIBRARY_PATH/PATH
|
// prepend the server directory to LD_LIBRARY_PATH/PATH
|
||||||
libraryPaths := []string{dir}
|
libraryPaths := []string{dir}
|
||||||
|
|
||||||
if libraryPath, ok := os.LookupEnv(pathEnv); ok {
|
if libraryPath, ok := os.LookupEnv(pathEnv); ok {
|
||||||
// Append our runner directory to the path
|
// Append our runner directory to the path
|
||||||
// This will favor system libraries over our bundled library dependencies
|
// This will favor system libraries over our bundled library dependencies
|
||||||
libraryPaths = append(filepath.SplitList(libraryPath), libraryPaths...)
|
libraryPaths = append(libraryPaths, filepath.SplitList(libraryPath)...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note: we always put the dependency path first
|
// Note: we always put the dependency path first
|
||||||
|
@ -275,15 +275,31 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||||
sem: semaphore.NewWeighted(int64(numParallel)),
|
sem: semaphore.NewWeighted(int64(numParallel)),
|
||||||
}
|
}
|
||||||
|
|
||||||
libEnv := fmt.Sprintf("%s=%s", pathEnv, strings.Join(libraryPaths, string(filepath.ListSeparator)))
|
s.cmd.Env = os.Environ()
|
||||||
s.cmd.Env = append(os.Environ(), libEnv)
|
|
||||||
s.cmd.Stdout = os.Stdout
|
s.cmd.Stdout = os.Stdout
|
||||||
s.cmd.Stderr = s.status
|
s.cmd.Stderr = s.status
|
||||||
|
|
||||||
// TODO - multiple GPU selection logic...
|
visibleDevicesEnv, visibleDevicesEnvVal := gpu.GpuInfoList(gpus).GetVisibleDevicesEnv()
|
||||||
key, val := gpu.GpuInfoList(gpus).GetVisibleDevicesEnv()
|
pathEnvVal := strings.Join(libraryPaths, string(filepath.ListSeparator))
|
||||||
if key != "" {
|
|
||||||
s.cmd.Env = append(s.cmd.Env, key+"="+val)
|
// Update or add the path and visible devices variable with our adjusted version
|
||||||
|
pathNeeded := true
|
||||||
|
devicesNeeded := visibleDevicesEnv != ""
|
||||||
|
for i := range s.cmd.Env {
|
||||||
|
cmp := strings.SplitN(s.cmd.Env[i], "=", 2)
|
||||||
|
if strings.EqualFold(cmp[0], pathEnv) {
|
||||||
|
s.cmd.Env[i] = pathEnv + "=" + pathEnvVal
|
||||||
|
pathNeeded = false
|
||||||
|
} else if devicesNeeded && strings.EqualFold(cmp[0], visibleDevicesEnv) {
|
||||||
|
s.cmd.Env[i] = visibleDevicesEnv + "=" + visibleDevicesEnvVal
|
||||||
|
devicesNeeded = false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if pathNeeded {
|
||||||
|
s.cmd.Env = append(s.cmd.Env, pathEnv+"="+pathEnvVal)
|
||||||
|
}
|
||||||
|
if devicesNeeded {
|
||||||
|
s.cmd.Env = append(s.cmd.Env, visibleDevicesEnv+"="+visibleDevicesEnvVal)
|
||||||
}
|
}
|
||||||
|
|
||||||
slog.Info("starting llama server", "cmd", s.cmd.String())
|
slog.Info("starting llama server", "cmd", s.cmd.String())
|
||||||
|
@ -300,13 +316,6 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO - make sure this is all wired up correctly
|
|
||||||
// if err = s.WaitUntilRunning(); err != nil {
|
|
||||||
// slog.Error("error starting llama server", "server", servers[i], "error", err)
|
|
||||||
// s.Close()
|
|
||||||
// finalErr = err
|
|
||||||
// continue
|
|
||||||
// }
|
|
||||||
return s, nil
|
return s, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue