Merge pull request #4188 from dhiltgen/use_our_lib

User our bundled libraries (cuda) instead of the host library
2024-05-06 14:41:05 -07:00 · 2024-05-06 14:41:05 -07:00 · b08870aff3
commit b08870aff3
parent 3ecae420ac 380378cc80
2 changed files with 31 additions and 15 deletions
--- a/gpu/gpu.go
+++ b/gpu/gpu.go
@ -166,6 +166,12 @@ func GetGPUInfo() GpuInfoList {
 		slog.Warn("CPU does not have AVX or AVX2, disabling GPU support.")
 	}
 	// On windows we bundle the nvidia library one level above the runner dir
 	depPath := ""
 	if runtime.GOOS == "windows" && envconfig.RunnersDir != "" {
 		depPath = filepath.Dir(envconfig.RunnersDir)
 	}
 	var memInfo C.mem_info_t
 	resp := []GpuInfo{}
@ -198,6 +204,7 @@ func GetGPUInfo() GpuInfoList {
 		gpuInfo.Major = int(memInfo.major)
 		gpuInfo.Minor = int(memInfo.minor)
 		gpuInfo.MinimumMemory = cudaMinimumMemory
 		gpuInfo.DependencyPath = depPath
 		// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
 		resp = append(resp, gpuInfo)
--- a/llm/server.go
+++ b/llm/server.go
@ -233,13 +233,13 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 		if runtime.GOOS == "windows" {
 			pathEnv = "PATH"
 		}
-		// append the server directory to LD_LIBRARY_PATH/PATH
+		// prepend the server directory to LD_LIBRARY_PATH/PATH
 		libraryPaths := []string{dir}
 		if libraryPath, ok := os.LookupEnv(pathEnv); ok {
 			// Append our runner directory to the path
 			// This will favor system libraries over our bundled library dependencies
-			libraryPaths = append(filepath.SplitList(libraryPath), libraryPaths...)
+			libraryPaths = append(libraryPaths, filepath.SplitList(libraryPath)...)
 		}
 		// Note: we always put the dependency path first
@ -275,15 +275,31 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 			sem:           semaphore.NewWeighted(int64(numParallel)),
 		}
-		libEnv := fmt.Sprintf("%s=%s", pathEnv, strings.Join(libraryPaths, string(filepath.ListSeparator)))
+		s.cmd.Env = os.Environ()
 		s.cmd.Env = append(os.Environ(), libEnv)
 		s.cmd.Stdout = os.Stdout
 		s.cmd.Stderr = s.status
-		// TODO - multiple GPU selection logic...
+		visibleDevicesEnv, visibleDevicesEnvVal := gpu.GpuInfoList(gpus).GetVisibleDevicesEnv()
-		key, val := gpu.GpuInfoList(gpus).GetVisibleDevicesEnv()
+		pathEnvVal := strings.Join(libraryPaths, string(filepath.ListSeparator))
-		if key != "" {
+
-			s.cmd.Env = append(s.cmd.Env, key+"="+val)
+		// Update or add the path and visible devices variable with our adjusted version
 		pathNeeded := true
 		devicesNeeded := visibleDevicesEnv != ""
 		for i := range s.cmd.Env {
 			cmp := strings.SplitN(s.cmd.Env[i], "=", 2)
 			if strings.EqualFold(cmp[0], pathEnv) {
 				s.cmd.Env[i] = pathEnv + "=" + pathEnvVal
 				pathNeeded = false
 			} else if devicesNeeded && strings.EqualFold(cmp[0], visibleDevicesEnv) {
 				s.cmd.Env[i] = visibleDevicesEnv + "=" + visibleDevicesEnvVal
 				devicesNeeded = false
 			}
 		}
 		if pathNeeded {
 			s.cmd.Env = append(s.cmd.Env, pathEnv+"="+pathEnvVal)
 		}
 		if devicesNeeded {
 			s.cmd.Env = append(s.cmd.Env, visibleDevicesEnv+"="+visibleDevicesEnvVal)
 		}
 		slog.Info("starting llama server", "cmd", s.cmd.String())
@ -300,13 +316,6 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
 			continue
 		}
 		// TODO - make sure this is all wired up correctly
 		// if err = s.WaitUntilRunning(); err != nil {
 		// 	slog.Error("error starting llama server", "server", servers[i], "error", err)
 		// 	s.Close()
 		// 	finalErr = err
 		// 	continue
 		// }
 		return s, nil
 	}