only package 11.8 runner
This commit is contained in:
parent
b9bb5ca288
commit
1255bc9b45
4 changed files with 17 additions and 69 deletions
|
@ -35,5 +35,5 @@ Now you can run `ollama`:
|
||||||
## Building on Linux with GPU support
|
## Building on Linux with GPU support
|
||||||
|
|
||||||
- Install cmake and nvidia-cuda-toolkit
|
- Install cmake and nvidia-cuda-toolkit
|
||||||
- run `CUDA_VERSION=11 CUDA_PATH=/path/to/libcuda.so CUBLAS_PATH=/path/to/libcublas.so CUDART_PATH=/path/to/libcudart.so CUBLASLT_PATH=/path/to/libcublasLt.so go generate ./...`
|
- run `CUDA_PATH=/path/to/libcuda.so CUBLAS_PATH=/path/to/libcublas.so CUDART_PATH=/path/to/libcudart.so CUBLASLT_PATH=/path/to/libcublasLt.so go generate ./...`
|
||||||
- run `go build .`
|
- run `go build .`
|
||||||
|
|
|
@ -15,15 +15,15 @@ package llm
|
||||||
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on
|
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on
|
||||||
//go:generate cmake --build gguf/build/cpu --target server --config Release
|
//go:generate cmake --build gguf/build/cpu --target server --config Release
|
||||||
|
|
||||||
//go:generate cmake -S ggml -B ggml/build/cuda-${CUDA_VERSION} -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on
|
//go:generate cmake -S ggml -B ggml/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on
|
||||||
//go:generate cmake --build ggml/build/cuda-${CUDA_VERSION} --target server --config Release
|
//go:generate cmake --build ggml/build/cuda --target server --config Release
|
||||||
//go:generate cmake -S gguf -B gguf/build/cuda-${CUDA_VERSION} -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on
|
//go:generate cmake -S gguf -B gguf/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on
|
||||||
//go:generate cmake --build gguf/build/cuda-${CUDA_VERSION} --target server --config Release
|
//go:generate cmake --build gguf/build/cuda --target server --config Release
|
||||||
//go:generate cp --dereference ${CUDA_PATH} ggml/build/cuda-${CUDA_VERSION}/bin/libcuda.so
|
//go:generate cp --dereference ${CUDA_PATH} ggml/build/cuda/bin/libcuda.so
|
||||||
//go:generate cp --dereference ${CUDA_PATH} gguf/build/cuda-${CUDA_VERSION}/bin/libcuda.so
|
//go:generate cp --dereference ${CUDA_PATH} gguf/build/cuda/bin/libcuda.so
|
||||||
//go:generate cp --dereference ${CUBLAS_PATH} ggml/build/cuda-${CUDA_VERSION}/bin/libcublas.so.${CUDA_VERSION}
|
//go:generate cp --dereference ${CUBLAS_PATH} ggml/build/cuda/bin/libcublas.so.11
|
||||||
//go:generate cp --dereference ${CUBLAS_PATH} gguf/build/cuda-${CUDA_VERSION}/bin/libcublas.so.${CUDA_VERSION}
|
//go:generate cp --dereference ${CUBLAS_PATH} gguf/build/cuda/bin/libcublas.so.11
|
||||||
//go:generate cp --dereference ${CUDART_PATH} ggml/build/cuda-${CUDA_VERSION}/bin/libcudart.so.${CUDA_VERSION}.0
|
//go:generate cp --dereference ${CUDART_PATH} ggml/build/cuda/bin/libcudart.so.11.0
|
||||||
//go:generate cp --dereference ${CUDART_PATH} gguf/build/cuda-${CUDA_VERSION}/bin/libcudart.so.${CUDA_VERSION}.0
|
//go:generate cp --dereference ${CUDART_PATH} gguf/build/cuda/bin/libcudart.so.11.0
|
||||||
//go:generate cp --dereference ${CUBLASLT_PATH} ggml/build/cuda-${CUDA_VERSION}/bin/libcublasLt.so.${CUDA_VERSION}
|
//go:generate cp --dereference ${CUBLASLT_PATH} ggml/build/cuda/bin/libcublasLt.so.11
|
||||||
//go:generate cp --dereference ${CUBLASLT_PATH} gguf/build/cuda-${CUDA_VERSION}/bin/libcublasLt.so.${CUDA_VERSION}
|
//go:generate cp --dereference ${CUBLASLT_PATH} gguf/build/cuda/bin/libcublasLt.so.11
|
||||||
|
|
58
llm/llama.go
58
llm/llama.go
|
@ -17,7 +17,6 @@ import (
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path"
|
"path"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"regexp"
|
|
||||||
"runtime"
|
"runtime"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
@ -29,46 +28,6 @@ import (
|
||||||
//go:embed llama.cpp/*/build/*/bin/*
|
//go:embed llama.cpp/*/build/*/bin/*
|
||||||
var llamaCppEmbed embed.FS
|
var llamaCppEmbed embed.FS
|
||||||
|
|
||||||
func cudaVersion() int {
|
|
||||||
// first try nvcc, it gives the most accurate version if available
|
|
||||||
cmd := exec.Command("nvcc", "--version")
|
|
||||||
output, err := cmd.CombinedOutput()
|
|
||||||
if err == nil {
|
|
||||||
// regex to match the CUDA version line in nvcc --version output
|
|
||||||
re := regexp.MustCompile(`release (\d+\.\d+),`)
|
|
||||||
matches := re.FindStringSubmatch(string(output))
|
|
||||||
if len(matches) >= 2 {
|
|
||||||
cudaVersion := matches[1]
|
|
||||||
cudaVersionParts := strings.Split(cudaVersion, ".")
|
|
||||||
cudaMajorVersion, err := strconv.Atoi(cudaVersionParts[0])
|
|
||||||
if err == nil {
|
|
||||||
return cudaMajorVersion
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// fallback to nvidia-smi
|
|
||||||
cmd = exec.Command("nvidia-smi")
|
|
||||||
output, err = cmd.CombinedOutput()
|
|
||||||
if err != nil {
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
|
|
||||||
re := regexp.MustCompile(`CUDA Version: (\d+\.\d+)`)
|
|
||||||
matches := re.FindStringSubmatch(string(output))
|
|
||||||
if len(matches) < 2 {
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
|
|
||||||
cudaVersion := matches[1]
|
|
||||||
cudaVersionParts := strings.Split(cudaVersion, ".")
|
|
||||||
cudaMajorVersion, err := strconv.Atoi(cudaVersionParts[0])
|
|
||||||
if err != nil {
|
|
||||||
return -1
|
|
||||||
}
|
|
||||||
return cudaMajorVersion
|
|
||||||
}
|
|
||||||
|
|
||||||
type ModelRunner struct {
|
type ModelRunner struct {
|
||||||
Path string // path to the model runner executable
|
Path string // path to the model runner executable
|
||||||
}
|
}
|
||||||
|
@ -86,20 +45,9 @@ func chooseRunners(runnerType string) []ModelRunner {
|
||||||
path.Join(buildPath, "cpu", "bin", "server"),
|
path.Join(buildPath, "cpu", "bin", "server"),
|
||||||
}
|
}
|
||||||
case "linux":
|
case "linux":
|
||||||
cuda := cudaVersion()
|
runners = []string{
|
||||||
if cuda == 11 {
|
path.Join(buildPath, "cuda", "bin", "server"),
|
||||||
// prioritize CUDA 11 runner
|
path.Join(buildPath, "cpu", "bin", "server"),
|
||||||
runners = []string{
|
|
||||||
path.Join(buildPath, "cuda-11", "bin", "server"),
|
|
||||||
path.Join(buildPath, "cuda-12", "bin", "server"),
|
|
||||||
path.Join(buildPath, "cpu", "bin", "server"),
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
runners = []string{
|
|
||||||
path.Join(buildPath, "cuda-12", "bin", "server"),
|
|
||||||
path.Join(buildPath, "cuda-11", "bin", "server"),
|
|
||||||
path.Join(buildPath, "cpu", "bin", "server"),
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
case "windows":
|
case "windows":
|
||||||
// TODO: select windows GPU runner here when available
|
// TODO: select windows GPU runner here when available
|
||||||
|
|
|
@ -556,7 +556,7 @@ func Serve(ln net.Listener, origins []string) error {
|
||||||
if runtime.GOOS == "linux" {
|
if runtime.GOOS == "linux" {
|
||||||
// check compatibility to log warnings
|
// check compatibility to log warnings
|
||||||
if _, err := llm.CheckVRAM(); err != nil {
|
if _, err := llm.CheckVRAM(); err != nil {
|
||||||
log.Printf("Warning: GPU support not enabled, you may need to install GPU drivers: %v", err)
|
log.Printf("Warning: GPU support may not enabled, check you have installed install GPU drivers: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue