Merge branch 'main' into archlinux
This commit is contained in:
commit
cbe2adc78a
11 changed files with 151 additions and 372 deletions
31
.github/workflows/test.yaml
vendored
31
.github/workflows/test.yaml
vendored
|
@ -8,7 +8,15 @@ jobs:
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||||
|
arch: [amd64, arm64]
|
||||||
|
exclude:
|
||||||
|
- os: ubuntu-latest
|
||||||
|
arch: arm64
|
||||||
|
- os: windows-latest
|
||||||
|
arch: arm64
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
|
env:
|
||||||
|
GOARCH: ${{ matrix.arch }}
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- uses: actions/setup-go@v4
|
- uses: actions/setup-go@v4
|
||||||
|
@ -33,7 +41,7 @@ jobs:
|
||||||
- run: go generate -x ./...
|
- run: go generate -x ./...
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: ${{ matrix.os }}-libraries
|
name: ${{ matrix.os }}-${{ matrix.arch }}-libraries
|
||||||
path: |
|
path: |
|
||||||
llm/llama.cpp/build/**/lib/*
|
llm/llama.cpp/build/**/lib/*
|
||||||
lint:
|
lint:
|
||||||
|
@ -41,7 +49,18 @@ jobs:
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||||
|
arch: [amd64, arm64]
|
||||||
|
exclude:
|
||||||
|
- os: ubuntu-latest
|
||||||
|
arch: arm64
|
||||||
|
- os: windows-latest
|
||||||
|
arch: arm64
|
||||||
|
- os: macos-latest
|
||||||
|
arch: amd64
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
|
env:
|
||||||
|
GOARCH: ${{ matrix.arch }}
|
||||||
|
CGO_ENABLED: "1"
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
|
@ -52,7 +71,7 @@ jobs:
|
||||||
cache: false
|
cache: false
|
||||||
- uses: actions/download-artifact@v4
|
- uses: actions/download-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: ${{ matrix.os }}-libraries
|
name: ${{ matrix.os }}-${{ matrix.arch }}-libraries
|
||||||
path: llm/llama.cpp/build
|
path: llm/llama.cpp/build
|
||||||
- uses: golangci/golangci-lint-action@v3
|
- uses: golangci/golangci-lint-action@v3
|
||||||
test:
|
test:
|
||||||
|
@ -60,6 +79,12 @@ jobs:
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
os: [ubuntu-latest, macos-latest, windows-latest]
|
os: [ubuntu-latest, macos-latest, windows-latest]
|
||||||
|
arch: [amd64, arm64]
|
||||||
|
exclude:
|
||||||
|
- os: ubuntu-latest
|
||||||
|
arch: arm64
|
||||||
|
- os: windows-latest
|
||||||
|
arch: arm64
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
@ -72,7 +97,7 @@ jobs:
|
||||||
- run: go get
|
- run: go get
|
||||||
- uses: actions/download-artifact@v4
|
- uses: actions/download-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: ${{ matrix.os }}-libraries
|
name: ${{ matrix.os }}-${{ matrix.arch }}-libraries
|
||||||
path: llm/llama.cpp/build
|
path: llm/llama.cpp/build
|
||||||
- run: go build
|
- run: go build
|
||||||
- run: go test -v ./...
|
- run: go test -v ./...
|
||||||
|
|
|
@ -248,6 +248,10 @@ curl http://localhost:11434/api/chat -d '{
|
||||||
|
|
||||||
See the [API documentation](./docs/api.md) for all endpoints.
|
See the [API documentation](./docs/api.md) for all endpoints.
|
||||||
|
|
||||||
|
## Integrations
|
||||||
|
|
||||||
|
- [ollama-python](https://github.com/jmorganca/ollama-python)
|
||||||
|
|
||||||
## Community Integrations
|
## Community Integrations
|
||||||
|
|
||||||
### Web & Desktop
|
### Web & Desktop
|
||||||
|
|
284
api/client.py
284
api/client.py
|
@ -1,284 +0,0 @@
|
||||||
import os
|
|
||||||
import json
|
|
||||||
import requests
|
|
||||||
import os
|
|
||||||
import hashlib
|
|
||||||
import json
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
BASE_URL = os.environ.get('OLLAMA_HOST', 'http://localhost:11434')
|
|
||||||
|
|
||||||
# Generate a response for a given prompt with a provided model. This is a streaming endpoint, so will be a series of responses.
|
|
||||||
# The final response object will include statistics and additional data from the request. Use the callback function to override
|
|
||||||
# the default handler.
|
|
||||||
def generate(model_name, prompt, system=None, template=None, format="", context=None, options=None, callback=None):
|
|
||||||
try:
|
|
||||||
url = f"{BASE_URL}/api/generate"
|
|
||||||
payload = {
|
|
||||||
"model": model_name,
|
|
||||||
"prompt": prompt,
|
|
||||||
"system": system,
|
|
||||||
"template": template,
|
|
||||||
"context": context,
|
|
||||||
"options": options,
|
|
||||||
"format": format,
|
|
||||||
}
|
|
||||||
|
|
||||||
# Remove keys with None values
|
|
||||||
payload = {k: v for k, v in payload.items() if v is not None}
|
|
||||||
|
|
||||||
with requests.post(url, json=payload, stream=True) as response:
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
# Creating a variable to hold the context history of the final chunk
|
|
||||||
final_context = None
|
|
||||||
|
|
||||||
# Variable to hold concatenated response strings if no callback is provided
|
|
||||||
full_response = ""
|
|
||||||
|
|
||||||
# Iterating over the response line by line and displaying the details
|
|
||||||
for line in response.iter_lines():
|
|
||||||
if line:
|
|
||||||
# Parsing each line (JSON chunk) and extracting the details
|
|
||||||
chunk = json.loads(line)
|
|
||||||
|
|
||||||
# If a callback function is provided, call it with the chunk
|
|
||||||
if callback:
|
|
||||||
callback(chunk)
|
|
||||||
else:
|
|
||||||
# If this is not the last chunk, add the "response" field value to full_response and print it
|
|
||||||
if not chunk.get("done"):
|
|
||||||
response_piece = chunk.get("response", "")
|
|
||||||
full_response += response_piece
|
|
||||||
print(response_piece, end="", flush=True)
|
|
||||||
|
|
||||||
# Check if it's the last chunk (done is true)
|
|
||||||
if chunk.get("done"):
|
|
||||||
final_context = chunk.get("context")
|
|
||||||
|
|
||||||
# Return the full response and the final context
|
|
||||||
return full_response, final_context
|
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
print(f"An error occurred: {e}")
|
|
||||||
return None, None
|
|
||||||
|
|
||||||
|
|
||||||
# Create a blob file on the server if it doesn't exist.
|
|
||||||
def create_blob(digest, file_path):
|
|
||||||
url = f"{BASE_URL}/api/blobs/{digest}"
|
|
||||||
|
|
||||||
# Check if the blob exists
|
|
||||||
response = requests.head(url)
|
|
||||||
if response.status_code != 404:
|
|
||||||
return # Blob already exists, no need to upload
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
# Upload the blob
|
|
||||||
with open(file_path, 'rb') as file_data:
|
|
||||||
requests.post(url, data=file_data)
|
|
||||||
|
|
||||||
|
|
||||||
# Create a model from a Modelfile. Use the callback function to override the default handler.
|
|
||||||
def create(model_name, filename, callback=None):
|
|
||||||
try:
|
|
||||||
file_path = Path(filename).expanduser().resolve()
|
|
||||||
processed_lines = []
|
|
||||||
|
|
||||||
# Read and process the modelfile
|
|
||||||
with open(file_path, 'r') as f:
|
|
||||||
for line in f:
|
|
||||||
# Skip empty or whitespace-only lines
|
|
||||||
if not line.strip():
|
|
||||||
continue
|
|
||||||
|
|
||||||
command, args = line.split(maxsplit=1)
|
|
||||||
|
|
||||||
if command.upper() in ["FROM", "ADAPTER"]:
|
|
||||||
path = Path(args.strip()).expanduser()
|
|
||||||
|
|
||||||
# Check if path is relative and resolve it
|
|
||||||
if not path.is_absolute():
|
|
||||||
path = (file_path.parent / path)
|
|
||||||
|
|
||||||
# Skip if file does not exist for "model", this is handled by the server
|
|
||||||
if not path.exists():
|
|
||||||
processed_lines.append(line)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Calculate SHA-256 hash
|
|
||||||
with open(path, 'rb') as bin_file:
|
|
||||||
hash = hashlib.sha256()
|
|
||||||
hash.update(bin_file.read())
|
|
||||||
blob = f"sha256:{hash.hexdigest()}"
|
|
||||||
|
|
||||||
# Add the file to the remote server
|
|
||||||
create_blob(blob, path)
|
|
||||||
|
|
||||||
# Replace path with digest in the line
|
|
||||||
line = f"{command} @{blob}\n"
|
|
||||||
|
|
||||||
processed_lines.append(line)
|
|
||||||
|
|
||||||
# Combine processed lines back into a single string
|
|
||||||
modelfile_content = '\n'.join(processed_lines)
|
|
||||||
|
|
||||||
url = f"{BASE_URL}/api/create"
|
|
||||||
payload = {"name": model_name, "modelfile": modelfile_content}
|
|
||||||
|
|
||||||
# Making a POST request with the stream parameter set to True to handle streaming responses
|
|
||||||
with requests.post(url, json=payload, stream=True) as response:
|
|
||||||
response.raise_for_status()
|
|
||||||
# Iterating over the response line by line and displaying the status
|
|
||||||
for line in response.iter_lines():
|
|
||||||
if line:
|
|
||||||
chunk = json.loads(line)
|
|
||||||
if callback:
|
|
||||||
callback(chunk)
|
|
||||||
else:
|
|
||||||
print(f"Status: {chunk.get('status')}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"An error occurred: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
# Pull a model from a the model registry. Cancelled pulls are resumed from where they left off, and multiple
|
|
||||||
# calls to will share the same download progress. Use the callback function to override the default handler.
|
|
||||||
def pull(model_name, insecure=False, callback=None):
|
|
||||||
try:
|
|
||||||
url = f"{BASE_URL}/api/pull"
|
|
||||||
payload = {
|
|
||||||
"name": model_name,
|
|
||||||
"insecure": insecure
|
|
||||||
}
|
|
||||||
|
|
||||||
# Making a POST request with the stream parameter set to True to handle streaming responses
|
|
||||||
with requests.post(url, json=payload, stream=True) as response:
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
# Iterating over the response line by line and displaying the details
|
|
||||||
for line in response.iter_lines():
|
|
||||||
if line:
|
|
||||||
# Parsing each line (JSON chunk) and extracting the details
|
|
||||||
chunk = json.loads(line)
|
|
||||||
|
|
||||||
# If a callback function is provided, call it with the chunk
|
|
||||||
if callback:
|
|
||||||
callback(chunk)
|
|
||||||
else:
|
|
||||||
# Print the status message directly to the console
|
|
||||||
print(chunk.get('status', ''), end='', flush=True)
|
|
||||||
|
|
||||||
# If there's layer data, you might also want to print that (adjust as necessary)
|
|
||||||
if 'digest' in chunk:
|
|
||||||
print(f" - Digest: {chunk['digest']}", end='', flush=True)
|
|
||||||
print(f" - Total: {chunk['total']}", end='', flush=True)
|
|
||||||
print(f" - Completed: {chunk['completed']}", end='\n', flush=True)
|
|
||||||
else:
|
|
||||||
print()
|
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
print(f"An error occurred: {e}")
|
|
||||||
|
|
||||||
# Push a model to the model registry. Use the callback function to override the default handler.
|
|
||||||
def push(model_name, insecure=False, callback=None):
|
|
||||||
try:
|
|
||||||
url = f"{BASE_URL}/api/push"
|
|
||||||
payload = {
|
|
||||||
"name": model_name,
|
|
||||||
"insecure": insecure
|
|
||||||
}
|
|
||||||
|
|
||||||
# Making a POST request with the stream parameter set to True to handle streaming responses
|
|
||||||
with requests.post(url, json=payload, stream=True) as response:
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
# Iterating over the response line by line and displaying the details
|
|
||||||
for line in response.iter_lines():
|
|
||||||
if line:
|
|
||||||
# Parsing each line (JSON chunk) and extracting the details
|
|
||||||
chunk = json.loads(line)
|
|
||||||
|
|
||||||
# If a callback function is provided, call it with the chunk
|
|
||||||
if callback:
|
|
||||||
callback(chunk)
|
|
||||||
else:
|
|
||||||
# Print the status message directly to the console
|
|
||||||
print(chunk.get('status', ''), end='', flush=True)
|
|
||||||
|
|
||||||
# If there's layer data, you might also want to print that (adjust as necessary)
|
|
||||||
if 'digest' in chunk:
|
|
||||||
print(f" - Digest: {chunk['digest']}", end='', flush=True)
|
|
||||||
print(f" - Total: {chunk['total']}", end='', flush=True)
|
|
||||||
print(f" - Completed: {chunk['completed']}", end='\n', flush=True)
|
|
||||||
else:
|
|
||||||
print()
|
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
print(f"An error occurred: {e}")
|
|
||||||
|
|
||||||
# List models that are available locally.
|
|
||||||
def list():
|
|
||||||
try:
|
|
||||||
response = requests.get(f"{BASE_URL}/api/tags")
|
|
||||||
response.raise_for_status()
|
|
||||||
data = response.json()
|
|
||||||
models = data.get('models', [])
|
|
||||||
return models
|
|
||||||
|
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
print(f"An error occurred: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Copy a model. Creates a model with another name from an existing model.
|
|
||||||
def copy(source, destination):
|
|
||||||
try:
|
|
||||||
# Create the JSON payload
|
|
||||||
payload = {
|
|
||||||
"source": source,
|
|
||||||
"destination": destination
|
|
||||||
}
|
|
||||||
|
|
||||||
response = requests.post(f"{BASE_URL}/api/copy", json=payload)
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
# If the request was successful, return a message indicating that the copy was successful
|
|
||||||
return "Copy successful"
|
|
||||||
|
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
print(f"An error occurred: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Delete a model and its data.
|
|
||||||
def delete(model_name):
|
|
||||||
try:
|
|
||||||
url = f"{BASE_URL}/api/delete"
|
|
||||||
payload = {"name": model_name}
|
|
||||||
response = requests.delete(url, json=payload)
|
|
||||||
response.raise_for_status()
|
|
||||||
return "Delete successful"
|
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
print(f"An error occurred: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Show info about a model.
|
|
||||||
def show(model_name):
|
|
||||||
try:
|
|
||||||
url = f"{BASE_URL}/api/show"
|
|
||||||
payload = {"name": model_name}
|
|
||||||
response = requests.post(url, json=payload)
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
# Parse the JSON response and return it
|
|
||||||
data = response.json()
|
|
||||||
return data
|
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
print(f"An error occurred: {e}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def heartbeat():
|
|
||||||
try:
|
|
||||||
url = f"{BASE_URL}/"
|
|
||||||
response = requests.head(url)
|
|
||||||
response.raise_for_status()
|
|
||||||
return "Ollama is running"
|
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
print(f"An error occurred: {e}")
|
|
||||||
return "Ollama is not running"
|
|
|
@ -75,7 +75,7 @@ func newDynExtServer(library, model string, adapters, projectors []string, opts
|
||||||
updatePath(filepath.Dir(library))
|
updatePath(filepath.Dir(library))
|
||||||
libPath := C.CString(library)
|
libPath := C.CString(library)
|
||||||
defer C.free(unsafe.Pointer(libPath))
|
defer C.free(unsafe.Pointer(libPath))
|
||||||
resp := newExtServerResp(128)
|
resp := newExtServerResp(512)
|
||||||
defer freeExtServerResp(resp)
|
defer freeExtServerResp(resp)
|
||||||
var srv C.struct_dynamic_llama_server
|
var srv C.struct_dynamic_llama_server
|
||||||
C.dyn_init(libPath, &srv, &resp)
|
C.dyn_init(libPath, &srv, &resp)
|
||||||
|
@ -181,7 +181,6 @@ func (llm *dynExtServer) Predict(ctx context.Context, predict PredictOpts, fn fu
|
||||||
"seed": predict.Options.Seed,
|
"seed": predict.Options.Seed,
|
||||||
"stop": predict.Options.Stop,
|
"stop": predict.Options.Stop,
|
||||||
"image_data": imageData,
|
"image_data": imageData,
|
||||||
"cache_prompt": true,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if predict.Format == "json" {
|
if predict.Format == "json" {
|
||||||
|
|
|
@ -14,9 +14,11 @@ BUILD_DIR="${LLAMACPP_DIR}/build/darwin/metal"
|
||||||
case "${GOARCH}" in
|
case "${GOARCH}" in
|
||||||
"amd64")
|
"amd64")
|
||||||
CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DLLAMA_METAL=off -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
|
CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DLLAMA_METAL=off -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
|
||||||
|
ARCH="x86_64"
|
||||||
;;
|
;;
|
||||||
"arm64")
|
"arm64")
|
||||||
CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DLLAMA_METAL=on ${CMAKE_DEFS}"
|
CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DLLAMA_METAL=on ${CMAKE_DEFS}"
|
||||||
|
ARCH="arm64"
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
echo "GOARCH must be set"
|
echo "GOARCH must be set"
|
||||||
|
@ -30,6 +32,7 @@ apply_patches
|
||||||
build
|
build
|
||||||
install
|
install
|
||||||
gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
|
gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
|
||||||
|
-arch ${ARCH} \
|
||||||
-Wl,-force_load ${BUILD_DIR}/lib/libext_server.a \
|
-Wl,-force_load ${BUILD_DIR}/lib/libext_server.a \
|
||||||
${BUILD_DIR}/lib/libcommon.a \
|
${BUILD_DIR}/lib/libcommon.a \
|
||||||
${BUILD_DIR}/lib/libllama.a \
|
${BUILD_DIR}/lib/libllama.a \
|
||||||
|
|
|
@ -39,8 +39,13 @@ amdGPUs() {
|
||||||
}
|
}
|
||||||
|
|
||||||
echo "Starting linux generate script"
|
echo "Starting linux generate script"
|
||||||
if [ -z "${CUDACXX}" -a -x /usr/local/cuda/bin/nvcc ]; then
|
if [ -z "${CUDACXX}" ]; then
|
||||||
|
if [ -x /usr/local/cuda/bin/nvcc ]; then
|
||||||
export CUDACXX=/usr/local/cuda/bin/nvcc
|
export CUDACXX=/usr/local/cuda/bin/nvcc
|
||||||
|
else
|
||||||
|
# Try the default location in case it exists
|
||||||
|
export CUDACXX=$(command -v nvcc)
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
COMMON_CMAKE_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off"
|
COMMON_CMAKE_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off"
|
||||||
source $(dirname $0)/gen_common.sh
|
source $(dirname $0)/gen_common.sh
|
||||||
|
@ -109,33 +114,41 @@ else
|
||||||
echo "Skipping CPU generation step as requested"
|
echo "Skipping CPU generation step as requested"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
for cudalibpath in "/usr/local/cuda/lib64" "/opt/cuda/targets/x86_64-linux/lib"; do
|
# If needed, look for the default CUDA toolkit location
|
||||||
if [ -d "$cudalibpath" ]; then
|
if [ -z "${CUDA_LIB_DIR}" ] && [ -d /usr/local/cuda/lib64 ]; then
|
||||||
|
CUDA_LIB_DIR=/usr/local/cuda/lib64
|
||||||
|
fi
|
||||||
|
|
||||||
|
# If needed, look for CUDA on Arch Linux
|
||||||
|
if [ -z "${CUDA_LIB_DIR}" ] && [ -d /opt/cuda/targets/x86_64-linux/lib ]; then
|
||||||
|
CUDA_LIB_DIR=/opt/cuda/targets/x86_64-linux/lib
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -d "${CUDA_LIB_DIR}" ]; then
|
||||||
echo "CUDA libraries detected - building dynamic CUDA library"
|
echo "CUDA libraries detected - building dynamic CUDA library"
|
||||||
init_vars
|
init_vars
|
||||||
CUDA_MAJOR=$(find "$cudalibpath" -name 'libcudart.so.*' -print | head -1 | cut -f3 -d. || true)
|
CUDA_MAJOR=$(ls "${CUDA_LIB_DIR}"/libcudart.so.* | head -1 | cut -f3 -d. || true)
|
||||||
if [ -n "${CUDA_MAJOR}" ]; then
|
if [ -n "${CUDA_MAJOR}" ]; then
|
||||||
CUDA_VARIANT="_v${CUDA_MAJOR}"
|
CUDA_VARIANT=_v${CUDA_MAJOR}
|
||||||
fi
|
fi
|
||||||
CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
|
CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
|
||||||
BUILD_DIR="${LLAMACPP_DIR}/build/linux/cuda${CUDA_VARIANT}"
|
BUILD_DIR="${LLAMACPP_DIR}/build/linux/cuda${CUDA_VARIANT}"
|
||||||
CUDA_LIB_DIR="$cudalibpath"
|
|
||||||
build
|
build
|
||||||
install
|
install
|
||||||
gcc -fPIC -g -shared -o "${BUILD_DIR}/lib/libext_server.so" \
|
gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
|
||||||
-Wl,--whole-archive \
|
-Wl,--whole-archive \
|
||||||
"${BUILD_DIR}/lib/libext_server.a" \
|
${BUILD_DIR}/lib/libext_server.a \
|
||||||
"${BUILD_DIR}/lib/libcommon.a" \
|
${BUILD_DIR}/lib/libcommon.a \
|
||||||
"${BUILD_DIR}/lib/libllama.a" \
|
${BUILD_DIR}/lib/libllama.a \
|
||||||
-Wl,--no-whole-archive \
|
-Wl,--no-whole-archive \
|
||||||
"${CUDA_LIB_DIR}/libcudart_static.a" \
|
${CUDA_LIB_DIR}/libcudart_static.a \
|
||||||
"${CUDA_LIB_DIR}/libcublas_static.a" \
|
${CUDA_LIB_DIR}/libcublas_static.a \
|
||||||
"${CUDA_LIB_DIR}/libcublasLt_static.a" \
|
${CUDA_LIB_DIR}/libcublasLt_static.a \
|
||||||
"${CUDA_LIB_DIR}/libcudadevrt.a" \
|
${CUDA_LIB_DIR}/libcudadevrt.a \
|
||||||
"${CUDA_LIB_DIR}/libculibos.a" \
|
${CUDA_LIB_DIR}/libculibos.a \
|
||||||
|
-lcuda \
|
||||||
-lrt -lpthread -ldl -lstdc++ -lm
|
-lrt -lpthread -ldl -lstdc++ -lm
|
||||||
fi
|
fi
|
||||||
done
|
|
||||||
|
|
||||||
if [ -z "${ROCM_PATH}" ]; then
|
if [ -z "${ROCM_PATH}" ]; then
|
||||||
# Try the default location in case it exists
|
# Try the default location in case it exists
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
Subproject commit 328b83de23b33240e28f4e74900d1d06726f5eb1
|
Subproject commit 584d674be622fbf1578694ada6e62eebedbfd377
|
|
@ -1132,6 +1132,7 @@ func GetSHA256Digest(r io.Reader) (string, int64) {
|
||||||
var errUnauthorized = fmt.Errorf("unauthorized")
|
var errUnauthorized = fmt.Errorf("unauthorized")
|
||||||
|
|
||||||
func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.ReadSeeker, regOpts *RegistryOptions) (*http.Response, error) {
|
func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.ReadSeeker, regOpts *RegistryOptions) (*http.Response, error) {
|
||||||
|
for i := 0; i < 2; i++ {
|
||||||
resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
|
resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if !errors.Is(err, context.Canceled) {
|
if !errors.Is(err, context.Canceled) {
|
||||||
|
@ -1157,13 +1158,6 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
resp, err := makeRequest(ctx, method, requestURL, headers, body, regOpts)
|
|
||||||
if resp.StatusCode == http.StatusUnauthorized {
|
|
||||||
return nil, errUnauthorized
|
|
||||||
}
|
|
||||||
|
|
||||||
return resp, err
|
|
||||||
case resp.StatusCode == http.StatusNotFound:
|
case resp.StatusCode == http.StatusNotFound:
|
||||||
return nil, os.ErrNotExist
|
return nil, os.ErrNotExist
|
||||||
case resp.StatusCode >= http.StatusBadRequest:
|
case resp.StatusCode >= http.StatusBadRequest:
|
||||||
|
@ -1172,9 +1166,12 @@ func makeRequestWithRetry(ctx context.Context, method string, requestURL *url.UR
|
||||||
return nil, fmt.Errorf("%d: %s", resp.StatusCode, err)
|
return nil, fmt.Errorf("%d: %s", resp.StatusCode, err)
|
||||||
}
|
}
|
||||||
return nil, fmt.Errorf("%d: %s", resp.StatusCode, responseBody)
|
return nil, fmt.Errorf("%d: %s", resp.StatusCode, responseBody)
|
||||||
|
default:
|
||||||
|
return resp, nil
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return resp, nil
|
return nil, errUnauthorized
|
||||||
}
|
}
|
||||||
|
|
||||||
func makeRequest(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.Reader, regOpts *RegistryOptions) (*http.Response, error) {
|
func makeRequest(ctx context.Context, method string, requestURL *url.URL, headers http.Header, body io.Reader, regOpts *RegistryOptions) (*http.Response, error) {
|
||||||
|
|
|
@ -46,6 +46,7 @@ func ParseModelPath(name string) ModelPath {
|
||||||
name = after
|
name = after
|
||||||
}
|
}
|
||||||
|
|
||||||
|
name = strings.ReplaceAll(name, string(os.PathSeparator), "/")
|
||||||
parts := strings.Split(name, "/")
|
parts := strings.Split(name, "/")
|
||||||
switch len(parts) {
|
switch len(parts) {
|
||||||
case 3:
|
case 3:
|
||||||
|
|
|
@ -15,7 +15,6 @@ import (
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"reflect"
|
"reflect"
|
||||||
"runtime"
|
"runtime"
|
||||||
"strconv"
|
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"syscall"
|
"syscall"
|
||||||
|
@ -668,27 +667,12 @@ func GetModelInfo(req api.ShowRequest) (*api.ShowResponse, error) {
|
||||||
cs := 30
|
cs := 30
|
||||||
for k, v := range model.Options {
|
for k, v := range model.Options {
|
||||||
switch val := v.(type) {
|
switch val := v.(type) {
|
||||||
case string:
|
|
||||||
params = append(params, fmt.Sprintf("%-*s %s", cs, k, val))
|
|
||||||
case int:
|
|
||||||
params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.Itoa(val)))
|
|
||||||
case float64:
|
|
||||||
params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatFloat(val, 'f', 0, 64)))
|
|
||||||
case bool:
|
|
||||||
params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatBool(val)))
|
|
||||||
case []interface{}:
|
case []interface{}:
|
||||||
for _, nv := range val {
|
for _, nv := range val {
|
||||||
switch nval := nv.(type) {
|
params = append(params, fmt.Sprintf("%-*s %#v", cs, k, nv))
|
||||||
case string:
|
|
||||||
params = append(params, fmt.Sprintf("%-*s %s", cs, k, nval))
|
|
||||||
case int:
|
|
||||||
params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.Itoa(nval)))
|
|
||||||
case float64:
|
|
||||||
params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatFloat(nval, 'f', 0, 64)))
|
|
||||||
case bool:
|
|
||||||
params = append(params, fmt.Sprintf("%-*s %s", cs, k, strconv.FormatBool(nval)))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
default:
|
||||||
|
params = append(params, fmt.Sprintf("%-*s %#v", cs, k, v))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
resp.Parameters = strings.Join(params, "\n")
|
resp.Parameters = strings.Join(params, "\n")
|
||||||
|
|
|
@ -9,6 +9,7 @@ import (
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
"os"
|
"os"
|
||||||
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
@ -50,7 +51,7 @@ func Test_Routes(t *testing.T) {
|
||||||
createTestModel := func(t *testing.T, name string) {
|
createTestModel := func(t *testing.T, name string) {
|
||||||
fname := createTestFile(t, "ollama-model")
|
fname := createTestFile(t, "ollama-model")
|
||||||
|
|
||||||
modelfile := strings.NewReader(fmt.Sprintf("FROM %s", fname))
|
modelfile := strings.NewReader(fmt.Sprintf("FROM %s\nPARAMETER seed 42\nPARAMETER top_p 0.9\nPARAMETER stop foo\nPARAMETER stop bar", fname))
|
||||||
commands, err := parser.Parse(modelfile)
|
commands, err := parser.Parse(modelfile)
|
||||||
assert.Nil(t, err)
|
assert.Nil(t, err)
|
||||||
fn := func(resp api.ProgressResponse) {
|
fn := func(resp api.ProgressResponse) {
|
||||||
|
@ -167,6 +168,42 @@ func Test_Routes(t *testing.T) {
|
||||||
assert.Equal(t, "beefsteak:latest", model.ShortName)
|
assert.Equal(t, "beefsteak:latest", model.ShortName)
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Name: "Show Model Handler",
|
||||||
|
Method: http.MethodPost,
|
||||||
|
Path: "/api/show",
|
||||||
|
Setup: func(t *testing.T, req *http.Request) {
|
||||||
|
createTestModel(t, "show-model")
|
||||||
|
showReq := api.ShowRequest{Model: "show-model"}
|
||||||
|
jsonData, err := json.Marshal(showReq)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
req.Body = io.NopCloser(bytes.NewReader(jsonData))
|
||||||
|
},
|
||||||
|
Expected: func(t *testing.T, resp *http.Response) {
|
||||||
|
contentType := resp.Header.Get("Content-Type")
|
||||||
|
assert.Equal(t, contentType, "application/json; charset=utf-8")
|
||||||
|
body, err := io.ReadAll(resp.Body)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
var showResp api.ShowResponse
|
||||||
|
err = json.Unmarshal(body, &showResp)
|
||||||
|
assert.Nil(t, err)
|
||||||
|
|
||||||
|
var params []string
|
||||||
|
paramsSplit := strings.Split(showResp.Parameters, "\n")
|
||||||
|
for _, p := range paramsSplit {
|
||||||
|
params = append(params, strings.Join(strings.Fields(p), " "))
|
||||||
|
}
|
||||||
|
sort.Strings(params)
|
||||||
|
expectedParams := []string{
|
||||||
|
"seed 42",
|
||||||
|
"stop \"bar\"",
|
||||||
|
"stop \"foo\"",
|
||||||
|
"top_p 0.9",
|
||||||
|
}
|
||||||
|
assert.Equal(t, expectedParams, params)
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
s, err := setupServer(t)
|
s, err := setupServer(t)
|
||||||
|
|
Loading…
Reference in a new issue