Merge pull request #1146 from dhiltgen/ext_server_cgo

Add cgo implementation for llama.cpp
This commit is contained in:
Daniel Hiltgen 2023-12-22 08:16:31 -08:00 committed by GitHub
commit 96fb441abd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
55 changed files with 3207 additions and 1181 deletions

View file

@ -2,8 +2,7 @@
ollama ollama
app app
dist dist
scripts
llm/llama.cpp/ggml
llm/llama.cpp/gguf llm/llama.cpp/gguf
.env .env
.cache .cache
test_data

3
.gitignore vendored
View file

@ -8,4 +8,5 @@ ollama
ggml-metal.metal ggml-metal.metal
.cache .cache
*.exe *.exe
.idea .idea
test_data

5
.gitmodules vendored
View file

@ -1,8 +1,3 @@
[submodule "llm/llama.cpp/ggml"]
path = llm/llama.cpp/ggml
url = https://github.com/ggerganov/llama.cpp.git
ignore = dirty
shallow = true
[submodule "llm/llama.cpp/gguf"] [submodule "llm/llama.cpp/gguf"]
path = llm/llama.cpp/gguf path = llm/llama.cpp/gguf
url = https://github.com/ggerganov/llama.cpp.git url = https://github.com/ggerganov/llama.cpp.git

View file

@ -1,23 +1,65 @@
# centos7 amd64 dependencies # Ubuntu 20.04 amd64 dependencies
FROM --platform=linux/amd64 nvidia/cuda:11.3.1-devel-centos7 AS base-amd64 FROM --platform=linux/amd64 ubuntu:20.04 AS base-amd64
RUN yum install -y https://repo.ius.io/ius-release-el7.rpm centos-release-scl && \ ARG CUDA_VERSION=11.3.1-1
yum update -y && \ ARG CMAKE_VERSION=3.22.1
yum install -y devtoolset-10-gcc devtoolset-10-gcc-c++ git236 wget # ROCm only supports amd64
RUN wget "https://github.com/Kitware/CMake/releases/download/v3.27.6/cmake-3.27.6-linux-x86_64.sh" -O cmake-installer.sh && chmod +x cmake-installer.sh && ./cmake-installer.sh --skip-license --prefix=/usr/local ARG ROCM_VERSION=6.0
ENV PATH /opt/rh/devtoolset-10/root/usr/bin:$PATH ARG CLBLAST_VER=1.6.1
# centos8 arm64 dependencies # Note: https://rocm.docs.amd.com/en/latest/release/user_kernel_space_compat_matrix.html
FROM --platform=linux/arm64 nvidia/cuda-arm64:11.3.1-devel-centos8 AS base-arm64 RUN apt-get update && \
RUN sed -i -e 's/mirrorlist/#mirrorlist/g' -e 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-* apt-get install -y wget gnupg && \
RUN yum install -y git cmake wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin && \
mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/3bf863cc.pub && \
echo "deb [by-hash=no] https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /" > /etc/apt/sources.list.d/cuda.list && \
wget "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.sh" -O /tmp/cmake-installer.sh && \
chmod +x /tmp/cmake-installer.sh && /tmp/cmake-installer.sh --skip-license --prefix=/usr && \
mkdir --parents --mode=0755 /etc/apt/keyrings && \
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor > /etc/apt/keyrings/rocm.gpg && \
echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${ROCM_VERSION} focal main" > /etc/apt/sources.list.d/rocm.list && \
echo "Package: *" > /etc/apt/preferences.d/rocm-pin-600 && \
echo "Pin: release o=repo.radeon.com" >> /etc/apt/preferences.d/rocm-pin-600 && \
echo "Pin-Priority: 600" >> /etc/apt/preferences.d/rocm-pin-600 && \
apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get -y install cuda=${CUDA_VERSION} rocm-hip-libraries rocm-device-libs rocm-libs rocm-ocl-icd rocm-hip-sdk rocm-hip-libraries rocm-cmake rocm-clang-ocl rocm-dev
# CLBlast
RUN wget -qO- https://github.com/CNugteren/CLBlast/archive/refs/tags/${CLBLAST_VER}.tar.gz | tar zxv -C /tmp/ && \
cd /tmp/CLBlast-${CLBLAST_VER} && mkdir build && cd build && cmake .. && make && make install
ENV ROCM_PATH=/opt/rocm
# Ubuntu 22.04 arm64 dependencies
FROM --platform=linux/arm64 ubuntu:20.04 AS base-arm64
ARG CUDA_VERSION=11.3.1-1
ARG CMAKE_VERSION=3.27.6
RUN apt-get update && \
apt-get install -y wget gnupg && \
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/sbsa/cuda-ubuntu2004.pin && \
mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600 && \
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/sbsa//3bf863cc.pub && \
echo "deb [by-hash=no] https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/sbsa/ /" > /etc/apt/sources.list.d/cuda.list && \
wget "https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-aarch64.sh" -O /tmp/cmake-installer.sh && \
chmod +x /tmp/cmake-installer.sh && /tmp/cmake-installer.sh --skip-license --prefix=/usr && \
apt-get update && \
apt-cache madison cuda && \
DEBIAN_FRONTEND=noninteractive apt-get -y install cuda=${CUDA_VERSION}
FROM base-${TARGETARCH} FROM base-${TARGETARCH}
ARG TARGETARCH ARG TARGETARCH
ARG GOFLAGS="'-ldflags -w -s'" ARG GOFLAGS="'-ldflags -w -s'"
ARG CGO_CFLAGS
ARG GOLANG_VERSION=1.21.3
# Common toolchain
RUN apt-get update && \
DEBIAN_FRONTEND=noninteractive apt-get install -y gcc-10 g++-10 cpp-10 git ocl-icd-opencl-dev && \
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 --slave /usr/bin/g++ g++ /usr/bin/g++-10 --slave /usr/bin/gcov gcov /usr/bin/gcov-10
# install go # install go
ADD https://dl.google.com/go/go1.21.3.linux-$TARGETARCH.tar.gz /tmp/go1.21.3.tar.gz ADD https://dl.google.com/go/go${GOLANG_VERSION}.linux-$TARGETARCH.tar.gz /tmp/go${GOLANG_VERSION}.tar.gz
RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go1.21.3.tar.gz RUN mkdir -p /usr/local && tar xz -C /usr/local </tmp/go${GOLANG_VERSION}.tar.gz
# build the final binary # build the final binary
WORKDIR /go/src/github.com/jmorganca/ollama WORKDIR /go/src/github.com/jmorganca/ollama
@ -26,6 +68,7 @@ COPY . .
ENV GOOS=linux ENV GOOS=linux
ENV GOARCH=$TARGETARCH ENV GOARCH=$TARGETARCH
ENV GOFLAGS=$GOFLAGS ENV GOFLAGS=$GOFLAGS
ENV CGO_CFLAGS=${CGO_CFLAGS}
RUN /usr/local/go/bin/go generate ./... && \ RUN /usr/local/go/bin/go generate ./... && \
/usr/local/go/bin/go build . /usr/local/go/bin/go build .

View file

@ -192,13 +192,19 @@ Install `cmake` and `go`:
brew install cmake go brew install cmake go
``` ```
Then generate dependencies and build: Then generate dependencies:
``` ```
go generate ./... go generate ./...
```
Then build the binary:
```
go build . go build .
``` ```
More detailed instructions can be found in the [developer guide](https://github.com/jmorganca/ollama/blob/main/docs/development.md)
### Running local builds
Next, start the server: Next, start the server:
``` ```

View file

@ -572,10 +572,30 @@ func generate(cmd *cobra.Command, opts generateOptions) error {
} }
if err := client.Generate(ctx, &request, fn); err != nil { if err := client.Generate(ctx, &request, fn); err != nil {
if errors.Is(err, context.Canceled) { switch {
case errors.Is(err, context.Canceled):
return nil return nil
case strings.Contains(err.Error(), "unsupported model format"):
// pull and retry to see if the model has been updated
parts := strings.Split(opts.Model, string(os.PathSeparator))
if len(parts) == 1 {
// this is a library model, log some info
fmt.Fprintln(os.Stderr, "This model is no longer compatible with Ollama. Pulling a new version...")
}
if err := PullHandler(cmd, []string{opts.Model}); err != nil {
fmt.Printf("Error: %s\n", err)
return fmt.Errorf("unsupported model, please update this model to gguf format") // relay the original error
}
// retry
if err := client.Generate(ctx, &request, fn); err != nil {
if errors.Is(err, context.Canceled) {
return nil
}
return err
}
default:
return err
} }
return err
} }
if opts.Prompt != "" { if opts.Prompt != "" {
fmt.Println() fmt.Println()

View file

@ -34,6 +34,35 @@ Now you can run `ollama`:
## Building on Linux with GPU support ## Building on Linux with GPU support
- Install cmake and nvidia-cuda-toolkit
- run `go generate ./...` ### Linux/Windows CUDA (NVIDIA)
- run `go build .` *Your operating system distribution may already have packages for NVIDIA CUDA. Distro packages are often preferable, but instructions are distro-specific. Please consult distro-specific docs for dependencies if available!*
Install `cmake` and `golang` as well as [NVIDIA CUDA](https://developer.nvidia.com/cuda-downloads) development and runtime packages.
Then generate dependencies:
```
go generate ./...
```
Then build the binary:
```
go build .
```
### Linux ROCm (AMD)
*Your operating system distribution may already have packages for AMD ROCm and CLBlast. Distro packages are often preferable, but instructions are distro-specific. Please consult distro-specific docs for dependencies if available!*
Install [CLBlast](https://github.com/CNugteren/CLBlast/blob/master/doc/installation.md) and [ROCm](https://rocm.docs.amd.com/en/latest/deploy/linux/quick_start.html) developement packages first, as well as `cmake` and `golang`.
Adjust the paths below (correct for Arch) as appropriate for your distributions install locations and generate dependencies:
```
CLBlast_DIR=/usr/lib/cmake/CLBlast ROCM_PATH=/opt/rocm go generate ./...
```
Then build the binary:
```
go build .
```
ROCm requires elevated privileges to access the GPU at runtime. On most distros you can add your user account to the `render` group, or run as root.
## Containerized Build
If you have Docker available, you can build linux binaries with `./scripts/build_linux.sh` which has the CUDA and ROCm dependencies included.

2
go.mod
View file

@ -7,7 +7,7 @@ require (
github.com/gin-gonic/gin v1.9.1 github.com/gin-gonic/gin v1.9.1
github.com/olekukonko/tablewriter v0.0.5 github.com/olekukonko/tablewriter v0.0.5
github.com/spf13/cobra v1.7.0 github.com/spf13/cobra v1.7.0
github.com/stretchr/testify v1.8.3 github.com/stretchr/testify v1.8.4
golang.org/x/sync v0.3.0 golang.org/x/sync v0.3.0
) )

3
go.sum
View file

@ -98,8 +98,9 @@ github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/
github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.2/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
github.com/stretchr/testify v1.8.3 h1:RP3t2pwF7cMEbC1dqtB6poj3niw/9gnV4Cjg5oW5gtY=
github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/stretchr/testify v1.8.3/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk=
github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo=
github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI=
github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08=
github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M= github.com/ugorji/go v1.2.7/go.mod h1:nF9osbDWLy6bDVv/Rtoh6QgnvNDpmCalQV5urGCCS6M=

134
gpu/gpu.go Normal file
View file

@ -0,0 +1,134 @@
//go:build linux || windows
package gpu
/*
#cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
#cgo windows LDFLAGS: -lpthread
#include "gpu_info.h"
*/
import "C"
import (
"fmt"
"log"
"sync"
"unsafe"
"github.com/jmorganca/ollama/api"
)
type handles struct {
cuda *C.cuda_handle_t
rocm *C.rocm_handle_t
}
var gpuMutex sync.Mutex
var gpuHandles *handles = nil
// Note: gpuMutex must already be held
func initGPUHandles() {
// TODO - if the ollama build is CPU only, don't do these checks as they're irrelevant and confusing
log.Printf("Detecting GPU type")
gpuHandles = &handles{nil, nil}
var resp C.cuda_init_resp_t
C.cuda_init(&resp)
if resp.err != nil {
log.Printf("CUDA not detected: %s", C.GoString(resp.err))
C.free(unsafe.Pointer(resp.err))
var resp C.rocm_init_resp_t
C.rocm_init(&resp)
if resp.err != nil {
log.Printf("ROCm not detected: %s", C.GoString(resp.err))
C.free(unsafe.Pointer(resp.err))
} else {
log.Printf("Radeon GPU detected")
rocm := resp.rh
gpuHandles.rocm = &rocm
}
} else {
log.Printf("Nvidia GPU detected")
cuda := resp.ch
gpuHandles.cuda = &cuda
}
}
func GetGPUInfo() GpuInfo {
// TODO - consider exploring lspci (and equivalent on windows) to check for
// GPUs so we can report warnings if we see Nvidia/AMD but fail to load the libraries
gpuMutex.Lock()
defer gpuMutex.Unlock()
if gpuHandles == nil {
initGPUHandles()
}
var memInfo C.mem_info_t
resp := GpuInfo{"", "", 0, 0}
if gpuHandles.cuda != nil {
C.cuda_check_vram(*gpuHandles.cuda, &memInfo)
if memInfo.err != nil {
log.Printf("error looking up CUDA GPU memory: %s", C.GoString(memInfo.err))
C.free(unsafe.Pointer(memInfo.err))
} else {
resp.Driver = "CUDA"
resp.Library = "cuda_server"
}
} else if gpuHandles.rocm != nil {
C.rocm_check_vram(*gpuHandles.rocm, &memInfo)
if memInfo.err != nil {
log.Printf("error looking up ROCm GPU memory: %s", C.GoString(memInfo.err))
C.free(unsafe.Pointer(memInfo.err))
} else {
resp.Driver = "ROCM"
resp.Library = "rocm_server"
}
}
if resp.Driver == "" {
C.cpu_check_ram(&memInfo)
resp.Driver = "CPU"
// In the future we may offer multiple CPU variants to tune CPU features
resp.Library = "default"
}
if memInfo.err != nil {
log.Printf("error looking up CPU memory: %s", C.GoString(memInfo.err))
C.free(unsafe.Pointer(memInfo.err))
return resp
}
resp.FreeMemory = uint64(memInfo.free)
resp.TotalMemory = uint64(memInfo.total)
return resp
}
func CheckVRAM() (int64, error) {
gpuInfo := GetGPUInfo()
if gpuInfo.FreeMemory > 0 && gpuInfo.Driver != "CPU" {
return int64(gpuInfo.FreeMemory), nil
}
return 0, fmt.Errorf("no GPU detected") // TODO - better handling of CPU based memory determiniation
}
func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
if opts.NumGPU != -1 {
return opts.NumGPU
}
info := GetGPUInfo()
if info.Driver == "CPU" {
return 0
}
/*
Calculate bytes per layer, this will roughly be the size of the model file divided by the number of layers.
We can store the model weights and the kv cache in vram,
to enable kv chache vram storage add two additional layers to the number of layers retrieved from the model file.
*/
bytesPerLayer := uint64(fileSizeBytes / numLayer)
// 75% of the absolute max number of layers we can fit in available VRAM, off-loading too many layers to the GPU can cause OOM errors
layers := int(info.FreeMemory/bytesPerLayer) * 3 / 4
log.Printf("%d MB VRAM available, loading up to %d %s GPU layers out of %d", info.FreeMemory/(1024*1024), layers, info.Driver, numLayer)
return layers
}

41
gpu/gpu_darwin.go Normal file
View file

@ -0,0 +1,41 @@
//go:build darwin
package gpu
import "C"
import (
"runtime"
"github.com/jmorganca/ollama/api"
)
// CheckVRAM returns the free VRAM in bytes on Linux machines with NVIDIA GPUs
func CheckVRAM() (int64, error) {
// TODO - assume metal, and return free memory?
return 0, nil
}
func GetGPUInfo() GpuInfo {
// TODO - Metal vs. x86 macs...
return GpuInfo{
Driver: "METAL",
Library: "default",
TotalMemory: 0,
FreeMemory: 0,
}
}
func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
if runtime.GOARCH == "arm64" {
return 1
}
// metal only supported on arm64
return 0
}
func nativeInit() error {
return nil
}

49
gpu/gpu_info.h Normal file
View file

@ -0,0 +1,49 @@
#ifndef __APPLE__
#ifndef __GPU_INFO_H__
#define __GPU_INFO_H__
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#ifndef _WIN32
#include <dlfcn.h>
#define LOAD_LIBRARY(lib, flags) dlopen(lib, flags)
#define LOAD_SYMBOL(handle, sym) dlsym(handle, sym)
#define LOAD_ERR() dlerror()
#define UNLOAD_LIBRARY(handle) dlclose(handle)
#else
#include <windows.h>
#define LOAD_LIBRARY(lib, flags) LoadLibrary(lib)
#define LOAD_SYMBOL(handle, sym) GetProcAddress(handle, sym)
#define UNLOAD_LIBRARY(handle) FreeLibrary(handle)
// TODO - refactor this with proper error message handling on windows
inline static char *LOAD_ERR() {
static char errbuf[8];
snprintf(errbuf, 8, "0x%lx", GetLastError());
return errbuf;
}
#endif
#ifdef __cplusplus
extern "C" {
#endif
typedef struct mem_info {
uint64_t total;
uint64_t free;
char *err; // If non-nill, caller responsible for freeing
} mem_info_t;
void cpu_check_ram(mem_info_t *resp);
#ifdef __cplusplus
}
#endif
#include "gpu_info_cuda.h"
#include "gpu_info_rocm.h"
#endif // __GPU_INFO_H__
#endif // __APPLE__

42
gpu/gpu_info_cpu.c Normal file
View file

@ -0,0 +1,42 @@
#include "gpu_info.h"
// Fallbacks for CPU mode
#ifdef _WIN32
#include <sysinfoapi.h>
void cpu_check_ram(mem_info_t *resp) {
resp->err = NULL;
MEMORYSTATUSEX info;
if (GlobalMemoryStatusEx(&info) != 0) {
resp->total = info.ullTotalPhys;
resp->free = info.ullAvailPhys;
} else {
resp->err = strdup(LOAD_ERR());
}
return;
}
#elif __linux__
#include <errno.h>
#include <string.h>
#include <sys/sysinfo.h>
void cpu_check_ram(mem_info_t *resp) {
struct sysinfo info;
resp->err = NULL;
if (sysinfo(&info) != 0) {
resp->err = strdup(strerror(errno));
} else {
resp->total = info.totalram * info.mem_unit;
resp->free = info.freeram * info.mem_unit;
}
return;
}
#elif __APPLE__
// TODO consider an Apple implementation that does something useful
// mem_info_t cpu_check_ram() {
// mem_info_t resp = {0, 0, NULL};
// return resp;
// }
#else
#error "Unsupported platform"
#endif

106
gpu/gpu_info_cuda.c Normal file
View file

@ -0,0 +1,106 @@
#ifndef __APPLE__ // TODO - maybe consider nvidia support on intel macs?
#include "gpu_info_cuda.h"
#include <string.h>
#ifndef _WIN32
const char *cuda_lib_paths[] = {
"libnvidia-ml.so",
"/usr/local/cuda/lib64/libnvidia-ml.so",
"/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-ml.so",
"/usr/lib/wsl/lib/libnvidia-ml.so.1", // TODO Maybe glob?
NULL,
};
#else
const char *cuda_lib_paths[] = {
"nvml.dll",
"",
NULL,
};
#endif
void cuda_init(cuda_init_resp_t *resp) {
nvmlReturn_t ret;
resp->err = NULL;
const int buflen = 256;
char buf[buflen + 1];
int i;
struct lookup {
char *s;
void **p;
} l[4] = {
{"nvmlInit_v2", (void *)&resp->ch.initFn},
{"nvmlShutdown", (void *)&resp->ch.shutdownFn},
{"nvmlDeviceGetHandleByIndex", (void *)&resp->ch.getHandle},
{"nvmlDeviceGetMemoryInfo", (void *)&resp->ch.getMemInfo},
};
for (i = 0; cuda_lib_paths[i] != NULL && resp->ch.handle == NULL; i++) {
resp->ch.handle = LOAD_LIBRARY(cuda_lib_paths[i], RTLD_LAZY);
}
if (!resp->ch.handle) {
// TODO improve error message, as the LOAD_ERR will have typically have the
// final path that was checked which might be confusing.
snprintf(buf, buflen,
"Unable to load %s library to query for Nvidia GPUs: %s",
cuda_lib_paths[0], LOAD_ERR());
resp->err = strdup(buf);
return;
}
for (i = 0; i < 4; i++) { // TODO - fix this to use a null terminated list
*l[i].p = LOAD_SYMBOL(resp->ch.handle, l[i].s);
if (!l[i].p) {
UNLOAD_LIBRARY(resp->ch.handle);
resp->ch.handle = NULL;
snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s,
LOAD_ERR());
resp->err = strdup(buf);
return;
}
}
ret = (*resp->ch.initFn)();
if (ret != NVML_SUCCESS) {
snprintf(buf, buflen, "nvml vram init failure: %d", ret);
resp->err = strdup(buf);
}
return;
}
void cuda_check_vram(cuda_handle_t h, mem_info_t *resp) {
resp->err = NULL;
nvmlDevice_t device;
nvmlMemory_t memInfo = {0};
nvmlReturn_t ret;
const int buflen = 256;
char buf[buflen + 1];
int i;
if (h.handle == NULL) {
resp->err = strdup("nvml handle sn't initialized");
return;
}
// TODO - handle multiple GPUs
ret = (*h.getHandle)(0, &device);
if (ret != NVML_SUCCESS) {
snprintf(buf, buflen, "unable to get device handle: %d", ret);
resp->err = strdup(buf);
return;
}
ret = (*h.getMemInfo)(device, &memInfo);
if (ret != NVML_SUCCESS) {
snprintf(buf, buflen, "device memory info lookup failure: %d", ret);
resp->err = strdup(buf);
return;
}
resp->total = memInfo.total;
resp->free = memInfo.free;
return;
}
#endif // __APPLE__

35
gpu/gpu_info_cuda.h Normal file
View file

@ -0,0 +1,35 @@
#ifndef __APPLE__
#ifndef __GPU_INFO_CUDA_H__
#define __GPU_INFO_CUDA_H__
#include "gpu_info.h"
// Just enough typedef's to dlopen/dlsym for memory information
typedef enum nvmlReturn_enum {
NVML_SUCCESS = 0,
// Other values omitted for now...
} nvmlReturn_t;
typedef void *nvmlDevice_t; // Opaque is sufficient
typedef struct nvmlMemory_st {
unsigned long long total;
unsigned long long free;
unsigned long long used;
} nvmlMemory_t;
typedef struct cuda_handle {
void *handle;
nvmlReturn_t (*initFn)(void);
nvmlReturn_t (*shutdownFn)(void);
nvmlReturn_t (*getHandle)(unsigned int, nvmlDevice_t *);
nvmlReturn_t (*getMemInfo)(nvmlDevice_t, nvmlMemory_t *);
} cuda_handle_t;
typedef struct cuda_init_resp {
char *err; // If err is non-null handle is invalid
cuda_handle_t ch;
} cuda_init_resp_t;
void cuda_init(cuda_init_resp_t *resp);
void cuda_check_vram(cuda_handle_t ch, mem_info_t *resp);
#endif // __GPU_INFO_CUDA_H__
#endif // __APPLE__

114
gpu/gpu_info_rocm.c Normal file
View file

@ -0,0 +1,114 @@
#ifndef __APPLE__
#include "gpu_info_rocm.h"
#include <string.h>
#ifndef _WIN32
const char *rocm_lib_paths[] = {
"librocm_smi64.so",
"/opt/rocm/lib/librocm_smi64.so",
NULL,
};
#else
// TODO untested
const char *rocm_lib_paths[] = {
"rocm_smi64.dll",
"/opt/rocm/lib/rocm_smi64.dll",
NULL,
};
#endif
void rocm_init(rocm_init_resp_t *resp) {
rsmi_status_t ret;
resp->err = NULL;
const int buflen = 256;
char buf[buflen + 1];
int i;
struct lookup {
char *s;
void **p;
} l[4] = {
{"rsmi_init", (void *)&resp->rh.initFn},
{"rsmi_shut_down", (void *)&resp->rh.shutdownFn},
{"rsmi_dev_memory_total_get", (void *)&resp->rh.totalMemFn},
{"rsmi_dev_memory_usage_get", (void *)&resp->rh.usageMemFn},
// { "rsmi_dev_id_get", (void*)&resp->rh.getHandle },
};
for (i = 0; rocm_lib_paths[i] != NULL && resp->rh.handle == NULL; i++) {
resp->rh.handle = LOAD_LIBRARY(rocm_lib_paths[i], RTLD_LAZY);
}
if (!resp->rh.handle) {
snprintf(buf, buflen,
"Unable to load %s library to query for Radeon GPUs: %s\n",
rocm_lib_paths[0], LOAD_ERR());
resp->err = strdup(buf);
return;
}
for (i = 0; i < 4; i++) {
*l[i].p = LOAD_SYMBOL(resp->rh.handle, l[i].s);
if (!l[i].p) {
UNLOAD_LIBRARY(resp->rh.handle);
snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s,
LOAD_ERR());
resp->err = strdup(buf);
return;
}
}
ret = (*resp->rh.initFn)(0);
if (ret != RSMI_STATUS_SUCCESS) {
snprintf(buf, buflen, "rocm vram init failure: %d", ret);
resp->err = strdup(buf);
}
return;
}
void rocm_check_vram(rocm_handle_t h, mem_info_t *resp) {
resp->err = NULL;
// uint32_t num_devices;
// uint16_t device;
uint64_t totalMem = 0;
uint64_t usedMem = 0;
rsmi_status_t ret;
const int buflen = 256;
char buf[buflen + 1];
int i;
if (h.handle == NULL) {
resp->err = strdup("nvml handle sn't initialized");
return;
}
// TODO - iterate through devices... ret =
// rsmi_num_monitor_devices(&num_devices);
// ret = (*h.getHandle)(0, &device);
// if (ret != RSMI_STATUS_SUCCESS) {
// printf("rocm vram device lookup failure: %d\n", ret);
// return -1;
// }
// Get total memory - used memory for available memory
ret = (*h.totalMemFn)(0, RSMI_MEM_TYPE_VRAM, &totalMem);
if (ret != RSMI_STATUS_SUCCESS) {
snprintf(buf, buflen, "rocm total mem lookup failure: %d", ret);
resp->err = strdup(buf);
return;
}
ret = (*h.usageMemFn)(0, RSMI_MEM_TYPE_VRAM, &usedMem);
if (ret != RSMI_STATUS_SUCCESS) {
snprintf(buf, buflen, "rocm usage mem lookup failure: %d", ret);
resp->err = strdup(buf);
return;
}
resp->total = totalMem;
resp->free = totalMem - usedMem;
return;
}
#endif // __APPLE__

36
gpu/gpu_info_rocm.h Normal file
View file

@ -0,0 +1,36 @@
#ifndef __APPLE__
#ifndef __GPU_INFO_ROCM_H__
#define __GPU_INFO_ROCM_H__
#include "gpu_info.h"
// Just enough typedef's to dlopen/dlsym for memory information
typedef enum rsmi_status_return {
RSMI_STATUS_SUCCESS = 0,
// Other values omitted for now...
} rsmi_status_t;
typedef enum rsmi_memory_type {
RSMI_MEM_TYPE_VRAM = 0,
RSMI_MEM_TYPE_VIS_VRAM,
RSMI_MEM_TYPE_GTT,
} rsmi_memory_type_t;
typedef struct rocm_handle {
void *handle;
rsmi_status_t (*initFn)(uint64_t);
rsmi_status_t (*shutdownFn)(void);
rsmi_status_t (*totalMemFn)(uint32_t, rsmi_memory_type_t, uint64_t *);
rsmi_status_t (*usageMemFn)(uint32_t, rsmi_memory_type_t, uint64_t *);
// rsmi_status_t (*getHandle)(uint32_t, uint16_t *);
} rocm_handle_t;
typedef struct rocm_init_resp {
char *err; // If err is non-null handle is invalid
rocm_handle_t rh;
} rocm_init_resp_t;
void rocm_init(rocm_init_resp_t *resp);
void rocm_check_vram(rocm_handle_t rh, mem_info_t *resp);
#endif // __GPU_INFO_ROCM_H__
#endif // __APPLE__

26
gpu/gpu_test.go Normal file
View file

@ -0,0 +1,26 @@
package gpu
import (
"runtime"
"testing"
"github.com/stretchr/testify/assert"
)
func TestBasicGetGPUInfo(t *testing.T) {
info := GetGPUInfo()
assert.Contains(t, "CUDA ROCM CPU METAL", info.Driver)
switch runtime.GOOS {
case "darwin":
// TODO - remove this once MacOS returns some size for CPU
return
case "linux", "windows":
assert.Greater(t, info.TotalMemory, uint64(0))
assert.Greater(t, info.FreeMemory, uint64(0))
default:
return
}
}
// TODO - add some logic to figure out card type through other means and actually verify we got back what we expected

11
gpu/types.go Normal file
View file

@ -0,0 +1,11 @@
package gpu
// Beginning of an `ollama info` command
type GpuInfo struct {
Driver string `json:"driver,omitempty"`
Library string `json:"library,omitempty"`
TotalMemory uint64 `json:"total_memory,omitempty"`
FreeMemory uint64 `json:"free_memory,omitempty"`
// TODO add other useful attributes about the card here for discovery information
}

136
llm/dynamic_shim.c Normal file
View file

@ -0,0 +1,136 @@
#include "dynamic_shim.h"
#include <stdio.h>
#include <string.h>
#ifdef __linux__
#include <dlfcn.h>
#define LOAD_LIBRARY(lib, flags) dlopen(lib, flags | RTLD_DEEPBIND)
#define LOAD_SYMBOL(handle, sym) dlsym(handle, sym)
#define LOAD_ERR() dlerror()
#define UNLOAD_LIBRARY(handle) dlclose(handle)
#elif _WIN32
#include <windows.h>
#define LOAD_LIBRARY(lib, flags) LoadLibrary(lib)
#define LOAD_SYMBOL(handle, sym) GetProcAddress(handle, sym)
#define UNLOAD_LIBRARY(handle) FreeLibrary(handle)
// TODO - refactor this with proper error message handling on windows
inline static char *LOAD_ERR() {
static char errbuf[8];
snprintf(errbuf, 8, "0x%lx", GetLastError());
return errbuf;
}
#else
#include <dlfcn.h>
#define LOAD_LIBRARY(lib, flags) dlopen(lib, flags)
#define LOAD_SYMBOL(handle, sym) dlsym(handle, sym)
#define LOAD_ERR() dlerror()
#define UNLOAD_LIBRARY(handle) dlclose(handle)
#endif
void dynamic_shim_init(const char *libPath, struct dynamic_llama_server *s,
ext_server_resp_t *err) {
int i = 0;
struct lookup {
char *s;
void **p;
} l[] = {
{"llama_server_init", (void *)&s->llama_server_init},
{"llama_server_start", (void *)&s->llama_server_start},
{"llama_server_stop", (void *)&s->llama_server_stop},
{"llama_server_completion", (void *)&s->llama_server_completion},
{"llama_server_completion_next_result",
(void *)&s->llama_server_completion_next_result},
{"llama_server_completion_cancel",
(void *)&s->llama_server_completion_cancel},
{"llama_server_release_task_result",
(void *)&s->llama_server_release_task_result},
{"llama_server_tokenize", (void *)&s->llama_server_tokenize},
{"llama_server_detokenize", (void *)&s->llama_server_detokenize},
{"llama_server_embedding", (void *)&s->llama_server_embedding},
{"llama_server_release_json_resp",
(void *)&s->llama_server_release_json_resp},
{"", NULL},
};
printf("Lazy loading %s library\n", libPath);
s->handle = LOAD_LIBRARY(libPath, RTLD_NOW);
if (!s->handle) {
err->id = -1;
snprintf(err->msg, err->msg_len,
"Unable to load dynamic server library: %s", LOAD_ERR());
return;
}
for (i = 0; l[i].p != NULL; i++) {
*l[i].p = LOAD_SYMBOL(s->handle, l[i].s);
if (!l[i].p) {
UNLOAD_LIBRARY(s->handle);
err->id = -1;
snprintf(err->msg, err->msg_len, "symbol lookup for %s failed: %s",
l[i].s, LOAD_ERR());
return;
}
}
}
inline void dynamic_shim_llama_server_init(struct dynamic_llama_server s,
ext_server_params_t *sparams,
ext_server_resp_t *err) {
s.llama_server_init(sparams, err);
}
inline void dynamic_shim_llama_server_start(struct dynamic_llama_server s) {
s.llama_server_start();
}
inline void dynamic_shim_llama_server_stop(struct dynamic_llama_server s) {
s.llama_server_stop();
}
inline void dynamic_shim_llama_server_completion(struct dynamic_llama_server s,
const char *json_req,
ext_server_resp_t *resp) {
s.llama_server_completion(json_req, resp);
}
inline void dynamic_shim_llama_server_completion_next_result(
struct dynamic_llama_server s, const int task_id,
ext_server_task_result_t *result) {
s.llama_server_completion_next_result(task_id, result);
}
inline void dynamic_shim_llama_server_completion_cancel(
struct dynamic_llama_server s, const int task_id, ext_server_resp_t *err) {
s.llama_server_completion_cancel(task_id, err);
}
inline void dynamic_shim_llama_server_release_task_result(
struct dynamic_llama_server s, ext_server_task_result_t *result) {
s.llama_server_release_task_result(result);
}
inline void dynamic_shim_llama_server_tokenize(struct dynamic_llama_server s,
const char *json_req,
char **json_resp,
ext_server_resp_t *err) {
s.llama_server_tokenize(json_req, json_resp, err);
}
inline void dynamic_shim_llama_server_detokenize(struct dynamic_llama_server s,
const char *json_req,
char **json_resp,
ext_server_resp_t *err) {
s.llama_server_detokenize(json_req, json_resp, err);
}
inline void dynamic_shim_llama_server_embedding(struct dynamic_llama_server s,
const char *json_req,
char **json_resp,
ext_server_resp_t *err) {
s.llama_server_embedding(json_req, json_resp, err);
}
inline void dynamic_shim_llama_server_release_json_resp(
struct dynamic_llama_server s, char **json_resp) {
s.llama_server_release_json_resp(json_resp);
}

74
llm/dynamic_shim.h Normal file
View file

@ -0,0 +1,74 @@
#include <stdlib.h>
#include "server.h"
#ifdef __cplusplus
extern "C" {
#endif
struct dynamic_llama_server {
void *handle;
void (*llama_server_init)(ext_server_params_t *sparams,
ext_server_resp_t *err);
void (*llama_server_start)();
void (*llama_server_stop)();
void (*llama_server_completion)(const char *json_req,
ext_server_resp_t *resp);
void (*llama_server_completion_next_result)(const int task_id,
ext_server_task_result_t *result);
void (*llama_server_completion_cancel)(const int task_id,
ext_server_resp_t *err);
void (*llama_server_release_task_result)(ext_server_task_result_t *result);
void (*llama_server_tokenize)(const char *json_req, char **json_resp,
ext_server_resp_t *err);
void (*llama_server_detokenize)(const char *json_req, char **json_resp,
ext_server_resp_t *err);
void (*llama_server_embedding)(const char *json_req, char **json_resp,
ext_server_resp_t *err);
void (*llama_server_release_json_resp)(char **json_resp);
};
void dynamic_shim_init(const char *libPath, struct dynamic_llama_server *s,
ext_server_resp_t *err);
// No good way to call C function pointers from Go so inline the indirection
void dynamic_shim_llama_server_init(struct dynamic_llama_server s,
ext_server_params_t *sparams,
ext_server_resp_t *err);
void dynamic_shim_llama_server_start(struct dynamic_llama_server s);
void dynamic_shim_llama_server_stop(struct dynamic_llama_server s);
void dynamic_shim_llama_server_completion(struct dynamic_llama_server s,
const char *json_req,
ext_server_resp_t *resp);
void dynamic_shim_llama_server_completion_next_result(
struct dynamic_llama_server s, const int task_id,
ext_server_task_result_t *result);
void dynamic_shim_llama_server_completion_cancel(struct dynamic_llama_server s,
const int task_id,
ext_server_resp_t *err);
void dynamic_shim_llama_server_release_task_result(
struct dynamic_llama_server s, ext_server_task_result_t *result);
void dynamic_shim_llama_server_tokenize(struct dynamic_llama_server s,
const char *json_req, char **json_resp,
ext_server_resp_t *err);
void dynamic_shim_llama_server_detokenize(struct dynamic_llama_server s,
const char *json_req,
char **json_resp,
ext_server_resp_t *err);
void dynamic_shim_llama_server_embedding(struct dynamic_llama_server s,
const char *json_req, char **json_resp,
ext_server_resp_t *err);
void dynamic_shim_llama_server_release_json_resp(struct dynamic_llama_server s,
char **json_resp);
#ifdef __cplusplus
}
#endif

423
llm/ext_server.go Normal file
View file

@ -0,0 +1,423 @@
package llm
/*
#cgo CFLAGS: -I${SRCDIR}/llama.cpp/gguf -I${SRCDIR}/llama.cpp/gguf/common -I${SRCDIR}/llama.cpp/gguf/examples/server
#cgo CFLAGS: -DNDEBUG -DLLAMA_SERVER_LIBRARY=1 -D_XOPEN_SOURCE=600 -DACCELERATE_NEW_LAPACK -DACCELERATE_LAPACK_ILP64
#cgo CFLAGS: -Wmissing-noreturn -Wall -Wextra -Wcast-qual -Wno-unused-function -Wno-array-bounds
#cgo CPPFLAGS: -Ofast -Wall -Wextra -Wno-unused-function -Wno-unused-variable -Wno-deprecated-declarations -Wno-unused-but-set-variable
#cgo darwin CFLAGS: -D_DARWIN_C_SOURCE
#cgo darwin CPPFLAGS: -DGGML_USE_ACCELERATE
#cgo darwin CPPFLAGS: -DGGML_USE_METAL -DGGML_METAL_NDEBUG
#cgo darwin LDFLAGS: -lc++ -framework Accelerate
#cgo darwin LDFLAGS: -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/metal/common/libcommon.a
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/metal/examples/server/libext_server.a
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/metal/libllama.a
#cgo darwin LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/metal/libggml_static.a
#cgo linux CFLAGS: -D_GNU_SOURCE
#cgo linux windows CFLAGS: -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_USE_CUBLAS
#cgo linux LDFLAGS: -L/usr/local/cuda/targets/x86_64-linux/lib -L/usr/local/cuda/lib64 -L/usr/local/cuda/targets/x86_64-linux/lib/stubs
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/cpu/examples/server/libext_server.a
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/cpu/common/libcommon.a
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/cpu/libllama.a
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/cpu/libggml_static.a
#cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
#cgo windows LDFLAGS: -L${SRCDIR}/llama.cpp/gguf/build/wincpu/dist/lib
#cgo windows LDFLAGS: -lcpu_server -lpthread
#include <stdlib.h>
#include "server.h"
*/
import "C"
import (
"bytes"
"context"
"encoding/json"
"fmt"
"log"
"os"
"strings"
"sync"
"time"
"unsafe"
"github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/gpu"
)
func newExtServerResp(len C.size_t) C.ext_server_resp_t {
var resp C.ext_server_resp_t
resp.msg_len = len
bytes := make([]byte, len)
resp.msg = (*C.char)(C.CBytes(bytes))
return resp
}
func freeExtServerResp(resp C.ext_server_resp_t) {
if resp.msg_len == 0 {
return
}
C.free(unsafe.Pointer(resp.msg))
}
func extServerResponseToErr(resp C.ext_server_resp_t) error {
return fmt.Errorf(C.GoString(resp.msg))
}
type extServer interface {
LLM
llama_server_init(sparams *C.ext_server_params_t, err *C.ext_server_resp_t)
llama_server_start()
llama_server_stop()
llama_server_completion(json_req *C.char, resp *C.ext_server_resp_t)
llama_server_completion_next_result(task_id C.int, resp *C.ext_server_task_result_t)
llama_server_completion_cancel(task_id C.int, err *C.ext_server_resp_t)
llama_server_release_task_result(result *C.ext_server_task_result_t)
llama_server_tokenize(json_req *C.char, json_resp **C.char, err *C.ext_server_resp_t)
llama_server_detokenize(json_req *C.char, json_resp **C.char, err *C.ext_server_resp_t)
llama_server_embedding(json_req *C.char, json_resp **C.char, err *C.ext_server_resp_t)
llama_server_release_json_resp(json_resp **C.char)
}
type llamaExtServer struct {
api.Options
}
// Note: current implementation does not support concurrent instantiations
var mutex sync.Mutex
func (llm *llamaExtServer) llama_server_init(sparams *C.ext_server_params_t, err *C.ext_server_resp_t) {
C.llama_server_init(sparams, err)
}
func (llm *llamaExtServer) llama_server_start() {
C.llama_server_start()
}
func (llm *llamaExtServer) llama_server_stop() {
C.llama_server_stop()
}
func (llm *llamaExtServer) llama_server_completion(json_req *C.char, resp *C.ext_server_resp_t) {
C.llama_server_completion(json_req, resp)
}
func (llm *llamaExtServer) llama_server_completion_next_result(task_id C.int, resp *C.ext_server_task_result_t) {
C.llama_server_completion_next_result(task_id, resp)
}
func (llm *llamaExtServer) llama_server_completion_cancel(task_id C.int, err *C.ext_server_resp_t) {
C.llama_server_completion_cancel(task_id, err)
}
func (llm *llamaExtServer) llama_server_release_task_result(result *C.ext_server_task_result_t) {
C.llama_server_release_task_result(result)
}
func (llm *llamaExtServer) llama_server_tokenize(json_req *C.char, json_resp **C.char, err *C.ext_server_resp_t) {
C.llama_server_tokenize(json_req, json_resp, err)
}
func (llm *llamaExtServer) llama_server_detokenize(json_req *C.char, json_resp **C.char, err *C.ext_server_resp_t) {
C.llama_server_detokenize(json_req, json_resp, err)
}
func (llm *llamaExtServer) llama_server_embedding(json_req *C.char, json_resp **C.char, err *C.ext_server_resp_t) {
C.llama_server_embedding(json_req, json_resp, err)
}
func (llm *llamaExtServer) llama_server_release_json_resp(json_resp **C.char) {
C.llama_server_release_json_resp(json_resp)
}
func newDefaultExtServer(model string, adapters, projectors []string, numLayers int64, opts api.Options) (extServer, error) {
server := &llamaExtServer{opts}
return newExtServer(server, model, adapters, projectors, numLayers, opts)
}
func newExtServer(server extServer, model string, adapters, projectors []string, numLayers int64, opts api.Options) (extServer, error) {
if !mutex.TryLock() {
log.Printf("concurrent llm servers not yet supported, waiting for prior server to complete")
mutex.Lock()
}
fileInfo, err := os.Stat(model)
if err != nil {
return nil, err
}
var sparams C.ext_server_params_t
sparams.model = C.CString(model)
defer C.free(unsafe.Pointer(sparams.model))
numGPU := gpu.NumGPU(numLayers, fileInfo.Size(), opts)
sparams.embedding = true
sparams.n_ctx = C.uint(opts.NumCtx)
sparams.n_batch = C.uint(opts.NumBatch)
sparams.n_gpu_layers = C.int(numGPU)
sparams.main_gpu = C.int(opts.MainGPU)
sparams.n_parallel = 1 // TODO - wire up concurrency
// Always use the value encoded in the model
sparams.rope_freq_base = 0.0
sparams.rope_freq_scale = 0.0
sparams.memory_f16 = C.bool(opts.F16KV)
sparams.use_mlock = C.bool(opts.UseMLock)
sparams.use_mmap = C.bool(opts.UseMMap)
sparams.numa = C.bool(opts.UseNUMA)
sparams.lora_adapters = nil
for i := 0; i < len(adapters); i++ {
la := (*C.ext_server_lora_adapter_t)(C.malloc(C.sizeof_ext_server_lora_adapter_t))
defer C.free(unsafe.Pointer(la))
la.adapter = C.CString(adapters[i])
defer C.free(unsafe.Pointer(la.adapter))
la.scale = C.float(1.0) // TODO expose scale/weights up through ollama UX
la.next = nil
if i == 0 {
sparams.lora_adapters = la
} else {
tmp := sparams.lora_adapters
for ; tmp.next != nil; tmp = tmp.next {
}
tmp.next = la
}
}
if len(projectors) > 0 {
// TODO: applying multiple projectors is not supported by the llama.cpp server yet
sparams.mmproj = C.CString(projectors[0])
defer C.free(unsafe.Pointer(sparams.mmproj))
} else {
sparams.mmproj = nil
}
sparams.n_threads = C.uint(opts.NumThread)
log.Printf("Initializing internal llama server")
resp := newExtServerResp(128)
defer freeExtServerResp(resp)
server.llama_server_init(&sparams, &resp)
if resp.id < 0 {
return nil, extServerResponseToErr(resp)
}
log.Printf("Starting internal llama main loop")
server.llama_server_start()
return server, nil
}
func (llm *llamaExtServer) Predict(ctx context.Context, pred PredictOpts, fn func(PredictResult)) error {
return predict(llm, llm.Options, ctx, pred, fn)
}
func predict(llm extServer, opts api.Options, ctx context.Context, predict PredictOpts, fn func(PredictResult)) error {
resp := newExtServerResp(128)
defer freeExtServerResp(resp)
var imageData []ImageData
if len(predict.Images) > 0 {
for cnt, i := range predict.Images {
imageData = append(imageData, ImageData{Data: i, ID: cnt})
}
}
log.Printf("loaded %d images", len(imageData))
request := map[string]any{
"prompt": predict.Prompt,
"stream": true,
"n_predict": opts.NumPredict,
"n_keep": opts.NumKeep,
"temperature": opts.Temperature,
"top_k": opts.TopK,
"top_p": opts.TopP,
"tfs_z": opts.TFSZ,
"typical_p": opts.TypicalP,
"repeat_last_n": opts.RepeatLastN,
"repeat_penalty": opts.RepeatPenalty,
"presence_penalty": opts.PresencePenalty,
"frequency_penalty": opts.FrequencyPenalty,
"mirostat": opts.Mirostat,
"mirostat_tau": opts.MirostatTau,
"mirostat_eta": opts.MirostatEta,
"penalize_nl": opts.PenalizeNewline,
"seed": opts.Seed,
"stop": opts.Stop,
"image_data": imageData,
}
if predict.Format == "json" {
request["grammar"] = jsonGrammar
}
retryDelay := 100 * time.Microsecond
for retries := 0; retries < maxRetries; retries++ {
if retries > 0 {
time.Sleep(retryDelay) // wait before retrying
retryDelay *= 2 // exponential backoff
}
// Handling JSON marshaling with special characters unescaped.
buffer := &bytes.Buffer{}
enc := json.NewEncoder(buffer)
enc.SetEscapeHTML(false)
if err := enc.Encode(request); err != nil {
return fmt.Errorf("failed to marshal data: %w", err)
}
req := C.CString(buffer.String())
defer C.free(unsafe.Pointer(req))
llm.llama_server_completion(req, &resp)
if resp.id < 0 {
return extServerResponseToErr(resp)
}
retryNeeded := false
out:
for {
select {
case <-ctx.Done():
// This handles the request cancellation
llm.llama_server_completion_cancel(resp.id, &resp)
if resp.id < 0 {
return extServerResponseToErr(resp)
} else {
return nil
}
default:
var result C.ext_server_task_result_t
llm.llama_server_completion_next_result(resp.id, &result)
json_resp := C.GoString(result.json_resp)
llm.llama_server_release_task_result(&result)
var p prediction
if err := json.Unmarshal([]byte(json_resp), &p); err != nil {
llm.llama_server_completion_cancel(resp.id, &resp)
if resp.id < 0 {
return fmt.Errorf("error unmarshaling llm prediction response: %w and cancel %s", err, C.GoString(resp.msg))
} else {
return fmt.Errorf("error unmarshaling llm prediction response: %w", err)
}
}
if bool(result.error) && strings.Contains(json_resp, "slot unavailable") {
retryNeeded = true
// task will already be canceled
break out
}
if p.Content != "" {
fn(PredictResult{
Content: p.Content,
})
}
if p.Stop {
fn(PredictResult{
Done: true,
PromptEvalCount: p.Timings.PromptN,
PromptEvalDuration: parseDurationMs(p.Timings.PromptMS),
EvalCount: p.Timings.PredictedN,
EvalDuration: parseDurationMs(p.Timings.PredictedMS),
})
return nil
}
}
}
if !retryNeeded {
return nil // success
}
}
// should never reach here ideally
return fmt.Errorf("max retries exceeded")
}
func (llm *llamaExtServer) Encode(ctx context.Context, prompt string) ([]int, error) {
return encode(llm, ctx, prompt)
}
func encode(llm extServer, ctx context.Context, prompt string) ([]int, error) {
data, err := json.Marshal(TokenizeRequest{Content: prompt})
if err != nil {
return nil, fmt.Errorf("marshaling encode data: %w", err)
}
req := C.CString(string(data))
defer C.free(unsafe.Pointer(req))
var json_resp *C.char
resp := newExtServerResp(128)
defer freeExtServerResp(resp)
llm.llama_server_tokenize(req, &json_resp, &resp)
if resp.id < 0 {
return nil, extServerResponseToErr(resp)
}
defer llm.llama_server_release_json_resp(&json_resp)
var encoded TokenizeResponse
if err2 := json.Unmarshal([]byte(C.GoString(json_resp)), &encoded); err2 != nil {
return nil, fmt.Errorf("unmarshal encode response: %w", err2)
}
return encoded.Tokens, err
}
func (llm *llamaExtServer) Decode(ctx context.Context, tokens []int) (string, error) {
return decode(llm, ctx, tokens)
}
func decode(llm extServer, ctx context.Context, tokens []int) (string, error) {
if len(tokens) == 0 {
return "", nil
}
data, err := json.Marshal(DetokenizeRequest{Tokens: tokens})
if err != nil {
return "", fmt.Errorf("marshaling decode data: %w", err)
}
req := C.CString(string(data))
defer C.free(unsafe.Pointer(req))
var json_resp *C.char
resp := newExtServerResp(128)
defer freeExtServerResp(resp)
llm.llama_server_detokenize(req, &json_resp, &resp)
if resp.id < 0 {
return "", extServerResponseToErr(resp)
}
defer llm.llama_server_release_json_resp(&json_resp)
var decoded DetokenizeResponse
if err2 := json.Unmarshal([]byte(C.GoString(json_resp)), &decoded); err2 != nil {
return "", fmt.Errorf("unmarshal encode response: %w", err2)
}
return decoded.Content, err
}
func (llm *llamaExtServer) Embedding(ctx context.Context, input string) ([]float64, error) {
return embedding(llm, ctx, input)
}
func embedding(llm extServer, ctx context.Context, input string) ([]float64, error) {
data, err := json.Marshal(TokenizeRequest{Content: input})
if err != nil {
return nil, fmt.Errorf("error marshaling embed data: %w", err)
}
req := C.CString(string(data))
defer C.free(unsafe.Pointer(req))
var json_resp *C.char
resp := newExtServerResp(128)
defer freeExtServerResp(resp)
llm.llama_server_embedding(req, &json_resp, &resp)
if resp.id < 0 {
return nil, extServerResponseToErr(resp)
}
defer llm.llama_server_release_json_resp(&json_resp)
var embedding EmbeddingResponse
if err := json.Unmarshal([]byte(C.GoString(json_resp)), &embedding); err != nil {
return nil, fmt.Errorf("unmarshal tokenize response: %w", err)
}
return embedding.Embedding, nil
}
func (llm *llamaExtServer) Close() {
close(llm)
}
func close(llm extServer) {
llm.llama_server_stop()
mutex.Unlock()
}

View file

@ -86,74 +86,6 @@ type container interface {
Decode(*readSeekOffset) (model, error) Decode(*readSeekOffset) (model, error)
} }
type containerGGML struct{}
func (c *containerGGML) Name() string {
return "ggml"
}
func (c *containerGGML) Decode(ro *readSeekOffset) (model, error) {
// file contents aren't decoded
ro.Seek(0, io.SeekEnd)
return nil, nil
}
type containerGGMF struct {
version uint32
}
func (c *containerGGMF) Name() string {
return "ggmf"
}
func (c *containerGGMF) Decode(ro *readSeekOffset) (model, error) {
var version uint32
binary.Read(ro, binary.LittleEndian, &version)
switch version {
case 1:
default:
return nil, errors.New("invalid version")
}
c.version = version
// remaining file contents aren't decoded
ro.Seek(0, io.SeekEnd)
return nil, nil
}
type containerGGJT struct {
version uint32
}
func (c *containerGGJT) Name() string {
return "ggjt"
}
func (c *containerGGJT) Decode(ro *readSeekOffset) (model, error) {
var version uint32
binary.Read(ro, binary.LittleEndian, &version)
switch version {
case 1, 2, 3:
default:
return nil, errors.New("invalid version")
}
c.version = version
// different model types may have different layouts for hyperparameters
var llama llamaModel
binary.Read(ro, binary.LittleEndian, &llama.hyperparameters)
// remaining file contents aren't decoded
ro.Seek(0, io.SeekEnd)
return &llama, nil
}
type containerLORA struct { type containerLORA struct {
version uint32 version uint32
} }
@ -194,6 +126,8 @@ const (
FILE_MAGIC_GGUF_BE = 0x47475546 FILE_MAGIC_GGUF_BE = 0x47475546
) )
var ErrUnsupportedFormat = errors.New("unsupported model format")
func DecodeGGML(r io.ReadSeeker) (*GGML, error) { func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
ro := readSeekOffset{ReadSeeker: r} ro := readSeekOffset{ReadSeeker: r}
@ -204,12 +138,8 @@ func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
var c container var c container
switch magic { switch magic {
case FILE_MAGIC_GGML: case FILE_MAGIC_GGML, FILE_MAGIC_GGMF, FILE_MAGIC_GGJT:
c = &containerGGML{} return nil, ErrUnsupportedFormat
case FILE_MAGIC_GGMF:
c = &containerGGMF{}
case FILE_MAGIC_GGJT:
c = &containerGGJT{}
case FILE_MAGIC_GGLA: case FILE_MAGIC_GGLA:
c = &containerLORA{} c = &containerLORA{}
case FILE_MAGIC_GGUF_LE: case FILE_MAGIC_GGUF_LE:

View file

@ -0,0 +1,42 @@
# common logic accross linux and darwin
init_vars() {
LLAMACPP_DIR=gguf
PATCHES="0001-Expose-callable-API-for-server.patch"
CMAKE_DEFS="-DLLAMA_ACCELERATE=on -DLLAMA_SERVER_VERBOSE=off"
# TODO - LLAMA_K_QUANTS is stale and needs to be mapped to newer cmake settings
CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static"
if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then
CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on ${CMAKE_DEFS}"
else
# TODO - add additional optimization flags...
CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release ${CMAKE_DEFS}"
fi
}
git_module_setup() {
if [ -n "${OLLAMA_SKIP_PATCHING}" ]; then
echo "Skipping submodule initialization"
return
fi
git submodule init
git submodule update --force gguf
}
apply_patches() {
if [ -n "${OLLAMA_SKIP_PATCHING}" ]; then
echo "Skipping submodule patching"
return
fi
# Workaround git apply not handling creation well for iteration
rm -f gguf/examples/server/server.h
for patch in ${PATCHES}; do
git -C gguf apply ../patches/${patch}
done
}
build() {
cmake -S ${LLAMACPP_DIR} -B ${BUILD_DIR} ${CMAKE_DEFS}
cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8
}

30
llm/llama.cpp/gen_darwin.sh Executable file
View file

@ -0,0 +1,30 @@
#!/bin/bash
# This script is intended to run inside the go generate
# working directory must be ../llm/llama.cpp
# TODO - add hardening to detect missing tools (cmake, etc.)
set -ex
set -o pipefail
echo "Starting darwin generate script"
source $(dirname $0)/gen_common.sh
init_vars
CMAKE_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_METAL=on ${CMAKE_DEFS}"
BUILD_DIR="gguf/build/metal"
case "${GOARCH}" in
"amd64")
CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 ${CMAKE_DEFS}"
;;
"arm64")
CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 ${CMAKE_DEFS}"
;;
*)
echo "GOARCH must be set"
echo "this script is meant to be run from within go generate"
exit 1
;;
esac
git_module_setup
apply_patches
build

89
llm/llama.cpp/gen_linux.sh Executable file
View file

@ -0,0 +1,89 @@
#!/bin/bash
# This script is intended to run inside the go generate
# working directory must be llm/llama.cpp
# First we build our default built-in library which will be linked into the CGO
# binary as a normal dependency. This default build is CPU based.
#
# Then we build a CUDA dynamic library (although statically linked with the CUDA
# library dependencies for maximum portability)
#
# Then if we detect ROCm, we build a dynamically loaded ROCm lib. ROCm is particularly
# important to be a dynamic lib even if it's the only GPU library detected because
# we can't redistribute the objectfiles but must rely on dynamic libraries at
# runtime, which could lead the server not to start if not present.
set -ex
set -o pipefail
echo "Starting linux generate script"
if [ -z "${CUDACXX}" -a -x /usr/local/cuda/bin/nvcc ]; then
export CUDACXX=/usr/local/cuda/bin/nvcc
fi
COMMON_CMAKE_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_ACCELERATE=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off"
OLLAMA_DYN_LIB_DIR="gguf/build/lib"
source $(dirname $0)/gen_common.sh
init_vars
git_module_setup
apply_patches
mkdir -p ${OLLAMA_DYN_LIB_DIR}
touch ${OLLAMA_DYN_LIB_DIR}/.generated
#
# CPU first for the default library
#
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
BUILD_DIR="gguf/build/cpu"
build
if [ -d /usr/local/cuda/lib64/ ]; then
echo "CUDA libraries detected - building dynamic CUDA library"
init_vars
CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
BUILD_DIR="gguf/build/cuda"
CUDA_LIB_DIR=/usr/local/cuda/lib64
build
gcc -fPIC -g -shared -o ${OLLAMA_DYN_LIB_DIR}/libcuda_server.so \
-Wl,--whole-archive \
${BUILD_DIR}/examples/server/libext_server.a \
${BUILD_DIR}/common/libcommon.a \
${BUILD_DIR}/libllama.a \
-Wl,--no-whole-archive \
${CUDA_LIB_DIR}/libcudart_static.a \
${CUDA_LIB_DIR}/libcublas_static.a \
${CUDA_LIB_DIR}/libcublasLt_static.a \
${CUDA_LIB_DIR}/libcudadevrt.a \
${CUDA_LIB_DIR}/libculibos.a \
-lrt -lpthread -ldl -lstdc++ -lm
fi
if [ -z "${ROCM_PATH}" ]; then
# Try the default location in case it exists
ROCM_PATH=/opt/rocm
fi
if [ -z "${CLBlast_DIR}" ]; then
# Try the default location in case it exists
if [ -d /usr/lib/cmake/CLBlast ]; then
export CLBlast_DIR=/usr/lib/cmake/CLBlast
fi
fi
if [ -d "${ROCM_PATH}" ]; then
echo "ROCm libraries detected - building dynamic ROCm library"
init_vars
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS='gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102' -DGPU_TARGETS='gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102'"
BUILD_DIR="gguf/build/rocm"
build
gcc -fPIC -g -shared -o ${OLLAMA_DYN_LIB_DIR}/librocm_server.so \
-Wl,--whole-archive \
${BUILD_DIR}/examples/server/libext_server.a \
${BUILD_DIR}/common/libcommon.a \
${BUILD_DIR}/libllama.a \
-Wl,--no-whole-archive \
-lrt -lpthread -ldl -lstdc++ -lm \
-L/opt/rocm/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ \
-Wl,-rpath,/opt/rocm/lib,-rpath,/opt/amdgpu/lib/x86_64-linux-gnu/ \
-lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu
fi

View file

@ -0,0 +1,92 @@
#!powershell
$ErrorActionPreference = "Stop"
function init_vars {
$script:patches = @("0001-Expose-callable-API-for-server.patch")
$script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-DLLAMA_F16C=off", "-DLLAMA_FMA=off", "-DLLAMA_AVX512=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX=on", "-DLLAMA_K_QUANTS=on", "-DLLAMA_ACCELERATE=on", "-A","x64")
if ($env:CGO_CFLAGS -contains "-g") {
$script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on")
$script:config = "RelWithDebInfo"
} else {
$script:config = "Release"
}
}
function git_module_setup {
# TODO add flags to skip the init/patch logic to make it easier to mod llama.cpp code in-repo
& git submodule init
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
& git submodule update --force gguf
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
}
function apply_patches {
rm -erroraction ignore -path "gguf/examples/server/server.h"
foreach ($patch in $script:patches) {
write-host "Applying patch $patch"
& git -C gguf apply ../patches/$patch
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
}
}
function build {
write-host "generating config with: cmake -S gguf -B $script:buildDir $script:cmakeDefs"
& cmake --version
& cmake -S gguf -B $script:buildDir $script:cmakeDefs
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
write-host "building with: cmake --build $script:buildDir --config $script:config"
& cmake --build $script:buildDir --config $script:config
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
}
function install {
rm -erroraction ignore -recurse -force -path $script:installDir
& cmake --install $script:buildDir --prefix $script:installDir --config $script:config
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
}
init_vars
git_module_setup
apply_patches
# first build CPU based
$script:buildDir="gguf/build/wincpu"
$script:installDir="gguf/build/wincpu/dist"
build
# install
md gguf/build/lib -ea 0
md gguf/build/wincpu/dist/lib -ea 0
mv gguf/build/wincpu/bin/$script:config/ext_server_shared.dll gguf/build/wincpu/dist/lib/cpu_server.dll
# Nope, this barfs on lots of symbol problems
#mv gguf/build/wincpu/examples/server/$script:config/ext_server_shared.dll gguf/build/wincpu/dist/lib/cpu_server.lib
# Nope: this needs lots of include paths to pull in things like msvcprt.lib and other deps
# & cl.exe `
# gguf/build/wincpu/examples/server/$script:config/ext_server.lib `
# gguf/build/wincpu/common/$script:config/common.lib `
# gguf/build/wincpu/$script:config/llama.lib `
# gguf/build/wincpu/$script:config/ggml_static.lib `
# /link /DLL /DEF:cpu_server.def /NOENTRY /MACHINE:X64 /OUT:gguf/build/wincpu/dist/lib/cpu_server.dll
# if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
# Then build cuda as a dynamically loaded library
init_vars
$script:buildDir="gguf/build/wincuda"
$script:installDir="gguf/build/wincuda/dist"
$script:cmakeDefs += @("-DLLAMA_CUBLAS=ON", "-DBUILD_SHARED_LIBS=on")
build
install
cp gguf/build/wincuda/dist/bin/ext_server_shared.dll gguf/build/lib/cuda_server.dll
# TODO - more to do here to create a usable dll
# TODO - implement ROCm support on windows
md gguf/build/winrocm/lib -ea 0
echo $null >> gguf/build/winrocm/lib/.generated

View file

@ -0,0 +1,3 @@
package llm
//go:generate sh ./gen_darwin.sh

View file

@ -1,18 +0,0 @@
package llm
//go:generate git submodule init
//go:generate git submodule update --force ggml
//go:generate git -C ggml apply ../patches/0001-add-detokenize-endpoint.patch
//go:generate git -C ggml apply ../patches/0002-34B-model-support.patch
//go:generate git -C ggml apply ../patches/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
//go:generate git -C ggml apply ../patches/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
//go:generate cmake --build ggml/build/cpu --target server --config Release
//go:generate mv ggml/build/cpu/bin/server ggml/build/cpu/bin/ollama-runner
//go:generate git submodule update --force gguf
//go:generate git -C gguf apply ../patches/0001-update-default-log-target.patch
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_METAL=off -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=on
//go:generate cmake --build gguf/build/cpu --target server --config Release
//go:generate mv gguf/build/cpu/bin/server gguf/build/cpu/bin/ollama-runner

View file

@ -1,18 +0,0 @@
package llm
//go:generate git submodule init
//go:generate git submodule update --force ggml
//go:generate git -C ggml apply ../patches/0001-add-detokenize-endpoint.patch
//go:generate git -C ggml apply ../patches/0002-34B-model-support.patch
//go:generate git -C ggml apply ../patches/0003-metal-fix-synchronization-in-new-matrix-multiplicati.patch
//go:generate git -C ggml apply ../patches/0004-metal-add-missing-barriers-for-mul-mat-2699.patch
//go:generate cmake -S ggml -B ggml/build/metal -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
//go:generate cmake --build ggml/build/metal --target server --config Release
//go:generate mv ggml/build/metal/bin/server ggml/build/metal/bin/ollama-runner
//go:generate git submodule update --force gguf
//go:generate git -C gguf apply ../patches/0001-update-default-log-target.patch
//go:generate cmake -S gguf -B gguf/build/metal -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 -DCMAKE_OSX_DEPLOYMENT_TARGET=11.0
//go:generate cmake --build gguf/build/metal --target server --config Release
//go:generate mv gguf/build/metal/bin/server gguf/build/metal/bin/ollama-runner

View file

@ -1,26 +1,3 @@
package llm package llm
//go:generate git submodule init //go:generate bash ./gen_linux.sh
//go:generate git submodule update --force ggml
//go:generate git -C ggml apply ../patches/0001-add-detokenize-endpoint.patch
//go:generate git -C ggml apply ../patches/0002-34B-model-support.patch
//go:generate git -C ggml apply ../patches/0005-ggml-support-CUDA-s-half-type-for-aarch64-1455-2670.patch
//go:generate git -C ggml apply ../patches/0001-copy-cuda-runtime-libraries.patch
//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_K_QUANTS=on
//go:generate cmake --build ggml/build/cpu --target server --config Release
//go:generate mv ggml/build/cpu/bin/server ggml/build/cpu/bin/ollama-runner
//go:generate git submodule update --force gguf
//go:generate git -C gguf apply ../patches/0001-copy-cuda-runtime-libraries.patch
//go:generate git -C gguf apply ../patches/0001-update-default-log-target.patch
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
//go:generate cmake --build gguf/build/cpu --target server --config Release
//go:generate mv gguf/build/cpu/bin/server gguf/build/cpu/bin/ollama-runner
//go:generate cmake -S ggml -B ggml/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on
//go:generate cmake --build ggml/build/cuda --target server --config Release
//go:generate mv ggml/build/cuda/bin/server ggml/build/cuda/bin/ollama-runner
//go:generate cmake -S gguf -B gguf/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_CUDA_PEER_MAX_BATCH_SIZE=0
//go:generate cmake --build gguf/build/cuda --target server --config Release
//go:generate mv gguf/build/cuda/bin/server gguf/build/cuda/bin/ollama-runner

View file

@ -1,24 +1,3 @@
package llm package llm
//go:generate git submodule init //go:generate powershell -ExecutionPolicy Bypass -File ./gen_windows.ps1
//go:generate git submodule update --force ggml
//go:generate git -C ggml apply ../patches/0001-add-detokenize-endpoint.patch
//go:generate git -C ggml apply ../patches/0002-34B-model-support.patch
//go:generate cmake -S ggml -B ggml/build/cpu -DLLAMA_K_QUANTS=on
//go:generate cmake --build ggml/build/cpu --target server --config Release
//go:generate cmd /c move ggml\build\cpu\bin\Release\server.exe ggml\build\cpu\bin\Release\ollama-runner.exe
//go:generate git submodule update --force gguf
//go:generate git -C gguf apply ../patches/0001-update-default-log-target.patch
//go:generate cmake -S gguf -B gguf/build/cpu -DLLAMA_K_QUANTS=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
//go:generate cmake --build gguf/build/cpu --target server --config Release
//go:generate cmd /c move gguf\build\cpu\bin\Release\server.exe gguf\build\cpu\bin\Release\ollama-runner.exe
//go:generate cmake -S ggml -B ggml/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on
//go:generate cmake --build ggml/build/cuda --target server --config Release
//go:generate cmd /c move ggml\build\cuda\bin\Release\server.exe ggml\build\cuda\bin\Release\ollama-runner.exe
//go:generate cmake -S gguf -B gguf/build/cuda -DLLAMA_CUBLAS=on -DLLAMA_ACCELERATE=on -DLLAMA_K_QUANTS=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off
//go:generate cmake --build gguf/build/cuda --target server --config Release
//go:generate cmd /c move gguf\build\cuda\bin\Release\server.exe gguf\build\cuda\bin\Release\ollama-runner.exe

@ -1 +0,0 @@
Subproject commit 9e232f0234073358e7031c1b8d7aa45020469a3b

@ -1 +1 @@
Subproject commit a7aee47b98e45539d491071b25778b833b77e387 Subproject commit 328b83de23b33240e28f4e74900d1d06726f5eb1

View file

@ -0,0 +1,460 @@
From 4c72576c5f6c2217b1ecf7fd8523616acc5526ae Mon Sep 17 00:00:00 2001
From: Daniel Hiltgen <daniel@ollama.com>
Date: Mon, 13 Nov 2023 12:25:58 -0800
Subject: [PATCH] Expose callable API for server
This adds an extern "C" interface within the example server
---
examples/server/CMakeLists.txt | 24 +++
examples/server/server.cpp | 279 +++++++++++++++++++++++++++++++++
examples/server/server.h | 89 +++++++++++
ggml-cuda.cu | 1 +
4 files changed, 393 insertions(+)
create mode 100644 examples/server/server.h
diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt
index 859cd12..4ea47a7 100644
--- a/examples/server/CMakeLists.txt
+++ b/examples/server/CMakeLists.txt
@@ -11,3 +11,27 @@ if (WIN32)
TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
endif()
target_compile_features(${TARGET} PRIVATE cxx_std_11)
+
+set(TARGET ext_server)
+option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
+add_library(${TARGET} STATIC server.cpp)
+target_include_directories(${TARGET} PRIVATE ../../common)
+target_include_directories(${TARGET} PRIVATE ../..)
+target_compile_features(${TARGET} PRIVATE cxx_std_11)
+target_compile_definitions(${TARGET} PUBLIC LLAMA_SERVER_LIBRARY=1)
+target_link_libraries(${TARGET} PRIVATE common llama llava ${CMAKE_THREAD_LIBS_INIT})
+
+if (BUILD_SHARED_LIBS)
+ set_target_properties(ext_server PROPERTIES POSITION_INDEPENDENT_CODE ON)
+ target_compile_definitions(ext_server PRIVATE LLAMA_SHARED LLAMA_BUILD)
+ add_library(ext_server_shared SHARED $<TARGET_OBJECTS:ext_server>)
+ target_link_libraries(ext_server_shared PRIVATE ggml llama llava common ${CMAKE_THREAD_LIBS_INIT})
+ install(TARGETS ext_server_shared LIBRARY)
+endif()
+
+if (CUDAToolkit_FOUND)
+ target_include_directories(${TARGET} PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
+ if (WIN32)
+ target_link_libraries(ext_server_shared PRIVATE nvml)
+ endif()
+endif()
\ No newline at end of file
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 0403853..5e78e4d 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -5,6 +5,9 @@
#include "../llava/clip.h"
#include "stb_image.h"
+#if defined(LLAMA_SERVER_LIBRARY)
+#include "server.h"
+#endif
#ifndef NDEBUG
// crash the server in debug mode, otherwise send an http 500 error
@@ -2643,6 +2646,7 @@ static void append_to_generated_text_from_generated_token_probs(llama_server_con
}
}
+#ifndef LLAMA_SERVER_LIBRARY
int main(int argc, char **argv)
{
#if SERVER_VERBOSE != 1
@@ -3123,3 +3127,278 @@ int main(int argc, char **argv)
llama_backend_free();
return 0;
}
+
+#else // LLAMA_SERVER_LIBRARY
+// Expose the llama server as a callable extern "C" API
+llama_server_context *llama = NULL;
+std::atomic<bool> ext_server_running(false);
+std::thread ext_server_thread;
+
+void llama_server_init(ext_server_params *sparams, ext_server_resp_t *err)
+{
+#if SERVER_VERBOSE != 1
+ log_disable();
+#endif
+ assert(err != NULL && sparams != NULL);
+ err->id = 0;
+ err->msg[0] = '\0';
+ try {
+ llama = new llama_server_context;
+ log_set_target(stdout);
+ gpt_params params;
+ params.n_ctx = sparams->n_ctx;
+ params.n_batch = sparams->n_batch;
+ if (sparams->n_threads > 0) {
+ params.n_threads = sparams->n_threads;
+ }
+ params.n_parallel = sparams->n_parallel;
+ params.rope_freq_base = sparams->rope_freq_base;
+ params.rope_freq_scale = sparams->rope_freq_scale;
+
+ if (sparams->memory_f16) {
+ params.cache_type_k = "f16";
+ params.cache_type_v = "f16";
+ } else {
+ params.cache_type_k = "f32";
+ params.cache_type_v = "f32";
+ }
+
+ params.n_gpu_layers = sparams->n_gpu_layers;
+ params.main_gpu = sparams->main_gpu;
+ params.use_mlock = sparams->use_mlock;
+ params.use_mmap = sparams->use_mmap;
+ params.numa = sparams->numa;
+ params.embedding = sparams->embedding;
+ if (sparams->model != NULL) {
+ params.model = sparams->model;
+ }
+
+ for (ext_server_lora_adapter *la = sparams->lora_adapters; la != NULL; la = la->next) {
+ params.lora_adapter.push_back(std::make_tuple(la->adapter, la->scale));
+ }
+
+ if (sparams->mmproj != NULL) {
+ params.mmproj = std::string(sparams->mmproj);
+ }
+
+ llama_backend_init(params.numa);
+
+ // load the model
+ if (!llama->load_model(params))
+ {
+ // TODO - consider modifying the logging logic or patching load_model so we can capture more detailed error messages
+ // and pass them back to the caller for better UX
+ err->id = -1;
+ snprintf(err->msg, err->msg_len, "error loading model %s", params.model.c_str());
+ return;
+ }
+
+ llama->initialize();
+ } catch (std::exception &e) {
+ err->id = -1;
+ snprintf(err->msg, err->msg_len, "exception %s", e.what());
+ } catch (...) {
+ err->id = -1;
+ snprintf(err->msg, err->msg_len, "Unknown exception initializing llama server");
+ }
+}
+
+void llama_server_start()
+{
+ assert(llama != NULL);
+ // TODO mutex to protect thread creation
+ ext_server_thread = std::thread([&]()
+ {
+ ext_server_running = true;
+ try {
+ LOG_TEE("llama server main loop starting\n");
+ ggml_time_init();
+ while (ext_server_running.load())
+ {
+ if (!llama->update_slots()) {
+ LOG_TEE("unexpected error in llama server update_slots - exiting main loop\n");
+ break;
+ }
+ }
+ } catch (std::exception &e) {
+ LOG_TEE("caught exception in llama server main loop: %s\n", e.what());
+ } catch (...) {
+ LOG_TEE("caught unknown exception in llama server main loop\n");
+ }
+ LOG_TEE("\nllama server shutting down\n");
+ llama_backend_free();
+ });
+}
+
+void llama_server_stop() {
+ assert(llama != NULL);
+ // TODO - too verbose, remove once things are solid
+ LOG_TEE("requesting llama server shutdown\n");
+ ext_server_running = false;
+ ext_server_thread.join();
+ delete llama;
+ llama = NULL;
+ LOG_TEE("llama server shutdown complete\n");
+}
+
+void llama_server_completion(const char *json_req, ext_server_resp_t *resp) {
+ assert(llama != NULL && json_req != NULL && resp != NULL);
+ resp->id = -1;
+ resp->msg[0] = '\0';
+ try {
+ json data = json::parse(json_req);
+ resp->id = llama->request_completion(data, false, false, -1);
+ } catch (std::exception &e) {
+ snprintf(resp->msg, resp->msg_len, "exception %s", e.what());
+ } catch (...) {
+ snprintf(resp->msg, resp->msg_len, "Unknown exception during completion");
+ }
+}
+
+void llama_server_completion_next_result(const int task_id, ext_server_task_result_t *resp) {
+ assert(llama != NULL && resp != NULL);
+ std::string msg;
+ resp->id = -1;
+ resp->stop = false;
+ resp->error = false;
+ resp->json_resp = NULL;
+ std::string result_json;
+ try {
+ task_result result = llama->next_result(task_id);
+ result_json = result.result_json.dump(-1, ' ', false, json::error_handler_t::replace);
+ resp->id = result.id;
+ resp->stop = result.stop;
+ resp->error = result.error;
+ if (result.error) {
+ llama->request_cancel(task_id);
+ } else if (result.stop) {
+ llama->request_cancel(task_id);
+ }
+ } catch (std::exception &e) {
+ resp->error = true;
+ resp->id = -1;
+ result_json = "{\"error\":\"exception " + std::string(e.what()) + "\"}";
+ } catch (...) {
+ resp->error = true;
+ resp->id = -1;
+ result_json = "{\"error\":\"Unknown exception during completion\"}";
+ }
+ const std::string::size_type size = result_json.size() + 1;
+ resp->json_resp = new char[size];
+ snprintf(resp->json_resp, size, "%s", result_json.c_str());
+}
+
+void llama_server_release_task_result(ext_server_task_result_t *result) {
+ if (result == NULL || result->json_resp == NULL) {
+ return;
+ }
+ delete[] result->json_resp;
+}
+
+void llama_server_completion_cancel(const int task_id, ext_server_resp_t *err) {
+ assert(llama != NULL && err != NULL);
+ err->id = 0;
+ err->msg[0] = '\0';
+ try {
+ llama->request_cancel(task_id);
+ } catch (std::exception &e) {
+ err->id = -1;
+ snprintf(err->msg, err->msg_len, "exception %s", e.what());
+ } catch (...) {
+ err->id = -1;
+ snprintf(err->msg, err->msg_len, "Unknown exception completion cancel in llama server");
+ }
+}
+
+void llama_server_tokenize(const char *json_req, char **json_resp, ext_server_resp_t *err) {
+ assert(llama != NULL && json_req != NULL && json_resp != NULL && err != NULL);
+ *json_resp = NULL;
+ err->id = 0;
+ err->msg[0] = '\0';
+ try {
+ const json body = json::parse(json_req);
+ std::vector<llama_token> tokens;
+ if (body.count("content") != 0)
+ {
+ tokens = llama->tokenize(body["content"], false);
+ }
+ const json data = format_tokenizer_response(tokens);
+ std::string result_json = data.dump();
+ const std::string::size_type size = result_json.size() + 1;
+ *json_resp = new char[size];
+ snprintf(*json_resp, size, "%s", result_json.c_str());
+ } catch (std::exception &e) {
+ err->id = -1;
+ snprintf(err->msg, err->msg_len, "exception %s", e.what());
+ } catch (...) {
+ err->id = -1;
+ snprintf(err->msg, err->msg_len, "Unknown exception during tokenize");
+ }
+}
+
+void llama_server_release_json_resp(char **json_resp) {
+ if (json_resp == NULL || *json_resp == NULL) {
+ return;
+ }
+ delete[] *json_resp;
+}
+
+void llama_server_detokenize(const char *json_req, char **json_resp, ext_server_resp_t *err) {
+ assert(llama != NULL && json_req != NULL && json_resp != NULL && err != NULL);
+ *json_resp = NULL;
+ err->id = 0;
+ err->msg[0] = '\0';
+ try {
+ const json body = json::parse(json_req);
+ std::string content;
+ if (body.count("tokens") != 0)
+ {
+ const std::vector<llama_token> tokens = body["tokens"];
+ content = tokens_to_str(llama->ctx, tokens.cbegin(), tokens.cend());
+ }
+ const json data = format_detokenized_response(content);
+ std::string result_json = data.dump();
+ const std::string::size_type size = result_json.size() + 1;
+ *json_resp = new char[size];
+ snprintf(*json_resp, size, "%s", result_json.c_str());
+ } catch (std::exception &e) {
+ err->id = -1;
+ snprintf(err->msg, err->msg_len, "exception %s", e.what());
+ } catch (...) {
+ err->id = -1;
+ snprintf(err->msg, err->msg_len, "Unknown exception during detokenize");
+ }
+}
+
+void llama_server_embedding(const char *json_req, char** json_resp, ext_server_resp_t *err) {
+ assert(llama != NULL && json_req != NULL && json_resp != NULL && err != NULL);
+ *json_resp = NULL;
+ err->id = 0;
+ err->msg[0] = '\0';
+ try {
+ const json body = json::parse(json_req);
+ json prompt;
+ if (body.count("content") != 0)
+ {
+ prompt = body["content"];
+ }
+ else
+ {
+ prompt = "";
+ }
+ const int task_id = llama->request_completion({ {"prompt", prompt}, { "n_predict", 0} }, false, true, -1);
+ task_result result = llama->next_result(task_id);
+ std::string result_json = result.result_json.dump();
+ const std::string::size_type size = result_json.size() + 1;
+ *json_resp = new char[size];
+ snprintf(*json_resp, size, "%s", result_json.c_str());
+ } catch (std::exception &e) {
+ err->id = -1;
+ snprintf(err->msg, err->msg_len, "exception %s", e.what());
+ } catch (...) {
+ err->id = -1;
+ snprintf(err->msg, err->msg_len, "Unknown exception during embedding");
+ }
+}
+
+#endif // LLAMA_SERVER_LIBRARY
\ No newline at end of file
diff --git a/examples/server/server.h b/examples/server/server.h
new file mode 100644
index 0000000..d22f1b6
--- /dev/null
+++ b/examples/server/server.h
@@ -0,0 +1,89 @@
+#if defined(LLAMA_SERVER_LIBRARY)
+#ifndef LLAMA_SERVER_H
+#define LLAMA_SERVER_H
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdbool.h>
+
+// This exposes extern C entrypoints into the llama_server
+// To enable the server compile with LLAMA_SERVER_LIBRARY
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+ typedef struct ext_server_resp {
+ int id; // < 0 on error
+ size_t msg_len; // caller must allocate msg and set msg_len
+ char *msg;
+ } ext_server_resp_t;
+
+ // Allocated and freed by caller
+ typedef struct ext_server_lora_adapter {
+ char *adapter;
+ float scale;
+ struct ext_server_lora_adapter *next;
+ } ext_server_lora_adapter_t;
+
+ // Allocated and freed by caller
+ typedef struct ext_server_params
+ {
+ char *model;
+ uint32_t n_ctx; // text context, 0 = from model
+ uint32_t n_batch; // prompt processing maximum batch size
+ uint32_t n_threads; // number of threads to use for generation
+ int32_t n_parallel; // number of parallel sequences to decodewra
+ float rope_freq_base; // RoPE base frequency, 0 = from model
+ float rope_freq_scale; // RoPE frequency scaling factor, 0 = from model
+ bool memory_f16; // use f16 instead of f32 for memory kv
+ int32_t n_gpu_layers; // number of layers to store in VRAM (-1 - use default)
+ int32_t main_gpu; // the GPU that is used for scratch and small tensors
+ bool use_mlock; // force system to keep model in RAM
+ bool use_mmap; // use mmap if possible
+ bool numa; // attempt optimizations that help on some NUMA systems
+ bool embedding; // get only sentence embedding
+ ext_server_lora_adapter_t* lora_adapters;
+ char *mmproj;
+ } ext_server_params_t;
+
+ typedef struct ext_server_task_result
+ {
+ int id;
+ bool stop;
+ bool error;
+ char* json_resp; // null terminated, memory managed by ext_server
+ } ext_server_task_result_t;
+
+ // Initialize the server once per process
+ // err->id = 0 for success and err->msg[0] = NULL
+ // err->id != 0 for failure, and err->msg contains error message
+ void llama_server_init(ext_server_params_t *sparams, ext_server_resp_t *err);
+
+ // Run the main loop, called once per init
+ void llama_server_start();
+ // Stop the main loop and free up resources allocated in init and start. Init must be called again to reuse
+ void llama_server_stop();
+
+ // json_req null terminated string, memory managed by caller
+ // resp->id >= 0 on success (task ID)
+ // resp->id < 0 on error, and resp->msg contains error message
+ void llama_server_completion(const char *json_req, ext_server_resp_t *resp);
+
+ // Caller must call llama_server_release_task_result to free resp->json_resp
+ void llama_server_completion_next_result(const int task_id, ext_server_task_result_t *result);
+ void llama_server_completion_cancel(const int task_id, ext_server_resp_t *err);
+ void llama_server_release_task_result(ext_server_task_result_t *result);
+
+ // Caller must call llama_server_releaes_json_resp to free json_resp if err.id < 0
+ void llama_server_tokenize(const char *json_req, char **json_resp, ext_server_resp_t *err);
+ void llama_server_detokenize(const char *json_req, char **json_resp, ext_server_resp_t *err);
+ void llama_server_embedding(const char *json_req, char** json_resp, ext_server_resp_t *err);
+ void llama_server_release_json_resp(char **json_resp);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+#endif // LLAMA_SERVER_LIBRARY
\ No newline at end of file
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index f20846f..9640cf3 100644
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -6757,6 +6757,7 @@ static cudaError_t ggml_cuda_cpy_tensor_2d(
CUDA_CHECK(cudaGetDevice(&id));
src_ptr = (char *) extra->data_device[id];
} else {
+ fprintf(stderr, "ggml_cuda_cpy_tensor_2d assert: backend: %d\n", src->backend);
GGML_ASSERT(false);
}
char * dst_ptr = (char *) dst;
--
2.39.3 (Apple Git-145)

View file

@ -1,51 +0,0 @@
From 032ef7ff2423f5117bb59d42fb71be9cebf0a2de Mon Sep 17 00:00:00 2001
From: Bruce MacDonald <brucewmacdonald@gmail.com>
Date: Mon, 28 Aug 2023 18:08:12 -0400
Subject: [PATCH] add detokenize endpoint
---
examples/server/server.cpp | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 9966045..5014691 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1075,6 +1075,12 @@ static json format_tokenizer_response(const std::vector<llama_token> &tokens)
{"tokens", tokens}};
}
+static json format_detokenized_response(std::string content)
+{
+ return json{
+ {"content", content}};
+}
+
static void parse_options_completion(const json &body, llama_server_context &llama)
{
gpt_params default_params;
@@ -1361,6 +1367,21 @@ int main(int argc, char **argv)
const json data = format_tokenizer_response(tokens);
return res.set_content(data.dump(), "application/json"); });
+ svr.Post("/detokenize", [&llama](const Request &req, Response &res)
+ {
+ auto lock = llama.lock();
+
+ const json body = json::parse(req.body);
+ std::string content;
+ if (body.count("tokens") != 0)
+ {
+ const std::vector<llama_token> tokens = body["tokens"];
+ content = tokens_to_str(llama.ctx, tokens.cbegin(), tokens.cend());
+ }
+
+ const json data = format_detokenized_response(content);
+ return res.set_content(data.dump(), "application/json"); });
+
svr.Post("/embedding", [&llama](const Request &req, Response &res)
{
auto lock = llama.lock();
--
2.39.2 (Apple Git-143)

View file

@ -1,27 +0,0 @@
From 5dd02993e8cc2ce309157736b95bb572f274a3fd Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Wed, 20 Sep 2023 14:19:52 -0700
Subject: [PATCH] copy cuda runtime libraries
---
CMakeLists.txt | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 824d9f2..dd24137 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -274,6 +274,10 @@ if (LLAMA_CUBLAS)
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt)
endif()
+ configure_file(${CUDAToolkit_LIBRARY_DIR}/libcudart.so ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/libcudart.so.${CUDAToolkit_VERSION_MAJOR}.0 COPYONLY)
+ configure_file(${CUDAToolkit_LIBRARY_DIR}/libcublas.so ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/libcublas.so.${CUDAToolkit_VERSION_MAJOR} COPYONLY)
+ configure_file(${CUDAToolkit_LIBRARY_DIR}/libcublasLt.so ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/libcublasLt.so.${CUDAToolkit_VERSION_MAJOR} COPYONLY)
+
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
# 52 == lowest CUDA 12 standard
# 60 == f16 CUDA intrinsics
--
2.42.0

View file

@ -1,25 +0,0 @@
From 6465fec6290f0a7f5d4d0fbe6bcf634e4810dde6 Mon Sep 17 00:00:00 2001
From: Michael Yang <mxyng@pm.me>
Date: Mon, 23 Oct 2023 10:39:34 -0700
Subject: [PATCH] default log stderr
---
common/log.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/common/log.h b/common/log.h
index b8953fd..25522cd 100644
--- a/common/log.h
+++ b/common/log.h
@@ -90,7 +90,7 @@
// }
//
#ifndef LOG_TARGET
- #define LOG_TARGET log_handler()
+ #define LOG_TARGET nullptr
#endif
#ifndef LOG_TEE_TARGET
--
2.42.0

View file

@ -1,89 +0,0 @@
From 6145068a6613c37bb43a7408b5496524bdcfc402 Mon Sep 17 00:00:00 2001
From: Bruce MacDonald <brucewmacdonald@gmail.com>
Date: Mon, 28 Aug 2023 18:08:53 -0400
Subject: [PATCH] 34B model support
---
llama.cpp | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/llama.cpp b/llama.cpp
index f2cbe76..62c5cdf 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -79,6 +79,7 @@ enum e_model {
MODEL_7B,
MODEL_13B,
MODEL_30B,
+ MODEL_34B,
MODEL_65B,
MODEL_70B,
};
@@ -122,6 +123,7 @@ static std::map<e_model, size_t> MEM_REQ_SCRATCH0(int n_ctx)
{ MODEL_7B, ((size_t) n_ctx / 16ull + 100ull) * MB },
{ MODEL_13B, ((size_t) n_ctx / 12ull + 120ull) * MB },
{ MODEL_30B, ((size_t) n_ctx / 9ull + 160ull) * MB },
+ { MODEL_34B, ((size_t) n_ctx / 9ull + 160ull) * MB },
{ MODEL_65B, ((size_t) n_ctx / 6ull + 256ull) * MB }, // guess
{ MODEL_70B, ((size_t) n_ctx / 7ull + 164ull) * MB },
};
@@ -135,6 +137,7 @@ static const std::map<e_model, size_t> & MEM_REQ_SCRATCH1()
{ MODEL_7B, 160ull * MB },
{ MODEL_13B, 192ull * MB },
{ MODEL_30B, 256ull * MB },
+ { MODEL_34B, 256ull * MB },
{ MODEL_65B, 384ull * MB }, // guess
{ MODEL_70B, 304ull * MB },
};
@@ -149,6 +152,7 @@ static const std::map<e_model, size_t> & MEM_REQ_EVAL()
{ MODEL_7B, 10ull * MB },
{ MODEL_13B, 12ull * MB },
{ MODEL_30B, 16ull * MB },
+ { MODEL_34B, 16ull * MB },
{ MODEL_65B, 24ull * MB }, // guess
{ MODEL_70B, 24ull * MB },
};
@@ -164,6 +168,7 @@ static const std::map<e_model, size_t> & VRAM_REQ_SCRATCH_BASE()
{ MODEL_7B, 512ull * kB },
{ MODEL_13B, 640ull * kB },
{ MODEL_30B, 768ull * kB },
+ { MODEL_34B, 768ull * kB },
{ MODEL_65B, 1280ull * kB },
{ MODEL_70B, 1280ull * kB },
};
@@ -179,6 +184,7 @@ static const std::map<e_model, size_t> & VRAM_REQ_SCRATCH_PER_CONTEXT()
{ MODEL_7B, 128ull },
{ MODEL_13B, 160ull },
{ MODEL_30B, 208ull },
+ { MODEL_34B, 208ull },
{ MODEL_65B, 256ull },
{ MODEL_70B, 256ull },
};
@@ -1027,6 +1033,7 @@ static const char * llama_model_type_name(e_model type) {
case MODEL_7B: return "7B";
case MODEL_13B: return "13B";
case MODEL_30B: return "30B";
+ case MODEL_34B: return "34B";
case MODEL_65B: return "65B";
case MODEL_70B: return "70B";
default: LLAMA_ASSERT(false);
@@ -1074,6 +1081,7 @@ static void llama_model_load_internal(
case 26: model.type = e_model::MODEL_3B; break;
case 32: model.type = e_model::MODEL_7B; break;
case 40: model.type = e_model::MODEL_13B; break;
+ case 48: model.type = e_model::MODEL_34B; break;
case 60: model.type = e_model::MODEL_30B; break;
case 80: model.type = e_model::MODEL_65B; break;
default:
@@ -1094,6 +1102,8 @@ static void llama_model_load_internal(
LLAMA_LOG_WARN("%s: warning: assuming 70B model based on GQA == %d\n", __func__, n_gqa);
model.type = e_model::MODEL_70B;
hparams.f_ffn_mult = 1.3f; // from the params.json of the 70B model
+ } else if (model.type == e_model::MODEL_34B && n_gqa == 8) {
+ hparams.f_ffn_mult = 1.0f; // from the params.json of the 34B model
}
hparams.rope_freq_base = rope_freq_base;
--
2.39.2 (Apple Git-143)

View file

@ -1,30 +0,0 @@
From dadbed99e65252d79f81101a392d0d6497b86caa Mon Sep 17 00:00:00 2001
From: Shouzheng Liu <lshzh.hi@gmail.com>
Date: Mon, 21 Aug 2023 06:59:29 -0400
Subject: [PATCH] metal : fix synchronization in new matrix multiplication
kernel (#2686)
---
ggml-metal.metal | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/ggml-metal.metal b/ggml-metal.metal
index 3f31252..88d48f6 100644
--- a/ggml-metal.metal
+++ b/ggml-metal.metal
@@ -1898,10 +1898,11 @@ kernel void kernel_mul_mm(device const uchar * src0,
threadgroup float *temp_str = ((threadgroup float *)shared_memory) \
+ 32 * (sgitg&1) + (16 * (sgitg>>1)) * BLOCK_SIZE_M;
for (int i = 0; i < 8; i++) {
+ threadgroup_barrier(mem_flags::mem_device);
simdgroup_store(c_res[i], temp_str + 8 * (i%4) + 8 * BLOCK_SIZE_M * (i/4), BLOCK_SIZE_M);
}
- threadgroup_barrier(mem_flags::mem_threadgroup);
+ threadgroup_barrier(mem_flags::mem_device);
device float *C = dst + BLOCK_SIZE_M * r0 + (BLOCK_SIZE_N * r1) * ne0 + im*ne1*ne0;
if (sgitg==0) {
for (int i = 0; i < n_rows; i++) {
--
2.41.0

View file

@ -1,41 +0,0 @@
From 14b1d7e6f720dee41ce5a826376df738096d9033 Mon Sep 17 00:00:00 2001
From: Shouzheng Liu <lshzh.hi@gmail.com>
Date: Tue, 22 Aug 2023 02:18:40 -0400
Subject: [PATCH] metal : add missing barriers for mul-mat (#2699)
---
ggml-metal.metal | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/ggml-metal.metal b/ggml-metal.metal
index 88d48f6..ce3541f 100644
--- a/ggml-metal.metal
+++ b/ggml-metal.metal
@@ -1850,6 +1850,7 @@ kernel void kernel_mul_mm(device const uchar * src0,
//load data and store to threadgroup memory
half4x4 temp_a;
dequantize_func(x, il, temp_a);
+ threadgroup_barrier(mem_flags::mem_threadgroup);
#pragma unroll(16)
for (int i = 0; i < 16; i++) {
*(sa + SG_MAT_SIZE * ((tiitg / THREAD_PER_ROW / 8) \
@@ -1895,14 +1896,14 @@ kernel void kernel_mul_mm(device const uchar * src0,
}
} else {
// block is smaller than 64x32, we should avoid writing data outside of the matrix
+ threadgroup_barrier(mem_flags::mem_threadgroup);
threadgroup float *temp_str = ((threadgroup float *)shared_memory) \
+ 32 * (sgitg&1) + (16 * (sgitg>>1)) * BLOCK_SIZE_M;
for (int i = 0; i < 8; i++) {
- threadgroup_barrier(mem_flags::mem_device);
simdgroup_store(c_res[i], temp_str + 8 * (i%4) + 8 * BLOCK_SIZE_M * (i/4), BLOCK_SIZE_M);
}
- threadgroup_barrier(mem_flags::mem_device);
+ threadgroup_barrier(mem_flags::mem_threadgroup);
device float *C = dst + BLOCK_SIZE_M * r0 + (BLOCK_SIZE_N * r1) * ne0 + im*ne1*ne0;
if (sgitg==0) {
for (int i = 0; i < n_rows; i++) {
--
2.41.0

View file

@ -1,32 +0,0 @@
From 1e3bc523d8053a77df3ac7126a84d0297ee97ef6 Mon Sep 17 00:00:00 2001
From: Kylin <56434533+KyL0N@users.noreply.github.com>
Date: Tue, 22 Aug 2023 15:14:23 +0800
Subject: [PATCH] ggml : support CUDA's half type for aarch64(#1455) (#2670)
* ggml: support CUDA's half type for aarch64(#1455)
support CUDA's half type for aarch64 in ggml_fp16_t definition
* ggml: use __CUDACC__ to recognise nvcc compiler
---
ggml.h | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/ggml.h b/ggml.h
index 544ad2d..0ec7ec5 100644
--- a/ggml.h
+++ b/ggml.h
@@ -259,8 +259,9 @@
extern "C" {
#endif
-#ifdef __ARM_NEON
- // we use the built-in 16-bit float type
+#if defined(__ARM_NEON) && defined(__CUDACC__)
+ typedef half ggml_fp16_t;
+#elif defined(__ARM_NEON)
typedef __fp16 ggml_fp16_t;
#else
typedef uint16_t ggml_fp16_t;
--
2.39.2 (Apple Git-143)

View file

@ -1,25 +1,16 @@
package llm package llm
import ( import (
"bufio"
"bytes" "bytes"
"context" "context"
"embed" _ "embed"
"encoding/json"
"errors" "errors"
"fmt" "fmt"
"io" "io"
"io/fs" "io/fs"
"log"
"math/rand"
"net/http"
"os" "os"
"os/exec" "os/exec"
"path"
"path/filepath" "path/filepath"
"runtime"
"strconv"
"strings"
"sync" "sync"
"time" "time"
@ -55,109 +46,6 @@ number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
ws ::= ([ \t\n] ws)? ws ::= ([ \t\n] ws)?
` `
//go:embed llama.cpp/*/build/*/bin/*
var llamaCppEmbed embed.FS
type ModelRunner struct {
Type string // "gguf" or "ggml"
Path string // path to the model runner executable
Accelerated bool
}
func chooseRunners(workDir, runnerType string) []ModelRunner {
buildPath := path.Join("llama.cpp", runnerType, "build")
var runners []ModelRunner
// set the runners based on the OS
// IMPORTANT: the order of the runners in the array is the priority order
switch runtime.GOOS {
case "darwin":
if runtime.GOARCH == "arm64" {
runners = []ModelRunner{{Type: runnerType, Path: path.Join(buildPath, "metal", "bin", "ollama-runner")}}
} else {
runners = []ModelRunner{{Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")}}
}
case "linux":
runners = []ModelRunner{
{Type: runnerType, Path: path.Join(buildPath, "cuda", "bin", "ollama-runner"), Accelerated: true},
{Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
}
case "windows":
// TODO: select windows GPU runner here when available
runners = []ModelRunner{
{Type: runnerType, Path: path.Join(buildPath, "cuda", "bin", "Release", "ollama-runner.exe"), Accelerated: true},
{Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "Release", "ollama-runner.exe")},
}
default:
log.Printf("unknown OS, running on CPU: %s", runtime.GOOS)
runners = []ModelRunner{
{Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
}
}
runnerAvailable := false // if no runner files are found in the embed, this flag will cause a fast fail
for _, r := range runners {
// find all the files in the runner's bin directory
files, err := fs.Glob(llamaCppEmbed, path.Join(path.Dir(r.Path), "*"))
if err != nil {
// this is expected, ollama may be compiled without all runners packed in
log.Printf("%s runner not found: %v", r.Path, err)
continue
}
for _, f := range files {
runnerAvailable = true
srcFile, err := llamaCppEmbed.Open(f)
if err != nil {
log.Fatalf("read llama runner %s: %v", f, err)
}
defer srcFile.Close()
// create the directory in case it does not exist, filepath.Dir() converts the file path to the OS's format
destPath := filepath.Join(workDir, filepath.Dir(f))
if err := os.MkdirAll(destPath, 0o755); err != nil {
log.Fatalf("create runner temp dir %s: %v", filepath.Dir(f), err)
}
// create the path to the destination file, filepath.Base() converts the file path to the OS's format
destFile := filepath.Join(destPath, filepath.Base(f))
_, err = os.Stat(destFile)
switch {
case errors.Is(err, os.ErrNotExist):
destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
if err != nil {
log.Fatalf("write llama runner %s: %v", f, err)
}
defer destFile.Close()
if _, err := io.Copy(destFile, srcFile); err != nil {
log.Fatalf("copy llama runner %s: %v", f, err)
}
case err != nil:
log.Fatalf("stat llama runner %s: %v", f, err)
}
}
}
if !runnerAvailable {
log.Fatalf("%s runner not found", runnerType)
}
// return the runners to try in priority order
localRunnersByPriority := []ModelRunner{}
for _, r := range runners {
// clean the ModelRunner paths so that they match the OS we are running on
localRunnersByPriority = append(localRunnersByPriority, ModelRunner{
Type: r.Type,
Path: filepath.Clean(path.Join(workDir, r.Path)),
Accelerated: r.Accelerated,
})
}
return localRunnersByPriority
}
type llamaModel struct { type llamaModel struct {
hyperparameters llamaHyperparameters hyperparameters llamaHyperparameters
} }
@ -228,83 +116,12 @@ type ImageData struct {
ID int `json:"id"` ID int `json:"id"`
} }
type llama struct {
api.Options
ImageData []ImageData
Running
}
var ( var (
errNvidiaSMI = errors.New("warning: gpu support may not be enabled, check that you have installed GPU drivers: nvidia-smi command failed") errNvidiaSMI = errors.New("warning: gpu support may not be enabled, check that you have installed GPU drivers: nvidia-smi command failed")
errAvailableVRAM = errors.New("not enough VRAM available, falling back to CPU only") errAvailableVRAM = errors.New("not enough VRAM available, falling back to CPU only")
payloadMissing = fmt.Errorf("expected dynamic library payloads not included in this build of ollama")
) )
// CheckVRAM returns the free VRAM in bytes on Linux machines with NVIDIA GPUs
func CheckVRAM() (int64, error) {
cmd := exec.Command("nvidia-smi", "--query-gpu=memory.free", "--format=csv,noheader,nounits")
var stdout bytes.Buffer
cmd.Stdout = &stdout
err := cmd.Run()
if err != nil {
return 0, errNvidiaSMI
}
var freeMiB int64
scanner := bufio.NewScanner(&stdout)
for scanner.Scan() {
line := scanner.Text()
if strings.Contains(line, "[Insufficient Permissions]") {
return 0, fmt.Errorf("GPU support may not enabled, check you have installed GPU drivers and have the necessary permissions to run nvidia-smi")
}
vram, err := strconv.ParseInt(strings.TrimSpace(line), 10, 64)
if err != nil {
return 0, fmt.Errorf("failed to parse available VRAM: %v", err)
}
freeMiB += vram
}
freeBytes := freeMiB * 1024 * 1024
if freeBytes < 2*format.GigaByte {
log.Printf("less than 2 GB VRAM available")
return 0, errAvailableVRAM
}
return freeBytes, nil
}
func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
if opts.NumGPU != -1 {
return opts.NumGPU
}
if runtime.GOOS == "linux" || runtime.GOOS == "windows" {
freeBytes, err := CheckVRAM()
if err != nil {
if !errors.Is(err, errNvidiaSMI) {
log.Print(err.Error())
}
// nvidia driver not installed or no nvidia GPU found
return 0
}
/*
Calculate bytes per layer, this will roughly be the size of the model file divided by the number of layers.
We can store the model weights and the kv cache in vram,
to enable kv chache vram storage add two additional layers to the number of layers retrieved from the model file.
*/
bytesPerLayer := fileSizeBytes / numLayer
// 75% of the absolute max number of layers we can fit in available VRAM, off-loading too many layers to the GPU can cause OOM errors
layers := int(freeBytes/bytesPerLayer) * 3 / 4
log.Printf("%d MB VRAM available, loading up to %d GPU layers", freeBytes/(1024*1024), layers)
return layers
}
// default to enable metal on macOS
return 1
}
// StatusWriter is a writer that captures error messages from the llama runner process // StatusWriter is a writer that captures error messages from the llama runner process
type StatusWriter struct { type StatusWriter struct {
ErrCh chan error ErrCh chan error
@ -333,203 +150,6 @@ func (w *StatusWriter) Write(b []byte) (int, error) {
return os.Stderr.Write(b) return os.Stderr.Write(b)
} }
func newLlama(model string, adapters, projectors []string, runners []ModelRunner, numLayers int64, opts api.Options) (*llama, error) {
fileInfo, err := os.Stat(model)
if err != nil {
return nil, err
}
if len(adapters) > 1 {
return nil, errors.New("ollama supports only one lora adapter, but multiple were provided")
}
numGPU := NumGPU(numLayers, fileInfo.Size(), opts)
params := []string{
"--model", model,
"--ctx-size", fmt.Sprintf("%d", opts.NumCtx),
"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
"--n-gpu-layers", fmt.Sprintf("%d", numGPU),
"--embedding",
}
if opts.MainGPU > 0 {
params = append(params, "--main-gpu", fmt.Sprintf("%d", opts.MainGPU))
}
if opts.RopeFrequencyBase > 0 {
params = append(params, "--rope-freq-base", fmt.Sprintf("%f", opts.RopeFrequencyBase))
}
if opts.RopeFrequencyScale > 0 {
params = append(params, "--rope-freq-scale", fmt.Sprintf("%f", opts.RopeFrequencyScale))
}
if opts.NumGQA > 0 {
params = append(params, "--gqa", fmt.Sprintf("%d", opts.NumGQA))
}
if len(adapters) > 0 {
// TODO: applying multiple adapters is not supported by the llama.cpp server yet
params = append(params, "--lora", adapters[0])
}
if len(projectors) > 0 {
// TODO: applying multiple projectors is not supported by the llama.cpp server yet
params = append(params, "--mmproj", projectors[0])
}
if opts.NumThread > 0 {
params = append(params, "--threads", fmt.Sprintf("%d", opts.NumThread))
}
if !opts.F16KV {
params = append(params, "--memory-f32")
}
if opts.UseMLock {
params = append(params, "--mlock")
}
if !opts.UseMMap {
params = append(params, "--no-mmap")
}
if opts.UseNUMA {
params = append(params, "--numa")
}
var runnerErr error
// start the llama.cpp server with a retry in case the port is already in use
for _, runner := range runners {
if runner.Accelerated && numGPU == 0 {
log.Printf("skipping accelerated runner because num_gpu=0")
continue
}
if _, err := os.Stat(runner.Path); err != nil {
log.Printf("llama runner not found: %v", err)
continue
}
port := rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range
params := append(params, "--port", strconv.Itoa(port))
ctx, cancel := context.WithCancel(context.Background())
cmd := exec.CommandContext(
ctx,
runner.Path,
params...,
)
var libraryPaths []string
if libraryPath, ok := os.LookupEnv("LD_LIBRARY_PATH"); ok {
libraryPaths = append(libraryPaths, libraryPath)
}
libraryPaths = append(libraryPaths, filepath.Dir(runner.Path))
cmd.Env = append(os.Environ(), fmt.Sprintf("LD_LIBRARY_PATH=%s", strings.Join(libraryPaths, ":")))
cmd.Stdout = os.Stderr
statusWriter := NewStatusWriter()
cmd.Stderr = statusWriter
llm := &llama{Options: opts, Running: Running{Port: port, Cmd: cmd, Cancel: cancel, exitCh: make(chan error)}}
log.Print("starting llama runner")
if err := llm.Cmd.Start(); err != nil {
log.Printf("error starting the external llama runner: %v", err)
continue
}
// monitor the llama runner process and signal when it exits
go func() {
err := llm.Cmd.Wait()
// default to printing the exit message of the command process, it will probably just say 'exit staus 1'
errMsg := err.Error()
// try to set a better error message if llama runner logs captured an error
if statusWriter.LastErrMsg != "" {
errMsg = statusWriter.LastErrMsg
}
log.Println(errMsg)
// llm.Cmd.Wait() can only be called once, use this exit channel to signal that the process has exited
llm.exitOnce.Do(func() {
close(llm.exitCh)
})
}()
if err := waitForServer(llm); err != nil {
log.Printf("error starting llama runner: %v", err)
llm.Close()
// default the runnerErr to the error returned by the most recent llama runner process
runnerErr = err
// capture the error directly from the runner process, if any
select {
case runnerErr = <-statusWriter.ErrCh:
default:
// the runner process probably timed out
}
// try again
continue
}
// server started successfully
return llm, nil
}
if runnerErr != nil {
// this is the error returned from the llama runner process that failed most recently
return nil, runnerErr
}
return nil, fmt.Errorf("failed to start a llama runner")
}
func waitForServer(llm *llama) error {
start := time.Now()
expiresAt := time.Now().Add(3 * time.Minute) // be generous with timeout, large models can take a while to load
ticker := time.NewTicker(200 * time.Millisecond)
defer ticker.Stop()
log.Print("waiting for llama runner to start responding")
for {
select {
case <-llm.exitCh:
// failed to start subprocess
return fmt.Errorf("llama runner process has terminated")
case <-ticker.C:
if time.Now().After(expiresAt) {
// timeout
return fmt.Errorf("timed out waiting for llama runner to start")
}
if err := llm.Ping(context.Background()); err == nil {
// success
log.Printf("llama runner started in %f seconds", time.Since(start).Seconds())
return nil
}
}
}
}
func (llm *llama) Close() {
// signal the sub-process to terminate
llm.Cancel()
// wait for the command to exit to prevent race conditions with the next run
<-llm.exitCh
if llm.StatusWriter != nil && llm.StatusWriter.LastErrMsg != "" {
log.Printf("llama runner stopped with error: %v", llm.StatusWriter.LastErrMsg)
} else {
log.Print("llama runner stopped successfully")
}
}
func (llm *llama) SetOptions(opts api.Options) {
llm.Options = opts
}
type prediction struct { type prediction struct {
Content string `json:"content"` Content string `json:"content"`
Model string `json:"model"` Model string `json:"model"`
@ -545,7 +165,8 @@ type prediction struct {
} }
const maxBufferSize = 512 * format.KiloByte const maxBufferSize = 512 * format.KiloByte
const maxRetries = 6 const maxRetries = 3
const retryDelay = 1 * time.Second
type PredictOpts struct { type PredictOpts struct {
Prompt string Prompt string
@ -562,158 +183,6 @@ type PredictResult struct {
EvalDuration time.Duration EvalDuration time.Duration
} }
// IsRetryable checks if the line matches a condition that can be retried
func isRetryable(line []byte) bool {
return bytes.Contains(line, []byte("slot unavailable"))
}
func (llm *llama) Predict(ctx context.Context, predict PredictOpts, fn func(PredictResult)) error {
imageData := llm.ImageData
if len(predict.Images) > 0 {
for cnt, i := range predict.Images {
imageData = append(imageData, ImageData{Data: i, ID: cnt})
}
}
log.Printf("loaded %d images", len(imageData))
request := map[string]any{
"prompt": predict.Prompt,
"stream": true,
"n_predict": llm.NumPredict,
"n_keep": llm.NumKeep,
"main_gpu": llm.MainGPU,
"temperature": llm.Temperature,
"top_k": llm.TopK,
"top_p": llm.TopP,
"tfs_z": llm.TFSZ,
"typical_p": llm.TypicalP,
"repeat_last_n": llm.RepeatLastN,
"repeat_penalty": llm.RepeatPenalty,
"presence_penalty": llm.PresencePenalty,
"frequency_penalty": llm.FrequencyPenalty,
"mirostat": llm.Mirostat,
"mirostat_tau": llm.MirostatTau,
"mirostat_eta": llm.MirostatEta,
"penalize_nl": llm.PenalizeNewline,
"seed": llm.Seed,
"stop": llm.Stop,
"image_data": imageData,
}
if predict.Format == "json" {
request["grammar"] = jsonGrammar
}
retryDelay := 100 * time.Microsecond
for retries := 0; retries < maxRetries; retries++ {
if retries > 0 {
time.Sleep(retryDelay) // wait before retrying
retryDelay *= 2 // exponential backoff
}
// Handling JSON marshaling with special characters unescaped.
buffer := &bytes.Buffer{}
enc := json.NewEncoder(buffer)
enc.SetEscapeHTML(false)
if err := enc.Encode(request); err != nil {
return fmt.Errorf("failed to marshal data: %v", err)
}
endpoint := fmt.Sprintf("http://127.0.0.1:%d/completion", llm.Port)
req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, buffer)
if err != nil {
return fmt.Errorf("error creating POST request: %v", err)
}
req.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return fmt.Errorf("POST predict: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode >= 400 {
bodyBytes, err := io.ReadAll(resp.Body)
if err != nil {
return fmt.Errorf("failed reading llm error response: %w", err)
}
log.Printf("llm predict error: %s", bodyBytes)
return fmt.Errorf("%s", bodyBytes)
}
scanner := bufio.NewScanner(resp.Body)
// increase the buffer size to avoid running out of space
buf := make([]byte, 0, maxBufferSize)
scanner.Buffer(buf, maxBufferSize)
retryNeeded := false
for scanner.Scan() {
select {
case <-ctx.Done():
// This handles the request cancellation
return ctx.Err()
default:
line := scanner.Bytes()
if len(line) == 0 {
continue
}
if isRetryable(line) {
retryNeeded = true
break
}
evt, ok := bytes.CutPrefix(line, []byte("data: "))
if !ok {
return fmt.Errorf("error parsing llm response stream: %s", line)
}
var p prediction
if err := json.Unmarshal(evt, &p); err != nil {
return fmt.Errorf("error unmarshaling llm prediction response: %v", err)
}
if p.Content != "" {
fn(PredictResult{
Content: p.Content,
})
}
if p.Stop {
fn(PredictResult{
Done: true,
PromptEvalCount: p.Timings.PromptN,
PromptEvalDuration: parseDurationMs(p.Timings.PromptMS),
EvalCount: p.Timings.PredictedN,
EvalDuration: parseDurationMs(p.Timings.PredictedMS),
})
return nil
}
}
}
if err := scanner.Err(); err != nil {
if strings.Contains(err.Error(), "unexpected EOF") {
// this means the llama runner subprocess crashed
llm.Close()
if llm.StatusWriter != nil && llm.StatusWriter.LastErrMsg != "" {
return fmt.Errorf("llama runner exited: %v", llm.StatusWriter.LastErrMsg)
}
return fmt.Errorf("llama runner exited, you may not have enough available memory to run this model")
}
return fmt.Errorf("error reading llm response: %v", err)
}
if !retryNeeded {
return nil // success
}
}
// should never reach here ideally
return fmt.Errorf("max retries exceeded")
}
type TokenizeRequest struct { type TokenizeRequest struct {
Content string `json:"content"` Content string `json:"content"`
} }
@ -722,43 +191,6 @@ type TokenizeResponse struct {
Tokens []int `json:"tokens"` Tokens []int `json:"tokens"`
} }
func (llm *llama) Encode(ctx context.Context, prompt string) ([]int, error) {
endpoint := fmt.Sprintf("http://127.0.0.1:%d/tokenize", llm.Port)
data, err := json.Marshal(TokenizeRequest{Content: prompt})
if err != nil {
return nil, fmt.Errorf("marshaling encode data: %w", err)
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, bytes.NewBuffer(data))
if err != nil {
return nil, fmt.Errorf("encode request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, fmt.Errorf("do encode request: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("read encode request: %w", err)
}
if resp.StatusCode >= 400 {
log.Printf("llm encode error: %s", body)
return nil, fmt.Errorf("%s", body)
}
var encoded TokenizeResponse
if err := json.Unmarshal(body, &encoded); err != nil {
return nil, fmt.Errorf("unmarshal encode response: %w", err)
}
return encoded.Tokens, nil
}
type DetokenizeRequest struct { type DetokenizeRequest struct {
Tokens []int `json:"tokens"` Tokens []int `json:"tokens"`
} }
@ -767,46 +199,6 @@ type DetokenizeResponse struct {
Content string `json:"content"` Content string `json:"content"`
} }
func (llm *llama) Decode(ctx context.Context, tokens []int) (string, error) {
if len(tokens) == 0 {
return "", nil
}
endpoint := fmt.Sprintf("http://127.0.0.1:%d/detokenize", llm.Port)
data, err := json.Marshal(DetokenizeRequest{Tokens: tokens})
if err != nil {
return "", fmt.Errorf("marshaling decode data: %w", err)
}
req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, bytes.NewBuffer(data))
if err != nil {
return "", fmt.Errorf("decode request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
resp, err := http.DefaultClient.Do(req)
if err != nil {
return "", fmt.Errorf("do decode request: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("read decode request: %w", err)
}
if resp.StatusCode >= 400 {
log.Printf("llm decode error: %s", body)
return "", fmt.Errorf("%s", body)
}
var decoded DetokenizeResponse
if err := json.Unmarshal(body, &decoded); err != nil {
return "", fmt.Errorf("unmarshal encode response: %w", err)
}
return decoded.Content, nil
}
type EmbeddingRequest struct { type EmbeddingRequest struct {
Content string `json:"content"` Content string `json:"content"`
} }
@ -815,51 +207,40 @@ type EmbeddingResponse struct {
Embedding []float64 `json:"embedding"` Embedding []float64 `json:"embedding"`
} }
func (llm *llama) Embedding(ctx context.Context, input string) ([]float64, error) { func extractDynamicLibs(workDir, glob string) ([]string, error) {
endpoint := fmt.Sprintf("http://127.0.0.1:%d/embedding", llm.Port) files, err := fs.Glob(libEmbed, glob)
data, err := json.Marshal(TokenizeRequest{Content: input}) if err != nil || len(files) == 0 {
if err != nil { return nil, payloadMissing
return nil, fmt.Errorf("error marshaling embed data: %w", err)
} }
libs := make([]string, len(files))
req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, bytes.NewBuffer(data)) for i, file := range files {
if err != nil { srcFile, err := libEmbed.Open(file)
return nil, fmt.Errorf("error creating embed request: %w", err) if err != nil {
return nil, fmt.Errorf("read payload %s: %v", file, err)
}
defer srcFile.Close()
if err := os.MkdirAll(workDir, 0o755); err != nil {
return nil, fmt.Errorf("create payload temp dir %s: %v", workDir, err)
}
destFile := filepath.Join(workDir, filepath.Base(file))
libs[i] = destFile
_, err = os.Stat(destFile)
switch {
case errors.Is(err, os.ErrNotExist):
destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
if err != nil {
return nil, fmt.Errorf("write payload %s: %v", file, err)
}
defer destFile.Close()
if _, err := io.Copy(destFile, srcFile); err != nil {
return nil, fmt.Errorf("copy payload %s: %v", file, err)
}
case err != nil:
return nil, fmt.Errorf("stat payload %s: %v", file, err)
}
} }
req.Header.Set("Content-Type", "application/json") return libs, nil
resp, err := http.DefaultClient.Do(req)
if err != nil {
return nil, fmt.Errorf("POST embedding: %w", err)
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("error reading embed response: %w", err)
}
if resp.StatusCode >= 400 {
log.Printf("llm encode error: %s", body)
return nil, fmt.Errorf("%s", body)
}
var embedding EmbeddingResponse
if err := json.Unmarshal(body, &embedding); err != nil {
return nil, fmt.Errorf("unmarshal tokenize response: %w", err)
}
return embedding.Embedding, nil
}
// Ping checks that the server subprocess is still running and responding to requests
func (llm *llama) Ping(ctx context.Context) error {
resp, err := http.Head(fmt.Sprintf("http://127.0.0.1:%d", llm.Port))
if err != nil {
return fmt.Errorf("ping resp: %w", err)
}
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("unexpected ping status: %s", resp.Status)
}
return nil
} }

View file

@ -11,6 +11,7 @@ import (
"github.com/jmorganca/ollama/api" "github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/format" "github.com/jmorganca/ollama/format"
"github.com/jmorganca/ollama/gpu"
) )
type LLM interface { type LLM interface {
@ -18,11 +19,11 @@ type LLM interface {
Embedding(context.Context, string) ([]float64, error) Embedding(context.Context, string) ([]float64, error)
Encode(context.Context, string) ([]int, error) Encode(context.Context, string) ([]int, error)
Decode(context.Context, []int) (string, error) Decode(context.Context, []int) (string, error)
SetOptions(api.Options)
Close() Close()
Ping(context.Context) error
} }
var AvailableShims = map[string]string{}
func New(workDir, model string, adapters, projectors []string, opts api.Options) (LLM, error) { func New(workDir, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
if _, err := os.Stat(model); err != nil { if _, err := os.Stat(model); err != nil {
return nil, err return nil, err
@ -76,16 +77,27 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
} }
} }
switch ggml.Name() { opts.NumGQA = 0
case "gguf": opts.RopeFrequencyBase = 0.0
// TODO: gguf will load these options automatically from the model binary opts.RopeFrequencyScale = 0.0
opts.NumGQA = 0 gpuInfo := gpu.GetGPUInfo()
opts.RopeFrequencyBase = 0.0 return newLlmServer(gpuInfo.Library, model, adapters, projectors, ggml.NumLayers(), opts)
opts.RopeFrequencyScale = 0.0 }
return newLlama(model, adapters, projectors, chooseRunners(workDir, "gguf"), ggml.NumLayers(), opts)
case "ggml", "ggmf", "ggjt", "ggla": // Give any native cgo implementations an opportunity to initialize
return newLlama(model, adapters, projectors, chooseRunners(workDir, "ggml"), ggml.NumLayers(), opts) func Init(workdir string) error {
default: return nativeInit(workdir)
return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily()) }
}
func newLlmServer(library, model string, adapters, projectors []string, numLayers int64, opts api.Options) (extServer, error) {
if _, libPresent := AvailableShims[library]; libPresent && library != "default" {
srv, err := newDynamicShimExtServer(AvailableShims[library], model, adapters, projectors, numLayers, opts)
if err == nil {
return srv, nil
}
log.Printf("Failed to load dynamic library - falling back to CPU mode %s", err)
}
return newDefaultExtServer(model, adapters, projectors, numLayers, opts)
} }

32
llm/shim_darwin.go Normal file
View file

@ -0,0 +1,32 @@
package llm
import (
"embed"
"fmt"
"log"
"os"
"github.com/jmorganca/ollama/api"
)
//go:embed llama.cpp/gguf/ggml-metal.metal
var libEmbed embed.FS
func newDynamicShimExtServer(library, model string, adapters, projectors []string, numLayers int64, opts api.Options) (extServer, error) {
// should never happen...
return nil, fmt.Errorf("Dynamic library loading not supported on Mac")
}
func nativeInit(workdir string) error {
_, err := extractDynamicLibs(workdir, "llama.cpp/gguf/ggml-metal.metal")
if err != nil {
if err == payloadMissing {
// TODO perhaps consider this a hard failure on arm macs?
log.Printf("ggml-meta.metal payload missing")
return nil
}
return err
}
os.Setenv("GGML_METAL_PATH_RESOURCES", workdir)
return nil
}

152
llm/shim_ext_server.go Normal file
View file

@ -0,0 +1,152 @@
//go:build !darwin
package llm
/*
#include <stdlib.h>
#include "dynamic_shim.h"
*/
import "C"
import (
"context"
"embed"
"errors"
"fmt"
"io/fs"
"log"
"os"
"path/filepath"
"strings"
"sync"
"unsafe"
"github.com/jmorganca/ollama/api"
)
//go:embed llama.cpp/gguf/build/lib/*
var libEmbed embed.FS
var RocmShimMissing = fmt.Errorf("ROCm shim library not included in this build of ollama. Radeon GPUs are not supported")
type shimExtServer struct {
s C.struct_dynamic_llama_server
options api.Options
}
// Note: current implementation does not support concurrent instantiations
var shimMutex sync.Mutex
var llm *shimExtServer
func (llm *shimExtServer) llama_server_init(sparams *C.ext_server_params_t, err *C.ext_server_resp_t) {
C.dynamic_shim_llama_server_init(llm.s, sparams, err)
}
func (llm *shimExtServer) llama_server_start() {
C.dynamic_shim_llama_server_start(llm.s)
}
func (llm *shimExtServer) llama_server_stop() {
C.dynamic_shim_llama_server_stop(llm.s)
}
func (llm *shimExtServer) llama_server_completion(json_req *C.char, resp *C.ext_server_resp_t) {
C.dynamic_shim_llama_server_completion(llm.s, json_req, resp)
}
func (llm *shimExtServer) llama_server_completion_next_result(task_id C.int, resp *C.ext_server_task_result_t) {
C.dynamic_shim_llama_server_completion_next_result(llm.s, task_id, resp)
}
func (llm *shimExtServer) llama_server_completion_cancel(task_id C.int, err *C.ext_server_resp_t) {
C.dynamic_shim_llama_server_completion_cancel(llm.s, task_id, err)
}
func (llm *shimExtServer) llama_server_release_task_result(result *C.ext_server_task_result_t) {
C.dynamic_shim_llama_server_release_task_result(llm.s, result)
}
func (llm *shimExtServer) llama_server_tokenize(json_req *C.char, json_resp **C.char, err *C.ext_server_resp_t) {
C.dynamic_shim_llama_server_tokenize(llm.s, json_req, json_resp, err)
}
func (llm *shimExtServer) llama_server_detokenize(json_req *C.char, json_resp **C.char, err *C.ext_server_resp_t) {
C.dynamic_shim_llama_server_detokenize(llm.s, json_req, json_resp, err)
}
func (llm *shimExtServer) llama_server_embedding(json_req *C.char, json_resp **C.char, err *C.ext_server_resp_t) {
C.dynamic_shim_llama_server_embedding(llm.s, json_req, json_resp, err)
}
func (llm *shimExtServer) llama_server_release_json_resp(json_resp **C.char) {
C.dynamic_shim_llama_server_release_json_resp(llm.s, json_resp)
}
func newDynamicShimExtServer(library, model string, adapters, projectors []string, numLayers int64, opts api.Options) (extServer, error) {
shimMutex.Lock()
defer shimMutex.Unlock()
libPath := C.CString(library)
defer C.free(unsafe.Pointer(libPath))
resp := newExtServerResp(128)
defer freeExtServerResp(resp)
var srv C.struct_dynamic_llama_server
C.dynamic_shim_init(libPath, &srv, &resp)
if resp.id < 0 {
return nil, fmt.Errorf("Unable to load dynamic library: %s", C.GoString(resp.msg))
}
llm = &shimExtServer{
s: srv,
options: opts,
}
log.Printf("Loading Dynamic Shim llm server: %s", library)
return newExtServer(llm, model, adapters, projectors, numLayers, opts)
}
func (llm *shimExtServer) Predict(ctx context.Context, pred PredictOpts, fn func(PredictResult)) error {
return predict(llm, llm.options, ctx, pred, fn)
}
func (llm *shimExtServer) Encode(ctx context.Context, prompt string) ([]int, error) {
return encode(llm, ctx, prompt)
}
func (llm *shimExtServer) Decode(ctx context.Context, tokens []int) (string, error) {
return decode(llm, ctx, tokens)
}
func (llm *shimExtServer) Embedding(ctx context.Context, input string) ([]float64, error) {
return embedding(llm, ctx, input)
}
func (llm *shimExtServer) Close() {
close(llm)
}
func nativeInit(workdir string) error {
libs, err := extractDynamicLibs(workdir, "llama.cpp/gguf/build/lib/*server*")
if err != nil {
if err == payloadMissing {
log.Printf("%s", payloadMissing)
return nil
}
return err
}
for _, lib := range libs {
libName := strings.Split(strings.TrimPrefix(filepath.Base(lib), "lib"), ".")[0]
AvailableShims[libName] = lib
}
// Only check ROCm access if we have the dynamic lib loaded
if _, rocmPresent := AvailableShims["rocm_server"]; rocmPresent {
// Verify we have permissions - either running as root, or we have group access to the driver
fd, err := os.OpenFile("/dev/kfd", os.O_RDWR, 0666)
if err != nil {
if errors.Is(err, fs.ErrPermission) {
log.Fatalf("Radeon card detected, but permissions not set up properly. Either run ollama as root, or add you user account to the render group.")
return err
} else if errors.Is(err, fs.ErrNotExist) {
// expected behavior without a radeon card
return nil
}
return fmt.Errorf("failed to check permission on /dev/kfd: %w", err)
}
fd.Close()
}
return nil
}

View file

@ -9,7 +9,7 @@ mkdir -p dist
for TARGETARCH in arm64 amd64; do for TARGETARCH in arm64 amd64; do
GOOS=darwin GOARCH=$TARGETARCH go generate ./... GOOS=darwin GOARCH=$TARGETARCH go generate ./...
GOOS=darwin GOARCH=$TARGETARCH go build -o dist/ollama-darwin-$TARGETARCH CGO_ENABLED=1 GOOS=darwin GOARCH=$TARGETARCH go build -o dist/ollama-darwin-$TARGETARCH
rm -rf llm/llama.cpp/*/build rm -rf llm/llama.cpp/*/build
done done

View file

@ -7,8 +7,8 @@ export GOFLAGS="'-ldflags=-w -s \"-X=github.com/jmorganca/ollama/version.Version
mkdir -p dist mkdir -p dist
for TARGETARCH in arm64 amd64; do for TARGETARCH in amd64 arm64; do
docker buildx build --load --platform=linux/$TARGETARCH --build-arg=VERSION --build-arg=GOFLAGS -f Dockerfile.build -t builder:$TARGETARCH . docker buildx build --load --progress=plain --platform=linux/$TARGETARCH --build-arg=VERSION --build-arg=GOFLAGS -f Dockerfile.build -t builder:$TARGETARCH .
docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH docker create --platform linux/$TARGETARCH --name builder-$TARGETARCH builder:$TARGETARCH
docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH docker cp builder-$TARGETARCH:/go/src/github.com/jmorganca/ollama/ollama ./dist/ollama-linux-$TARGETARCH
docker rm builder-$TARGETARCH docker rm builder-$TARGETARCH

68
scripts/build_remote.py Executable file
View file

@ -0,0 +1,68 @@
#!/usr/bin/env python3
import subprocess
import sys
from urllib.parse import urlparse
from git import Repo
# Helper script to be able to build on remote repos using git to push local changes
# (e.g. particularly helpful to target a remote windows build system)
#
# Typical windows remote git config looks like this:
#
#[remote "windows-pa"]
# url = jdoe@desktop-foo:C:/Users/Jdoe/code/ollama
# fetch = +refs/heads/*:refs/remotes/windows-pa/*
# uploadpack = powershell git upload-pack
# receivepack = powershell git receive-pack
#
# TODO - add argpare and make this more configurable
# - force flag becomes optional
# - generate, build or test ...
# Note: remote repo will need this run once:
# git config --local receive.denyCurrentBranch updateInstead
repo = Repo(".")
# On linux, add links in /usr/local/bin to the go binaries to avoid needing this
# GoCmd = "/usr/local/go/bin/go"
GoCmd = "go"
if repo.is_dirty():
print("Tree is dirty. Commit your changes before running this script")
sys.exit(1)
if len(sys.argv) != 2:
print("Please specify the remote name: " + ', '.join([r.name for r in repo.remotes]))
sys.exit(1)
remote_name = sys.argv[1]
remote = {r.name: r for r in repo.remotes}[remote_name]
raw_url = list(remote.urls)[0]
url = urlparse(raw_url)
# Windows urls don't quite parse properly
if url.scheme == "" and url.netloc == "":
url = urlparse("ssh://" + raw_url)
print("URL: " + str(url))
netloc = url.netloc.split(":")[0]
path = url.path
branch_name = repo.active_branch.name
print("Force pushing content to remote...")
# Use with care given the force push
remote.push(force=True).raise_if_error()
print("Ensuring correct branch checked out on remote via ssh...")
subprocess.check_call(['ssh', netloc, 'cd', path, ';', 'git', 'checkout', branch_name])
# TODO - add some hardening to try to figure out how to set up the path properly
# subprocess.check_call(['ssh', netloc, 'cd', path, ';', 'env'])
# TODO - or consider paramiko maybe
print("Performing generate")
subprocess.check_call(['ssh', netloc, 'cd', path, ';', GoCmd, 'generate', './...'])
print("Building")
subprocess.check_call(['ssh', netloc, 'cd', path, ';', GoCmd, 'build', '.'])

View file

@ -0,0 +1,38 @@
#!/bin/bash
# This script sets up integration tests which run the full stack to verify
# inference locally
set -e
set -o pipefail
REPO=$(dirname $0)/../
export OLLAMA_MODELS=${REPO}/test_data/models
REGISTRY_SCHEME=https
REGISTRY=registry.ollama.ai
TEST_MODELS=("library/orca-mini:latest" "library/llava:7b")
ACCEPT_HEADER="Accept: application/vnd.docker.distribution.manifest.v2+json"
for model in ${TEST_MODELS[@]}; do
TEST_MODEL=$(echo ${model} | cut -f1 -d:)
TEST_MODEL_TAG=$(echo ${model} | cut -f2 -d:)
mkdir -p ${OLLAMA_MODELS}/manifests/${REGISTRY}/${TEST_MODEL}/
mkdir -p ${OLLAMA_MODELS}/blobs/
echo "Pulling manifest for ${TEST_MODEL}:${TEST_MODEL_TAG}"
curl -s --header "${ACCEPT_HEADER}" \
-o ${OLLAMA_MODELS}/manifests/${REGISTRY}/${TEST_MODEL}/${TEST_MODEL_TAG} \
${REGISTRY_SCHEME}://${REGISTRY}/v2/${TEST_MODEL}/manifests/${TEST_MODEL_TAG}
CFG_HASH=$(cat ${OLLAMA_MODELS}/manifests/${REGISTRY}/${TEST_MODEL}/${TEST_MODEL_TAG} | jq -r ".config.digest")
echo "Pulling config blob ${CFG_HASH}"
curl -L -C - --header "${ACCEPT_HEADER}" \
-o ${OLLAMA_MODELS}/blobs/${CFG_HASH} \
${REGISTRY_SCHEME}://${REGISTRY}/v2/${TEST_MODEL}/blobs/${CFG_HASH}
for LAYER in $(cat ${OLLAMA_MODELS}/manifests/${REGISTRY}/${TEST_MODEL}/${TEST_MODEL_TAG} | jq -r ".layers[].digest" ) ; do
echo "Pulling blob ${LAYER}"
curl -L -C - --header "${ACCEPT_HEADER}" \
-o ${OLLAMA_MODELS}/blobs/${LAYER} \
${REGISTRY_SCHEME}://${REGISTRY}/v2/${TEST_MODEL}/blobs/${LAYER}
done
done

View file

@ -418,6 +418,31 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
return err return err
} }
// if the model is not in gguf format, pull the base model to try and get it in gguf format
if fromConfig.ModelFormat != "gguf" {
fn(api.ProgressResponse{Status: "updating base model"})
parent, err := GetModel(c.Args)
if err != nil {
return err
}
if err := PullModel(ctx, parent.OriginalModel, &RegistryOptions{}, fn); err != nil {
log.Printf("error pulling model: %v", err)
}
// Reset the file pointer to the beginning of the file
_, err = fromConfigFile.Seek(0, 0)
if err != nil {
return fmt.Errorf("update from config after pull: %w", err)
}
if err := json.NewDecoder(fromConfigFile).Decode(&fromConfig); err != nil {
return err
}
}
// if the model is still not in gguf format, error out
if fromConfig.ModelFormat != "gguf" {
return fmt.Errorf("%s is not in gguf format, this base model is not compatible with this version of ollama", c.Args)
}
config.SetModelFormat(fromConfig.ModelFormat) config.SetModelFormat(fromConfig.ModelFormat)
config.SetModelFamily(append(fromConfig.ModelFamilies, fromConfig.ModelFamily)...) config.SetModelFamily(append(fromConfig.ModelFamilies, fromConfig.ModelFamily)...)
config.SetModelType(fromConfig.ModelType) config.SetModelType(fromConfig.ModelType)
@ -456,15 +481,21 @@ func CreateModel(ctx context.Context, name, modelFileDir string, commands []pars
defer bin.Close() defer bin.Close()
var offset int64 var offset int64
CREATE:
for { for {
fn(api.ProgressResponse{Status: "creating model layer"}) fn(api.ProgressResponse{Status: "creating model layer"})
bin.Seek(offset, io.SeekStart) bin.Seek(offset, io.SeekStart)
ggml, err := llm.DecodeGGML(bin) ggml, err := llm.DecodeGGML(bin)
if errors.Is(err, io.EOF) { if err != nil {
break switch {
} else if err != nil { case errors.Is(err, io.EOF):
return err break CREATE
case errors.Is(err, llm.ErrUnsupportedFormat):
return fmt.Errorf("model binary specified in FROM field is not a valid gguf format model, %w", err)
default:
return err
}
} }
config.SetModelFormat(ggml.Name()) config.SetModelFormat(ggml.Name())

542
server/llm_image_test.go Normal file
View file

@ -0,0 +1,542 @@
package server
import (
"context"
"encoding/base64"
"log"
"os"
"strings"
"testing"
"time"
"github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/llm"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestIntegrationMultimodal(t *testing.T) {
SkipIFNoTestData(t)
image, err := base64.StdEncoding.DecodeString(imageEncoding)
require.NoError(t, err)
req := api.GenerateRequest{
Model: "llava:7b",
Prompt: "what does the text in this image say?",
Options: map[string]interface{}{},
Images: []api.ImageData{
image,
},
}
resp := "the ollamas"
workDir, err := os.MkdirTemp("", "ollama")
require.NoError(t, err)
defer os.RemoveAll(workDir)
require.NoError(t, llm.Init(workDir))
ctx, cancel := context.WithTimeout(context.Background(), time.Second*60)
defer cancel()
opts := api.DefaultOptions()
opts.Seed = 42
opts.Temperature = 0.0
model, llmRunner := PrepareModelForPrompts(t, req.Model, opts)
defer llmRunner.Close()
response := OneShotPromptResponse(t, ctx, req, model, llmRunner)
log.Print(response)
assert.Contains(t, strings.ToLower(response), resp)
}
const imageEncoding = `iVBORw0KGgoAAAANSUhEUgAAANIAAAB4CAYAAACHHqzKAAAAAXNSR0IArs4c6QAAAIRlWElmTU0AKgAAAAgABQESAAMAAAABAAEAAAEaAAUAAAABAAAASgEb
AAUAAAABAAAAUgEoAAMAAAABAAIAAIdpAAQAAAABAAAAWgAAAAAAAABIAAAAAQAAAEgAAAABAAOgAQADAAAAAQABAACgAgAEAAAAAQAAANKgAwAEAAAAAQAA
AHgAAAAAXdsepgAAAAlwSFlzAAALEwAACxMBAJqcGAAAAVlpVFh0WE1MOmNvbS5hZG9iZS54bXAAAAAAADx4OnhtcG1ldGEgeG1sbnM6eD0iYWRvYmU6bnM6
bWV0YS8iIHg6eG1wdGs9IlhNUCBDb3JlIDYuMC4wIj4KICAgPHJkZjpSREYgeG1sbnM6cmRmPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5LzAyLzIyLXJkZi1z
eW50YXgtbnMjIj4KICAgICAgPHJkZjpEZXNjcmlwdGlvbiByZGY6YWJvdXQ9IiIKICAgICAgICAgICAgeG1sbnM6dGlmZj0iaHR0cDovL25zLmFkb2JlLmNv
bS90aWZmLzEuMC8iPgogICAgICAgICA8dGlmZjpPcmllbnRhdGlvbj4xPC90aWZmOk9yaWVudGF0aW9uPgogICAgICA8L3JkZjpEZXNjcmlwdGlvbj4KICAg
PC9yZGY6UkRGPgo8L3g6eG1wbWV0YT4KGV7hBwAAQABJREFUeAGE3QfgX9P5OP6TIRKRncgmS6aR2DNCKEKLqqpRW9FWq0q1dEQparZKF7VK7aq99yZGSCRB
BhErk0Qmyf95nZOTfOqrv/9J7ud977nnPPt5zrz3Ntp0s61XrLnmmql58+Zp6dKlqUWLFmnZsmXp888/Tx07dkwLFy5MX3zxRT4aNWqUmjVrlho3bpzatGmT
Pvnkk5y/YsWKXHfttdfOv/VauSZNmuRj0aJFSX15cIAPruS3adOmafny5Uld5dDkXP05c+akTp06pTXWWCN99tlnacmSJQGnUVp77VbpvffeS126dM4wli4t
dK8RsJoHDvUXL16cy7du3TrjXrBgQS675prNUsu1WgV/AW/ZktSxQ4dMC37BXbDgs7Q4aG7cpHFq2bJlpo984EY/3vELB94k+eqjU36V1fz580OmSyO/WZZt
8+Zr5jKu8YZv8pTgkCoMcnCgm17atm2bz+Gv8NWnvxUrlgd9S3P+4sWLQnZNc91PP/0ktWrVOst19uzZwc9akd98lczxN3fu3FwPLudrtwrelqcsM7LG95rN
Qv4LF2U6XLvfvMWaq2gi90ahX2mttdbK5ej2o48+ymXokv7Ri/ZPP/00LQ16O3bqmOuwCbiaNSv8Ngs5fhFl2QPe1fXLBtgLutHrVyJnciffZWELS0KWytEL
Odd66oDjHrnjpdoiGTbyL3DRAX3AT77xEzAW5nrwuY9m/DTp3bvf6Hbt2oWgW2WC3ARYZQdA8+bNW2UYiILU4T6FIsw1w0NAYaZ5RoT4KgRIwa8GgBgEEjC4
DFJdB9jynTNYDqF+pQdDyqw23ma5nGv1MIcnuAgMHPfQWuholtKKlNaEP2heujQMYyVuTrT8i+VpUeCsNFIEueAFDWBSXD1nOO7PmjU7nK9J+uLzkE/AnRnX
yi5atDgbcMsoN3/+Z2nK1PfS2i1bxL0mmQ+OXmlEO4fEX4eOHTJORiefPNdYoxiR8nTHwCR8f/EFY8T/iqyThjyjkdHBRdbkIMGFdrLiqIx5/vwFaY2ma+R7
1UA5M0OjM7Dw59x9sPANDn47dGgfZVOmPSOJP2RF/+5LfjmsX/ckcqp0gkfv+GQDZF9tjyyc+yUbNLjmGHPmzE0LQk6u8Yov5zUYu0YvPGRGFpmfkDd+QvAZ
F9jwg7F8+RfB29KcX+WMbvxKTfoPGDQ6HC2nShjBKuwXg126dMkKwBAiOA/CCRYBkAHaKhBSvnodIsKrywDBpVCplnWubFWSX+UZP1jKFYK/yPgqXLDQQyFw
Y1Id5THVPBxl5qxZWfBgEgZ6CLdJtC5oBrd5i+ZRNoQWPM1fMD8bIyNcGBEXn40bRUQKXhktOASMdzRSgoNTukbbhx/OjOtmqVevnql9GHe3bl1DZi2Cjpap
e/duaZ11OoXzvJsWhzI6d+6Yhg/fOk17590MFz7w8A0Pep2DvzgMC72Zt7in3DrrrBM8r53pgrsamJZEvWoUZAU2OLWMewyPQ+KHE+LBr7qff74sG7M6Ak1U
z62yenBXfJ9FsGkaLR5HoAt6qLjAw0MNouo64ENTTZwWTDaCR85SaCgtkxYV33SmnFTpJidlHXQPPidaFHjR4T6a3NNCCSBgKM9e8Fdhocu5+5wK7ehUFr8f
f/xxBL3S25LvkO+Qcrldd/v6imIcy+JG41WMtm/fPjMHISF/8P77YXALMnEAIFbkEvkqUADlI0pSFyMEDXltip0zTvkExckWMNaVzgaeesoQLmPW3arOUxlm
OIRVIzI+aotBMeoTrnx4wMQXfGhv0rhprvtFRBtOMC/gaYWaN2+R+dK1+DycS3k0zZz5cZQvRt0BnFAeJc+aPTftsvMO6eennJwVWmRTWgmGKJqhffr099LR
3/t+uvKKv6W+ffumu++5N+2z37Fpj123TLNmzkyd1umcHR9f8FG4rqdgwHnwQNG1C4vH6mRVT4xCGfjcw7trMip8N849DDDJrtZniM7xQz8McUG0SuS+NLq+
5Coo0Lcya0b3q0uXrmFEjdMnK1tLAbYaL9lrAeCuhkf2nBgs5dgJWeFVYh/oZch4rc7iGr01YMqvOleX3XFK+iU79kEOeFLPffck53A40AFmlQ/+lXeNVvfR
Cwd86tb6aNA6fx49D3LNbawKGMcI711rrZYZGCYh5JGQUI6EQIDdg7h6dEOi5akPsaQ8BolMs+saXr9gtwyHIVhEKYdQTGICHMpQlkDeD6emCHQU41oYDtM2
160wlCcMNOJLFwhNaJTAnzN7Tnacxk0apQ8+CIFFfoeOneKvrkTrTN/cuXMyfjQZ04DHOVvHQcFahsefHp+O+V7vaGk6A/0/U+9evdK222wVrVW3XGZA//VT
9y5tomWakV59+ZnUfO0eaY/dts+8MUo8zA4nHfvqi9Eh7x79pPfSVlvvkLp27Rz5c7KclCM/vEnkRYbyyBe/8hg/OZAhuc6KVptcyQ9PeHEfTvkSmS0LvgUz
9+NGLqMcvLPn6LYW54M/yyX0AoZruoIPbnYwM4KFfE5vuCDRAxrkf77SDhly5YHNKYMH+pTQxyblK8d58PTZZ9EdjfLKgk8GyqAHTOd+yQU+/KFNK5wDRshB
HQHAWJJ9tY8u6lotip2xAXXBwYNrrSacTQm6fft2uZIbCONUkGNeswspJhDIUAkVEgw5KAIw5xA5RyRBggGmOqIruBwVnEqMFkekd28ZZqKOuu6DRdBoqwZB
mNVp4Q7zyTQTJhjKoo/Q5FV60MYJCYLQFy1cnAezTVY0zhG2jkeaNFkjfRKKUL9ROJl6eKs8wl0VCd+2W/ZP199wSx5Xde68TuZ39913y3Jj8HfffXemY8xL
L6d33p2+ypnRPueTxenHxx8VrdkJacqUqenKq65PHdq3ztH//odfSDuP2DRdfPGf8phDj+C5515Izzz3Sho8sE+aMeP9rBfKZ7DgodU5eaOf/J37JdOqC2Xc
x0s98AhWNXaBY01jreVF9sZEJjEWL14SjhRjthhHduzUYZUDkgVc4Ah04DvneA734FcOrRy04qTTpStth5wrP3TuUKfaolYCjeq7x07c0+XnANVuODY7U7d/
//5RZvZK+2yWJ0DkC5r40c0nB3Q50EVmi6Krr4vLJ9hVjx49Mgw0uCZv+Brt8839c9eOsarsJgG46Rpws3cIQjxlOK9NX0NGCUOSRxgSj2e46kJeiC9llEOs
svKrUNFAobWsusqgi4O4B9aSJYuzMEUFjFa60WywbHaKQ+uOEOr8+TFLFJMKZoWUb8J5o2yZ4SoGBHaTiLJpRaPc314UhiOBAzchi3auK83odr502fL0wnOP
pf2+fWC65por8njt3XCc9dZbN3XtPjB9MGNKOurow9Mf/3BhhvX66+NiZmlJ2mzTTTMOfx599LH03UOOC8dpm/b/9l7puOOOybhqAfhv+8/t6fCjT047bjc0
ZtEEqIURzUv/f3l0N4xPi9HqfpQILmqThyCGVrJirGTRIsaL9MDQ/CpDBytCbmYttcqSmT7BsM4GNo3JCF1kxkTHuqfkSTYcRyKrqj92U4JYCaLkpuyCGKN+
+un8fF51TIdsEN3orLYCpm4cmLNnzwrcZbxKN2wEPvTArw6cyreLY8rUqbm1gZfjVRzV/ti2AMAG2K18ZeUL9mTJWefNm5umTXsn+4BGSCBv0q/fgNGEvmYQ
9nkIGIGYAQzTiKnRQblqyBDJZ6AShBAjrrYgZvGygYXy1VOe4MB1TlDV+8EDSz44tVvmPlrANIXMQQgLvqKg0q81roGLcpct/SK1DVjRXoZBLItAEN21EIKx
SnXmFs2j/7xC/zYmHYIegs+RJcaJxkaMjlHBj3a4yAKdzhkrXuGkODR2aN82JlzapoED1k+7fm2XXF/5F154LQ0Z3C+1DmV2jan6UaN2z/cooVvXrlneYEq9
e/eKaPl+8Ls0XXDB77Niyf2ll14K2TTJRrHBkCEpZp3T3fc9HBMbrbKC0fDZgtJ9IadyLItfA/fSvwe/ZQyaa9fOAJrDcIZPPpmX+cGHvLlz52V+Ca7qiuzw
TS7krx4jIxeHGVCtBHmSjXK1LJ3Kd78Etfmruk/oAkdZuMkUHjDlfxF5einqu4dhY1nd02qH9PRZyJoeq/3Jq/b0/gcfZD1VfcFJZuQOJ3rhq/erbvkCvsEB
b/r06VG+TJigV7lP5n2SGkOqn4tQwnGt+eXFy8IIeTRiJcAoAUMEXg0cMkAJqEYAMIx7uoahmMVCbG3uFy2K/nYkeZVRsCRlGLQmWpJPmHDoWoBLGcpwjI8+
mpnvK2sw3DrGLB07ts+O0CzWPXRPPo3+fBZ08AKe+nhep9M6Ofo2DgESCD7jNNOs5ZKnbBWuuvhfK2jQunFowkcTmVDu4sUxuI/fhmnhwiURyRdlWrUYWkjp
i+ganXHGWWmXXfZKb7/99qoq667bM+277zeyA8u8/vob09Zbb51+ceovV8ll1113SdMmvxN4W+RybVq3CZ21Cf60MsYrbbOMBC50043Wh34YBjrmBv0mFIx3
QvVZH/ihE7Dw7aAn+WDRBXj0LcDg28Fu/AqA5KGco8qQ3MAgszJWKt1/QYLc6VMib06kxVCfY5jUAb/aoVlZa1NsxX1OiiaOXINsDW5owUPRXZkVxB9aqk2Y
6ZOnDhx4c0gtAqZxMDs2BjZ+AqvaLR3SZlMZmNBciYIMliIgInSVEMLJKAPjFIFASCuzBFaZAINAwHFUBzWuUB9RYCqHeAqoc/yUprw858rVFkpdNHEQXQGt
RvtoBfDw5ptvp6nT56Z2rddOc2YtjO5U+9R/wHphEK1j0W9ZsFq6m1qoYC1wl1m8tQJGs+DfDMyKFWumiZMmp5dfnRRO1jr16NYl06sV1D1jDOPfeCONe218
GrbpJhEgtKAMrwQBvHaKxUXOiwfJDyMAv8xwWmcrEx4zZryXrrvuP+FEL6exY19P/fr1y3XQ16vXevmcXA866ID03e8elGVB7hJ5RRubloSxrR2LrYsbi+gW
CGOdK1okk0Z0R+aMgp7o1DoNZzMm0FWzcLl2q9LdW7rU5EBpkeCNdibLnnx1f8kQDPxUmGyHDuXrLtORBK+ZRvTW8YV6nJY+S8Ashk/XDjCUn/7uu3mSg6Oy
I/iVh6caOX7A40jyXYMBNtrpynKGQysMtrLsBw3KrHZCOomJpnBgSZliD9HafFp6SvLAltDEFyrPaG7KKx26AISCeATKQ0x1JERWJ6IkTiAPMcozIr+QMX7n
fhGgm0FpEkEQrHsIAQMhDtcEUnHDBy6m9ZUJQDkK7dmzR5o8eWoaP+7ltOHGm6cRI7ZLh0Ykx2AR7JIY2L+bXhzzahr7ynNp8ODNUt9+6wbesvsBHC0j/Mp/
GgJ74vGx6YfHH5jWW3fdcJgJ6aorb0t77Dk8RyKCffTxZ9NmwwanQ797YJ55/Nf1t0YLqEtYAoaIXVrIsosjMxzxau7c+alXr245AOCxJkbbrt3acVnWxGo+
pTIeCe8ffvhh/JoIaFxakzh/4YUXU5uI/vRD1mRjXOcaDtcmBJyDoYdBvyZD6GzKlCnZmTikWUs4tNKClPILY8HbTJZAoUVFEz7hokPw4BBMGTkHhs89MrV2
VoMXOdM3e1JfkGEnaEOva7Bck3ObgE0/bEEwdbADdgGf8nhRto6hXCsPHv4ki/bsAU26rmy24mTnxQbKfIAewxwzdVG30FS6w/yCDaMbz/jgSGChh87ByY6E
KYUQ7KaCEGIOQsS7lgijElwiVYl0kClXBaSM+5QCudaOA8lz3WZlF87qtTJaOQLSpDNszX+NGNUQCMU5g7rj9mfSYYfvkc79/Zlpww03yBGaAhsmszEGpK+P
G5/+c9sd6W9/uzTt881vZzrnxAAaLzNmzFjZ0i5JDz10Qxq50070n2Wx225fS78947w0ZFC/9MRTY9KJP/5e+v73j4t6jD+lb++3b/rBD08IesvYEg9zYmzR
Irpbq1MEizXLDoGWa7WI3QKly+A+Q6C0xo17hHxX16B4rbtkQuSyy65IZ511Rr7+xS9OS9/61jfT25Mnh6xjRi4rNGbqYmxXDDQWciPQ6faC0yKmbhk62hwG
y7qtdYZLK9Z0jTJV3ry58ULp/zcLp6GvttHq0gPZ0jGj0X2Diy7pSjl8WFvT/WZDtWXjoGyHPay1Vo8sc3aiDON0D4w8vgm7Y/xwyBNIlYHfNZ7YDpvUerDP
du3a5zzyEuDVz3Jb6VCl3vIsB7jAZEN4QTP4aHPPssziuMafa/6AFrzCyXlMvKknHy3KuN+0eLaoWebiOZFmnEBVwHStBHjDFgcRjBFAZY1RdGsQhlhJeUpw
1HP1ssOF0DlOxSNPPTQxAgnjEsGbGFi0aFma+s6M9O9//zntFlPM+rANE6YktBJs9+7d8zFypx3TPvt8I536y9NTq6BzrYA1PwyrWRj5gw88l84886Q0cmQ4
UST8wP/NffZO9933QJow4c3Us0fndNDBB2Yncp8RDR48OB1//PfTkd/7WfrayC2CtsUxydE68wq/JKK3a98mR7rPYrq9UJdvxR9dLVPQZdW+5goYN998W/r6
1/fMeE466cS0Taw/tQ7YW225ZZadRdBzf3922njDARGtSzeubZt20RX5JH0a4zfbe6o8yZjhiOrkWQykDPzJ2oIr3ZmN03rQIUdYKxxfeVEXz8rSN13oujHw
teNgfORhskonl2Mpo2xprcpY2EBdQhca1KEvcMkL37pinMF9ToDmsj6k1V8z4JWxW7VX8MCBR1l2qx6YbBnf7rM/tuy63hOIBJ08Oxll8INuvKJLWXToorJL
dg0vWsGQlAG3KaYRgGnIGLnCBqsAyqvNF68HkHEAZkqREAGyh8zOBEyAVR2pwlQmYnCup65rDmqMkreaBNPqYsCBDjDkgSEaGat89NGsdP21l6ehQzfOjKAR
PId6NdVz+eBQ3q67fi0ZyB9w0OF5wgDudu1iKjVw9+vXN1dVlmOrr86QwYPSVdfenw7af6fciipUDQDs9QJeixamYmOPXRifCD79vRmZbnLlXNOmfhzdyvXC
OdcIA4wp4qBZophlMWUeYSLv0cuZ8Uekve66q9JOOw1PBx98UKZxjz1G1dsxppqc/nzp39LQYVtmh9faM76msf4FJifS1VqwQCtQornAoKvml/Lfi/FZm1Ym
J5pmWkXc6mTGFC1bakVjEimMjTGxAXzTB+eXqozlg8sIya4amfuMk42BQV5+ydhvNXD0wA82GrRO8LIPZeALVWS4yrIH9euEFD3BoxzcYKlLFsoJJGwSjVri
alf2VFb7oSv0g48OTs0R4cKHxkV9B9tUDhz1+UMO+5orGbWw8QxCAFIYQSIFQh2aVdEBQkwoR4BgEBqB1Xx1IRUxCEn3ojKKeIQp656y8givGrKyYIP50IN3
xoLkc9mJqgCq0bvv+HICRxl8OB80aFC65qrL0l77HJw22rB/jnC9e9p9vLolU67Cyr/LpmcF1Tz3azJ2+WD6+LTxkN6xhUrXp3lMWLyd/vKXv6Utt9wit2i9
+nTM24+sybz99ox09dX/TBtvvFG66aZbQlHNwmGGpWuvuyFosR1nWbrkkr+H0++ZTj3t7DRmzMsxqzcyxg1t8lrRxEmTot5tQU+z2CHROesDb02DRw5ovOPa
NiLBUKvCyMjXWFV0V66l9aQoZzeBpHx1BK3SsmXR5QuYHcI2rNeRoW1cur261mRQ5UC/dOZgN+TENhgclTQPWuNPtiEBl4x0AU0YsSfGTp/qwtM07IFNsA3B
29JM7daBif6Kx84D25U+iOlt8kMgG1QOzxyITYHPKdCmvsaCbbFL58qWGcfSc0ITpwQLv1pp8gEXjWCBbfYw0yoDEkbMCwHgKBAi1lw9obgGQB6BAapp1Epw
JMoSIRAmAQ4uYajrQJh6jbRoMTCmoOVflIVaAmXMy5aVRzWMQ0TZ4gDL0yMPv5j+9vfLwji3zApEA5juO/f7xhsT8jy/fJFngw2GZPy1DMU732ijDdMfL/pd
+u2Z54WQ10iTJryUZ7oy4V/6Q6kpdcxw6i3wqxE5l2zYDcayUrp1XSedfPJPa/H/83v88S/nvH79hqY+fbpnFzYm3Guvb+T8HUbskneHR0OT/nLlneGUl6yC
0bZD79Sze8f0+muxbahB2mCDLVLnLmUvGx3SyaSJ74aBTVxZyjrV0jRw8LA0aOD6eVdEcBFT9aV1oRvGQm4c46VX3kgz3n0jdV93SN5ou07HNmnC+EkBY35a
f+AmqX+0sMZ4JhgkemYfDM+5NbKPP56VHnv69TS4/7pRYkV6d/qHYR9rpSlvjc11OnXpG3B65qBA14yULhkoudaWgR1J1TZ1K9HLds06OtiblpnDgWFJgwwk
sPAEnpYaLvrjPPTPpt1Du1/1wGf7tZHRg6o8wosWh/JsqqnoYDoaIsqEFFEQTY4BLa/lWPJU4M2coRhnWZMBWFK2JkTUaIDQ99//IE8hEzanUV+yaKpcZj4E
IAKpq1+KIUJD09Bh/dPeKw0NbdV5/Kpzzjnnpt/HmCGlWC9Zu11aGq3DKT//XvrpiT/OExGF3jITBu+IEcPTn6PVgEsyWP+qZMtSSrNW0ftVZeSRH8W0iXHM
A/c/lTbbfNt0wHf2TUNi8ZRxahl0/QhewHnzzTfTLbfcnu655z9p1932zDTusedeWe6vjp2UDjn4W2m/mMwwdpk6dVq6NLpyAtBLL4+P8dLm6bxzz8w7zPH1
0Ucfp3/968Zo6f4R48Y9s0E9+khMjpx4ZLRsF2dZhghz9LzvvgfSRRddkLbbbqcsQy0M+fbs2TPjfuyJ59O2W2+Sfn3aT3LrbT0J7crQ28yZs9Jrr72WHnzo
8fTiS+PTTiO2yjJkF1XfAuyDj76Yvr779unIIw9JA/r3j8ks61hlWUBZOwOee+75dN55v0/rD9g49e2zbgTzsimXExj4M9xqF87h5wjsCQ52SOZ01zLkxBGq
ntkkx9StY5d1AkH56mD0pqdlLU4DAB4erBMasqjHifWiJDJwuJbvXBk4I+A2y1EUQkoGCHAFDdQV4o1meiw+IRAxEHMuwlFPl8F99eRhDOOQYZwgOUD1eoTJ
V0ZrqJ/5+edlAyziXINlXeb+++7KXaU66CPc6kxwn3HG79KFF54fU9V7Rb1irMpc/c9b0+x4Hujiiy/MuNRBD57Qf9CB+6efnnJGTJ9vmfPR9OWkrLRgQXRr
ViZwakKjhGbdpclT3o2u2Vkxs7bv/3MT67bbbpO++c1v5n14Z//+j6ldGwuPAs5Hadddtk+//vVpmWaw+4ch0os6V199TTrwwAOykblX0447jojWerM8qzhs
k63Sww/fEl3GHevtVb+77LJLsg/wlJ+Pjoma6JZHy89ILTC/9vqkdM7vTk3GY3on/yuNGLFD0HBgdDFvTr86/YK07VYbZZmGaLMu581blq676o8JTXoqX5U2
2WSTmPzZOx1++KHpoj/8Kf3njgfToGi5LGbTnYDLvhgtWuiBvdA3mRtvLlpUNloXfazI9lqm8cu6mTqm9+lcAo/9sk+tCccJ98g26b7yDk4ER7UV5dm8Vqra
EPrITTn3m0Jcu3I8WFPHudhKbc4A5ySSMtVTEQMQ4PI0rZAr07J57DSOSGG7zfIoo6yoLVVBFQcqA1iGoq9K8GAhFF70pDV6ps022zTXrX8qQ08//Uw40QUx
rb1ftJbl0Qx4ML39NsPS5Zf/NX3jG3vGDNgeq4RQYWy++WYxy8bhS5ei5jf85dBSXQdzjseivDJekOd6bBjiWWeemp2o5i2P/BXBR4FT6lXFar04RY8ePdIO
O+yQ9t5n3zTu9TFp9G9+kY2GzCQK699//XTxny5Jhxzy3ZznHjlK5MRIjj32mHT/Aw/FTvQjsxPJd9RyaCTXXXbZOeuNQ339G9/MRjLmlQnpxuv+ljiJpKy6
fhvWr7R37NghnPbYvDv66ON+nvbda2RE/7nppVcnpvvvvjH0tVmGU2GoBw541UjRYsz6h4vOj8B2errkL9eGU24Y9YrMazm9EmW1JH7xzvENKwRm+eBb8xEg
2ZVxjW4nubAFjqDnInBXOPI4JpgcVjeOczlng+6h1wFOpQcfxpt4U4a9N4a4RKT5ubLBsxuI8atyNW6eV8tXoSBCs2naUXkM6S5qvh0ijGZaUhaBZoCUQ4hf
EQexGEYYZt2zcwGMoRt0D2OLxwgiKS9V5T4Smzx7rDsgO4N7hIxO50ui7zxqj31ifejhLKBKc4WhhevXu0d6+81XM74M+Et/tHASumpa3R7pmsZGxu6DQvjz
0/DtN89T1sqRm6SbQr7gwE8Gfh3VyIYP3z7WuP4Wi6TvpnW6rp/loK6yDgmMb+/3rXxer/EBjntVyWeecXo8NDg8l6v3ajmw4JRM9R951DFpXhjb9Pc+TMcf
d0iqTlTLKF9oL/S6rrRX/vb/9n7pR8cdHN3GmdHVG5cu+P2vsxOBIYgoX+uxKXoGs9JCX2zi2GOOjh3tvWPM2DfrTlm6B4d9sQ2tCVtUl9M45xTKyJOM2+St
HYHZw5V1fRQdJmjA0bJxGLSwRV3XPn1653tsmNOAiUfX1R7lo9ehOygfLOWa6tIpgBCzH/bXAYJIQER3h/squSfCfhqRQB6jnfHee2UNIaIAQqtDIZTXE1Ql
3nVD4SIKIeASHmFhTl35unsDBvTLXZssqfijLBgijMcK1u3ZJUemWh/tyjAgfeoxL72a+8rGKuBWR1Ju3XV7ZLBVERVH/dXNzSnqfVUiCzDfmDA5Jgv2yPwL
KoF6FZ6xY19LTz/9dHbGbbbZJmkJJTzgEe6dohv2u7MvSB9/8NYqNOAqIwk2hZ8yneuarCs/YDg3wSLh31gMj+Rfy4HnHp3vHM708CNPp2lTxsUs4hm5noCh
TMX71FNPxS6KMVkvI0fuGLoYkGHBV2nf8+uj0lXRjV4jHvLbbrvtMhx/BBGJjV151dUxGTQxdNs27b7brtmR4UCXtG7sJtlyi01yqybAqcMO2Au9sh8J7RyR
3RkueBhVC6KMnhX63VeuSV4GKPalrqn+CjMQx9ixU8b/2Wd24JRH+/W8WrUqXcjW4Yz272HD+A298FZ/0Kiw39y4FARlU6PFqRaNV284RJSKPFw3j2IogEIR
S0wYsK2Cd8qDRLIYatrSFCtGK3OUqg4lVGURZvVsCnK/RIamgXNuKHlIpgNcuKvw0fT+B/FkacfygJYogz6bHBm4+xxhwpvv5G5A+NF/JTNyHfOetdKV+a+b
Ky9W6nkVzpxdM+PC6YLYQrPFJgNi4XZSjnRkVtO9996Xd3vH5v/IIptlafz48Xkxl7LxIzGCDYYMSO9Om7QqL9+IP8qRlfdBnHvueemZZ1+MJ2x75XFU3z59
Vt2v5cn7wgv/kO684/60TucOUeesvPujOlMt16NH93Cit9KwTbaJKftJuTUlv5qMxw477NA0aMim6eOZn6Q/XXJ5evSRu1atxVT9dQ3BLoz1ss2GDcyOoj6+
qqPpfp/00xPTJptunZcILjj/3PSPK65Mhx16SA4iyr/zzjvp3tjNbuHb+IfBCxTg2CzKHtgQx9JT0dUG32K24Mv+2A37wT+90416DJ3dgcdG0A0+J1CHk4CN
d0MJ+I1Xl0Q+2y6blOfnyTg40eRg6/DC37huFjU7RdCcRQHEumaQtbAKiFEHITZ71oiHEMD9MiRM2FHOGdVxIBQs46GMPBgSWZXBkCQSORfVLSZab2gdmzKr
0nKhlX+qgZWdGGWhTzkLeroFWiNN8NyZFgRXd80qDC89MYaTCO+rUsWLjprwWRMZfTZvaizWrp0eefTpdMGFF0XrNCFmtl7PM4mjRu0eRdcJYayZNtq4LCJP
mTK1Vl/1ywBssfmvFHjAr/huvvnmPMvVuXOndM3Vd6Ybb7w5F680Kivdd9/96fTTf5P69F0vzu9Ml1z65zCs1U67sli8kwLP7WOQ3jn97OSz099jecFs4ph4
ZOPHJ5yYnWj9AUNj2nt63sQ7MZYJbKmSKi7nxtQ9unWIMa4F+0KD/EqXVurSoKFv396hy1Zpg422TEcecXh0LY9Nt97673TTzbekn//8lzFe9S6FsobJDozf
4WHwbNKajXytB3tyj56rjZEhm1A+XDk7D/uUz/Y4AftTT52pU6dmp6vDFffqzKEH+sC2gfbdOJxLyjrgAYO9G6fFmlsZt/DcShQjN3EAMU+uAtFXdY4QjKjD
aRBQnUFddTiI2Q+/ooCkPHzVIQmnNssijXpwKKffyvj9atUQrm7DxPgZsIF8EVaz3LKpXxRgIDg3NY+mumGkrTDwtzqtNoDVeavP4K/JlH1N1ciXxA7zvn16
pvMu/Fs69Rc/j9vrpF/+8rgwlNtC8PGUbhitPXhW7G2KldRFp+RX4Knn5ST+hs8qB78lhP4xVSzQ7DBiWPo4pr3JHW/qV1o8TNh/4LD8qMSIHb8Wi8ExVo1F
Vj2LUm4lzsxTaf1H7bFdOuaY72W022y3Y9orumuMnE7qgYfevXrlMtUmXORxSQTeWXNivBXbgMy0ki0jg0937vvfPy4dGi2QQP3BBx/mcq+88uqqiZl9v3Vg
7mazJbbFNuClyw8//CgHdg5SW3tLNXTOqMmA7VYbYV9wkxOZgGkii5xcgymRhzqu/brvV0+GHeolgAEnG2ar+OGc4MPrOuPjTSojXkuBAcgIAiDEA6LrVZG4
VpYwOUAVWGVUPiRaBoZg9g6j1Zit3RBSUWosd8ZsifEOHJgCB1wG2CwMz+Pa8qvBZClkQbSMfq6nd0WSMkFBYGgnJPTPi4euttt6w+zQtV79tZovikpw/r9S
VWAus9qPVgUZ6z1jX5+Qvj5qRBjNDdFNG5IF/r9gFt5XO5Jy9v5J7kmhhv9KWuk3J01PvXr1yDLxuETDVB1Jnn19dp9oIbSmDe+tBL+yanmf3d13PZJO+MlP
0xGHH5bWX79fNo6GsBueV9oz7JU0egFMp04d0t8vuzw/Acye6AFeduOXATryeCh2fVgi+MlPTkjPPvtcOve8P4RjCIjlvRycNkf6MNgaKI1xOQX7oWcTCeyC
rTBmNinpXTF69sTZ2Cv9cTo0uSZrAVp9tsi2HMqzZ+XYsXto51BwlABSghY5KMd2GlfvBUCmpADmEYNIRCjHyZwrC7Dr+gtYdQ7E1cOGQAS6V5GCjRlCYcxV
yGC5V2gyQ+SJ0DZp3LiJmZZMXPwpzJRmduONNkjPvTA2O5168BAYRYrCn8Rs2qbDNo4I0yFXh6sma1+T3iyD+2q89V79reV1EWuqefXa72uvvZF23mm7dPEf
L4pB8xarnAg/X5W+CoaxnfS/aGFkXWNXg0khL1VsSFNDHFb3Z3zoYb2y88AOkv9ORQYcqmuPgfGA3/x0/vmj8ybYDTfcIMsfDf+Ljq+i/dP50aOIx3b/9tfr
48nei3JgZCd0VX/JQoBlKxV2x44d8tLE3/92cejLU9Bla473ArIXemTYnEpLQX3y6Jhd0Xk9p3uHWWL5yknsCh3smd0pD5BALYhzOLDwVXpBpSVk31pPLT+a
0aHM/PkLsn3Xa3VjYqX0JSuw6lCVeQRgGgGmtUX59dZbL78Jx85fwBwY0ApgHuEEJg8BYIHh2m/ZxlEilToIzi1jKJ4AJNFUXbu7X3zhiRiMvpvzqwKqge66
69fSJ3PeybDhBs+qt6c/1Z0777NYYNw9aCizVfitMN6L2cZ773kqtV+n76q8jKTBHzAl9dX9cmKszZr3zpteTznlpNzyoTvTF7ySnxeuvBQvPrFP8O6778kv
OQGn0lFhMpSG+V++r52yN09LNHPmrKz0Wve/fwud5GrSp8p0dZnS4uHpg/cmpt7r9UhHH31UNqZKO14ddlWMGfNSTHA8m+6//4HczQIHbQ3F8fEH72Zed95l
q3Taab/IY+gbbrgxTZr0Zh7XgEsWjI69VBjk5Bg0aFA6+aQfpSefeDiMssywKaunorfBdqzdMGy2WAO6Fk6LBb58b5VVVhLM4ZT8KkMmbFM9j5iwVS2cfPVt
MoazOrBuarvY7qS169OnT66HLjjsRaz8NGUoGIEEUEwC6FwyRgEU4ZpSZfVPlbNOQElgaLk4jHxlJQS7T+gQKmOHRMuW+uqrHytGuHJg124fOMrn1LhLeuaZ
Z+Nx661WGXMV0JbRRTj3vPNjsHxS2uVro/LiL3p7xINlt95yQwxiT831wPmyIzz3/AupVbvWqWvnMvYryP77LyVJZILmLyd0LF08NW2//fdzlwWvaM9OEJb2
+ONPpJ+ceOrK54YWp6mT30h33nlXXrfIZRoArHJT/8tJWe8ucI8sjWG/XN/YLcw/Nq7GWHLp+5lmxuR9fV8uC36Vx6hRu+boTP50WPNNAhx02EmxITeeDo6u
62OP3hcPSo6JWbvOWVdVBwEp9V1/YI7UHqWwYdcevgMOODSwLImtTgdE8O2Zd2hYr9k4Jl20RGgCA15p6NChqVvPQdHy2ARbuoL4FQx0a3Uli0OVRVR1TD4o
oyHgDBKYYMsTnNShv2rHbL32mJTVZaxOZhOv2dzqhPKr3Qv21Ufq/Yqvqe6VGTjMcAjX5v89EiHitG3rmf+YCQsiEe2+iG9GDQMO/VX1EUUJDi2DX4x4k2mn
eAFJxYMxhKhj1dqEBII5nhZPQguDUX9k7I6+4sp/pW/H4p8nY92rrRwcPzr+hzli/P7ci2M6d2quv0FsTD3vvAtitf97mcZKX/3lsFdeeW3aYbth6a47b8v4
c8Uv/amzeRQRcfhLd+My8EsMo2FCl3TbbbenV15+On17/wPjYbwpCXWDBg3M95RpaOD4tVewpgrDtXO0oF90bBePazhnxKsS8uLSTGu89yg/K/TmvCmpd691
c/1arsKtvwzpy4nBXHnlP1O/Xp1ik2u/NG3a9LTTyN0DVq9clK5XJ8EjJpHCZv58yUW5dXHvoNi1cebvzkl/uviiNGSDzdKtt92bPl0Qzz6t1TQ9/+wjeVq7
8FAggRHE5zfc0jHdszt0MmbycXAIMmCL7tWD46CLbtHPRmpij+7Lh1P3TvAGg90J2vApBz59wyGPjcJb67rPXuEFC57GraIiQNWDFTI4LU7ROHe5bDBk+Jjj
BJ5r4Z36k1oTgAwgOQrHAhzSOmXoeSOEaXbBNXMDlrx6oAEs1+7V6Xjl7QSfv2BhvATk+iwX+ODACLrRfGzMOD3z1IPRhXoqvfjiE+meu25OHogjGGXBUVYd
yYzUC89PiPxyXfPzzQZ/0CKBUVND49cCSOhvmGqZI444NO37rf1DubPj2aaBuWXt27d0Jb+M03vi4mmk/3KuCpOPaHE6RDcDLhM4UTDn1jIVHvlsPHRg7jFk
Bw8SKz21bPkttDd8WQsYyjKyn5/y09Snd/fcNR06dEg4xAU50Llfy4HjfNKEV9Luu+6cnch98rKw+rszT49Nsn+M9ZgWcd0hnvhNaccR22Sd1bqFFq/u+iS9
/96kwF/sBwxLLb169cq/+GLQfvWK2BhZsE159RztumOV52prxkgcAFyOww7rPICyuntsiXNJ4MHHpjmV+9WBBH/8lb2KUUZTzCkMuhCgcm0xOAoAVoCtIBMY
QMo71xWRjxGeizhIa1LGPUgxqi5HAR+BiK+Og1kMutbimTp3Xz44nh065ZSfxcr9BnmBU5574FaB2cXuaJjc40RgC9cc58knn4qW6hdp91HbxfM4xVGU+aqE
ZqltPNtTE9wVp/Ge5ClavFIEXBXeRhttlK7951URWcubTHVT/lfy1qUYHWYH+D9lgg9p2rszQlfelxBTsf+nUMkgw7GvTojWW5cl1vFivPlVqe7asJt7j1jv
qnz5lXbYYXjadNNNsk58zYJeGqZazm/neG+fXegNdUIO7OeEE36U9t9/v+wo5OLhSq1DlWGF+Xx0tXccuUeWI1kyVDDANE6yxiTfNTtjc2gynjax5LHz7Bgh
ay/7XLoiglsc5MHp4KNPrREYtTUSbNkRmMpJrtl31b/fyg9+2S97bxZraDYrN+Y4WhKZiMwzGisBdevWPTsXHVZCOIGEKMoE3B4mTTDiIEAgxNVhOF6tr+vG
YdXHYHUw9zGCKQJBB6MEEw7RaqeRu8bEwSGxs/mRrPTqRGA5lG14qCffLwVyIltehg/fNxkUG1iWcUn7XAZfyjb89V5wqfa/nVd8zhs1snWqbzwO8WhsA3pG
Vk7oqLjx6+sMnEj+e/EELWW4vzrF66FC/p4ZslAsVfrzhT9RvG4ero9I577cygKVdg8QDonA431+dOBhwYaplrNlptna68VbYm+L3RZvZJrca3jQhzUVBstG
TJygXZlKP5rW67FOuuXf90SLW2RQAwk7UE6AGzRoYGwx6p+dqOIAS9lXx45Nl19xbXxep204Q3kuiN0YThg6mBggOy0Reerq1YP92YWgPFweaFwSr0WT593t
4OcAH/c4MOeGl+3jjw3o9SiHLmVy+RAae1ZOb0mq+WjS42LDjsYMGACAJb8Aa6E4Vm3mEO8asYycgtSTGKRyjG1evK2lejg4zqvAXSPMGz0JRB0EijZgYgI9
8MAnUWKFIyLtPmqbNGrvI9Jf//r3DKMqAi0cq+Ehrx4UYlvK9ttvn0bsODSEUx4rnhUvR+nUpV0o8rWMT31JPfw88cRTsYVmq/TAw0/llXX34KzlJk6cGHxM
jlm7HhF1j07PP/98rut+pU2dmm6++dZ4dqh0Ud13SB999GF6KHZGbDxsq5jpKlPyFYb7Iu2rY19PG26wfsbdqWP7NG78hMgvK+5kjGZKtaetS3zx4v33Z+T3
/D37/NgsK3Ckagx2YKy91hqpezxpe/pvz8ovVIGzysxvTYzYg5UmT6RaxrmV/xeefyJtMnRwPMZ/dAS6h7MulYGr8ljtoNavMrSOdPyPTso7Gzhq1TkDrkYt
n62wJ70A43YOIbEXAY+jsyF45ZGFPEmer4uA6YU47hmTsbeSik7BZKOVdrbZ0B61gnpYnJhMjMfYcpMBAwaP9hpajoFhrQBkKiMCUJ4HsHsQE4j8ipR3ugc4
4binP1mZ4Agiky0ejNOgGTz5HKY6KcIkMMCrzSl6lJUvWm22yZB0+VU3p6efejKYMegr06V1vIM+9DO+yZMnR2txX/r9uRfGw3BnR3dlZHQJtJ4l8kTRmOHr
HN2vu2LXwLrJ++TQpu4VV1wVW1suzrNNZsnmzJmVF1pzlA9FaIFOOOHUlY8M2MXeMva/XRr9+ZY5wlEUHsBitH/581/TBedfFlP50+IdDr3iratlkP9hyMWb
ghaGbDp1ap9uuuXuNGhAn/ywHXmQ7+WXX5H+ef3t8XTsOqGD8gj325Onh0yWx9hrcJY5ed8cM233P/BozLJ5+1Os+4UcvVPi3cC54YYbZD1bq7GJ9gc/PDlt
MLhPlvXEcN7b/n1n0B1rMRGR2QLaGe+rr45NF170h3TzLXflbTv9+/fNYyX6evvtyekPf7wkXsxSPuMD7+9+95fQ8+yVeinrgeA5GDojtsj++uuvp6tiP98J
J41OA9bvFW8sja+BRHBlP1pA+uZIJcB6cWl5Fx/+awuF5+pQZMXQa4smP1BmeOyJDbENdMMBdrVXDl9bHfaoDLw1waOMg51Vh2Ur4DTyWRcCg0CharSUJ0Hk
vkggcQQEcRT3lAdU3eo86iqj9SK06r1w1AEbxtRh8H6VI2jX6tb7GKr0MUj9ZjBEinfemZ5eG1seud5++M7RKvSOuX2tUJO8jvRhbO2fEI9bz4w9Wzvvsm1W
BAcSDKpiCcI14Tz26Csxhb5lDLDXi2nel2M88nHacvMN86Mg0RGLB9/eDNwt0sgdt43JjwXplpsfCcccFq1piYyUY/Fz8uQZIafF8Uh7v+irR8sxbnx6+aV4
J97m28XsZYf8vu4nHn8wfXPfb8fO9Z7RskzKhqOLgkcGMH7C22mLzTaKl1C2Tc8+Nya9NGZsGrnzdiHv8vgAXOT76CMPp+E77Bhjx0FZHi+MeT1ahoEhszJ+
pB+yfWf6B/lFmd/+1p4xCzUzHsr7VzxisnfWKX0xOOOg++/TNZsdLf9eEeDWjE3BH8X3pj6NcVajCDh2FixJTz35cDriyO/l2d3/3PlQ6hsTEt5w68sVbMGW
oGnvvJffJ9i8Vbe09x4jo5WJ97/FUooZRb2Wt9+eFu8RfDD16bdhtLIDsz51eWsLVO2L7iVwBXT32YbEnhbEjDB54TEui15TeSMQ+/xsYXn3vLIND/fIDzw2
QE5etmmTqnJwkDEHQgP8AqiJmfLUdPENtpMddqutt1+hhWCwjCt7V/zWaWlG7Z5fCCQINGkIAFxTCwYnANhMCwJqBFKPYBCjbiY6ytZ8zinBDZaBOdgcrEYo
9yodlTFwRCn19fc9W8PQzGwZv3lEwuZb99W1Z8vLMgwQ0SnBI4qByQDUmzPHZy1bRAvTOu7ZA1i21TM2OyTsSO/QocxQLoztSfgEhzIpCK/WyzzoVt7b93nA
8uRxbAD+bElqF3U5pJk8Y5wWsSPAm0+9ow6/ZO0wW2q2cr11GWqr/OgKmcAhaeHt/MD7jBkfhtxjCj5mvGhJ94g+6EFramHRxMPcufHSyAgaPXt0CxmU7g+c
ZNm9e/eAFdu5Yjy6NOTIqclA3XmxlcpYS5dfd9JDlJysezevBGiV68PFqNCu60zf6PXev48/nJ1mR+vWLt5V3nItL5DsHpzHv6DF4O/TkDu6G9pgXGaavc2o
2gOa7NDu3LlLyG+mItnolcEHB8gPlMYEBH1ZvKbbaismx9igPAGkOiw4bBme6kBg0Sm+5Bt+wK+OXzap9XLeaOiwLfI3ZDGsgISgamgQmiAAVN9QsrXGuEhL
Y4euRHgMFhzlEE7h1bkogDGDhzjnBE0oBoe6l5zAvdrVg7PWwRyaKkN+TV9K6Kj3LVqus/I7Re6LOvgCS9ONNzgkdShepCNENOrvahks/qGVExE+OpShaHxm
eGFcIr9WEkw0gQdv5cNgl1Gt0SwG9muEDOMl+3j+dH75/lHLlmtnRVQ60VcDDBrhhdOBHnKFp8oPzZ4WFeO0hgIaOpVTBp0O12CU7mZ59xuc8tBM5s7pS1mP
NdSuujy6RJdf9kDPegUCAfiMTT3Jm4IEEHKtEwHWHZ173g0Mzt8qvkEVb8HJsIrDl50I9KKMQOHpajO4cIJHl3CTQeG3aZYf3IIcGc38uLwvJOOJZ6T8MvYK
lx2Bgz+68tLOL0LfZFNtDxzl8Yw/+iRPsiAn+OWBAXeTvv3WH61J5wgKYkg3DnKRup771YzOn2+ae1EG4D6i1K1O4QV+ALuX36kdzgKWPq8yCMMUQgjEFnye
PTcmA+aFoKyVuIcJSXnn8BQcBRcclelaNtMTjBK4soSB2SoIBoaW6uzwMFyJcNBBOSI4fqshikTwo4VyJee+5mAcgrdKp19w0ZZDbJT1/rwu8apg+wrNPnIu
kx8SWSjI0NBc6VscdKDNATf6HHhFl1QjZNFb+YIIujmGg17Ac1+3ynoeeVenMTuo9xC+kN/EAyZ85COf3JyDhSew8KwX4Bw/6HEP7c7J2Lm8InebQst2HLOJ
kOklfBEPbGqh8Y5O+kRnhYcOsDgkeulSUoYxu1+cN7auBW8dwomyQUd5XyIBRxK0qm3jhQz9gknf4Nqho7w8NIOjZ0RWUTzbA37oFt/4Y2sCqrJgNBkyZKPR
biBORo3ezgGHjMJEL01hJVB5RHJCCJSnxAoYUXmHbTAOuboMHFPguTbdiwlEwUN5yoKBYU5QFVuZJsgqcH1aeNWnEC0bXmzzkK8OwTh3VKNAr0OCA0+Uhi78
5ygVecqg2S+4aKplfM0hKzobWDEytFZFWVT8PJThq9/V6Hz5gfH53I03vGZjj1YKbRlWtHxmEZUnP7JSBk0UCTY9uC+pJ48uRGwOJ8lTxsFR8QiWa3yQvetq
UAxCa12jq3zRmVzJjxyV8etVxrqtgim46K66qS2cPLiMsapd5S+ehxOpK6GDngVO+tS10/KAYWHaxlM8sykwJNfoVhd8PQFy1lX3Si5dUu8/52D01irGruRo
ls5LM8sXV8p70A0fvOu77rbJCLTqQR968Bek5USe5FDxuyYjtDnHf5Nu3XuOdoFAXTKCZxAKMSDCIlDEA+SwJiBff1pXQtdF9wcyCCjULwOtTLsHDyLBsIBm
IU0+7/dLWcqL8HnGKWhSp8JDsIOg5CtnYoGxEaxyjI7A8IB+SpWvy8dpwZdPiZwCPGXR6hcNhItOsMjFUY0ZnT68ZXcyGAzLh52XfV6cPkf36L7g8fOch9bo
hsS6RphClosuTlASgSSmYJtEfz26tO4brNvRrYsbJOYnjNGUDS4y0I0W4x8tGr7kVecmE/zi0T0y0fXzYkcwGCS9ue8az8pwIvzWpAz4xhpoW/BZeeGiCRm9
Et169VuuFWsvYQMcmr3gmfXpDsOl96IsuuDUEhuX6TaTO2edNdMrq2OhN/jQQuEdXVpPep41e2bWMdjGVAIR5xPE2R/9m8Ej28aNS+tDvqX1ixYs9KRlgYPu
ygfZyk5vfFpS4bjgsFnl0dE+1jKDpNy1RLsDH+pwdjInO3l+mwwesuHo6lkESXGSCgpVgWstquBFbIy5lwUaBleRUQplUzAYDJAywfJLwCYyasRVDgOEhg7n
CKNY+BwEoL4EnnvwOXffOWWqB75z+RJ+ssAiMlFepQ04tIPrPrwOBohX+aKde4wBTHjAI+hKD8dzyIMTD+Aaa8Enj3EIOBRuYoHToVUgoTB8VDrgcE9iaGHL
WdbkQw4OMhZELFy7pgt0w0U2tSwYzr0nHE1kg3+8OJTFn6N0UYu80cJQBAm7FSpfunRkZDcMw9OFUgZcLQCY8JhEoWO4BDnwtUj1wUWyp/fZs3w1r+jfs0Ho
gRsv4Hxm3BxByURMluPS0r3Duy54CRreoOqtwPbele4wmZCdXz0m5eCT2J1rMnbIR7dE9mjAI/xVx+Gj2YmVJWt6dQ/fVVdNBg4cMtqF9RgMKIAAzIt8zitA
nlgVpk5FiAhltFwE57y2LroLCKDQOi5w30qy+u75ZaDVoDhq25hBwzDGwGSgxWBLS0Cw6mGKcAgBk8rUVAUFrvOsnJWtq0E+5cuvDuk+XHiUCIpBoVc0NrGC
HjNOC6NFVVfrhTZ8kFWFV4OH1ghd+CVTkRJM9PtOrTUP9eBChwQWmHZQ60LqworkFM448YtOdegEfPKr8oSHvOmjBiy8qiupJ+lJSPJNT6OJvtwHS11fw/CO
QnxpBdDGkRivBU4GjS740USG7EbCpwSeaWN1Jc4DHrkKLuQNFzmoT8Zaa91fOLt27ZZp1Jpo/SufelCCEbvSqhT52zIULU7oi/0IYPCa8ofDwb7wDD9Z6RbD
yYbARpt89Tg4+sCGx33BQ88NrWBk++rRc73RIpwmk0AoYO0QHOVIgKngGvCaR3CIMWXMGCCATBdKOcgJVF3wwUYggh2E4FodDCCm4mKcmvbKGJwMxhqE8nBh
wH39ffAluBgDuGhzTrlRNMrNy/eVcR8MggZDWfxVntHiHJ3KK6ubgi/34AajCjsLPMr6rcYNprrkAA651qlYdY0BoniGozuIL2XIFfwi79IVg4eBmxxgiq6V
lZQlW7zUbrlr+OGp/Fae0eMc3+TsnRVwyxOEGD960aElAN/snICly1Z5zl36oENggAN+sOlJHQZLN2gxlilGzchLKwWPc9t4tDCu4SRfQQRNYHAKegSTA6Lb
UIBe1Zk5c1Z2DF8eN6mBjyiaFsdH0+ijLs7WCRI00hG6HFX/gg4eq65cV1jo09oJjmjxsCr8dCJASU3ad+g0mjMgHCCICAZDKlXEujmQYEg5QsColqxGMEL2
cWBlwEMkhYOjDMbBVpehYFRZ91zDpQ4G3KMUA1YCMltGGZQND1qUVwPigIcAAEAASURBVA8M5eGpExbyCKPcL04hj5I4BiGC4bziRjM4DsqKnxzJRF00GRui
3bl6eIOj8skQ5OMx447WSGQETz7c8HrosHngMgbBo7LKkANnyLIL2oKxLGvwlcG7X3ygvfKOb9dgUy6Dd0+9HIACNl3Br35WfPCAXvISoUVsdEv0K5Gzbike
wNOCqWNWk97JB81wOOAjH3qGDz0CJprQXA+81kCgJQETTnLFC1jZYYMGRuwcvFlBp0kF/FkTIytrbyYbjIFMENAbWuN/1I2JiZALvsgHfegFj4PByabkuY8/
sq+yxYvgLXgF+Vl+YHEeOExWuJdp32ijYaPzYDiAKYQZrz8qxlTGHioRAsCMl5O5X4RaFlYrsxh2DhaBSIgjUDM2lUhCAJeDilhVsFWJ4DhEPgqpNDBIjBMA
uK7BFiHkMRR01TrV8Ny3VqVvz3gITfcOnQSMP7+itPqacfhtl6nO41eUhV95+7vyYDZoMMCFXxm0OldGywqmrqEPjenzk4FZpEAXNKyVaYeTbHUbReG5MQMF
ppelKM849ftrt4hc4aBJ3T84GWFJxfDpiIzwAT4Zkxc8aJPQhW8BCzC8SVVPljDKNiyL9B9l56EjMFvFuMQEgq4Rm0ADOasrwQEvw6tyq3aDVvfYirU4Y0pJ
XbZBf+TrlW5Izbpp2z4HE/ySA7haB/fYCVhgcrI8vg3dLo6dGOwQvWwbneiHn4OTDzrRxfHYg+6jaf6s91iHAtNYi/zy/YBFTq45Oltp0rlzt9GIdwPwKnTE
QQwJhbkPCOB+ax0MI4YwRQXdgEq4rh3F1Zbo41go40xgY8KB+Sp4ZR1wKAMPuPIwI195+NCjDKEY2KKDAiqdlCoaedoRDxSsXw8W+tR1qINnNEpV0fLcJwM4
JTCr0Ctu9dBFkfhQFl/wmYo1CUEmCxbE91UX6T4Y9wlQJXpXeOjw6L7dA3AycF0+v1Xu8sFFB524xo9ruHVf6bDqSH1dbQ9n4oeMaiuuPl61LmgoMiifiZQv
uOl2kTuDVqYGLN/TXRYD/8JHacXIp+pRADBUYGgMV100osuvlrHK1s4QPNCjfGXJDz0Wdn1vChz1zLCRRR0nkYV75GCs9Hl8awqO2mLl1iry5SmDrxpY67DD
Dh10o6faGRmRI3kJdO7hPUjLeuY86ISHgynfZMDAwaOdAIYBBKsoVSIZrqlGjOheuI8hjBMUpcjznU4CwSDC5VEEoorSy769KiwGLaooBy9GEa88PH6r4TPQ
6iTGcMopz0gpzaJnpSMbcdDvAcUKD6w6loIfPPUpDH/oJgdl/LqPropX90FCp/uS33roqonolCbCUpD9eAxZPnnhZUnANS4hO2sW6qNHFPWaXWsfeX0tIm0N
FlXR1dnBQptukTzllMG3fOf4Fa3zOChoQk/lB69krwy9oMVWILRLZAAH2XEWDklOymrR6YxzuU//YKHB/WLIZRbXPbCrXvGZ6Qq5Oyd3s23ga4WVc1F161eA
QbeFbOtE6kTBLDvl60ttvOqMYWvZvDTSBAk+BQM6I3v2UZZVSutFt5zSfWXtwC/6W/2eRduh2GKVsbKckd4ERQ4Gd5M+fdcfjTkFJBUAA7jMbhQGRS6IAUUU
hjBIABhWD3DnfinVuToVnvIURIjZqKKOyMeB3UMHhanHiU2ZLojtSLPnzAuDKU2te9UhDL4xY+xScYmGBpZwcGL0ogfeHF3DyPDmcA2fbgfjkLSY6EWP7o4A
ousbf3I3zhQu4yiCLFFUqzFh4pT06ivPpS/iy+geYQC7DNJ1T8uWGNPV5Pzw4y+nt9/5MPXq2TlaoegCRn3y1B1Rh0FaK7FLpFMMcJ0X5ZUuK0fBb7OmMUER
eBrFNhvyzw/wRSuo22Nig0pNkhgoO9e1RRcZwUcGtUXlyN7f/VmMM9rF/j3l6BwtWlswc4sX52bPOJQAhl56Iy8HvdO5eoUnzlGCjvvkRgbFqWI8GWXlcyY4
TXnPmzcnf5LStSDkPj11Xqdzthuv6qJPupkfYx280znnZvie1WI/Agka6A+MEpzLliI0uGfBHA/4F8TQ7fOh6Cl6K/onczTUljzDD0fGa6ald59+oxVi2DIo
iIBdMxjnHMq9Qnx5KR/CMMCoASVA0Y9iIOSpYJQoU56BEbl01WqXj4ARhhgH5uADk5AWL16WZnzg0YUB6aPoFr4xYUp+CSM64LB9hKApEg8hufgY8iex1UjX
zTikTCqgFR5l0UlYxaDKY8XZKFc5TxkToY3BUQLajIU0495MRCHGDehgZG++PT0dfuj+8T2iE+NxgD7ptjseCAfwBtPumS7Gwzk5wyNPjku/OfW4dNTh34n6
S9M119yS+q2/XsaDLokc4Kxf4StjgqIjfMMbpYpzhZPofljgDrsJvksXq1mzJrG7+pF4bVf3CFYd42uHD8Qm3t5hjJ3y9HXtPnFe7wW8/74xabvth8W6Vpv0
wCMvBf2tY1+gKfYyY8vw4a700S06BRH5jNJ1Xb+hd2WKkZcF2hrQ5Bv7+ESoVpkTaCmyDgOB++Tl9QV0UBwKX6WHYOkBXPZENqyejsFnf3CTEcdQjsOjr8Kp
vLvWa1C34sYn3GyCvZQF8rIGB6Zy9K8c2GzfeaOdRu62AgEVgILWPERPUVolFQgJQkgk1+rwZgaWgcU1Y9Ac1zKlfF03KE1sNXwOox4H5KyVhshKTz4zNv3k
+MPjZYvH5BYL09fH651+PfqCtOnQAbmLBI46DM6ugIcefCU+tLV5fun+008/n8a8ODEeFdguK0BZ/FA6BfjFA6FrhbRsNuOaISwGYKaoTK7YTqIbhU/RSuuo
nunQKVOnx2dJzokvUWyXnx71IpGJEyelgw85KhtlMagmwV+8OCZ2L9x0w9X5cXldEHzffvud6ccn/jKesRqcFY4WrQi+TIbQhy4LA/00d6nCSCJ6Gn+ZyMh9
+DCm7EhB44rIbxX0PfjAY+mPF5+TDj7owBycpk+fHs8UXRyPh7wSj250z/Qw0sVBU48e3dNpp/4svxiS3t566+34ftJp0S3iRD6sRS5lX5pAhCc2oTv9xRda
hrL2Qp9sBC2Mnu7JmT0wcHLX3Yvq+VGM7YaPTOPfmJLmzpoaj5TsH4P2udkZ6AoOXXi6MFMn5S58BEQtD1zuCdjsl5wcAhCcDjCMwdCBLrgFM05C945Fi8sY
y1Q/3VabUJ8OwGTTcOEDTMHeAZbkvMk6nbuOdqKgyjyX51l8BChHxriHCNc8kEBEeIAZA+QG/PIrkYQBZiFCi7F6cA85x8AcmGY9GITIwmA+/nhO8tzMmWee
Hu8x+yAdecQRObofcMB3otFZkh57/NnUr2+vzBSnf2/GByGEBemmG/+eX3iy88iR6bsHH5h69e6Sv5LQvVunTBsjEBjwqLVFP9oNxku0Ed3KtK9Ibdqb4LQs
XnLhc50dO3bIgtOlfeixMemc+B7S1/fcIx5m+1286PDr+d3YRx55RLSig9Kvf3VqPNvUL2B/np579on4fOcD+WsRJ5xwQrxl9FupV69e6aAw9AUL5qX/3P5A
6hXvlyuRMF7c2LVL7nvrUul+0AN5iZKc2oDX9Dn5ruldd3FfC23C4sEHXo5XYp2dfvCD4/ITuxdeeGHabbfd8nsZ7r773pDpR2HYYVQRILTe1137j/x2Jjzc
fPNN8bamb8eTxNulS/78j9yC6R3UaEw+aEQL2fmlS91jBk22WnH6r60/mYPBZjigOjffdEP+muLxPzg6Xs81LF1w8ZVp/b49Qt7FNtgFGMY7z784Lo0f92I8
ibss3k0+KfVct1tQUZ5PEnTAU1Yij0pf1nMEI8n9Yr/lKQO2zkl06fQuyE89vw7OB5Zz5SoMPMAnH4zqpNFzXt26cIQ6TakA5LW7xoEIhEMNHDgwA2GM8kRX
ZQmuGqn6PLYQWyYGasQCQz7G1CnP1JR3kDOcZdGNOvmkn2QnGjBgQLxK9yfxlbnd09ixY/PrtSZNfCVeIH9dfo3WzfGAWs+e3dKD99+eX9Zx66235N9HH30k
HRUGfeSh30oz4it46KFEjiFVYaEb34RDSIJK7XqWBb4yQyNfN0JwIAt09u/bLX/E7LHHHs0vfwSbc/zlL3+Jz5tsm86PN44uDV6eiDHRtddel59QPe644yLy
r5+N78gjj4z3N7yXvvH1PdOH78fYJZyTUTJQ6xzkxZm1ZsYrzhk0nZj8+Cy6Q/LQgz+y/OKLFfElvOHpqKOOiKdgn4qv5u2YH6/XUqL9oovOjXHVkpDJzPTc
M4+n6/55WR5j0psXf2699TbpiAhcvvJ++GH7xxuZxkfUL7NoZEVOElhaeDpmM2iuwZVNoIeOycRyA/qdP//cy/EBgMvzS1XOP//8eOnk/fH+u/3T1ZdfEC/r
vCPKtg8dmEW1nNEiPiN6TzrmqAPi8fk3ws5ejXdLPBO09co8CGqMnW44cZ1QQRt+yEqwLC1HeeoATWijf3XYsAMcMtKd7Bw79dmmeupLeOJoYKpLFmA55DUa
vsPOKxp6MYFQJmJ4nAoQA6IcYSijxVLGPeUYqcF53M5lKyNmrWwlgVA98AhcF2bSpMn5ZYP9+/eLSP5eTHe2TBPfnBLjje/EeOOEMIajciQVvW+77bb8Yo3z
zjsvdz1ef31cGNiszLCWCh39+w/In1M89NBD88sGfTnBm0IZ9V57fyt3HQhdYhBoZATq4s2BJ/wTDoVymFIn+sQxA5QH1/FskY9zHbD/PvmrD5zjkEMOCSPc
Ot6iOiX98Ic/jMfb78l4yEWLpJvy+uuv53Ivv/xypvuMM87ITj9w4OD8oNrOu+wWsoO3vKHWoJ68m0QrTemMiwzp4sNoVXS/rTOtWFGmbyn/kYfvj3dC3JCN
0/Wrr76aX8j405/+NPnc5EEHHZTrMxB6owuOP378+Phk5yUZD8PXFXzxxTHpa1/bJb7wNypkV3aP0KNuGjnpYmkZyc8ak5bIOcfu0aNHhq+ngmYtuE+YXhbv
Bj/qqCPjW1J75Rb82XiDKxz77/+d+HTp3umeu28PucW2q7bxTrl570bA1ELuFwFnenxz99YIXN/Ige7Agw4NPIKONaDSvaIzTkHmNSCZQbXrHl14pXcyro/7
eFyFbtSpgYJdVAfiTK7xRF7w0Q85kEH1iaYQY1RBvwBSgIIOAlGBQHR3TEMqa3wAiKjBUDDB6KrnglGMaFlqG4QY4LnO8KN78v4HH4eCRuT3Fjz2+NNRb2Ee
yI977fn40sJfsoH/4x//iJc8npcNkhH4rAncXhLZo0f3zBg8hNO1S9c0+vTRiVFLBC5ymAb1VfIqSDxK4DBIi4paGvfBko9fCf94k/IqeeCx1R+ItyaNjUfJ
f51hTJgwIUdz5XQD0XPjjTdm/GD60PFmm22Wneu3v/1txqOs/j1jMFbxQWIOu3BhTJhEV5deyKzMyplNLVPt4Okmdw/+Q4zZkO19Wx7OZJtMfGkpbbXVlvld
FZtvvvmqlzVqZX7961/n3oSgZVzTp0+fbFyXXnppfP7lPiRl2nw5nh7teCl5ZZ3Kfj+9h2qQy5eWLhU7IB8Gh3eGxpDJn82suWb5hvBmWwxPe+65Z25d8ChQ
jho1Kj730i+6uvvFezOuzO/B8KKTt9+eHI41Ku0QXx8URH1v97LLLouyfXMP4vvHHZOOPvbENGRQ7wio5cMJZFPssPQc8qMTjaMFal82UZcJovLoCT2jMTrK
OajTPztYHJM3aBbAOFYNGuCWgNAs75Esj6SvXiJqWjyOELxjwZaaMjUt0jAiDgSYZpyBOa/jIsTYWoJAA3FCRBxBV0IogvNohmkeQbfcckN8gfz8tPfe38h5
3//+sfGV7IfiBR9X51fW9o3X2oqQP4kuHVySSO8FkYSsKwY+Jz788CPSgw8+mE6MD1lVJ9Jdui8+8NX1pq7RCoyL2rMy/QyQcBiJPWIU7yADvDinDJFHRCI4
60Nmdlq3LtHHGMTTrhKnUU6Xqlu3btnxrrnmmtxd8U0ghkomjPT444/PLdK0adNyXX/IigLxMnnaxzHuWzc7CfzoMKPFGLX0Fhy1PKI+fCYann9+fPps0bK0
9VYDwjHKTF///j1zK3/vvffGZ1QOzfDheeSRR9K///3vLE/GiK4bbrghupzXhgwPzwGzEkZfNdLKM91M723bNs7vi/PtIDDpffbssj+PnvBRHElAKOuNxp+S
1wD4cnmnTh0joNwd7xo/Oudr/YwTPZHbu3fv5POb7KcGPDTvu+++uZsr8Dz66KO5N9StW5f04YxJ4Ui9cosCmLdTsVEyZcfkZ1LGJFIILtteeQzdpgKbVdvH
DhKv+ipbheoEhplYs4jszD1l8YU/srENzjII3UjW/TwIkoWiAAWqEDhzJYBEDorDBEAcyhS2roZ7GHbfTI37xhciOSSYMong+f5/XXd16rne4HjjTb908HcP
i2/xHBUOdXNEyEFZEF4K8qPjj4sPdV2cI7E+cf/+/fOHrwyWn4n3pT3wwAM5j1LRaabv1ltvzUbws5/9LDPlz5NPPplO/tnJ2TEeffSx+IJD/5WOUbpvnBmt
nm/R5ZBcF94bZWflcORhVsszQ5yNsSoXSyg5aZXxyZlE9XPPPTdPIuirk1HDpGullSKTmtTTXSOvLh3LU50CGKdGF2fKkwohYwumxk2S+wvjsy1/uuSM3Ipf
dvlV2XAtEYzYceusfF0zNF999dXpsMMOi0mYk/J4jPM3TL/4xS/yPkY0cB6yMeYtvJftQtF5WrkT2xYpzw/FwmvszF68uAzCXbMZtmGmEf+MWauq1deaPvzI
K+lXpx2b677yyit5HIYOMhesN91003CiEVmW6P7BD36QW3JOBB77Y48mn9iYb0VJpadQ4HToUNbv6CTrKeRGhrqgElnLr8kaJb7xzJadC6b4wJND0sBYRFcX
PGNJdm94I4Co17QaJUeiZEqyQEjJjImxQASoe4CJ6BByIIJSFgLX1QABh2xqvDN66y03TX++9KIccTT7ZV9Xiu7O5nlB9rTTTosW6vd5ULrFFptl4uEyu/Wd
73wnxj37xKdCLsgTHfnmyj+MlSE0TN6x5tP1finghpvvSgMH9A6jspLfLDsfJWuBjXfwIgLiEc0ikIT2OTHI19Li2St3RWHjJt3bw484Kg/Ix417Pd5990Qe
+3Bgg1WJgVSlkQ05Mo6axyjOOuusmN7/fjbavn26ZwVyHMnYjazImUxFyACTeRgf36t98L5bcz0R9fLL/py+tute8aaiielf116R69OJMc8f//jH3LoPHjw4
5/uDNgld8hve0zJ4bEG3kjzW67NxuFHpWqpHTsU5ypIIGHRVZUd+7itLtn7JLC3/qEyvBzABEF+6xMccc0yeDDFeNIzAK95NQuy0007xCuqXVgUltNEdRxof
gbZzt4E5WOhp0I26DolO2azWCI30yUHZjHMTXpxHebaPTvavgXAukJABXuhM61vhus9Z20Q31z14YrJh5ArA3CQkBEFSjKZswlwNrGwjIgjEcBxEIBCxdbyF
WYS88cakNHKn7fPnGkUykYiCHeBryocOHZqVYIzEKPfbb798H8P/+c9/8qA0cxB/0NgwYQIdcFMkhnRVvvvd72YGp0U3auhmI+Pbpv0zPfWZntoqWI8x80Xo
BMgIqmBNwzdrFm/2iXUILZGy+s5W/XVNnn76yRiLbBGBoHPuw3vxZE3oRFfDJA+dUqX3lFNOydFNt+/iP12a/nTpP/JLFsm3OHt5GJGhkrEF57HjpqSzzjgl
ukZHposvvjh3F40d3nrrrcyj8djtt98e3ea9cyD6zW9+kw2CrCpd8Luu9Lz44ovxsa9nsyHpEt511105+qOjV69h8RqwLUNfZUcC+ahbf7U2Fi0ldoJvumBo
nCUH3GiZXhk7IT30wG2Z34r3gAMOSGYuR8ZyxVclk0V2vsAlCbg77LBDTE58LX33kMPT08+8EO9qsPBfnlsKsjIPNTiik17RxKbhJVfylNhulYt857UVxUMN
Gnbre4tTvac++1aHHUtNuvdYdzQD8RqqihBABY1BFK4RBDESQpWt4xeGyakIjTApfWkIeMyYZ2Jcc12+FnkHDRqUWx3di169euUB8R133JH7wE888XgWGuei
/MOiO2KGqTJKCF8+0CBPmXpvo402yq8E1tpsu+12+WPDb0x8KyJR2XVRadRa6Ue3bFkeuKuCBlMZXVJ7qFa1VCEPfeWZM2elDQPHz352UnZ0vBhj1GCDjoYJ
bRK4UuWH04+Irsy2226bW7MNN9wgnXXuX/JaCoMXdeu4R+vFYMF+I9ZTRG9T28YXe+yxR3z79e/ZcQQCs4e6SiZpwNBl2mqrrfKUe6WlOhSaR48enfkQ/W21
wg9H9LvLLrvEWtOcMNiXwgF6ZPrVqYYNTpGnKeEyGCdHePDHhsj48adeTeeefVpuefQ8evfuk+644/Ystz59+mS4YNVU6dRbqjpmg9bpTNawu5tuvDk+ED07
d3kFYbLS4+A4cKOxypxTS37J0L0arNBYg7BfMmTfunc1v120TGiRKj1otBGAf+SGZPjwnVYYOFXh8jqFeKsBrnyKBKAaIYKUcw0p4ZqD553u6br5Up7HBv59
643J7BuYFKXezjvvnIny50c/+lHuv9cukckEfVADv+q46jqqYGrlyqh7BIQOTj0tWiIOyQkeeODBMLZRMXbYJUcuMHXzjDe0ehLDq44kIGgxCcjA1IzTvOg+
6XLp3t19173pnN+fkU6JMdj++++fDVGA+P9LVb61XKWV0U6cODE+EHBKOvjgQ9Nr4+Ll9z26ZV4Yp3L4o2BG+cD9d+dpYDxdfvnlWea77rprzCBulE499dQ8
rqiTLnCR54YbbhhT2S/myY+K369pb1H/zDPPbJidP1LMwBmT5YOhQ0ekb+y1U+Av0/CMtU4I6B6xA/xxXPSK4PUjXHSGj7vv+ncE1hdjAunEvEjM2KUqFzxK
DQNRw3vydffOOOPMeH/7k/mD07vvvlvQtW/YahmnNmwltBroYY/st9ILD3rlV7o5Ar3DgV42jT68VLsAr9qfuuyMTvgJu29sQ6QCWiUVFWKAksoiN8NCgF95
1TvVgdB17a65hmT+pwvy6r5zcMeNG5cZ071DqASm6EJgNemiaL0wQACUVhnkOIxeNJHgAqMKHy6pV7R21p4mTZqUBg8pYwODVc7BOAwei0OVxUT14SF4ONwD
m5C0QnAQtuvoEOTntZyDpevhfd933nlnxu2Pvr+ZOq2tLhOZViUwXEeldYMNNsgLzXBsPHTj/HZXNBRFlxevkA9lzZw5K22z7Yh09jkXZplts802efKFE5kg
8FWJw6Ilr8nkBp7Nzt19991Zl+gylrOWpetcJ2ngkwzmBTJdZF08C7Mjdtwi5FL2VlY5oJGdMED6leiVfXiGyz37He9/6KW037f2zkb3m9+MzpMfZKtO1R3+
6MAhr9oDmTmv1wLGoEED87vFhw/fPg0cvEnoxZPG5U1EVaZg02e1W/nOBXEOVWgr9u4evLXVV1c5B7zFXspTCspq+SpcjkgOZNBY35cArAEROkGIKAA6VFaQ
swDulzDdUxcgCA0Qq3AxN/Wd97PwEAmx9QORV1eDoUruqUMQNRGA1k0khcu2FfAZqk2h//rXv2J1/qI8fWomDwyGqTvofdY1DRs2LC/itQkYPWK2MPMWxqil
M4tTjZswtEz9+vXLfMIlspmM8Gy+yMrZe8TalS3/bTv1zY9GULLujwCh62TdymBY6/vPf/4zy4jBCAym8QUAcjagdkydOjWTqptRFWsj54KFZXc8OqphwgWW
X4PeV157M97b/eOQW4qJgiGZF46NHnxKHJgjCyZ6Alp+snOfjK+44orUKwIO3dKBfEbzpz/9KeuSc7755ps5v2fP7und6e9lZxY4yJydFDmVBwfRK8+9zz8v
j3m4nz5/Pxu/lksZO1WkasDK44sM6KReKytV3uu1aXOtOIcYtdvINO6Nt7NM0K8sGsCiU7DYlpaDzYHPbum/3hOg6JkcjLXYQ4WFfsMbcHuEbMHS+tEZ+vEk
QIPbVCU3skOsNGpTfTzUmgXADBpxiMAQIA7XGOJY1SAoXEu1eEmUXekgHMm4wjafLycMNUy6ZBZetVwSHGeffXY2hoMPPjjPQFkVJygzPpnuoEnf2YFuDo/e
gr5R+mR+2Qtmvp9w8INeAsI/XjgYPpwLCvh66eW30tzZbwUVPeKLFL1iMbBnmjdzRsapvlkdj8DfdNNN2dkeirUw/P/qV7/K4z5KN2YZMmRIMoX/y1/+MrcA
psnRJ3EwvKjnUeouHa3Ml3cfMAD8oYmjOJd2HL5pzErOSKee9tvo6t0eL7e/NWY298rOAx7HIXMTNwIEw+NEtgtNi26vxW3jH0EN3wIF3k1akJ1WySGg0Y/d
AR2DRu9V54RkU42WcbEJB2PEB5tZvnzOKh7JXD3dd/rBP8MmD3i0nPhjkA6zrrrLYPqyBR3VyRw0mXZmF11jLenjD95OjeMTNp7zIid04cGvJ73pSVDGI7qq
zaIH7a7VE7ToEy76b5jUsyZVeyfuqUfG6Oa8sc+xNLNu5jejBnDAHJhFCGFCRrHKV4KVyR4dZXgmQTIQTPRerwuQOanjTThSNYZ8EX8I68vOJJoahDMKK9+E
qxtD6YfHwF6U171jnJxu/fXXz4ZiYZYQMfnkk09lIyoBIQa+sbsXfZyIYBy2GFm8VYZQ8YV+PE97Z0Z8veIX0U2bGNts7o4tN/vEZ1XeCIoXZ3zK2Qa0RXzB
nMGilwLMKnFoA2NOdvLJJ2dHYtCuDbbxZY1Muueee1cFmClTpobB2n7DyT3qEe+5iOl3yxFkZOxpO4t7nmPyDu3dR+0dC6135GdvLBFYryIb3SAzd7p7ZITO
p+LbULYxCVKWB8hSyySRm8Bkit65nRA77zwyO8DkydNCJuWhRIbPeOi+7pEkS3U4Ix2RZwlwxfF9+pTc8aDbe84552SafJwazZwGfVp1ux3Ib8yYMbkraoZu
REzKWFeU4GZfDm84ionurC+zajkvbI1NsQHOgl6/1eAtFJMPu1VOoku2jT48KK91rj0CeYKJbUWcja3jT4AwHMBbU0YKAEG4yaAAIhwGpW/s7Z/tw6OthRAU
xLxQOeVFVIQ7RCsP3OkfE6iESHhqUgdOv5ghgJr05XWFECzpy9vqAq6x07SIqNaO7GYW7SmUo5pKrcli5MMPP5T7+r6qHWjyk5CzZ7yfnQWdcBIOHsFwHSTF
+RoxGzQ3XX/dP2KP2kZZoW3atI5NtCfGdpXt05ZbbhFOsH5uGTlNr+geSWhAmxaV84PLcUR9AtfFki+Zbat844uSJTL5POTtVwSt0/XKRlbA9DhA0Q+e8hOy
bdZO1//r3/HpzyNzK3PN1ddkPZlSJ3/dadtrJDolT84F77HHHpvpdM2A0TV58uR4Ruqa3F22WdaakhnQTz8tH0Kga10auluxonRryE8QscWmTRgb+YrW68Qu
BsnDd+yLcZIvOeHxyiuvzAHHrhCBxeSJoGTGUcA5/fTTcxmblbWy0vjx4/PeQfqa9s67qWuP7vmNvmvExBiYjJ4uNAJsFS1SmaUtG2urPXIUwVN59mycqbxr
sjJzK2j9f63dCRzfc/048M8uzTGMOWabNnPfx4hcQ46fqBwpV78lqfyIXyLSsX45chP1JzkKueZIxJgZRY5KjhLDxgxDOSbn2P/1fH322r6tMdu8H4/P9/h8
3sfr/brfr/fxUS+/CT3xPt50jwDJbyd2ChJtY3kF5LoQERGMdTAEgkHMW2/FvEs0DhAAyEtAAABwgghRuc03mMeOSoRnCYpZEKASYKT69pu2NBFbyWShKJQx
EkRbImK222XQrCN1WIu6Idhg2ZgMU+hL/76LJaLAjKm1J28hG4EhzkuoLay85JLLUoiGxcCd9bDw0xjj8MMPjzofz7GIezWR2fZ7vgxwiJyB3/ow44m11lor
Vz3Ii2FaoWjb18fttts2tbKtC8sNGtj8+d6/Jny27YOPe90jLNIb0U9jj549rXSOxbOhUS3hendqLM/q3SeE44F4E+GmeQ62CWICDTcDQ9CturBgloWSjB8l
81AjRoxIC2uFCGaCSxO5lJQoGUux5pqr5T4mdMdIBIeC1ZdSqvAHr/7jGWv5HIwiLRtnPMDx+BCg5ZYbnFqcEmYV4IfVtoSJkJ955pnp/lbkFg3gF3OjGR4w
ka3sqFFjmpVXXC7ut0EvtNc2OPAlehNgQu4ZGC2i1k98rL6k+7RybZ42WotPKNopU2acJ4iPlYFXSSyB0dDnaMvaJLsb6+jXVvIApEEVAkZmwkBKNaBj8kAq
oAAhj7Ay4JwoOvqW30eZ9q0V/FSTr9qrJD+BrXsApFEIqASBJhi5K1Y3CDdzDcx5IDYX0OBY0kEwlKYXADD432CDjwW8xiOt0GoDg+qL/GmWAybl3w7CLzto
jah/aK5WIETcN2U2i8WTNLYJREn/4UcqwRwaLoi+0KgYWHmCxe0Df+WV3yUhTll2ms480ZJ9tk7c2joeYIV/LnLZLiju2dPKkNilG0oOM8P15NgVbD2g5N1D
rIaARqX99tsvxz8idDX2wLTGTBSSJLjQmbgvaAPORReNQFPgzBhBgKneKYyZ0Ugf4AOeWCv4deAkBg8bH+H8AemacTe5QvLpg7JwZDEtq2MMxSJRzOCzil5C
J/QBi/Gd59x5i2rhGw7woHoJUCdd9aG17u38En6tMvCOh/GClf14QRvVFzBql2UDq8tEvfzaoli0q57uMqvYhTEAoTJMjiB+KwQ5ChG6119/OTvozC+n3gBA
I5JGXosJsqWXXip2QY7OxaZmrnfZZdf0y2mHFqDuGVUSgCizDQZtArwzIbhojygdYTSuETZmqXS6hEcZ8EruCVWGiDXjJtiuvmIimaAWsvVJ/8FcLkG/vn0S
QcZhe+/9hawLXAQKQWhzxGRdaMcSYBn9tljV6mkDYv2k2SG8nhd88kqYgoC6/8wzzzY95g8XJroALvcwoykKXkP0Ku7pWyiN6Detry8rrfLRZsKTE7K+wcsN
zi0QLCC6wQ9m4RZzTYwZ3DM9QFgkcGoL7dFdfgKB6eFx0qTns51FF+kVfVkgztCIYEws+yFU2rewV1v4B07hPNemxa81YtOeyJd2Mb9UOGCJDjrooHR5uaDG
wWPGjMkJZwoSDoq2YAS3oINlQ0NDadkWf+99f21WSqs0I2yOTmgGx+ChmEvpo7NzHfCxe/rreWtlW1e/8EBR6Jc8YMYnknIusBFiqbsjj+oFUswvX7IsEKTp
AGJ2VsYFUqmOLhoWSkgQ87NW3BlaQIhxmf4rNmeedXa8Vv7UiMxcku4GRBEcgEOccQZhss1AMh7imrAohXAAY1zEnzkVout+leFL77zzTgFLDBRfeyn7oH+I
igEtYISksqDKQ/CLL7VWhnUw1pIwi//mi4x51EODUyzcR4GEggO+IJklreQZuAo29+u3iBpFQ6td9evrmlVX6h+4NtZsw++WKAmUaIsFQmRr1wy0tSUtsvCC
zWVXXBeW8EfBYJvnnBHtj5mkElpum6tSwYVBK4FdMvck+GDFi82Tm262ZYhHq6BMCVhviEn14x8hdHhGO3CKtq8ED4D1bw8/Fbwg0jsDXmW0jVdMFwgSDRzY
rnSBZ213jiMToGkfxcD+ep2LoIukHCGhtIxZ8KcLjvCj9sBGecCvE7EIgbE8Vxkvgx//K6Nv6C75DUf6IxWO0IxcSDEh257w8uKL/5zuygGGuZ6B6FbzaUgj
jtuVEOq119q3Pug4KVdG5YBbd53VmjNOP6u5++57AlEfTW3NohhYQhx3yViD8AhxA4z7ZpafWwZw9dW332Bw1e/qHHjck2heIVX7XNpoT7scHoFffTUGlXF4
+5133xeb854JmDFO+0aHAQOWbZ4Y99dEptUAwtraKm1lvGCiEjEQiQIAqwQOeSG84OuEsQRHXvflJ4RWtFvm88gjY5tHH3kgX4VJyNNtCPdN8rtPn8WD2CYA
23kuG/rAoK5evRZq/vHcuLT4W225Vbq7NafmubaLef2vCwwzw6U9qyAefvjh1P7myaR+/fon7VkqlgdM4NFfFgCOXOrjoXguVP3OGxNz/87AgYNyr5G6tOsC
B2tgOuPLX/5yjkNZ8FkJUcH56KOPhgs4YJoS/Fd4B2tHO30TNnUJiJmPq347KAVfYX7wgE3bhEQ/8CkBLGHTH0Il4W95i6bwrS6X3wTTc33t+npUplKamaYl
pRolDLSgZ9UoVwmT6JRKhBJ9a0hlnnmFiP/cAnVtGOHWm0ffEmB1ySUqfF9J/TprsKldv9UhaidZPCkVAgHs8t9VvzNTfGjbPcmErVXPhRD3Lrvy5niP6rMh
BL1i/86Q2IW7W7PVFpuGNn0l3IOxAUO8zS/6I91++x9S2L2zp+BVv2T9n60J6mY5uUz1rGAt+GaGUfmCU3+FmhETwRwh1X3BZXOALqiDYeGfBcWgxqVtWLyd
gPYqSsn4r93Q10b1bDyUaFapYPIbPJ2Xe5UKLvSkMMzlyNu62XYAtKeYsjzcb98UJ4VbPFN1gx1zermy5DAVrvi4cY/nJK97+i+/dv323fnbs0oFm3zGxrbD
m4S/509/S0EsPvUN3gp5+29zXylCAsBdgxMGAe59F5/4Xe36je/V4R6eVh6PeoZ/XWRFfV3WG7LhVA0BEhIJgAcIQcJdInbuISbfmCuoc8zppEnPpYb2W3xe
4xpr87cDsmcnvdD87tYbQ8v9fdpY6YnEkTalAp4rZaJQKNsAVFLPB0ngkVdnhw0bli6jMZSEqDQIhLkgRdK+BY9cif32OzTmXjaOgb0Jum6x5fnqsKR3Z4hY
eFjd8vt2CRETKBOGkvZdlapP9b/zWz5EoEBOPfXUZlBE2ISf99//wFhr97cMi6TrlvC126TVp12Eg2N0QVDW1DkNq6++SmxV+XFG2lg58zMFj3KzSp7P6tn4
aWF848TFFlu82WrrHfJglmIeZ9GZAEVnPAO3hAmeUylM+z1gQCyAvfqK5rqYK9s+1sUNHz48x7ngm5ME7/rPyzDpbeL4rrvuDvf/Y7HNZu/E4z9j7I2P9Qle
4KfGOGVZ4A3t9ZkiIHS+8YyLNZMEQPTHpW/6rX11VxvyKVP3uq244srDiyAa14gGVKKwTpBcALTLZtqDBUlq+ZbKq1CjgAV43ePPP/b4xGbgskvn3I+om1Cs
QaO21C/5JozKG9AbmBYCM8NsPtSlTR1l5cwzcfEQFyKYbstyTAJaOkN4zPaL/HAnl44Q+WmnnZwrMG64/jfBmGuE67ZdhtgpEjDpo6QtzO9ev2lLR9zrvOT1
f+ZU9+GT7w1WuBDdczbDccceHcuRls032AkoYNaiCyWgTsKkry7PR998d2zT/lkwfe+0pAbxFZ1Dw86k/YJBXfVbHnSWnzLlbgub77nnHoG7l5tfXHh1s8pK
gzKI8ny8/wgDwmtrmWYcCYwZ1TF/WNgH/jo2FOdOcVrQwbm6wLyVXc5ctw+awKc+NGAlWUOTtIMHLxc0WC5PmnrymVeb/su0x2Mbz+MBZcr6+I2X4K9w5ltd
eBUvw4VyosxB4enKlhzAcVke9XAP8SlrjIbq6hKrovNlzDpGgBBYZg81DBiA1OALY5aQ8deNF+TVYYjVYBEaYTABX/p3f3ig+fNdN8bAfHCEZgdExO2rub9E
u9rrRJjyynUSWb7ZJfWAlWWj9TCTsYIoH2tqYlVYWCCAX83yiMDdeuuteZaBgfjQoVtnn++66/asB3PqO+uEuRJpgXTtSNWm+yJ7hHfLCM9bPT0z/JX38ccf
T1xrWzI+MPfk1KLlYpv9xpsMjbvhhkXI21kMCIm4LJjxEPzzGPr379fcd//DMRm7d0wYH5KaWqgbs9oQWe1F4YTFf/SVSuFVP+DMNIN1guqneOBq/PhxMQn6
UAj7Js1ndvpsKk8TtOrCULSyqRP5JczoEE2n4zok89JLL0x+wEtHxSpze4qkmXGTN9/nA34xfAmjsS/6Pv74uPAOLshDaBTf4GObhoC0S57eeCM23wXNWxhn
DAvk0z4hKqulfjyHh93Hg5KgWR3UX4qxcAaXfut7dwUhpZL/HqpIY779JxR+139IJFTuA0Lj8vsvQSiCsFr8+FVW6Nd89WtfD815Ti4m5crQ6CYCMV0lCFdO
KsDr2ey+q4MEpeZ7RAbfL4kYjYlIovkmjMJVcXD7lltt0wz/wQ9j9+mZGcIHYxFPfYIhNVbw35isfHPLW0wswodUmr/gM84aHm5OJaF97sTaa6/TLLzYwLTM
5pCUc/QwQWbtu8bRxHBDKQiHOx1nwjMvNsPibAbKghAJ5tDeUuEPfRDdhamM+wibvhpzuG9LheCP+sFN21oRMWHCU6F42jkp8HuGpvjAm/nkX3BBVxvBozQf
ePDvee7gBRecm0qg5tDwiKSOgi1vzOZDfjDiOdFSwQn8wzJZyDx8+PdSMaPlhRddksd6rbTKWuEFDcgIHv4zZaAefQC79gkAfKAj/JIDFx7WnnwSuD3X14Ib
LISQcDMm2TMIxcAaMc8DKbQgraWA537XPb8JkcaEEpVDrGqkAFBOgML5AgsttEAifocdd2kuveSCnIMRUBg8eHBG7GgazOBsAa4XQZxThBc9OsuBC3yS+xDl
u2Dk4pmUZI2vvuryZtE+KzQvvTA2uDDmc6aODDh6xNjj9Jy7MDdjDkP4fuDAgTkHxLUB91lnnZWRLtsURPMEEkT+OtuFL+veCI7wdCUWyvwTN3OroRs0V115
WT2K7zhkPuaPYpPC9HsrrLROnn1x/ai7m1NPOCJcpT65Pk0GgkRQK+lrS6e3MxrHUgsGoLF2jfGsIhA1RVOp8MfVxiiLL96OHQjQ88/HOrVgPlbHyoXnnv9n
83gc2bz9J7cIXukVFvKhZq1YCXHeuT/L/gwcOCgW8X6nueiii5rxMfaSik/yzwf4qPzgssKE5bAuzxgVf1q7xz3fa689c3xtvvGkk09rfnP9H5pPbL62jcXJ
u/qNF+zr6tZtxsvqCImkr+ipPbxLgHzDAxxK8rpPCNVFiMAVbz1prYwHbihE0jBgFVQZSyUPYSqhI+UaxfS0oAYy5BllaVhzVFLN2Sy91BJBhDebldYc2oy4
6PRYsvLpJB6tSPtZ/oPQ6sPwJQBZyQf8KCbA3MKyGKbugbXqpAwserVVgGbjKo0ceWP2T/62jHHD67m5jRsmFMzSmU9yuk0NTo29bG+Xhg4dmgEEYVxEN1OP
OAhhoailQFZ5SGUpjJFYCHNnn/nMjrH+bPtmlZXb8wiMmwKYwHNrDWjFe+75Y/PLCy9p3pz8dJ7yqq8G4Cw8+PS56taOVQOioRiJ+0nQjRXBx7ISaszXWQ7+
uan4oV+/ZdIysYItzdXaJv267/4Hmv/9xreaB+//Yx5sc9aZP0k8itQddNDXp3sHpViq7Jx+o1+5/PiQ8jW3aCkT/LtHgZlOuGLERrEB8/jmO0d+u4njFJJP
8QNlMTksObiLl9Eaj0utpbGEyCGgMxa/4m04hQ9WrCLN8uOpLltsuc1UlcsIUJlVjJn9V0iFGE/CFPJq2D1nFjCFmS/K1jFFKi9G85ugKUOT6cSoUddH57+Y
g0XzAgiLCUTIPG8Z+T8H6wnE+3yUANp+LahRS4j0CwzGAgSBG8T62cekT7NLYNdvCsJYSvjVJLFkA5x6K0pI6bBMNKbxkhA5HHOXRJ1MwIJH0lfJxLS1b1xI
9zDMeyXhZAtnDznksObEE4/LoAAhAZNoJ7jUrx5jREJPwYHJvBfFAReCEvDNglmOA+cu9GKhMZ55G17D7NL48U80N4Zr5XRbvEAoKUiMjSYiuvgBT81Lgtvi
1aKJ+vRXIIvFt3qc+0qxHH74kc2vLr2q6buUl17He3ljXsnYvlw1tMTHLvdKUMvyOMjUCg4CU+1Sula+L7pouyY1cVYROlaGaQOc3wqpDBL8VhHEkmL35IMU
BCFIObYKwinr8kwez9XbCmac5RZIti7sU5/aOQa25zW77LpHEg/yuVjzIkQQWoRiEbguBMYgmsWzxX14jE30y5IUa+IIkf5Ahu+Zr2J4OKClWARuBJNeCeMp
B5c0PotuG4UQuUldC3H59Nw/QiSvMtVX9YjaGS8SNvibGQ7/4VPKU3nie6OPb5gMZKcrqwKHykrq1k/CySKByUJaAZNzzz03J4Pds1Kj1rSVEClH82JKygFt
JTDIM/Plvgn3/b68b7pvhEi/CZHEHSbgRRvl5zRpQ7JipU6OQhP0gRf9ZQGtGH/44YcziESx7LvvPs2E8Q9lf7TbMxb9ojnFgpZo9uqrkxPv5U3hV0m/ubMC
FhK8WNKlHzwENCRo3P+ugClGEJZESExPgwKyxhLuAVZ+gPhtQg7hlHG/XAMNkWz3CB+A5G+XZbRHxwqhbv/JT4dQvRbCFm+zC2EsJpgbRGdP40PbkK4uS1xY
H/NSxgDcRsyKcYxTtCOv/ivnu/NyD9zSxRdfktZEnwhoEdYzIXB1cpWUgQ99hgMCwi3k0xPqSvKxBvAsWe0hmIEpOmGY+be8ZUEHxthGgENfVw5XEAMRjkpg
YC0JA0ZmnVjgQRG6BxNLw2pINLK2wEDo4abW442JYIzkHrg7L/eVkygRdXITubSlhHgb1h9K8Kb8nKYqY5Pk+Bhr2RTIChdPVt36DJfG2SKQAiWfiCOXjdNz
+DKNNwgQwdBv43xLhcqzwrNWibM8+N9zdHohhKiU2cTYksO9Wyh2TXMVu8qosrIqMgIacXxDBmAhQGUk0iw3wXL5T4JpaD60e8qyXPJbOQFg9auD8NEEIiUY
7eGHJ4UgvZzLPixNkRBmXoQpK4kPDGPcYR2fQIE29QkcLqmYIP90fHhexBsx4sqYw9g93Lp2u4g6C1ZF1CsUa+en+S/P4FW/LSkS0DAHos4SFJvszCFVG/CF
Di5p1v1vGVDezTbfOpWUzYkUII/BNgnMXAmeBTHMqVkpr00WCoOwFoRK0ia6YUyMOmzYsOlwEX7uqPReuPKM9TI2YQVsHiwh8syY0HpEadb9ykfv+1G8CK9c
N0cQEE77lrjwFAj4PJcIDYGAg6Gbb5KvkTGEsJigpUP7DR7CZ2EB4SrFKVoKp3BNeeLv5YO21ge6Z9zIA0Bj1rs77QkIN1gUAJA0ANB8nYKmQv/5mACALELj
t44yd2+8McMsIpB61SWfRKC0A/gpAWzz7sQc1GJ2LooBPRNddWahufzQzmOPPTYdhhKO92MITdl/5Uhj+c8557ywJPs2q62xfjKb58qDvxI/2wp1cySelavE
pREF5QpKnrloaAEGsJV1gSc4xNDvleJxJnmWX76dpzL4RxcJE2AGCW3QiiCZA0NsS38kAQ/CJOmj+mrsR+hrDKc+4xrPJPDJrw9S/a4gijopFXRX1uWZqFqF
wLPgXH5Uu6KMvAwbQAmpEDhlzspSnnCJFhSbocUXvzgswuKXB9ytRwVOeFtwwZZ38T9lpH/oZVxIYCh6xgB+9XXy5FfS1QO+MngUX8N5V4wNkaURMZ9CtkgU
4BCDKASCFPrvmXsawQyEz4BSWURUp/y15EgZ2tgzAMn/cnRSQmSdsxyltlTo1LwmSDGnBA4JzLOrF3yEiG989NHHhhB9Jcdzf33gnrA6KyeibTIkCJI+wQPt
6Fv0jKWRuHyYicDAG+ttnERjixYKLCgvGUdhdPicnRLRhz59FkvtD1/3R9QM8c0F2T7embhn1h6ylARKhEsCk0Qh2CBJGRhXYiAJDBIcVtDIfzhE485E0AgK
IZLQXr+5rlzaLSPgUmsWZ4f/znpn/l1l0RPPYHRzSuaP0ESwyp6mmvS1/4pSWGaZvrFZ8KCI5F0c/VwiriXT5VOPOvEo/nOhv37ACwHx3G+XCK4kYCGvzZUE
teWZ6HBZHsRGWEi0atY3SYc4yFEZRCpYAiO/uSJmUMPyKNcyWEwahkSXdsIkJNi2CzsLWbCFFhmQwsP1OPDAA7IT6i6kzYzMOfnP36+I08zEn1U9+gVWQn7E
t49sjjr+Z83ue+yZb4SQn1BiWC4UIuqn/JJghvYwZSWaUxluDQ2KSZVXP8bWXmk7LhfXSFLvrBKc1LNBgwY2v7n2uhw077zzTlmfMRqmlwedfHM5CTRG5qax
kubvWEkwc1Ptq8J0olymIGh0cEkYVJAGw5x99jl5r4TJt2SSlBIxHiQ8LK5BP4VinCLc3imMWWgeP+z30iZcnnfe+c0jsS7QHJqxGWUi3M8a4jd0+WRu8+/S
jLj8t/nqmN9cc2Uspn08xrPtShEGBc+iiXrxrf/43pkTcMl788wcFIPjjJN4EP9jqznpIwBcOszBZBWBIU8BQgLgQm6UTqsUdWTFynbv3r6pG340OjVmwV57
rT0jDzBSmUPAWO388suTm6232jA6O3+ug+PSfZiJlfAyK8tvKAtwvZeAlhBNCu165JHfb24Zc0ezzRbr5bgIskMXh2VrT50lHJL6iqnUj2kszWFZDHgN7lkM
BO3fv38+R1j4bBVQG+ZXx9ChQ9PFzYrf56P6INDwta9+pdkn3BZCYDVFheMLruqTMLfoHMuB2SUMPz4G7WghUIGG4CI4ZcEtWrWolmvWRkD3jZJTY0nTvtP7
DZ/oSxjNhbF+eAbuKWGKhhIxPpPeC//5cDYflCFcsaZwCscjI4C0zz6tFd7vK/s35rBYfHnRiZttlb45p8mTX8lxoLHuXyKSedNNo+PE1ovifLy102o5GqEM
gjER2NFpoQXbsDlD496C8X/KlPbcEv+7GQoQFoIkuVlE8LsVgHYwpgOQ0F7t/ncIcz/4KRMmUY50c438RijAyMccq1M+QYc6EyKqTcDk/TBSEQuiaUtMI72X
VXIfrOPiVNE99/zvmBOa2Kw/ZM0w3+0Z3PrSNEuG8mgPROSidSZ9UwcLxD3ip2NcLoZV2NwmETXjv4Kh8Fn/wSkkP7uEPlKF388+++f539wQ91mqPPqkfi4k
AbKJTpKPdjVBSogkS5zkcdZFlUffhSIfgdT3DTYcGlbtu/EWjJ9M5xN5Xepj8RxoIhJogpRixvRC7mgvFW3yz1x+UOi8Gn2rLSS777F387OzfponTHElzzzz
rKy99rxRBKYBTDILOpjvuuTiC2I/3H3NCssPipc9TIy30bdvTS+XD40ID74kPPDp2TvvtAsUDIfQgTHq6oEO66CCJNJ/yPPMAK01e+2bxBO5oX0lMXT/dcgy
dmMeZWlndREYCZLtngUIf7RlHq+AWbQZH2+rQNhllumXVikLzOOHvrRtNOm6QOKsEgaQD6wPPvjXZvDKH4txx7PR7x4xnnkh+8ZVZbqb5tnUypjP4Rt8ZOWq
nWKQcePG5fhp4MBwvUK7SywTF8tVB5BU/vqGY7j8oAmd5luwf1iAkxIWruINN7TjH3guYaj6MbkwuFUQ5SFoi3ATYv0YPnx4Nl9lWbitw60z2azM3XeOiZD4
0Hj9zgGxKuSU/xCmwoVK4E3CR3jiw0jwLVFY66yzdk7ADgihkOzm7jb/siEsD6Qi6x4rQsaHxTX2NC797ne/m3QwKc31c0aiuS6LqC+95KLmE1tt0oyMVfQs
Db7lfRAgQgvXhAZeWqXSBlPkA5Ooc84jYXJRDAzN/VIBd4ZGpokQxpsZ+IYiduZ9aIQpU96Jb0f99o6XSA2KQzv6xPN2zKQj3AX+NqSqm2XyTYrVu1SMGe6+
63fR4SdydbEojMEt4IqYiaV5+Hivuqp+zzH6Gmus3qy31vIxnuibg0qIBDt8YJBVVx/SXHX1rxOptG6tmChGrfoQSQRyn332ybCssYgJYRbJs1IuylUZ3TO2
ImizS9Ue4m728TWbXosNziVDm222aczf3J6nAnXWIT/iw7mxEReH/4/JCI5xkgCJeSZ0B1MnzpRR7oocAAArvklEQVRFf1Mekn1Iu31uz1gVcki4zSdn3fLD
ke9KykhC0AIr6C519jlvzOFHCasIHQFfNN5v22epFXKO553X/9FMem5SwqFfPAN9FSq3wp4LTKCMnyhDJxdR+jyyE084Lk4k6hf0bjfvkQeuKfyZFtCuvATK
ZRcufOFtctC1zTQ1fUSFypSZPJXRPRlZJ89K0xAEzFbWiwA4jRJQAMCA5pAQvEV0K82Yk6/qea4ja5aKybPfBAP3T2KafZfmBeHKglsyIcu16UyQ4rmLFhci
3mbbHVIhlF9Pw7GkNLHx32K9eyWy+dciRZb/6Is6OpmIMJhLsarBQk2EY5m4mfK5MKfvcnm5dDQld1Aq2Dthnvk35aefhP/n55yfNBK9MtiG2846iqmtfnC2
nvGL5VFgNQfjf8GjTjRW3oDexGb//v3Ckk0MEBZOoTKWsFL+sMO+GRPdx2U/tKGOStW+sSG3zhhOmhe6Vt2+Z7QX5yxMag8MtbB3nQg4sO4UgyAL5jd8QQPC
TLmbpKYIudwmdo1rKZofn3p8c8/dtwe/thPxlADe9YyBADvawa/VOerVZ7B02XzoVlNFzwyKCYqZckKjUM+e7Wn7Ag2ExrjGoRdLLBH74qcJVqAmgdOABHCn
g77xpiXmr+d/9eZ529P8WgwEuNZcvtuMuun6tETmBL70pX1j8vCOrAsMRZAiQP3PDB0f9dy3zjHJxkYiRxBbptnz0pyXXz4iX/TrRc0UweSYoSY8EKOMOigK
i26vGHFJ84vY9/KFvffKVk1o2s1brloxUdUtk3v1v/Cj7s5E8SAYi0SgO8t05qvf4IcDxPzyl78ah3e+2Nx6x4PN5bEIeIcdPplzKYIeJlYJRLVXcKiH8tRP
ilKSr37njfgAL4Vhzsm474ADDw7Ld2/whBeBtRs/F4vV6jfe+NsYW30nJne/m0qnE/76TZnZi+SgF3BUH6qtOfmustxUK1eMT4S8d/zUTs1vrrkq+PfZcFcf
yDEf+jkKDW9z5QSzRHEFg4TlKTeK0sS0cZzx4e57fCECGU8FfzslqN2sivfxPdh5FBSo3wSzR4/WS+tK83pYx7giEsZvEd8SDdLlw2y5rmhahdw3HVNeeJNw
IIhwYR3VRGgwIxNMi0rKuBALsqUxt96Wqw8WWmjB6TPpYPHc5Xfn/6rDM7/rObghkNknRKJFhKjyeS7/z39+bgrRNttsn7PaloGUEEGUPmFCixMnTHi6Oexb
RzR7xNvTK4mEmTuh3dSt3qqbUBiX+V9JXS44pJ3Hh2tlTocQWf1AiMDVWabKdn7rp0SRsRRedym6eNVV1+R9S6EwGEWAFgWXh5buaN841TPtSfVbJAxcQuCE
iPtEiO644w/NT844Lcp5o8eMOZy3AmfexH7MMUcFM/5gOt+U0ihYBTSMKblZEnxVAoP/ddX9+q77vvGSOgUM7PdiUR9+ZOy0rMEfPZdNHKO/+TNuLAttUa7o
o+CP9g488MAUqlIy5p9YJumLw/Zu7rj9lmyLwtCXt8MjM35lWYuO+AMe4SN5ZdmPDhqugy4SpiDGx1TGQwAHmG/WxgywgrSasZJn/ivrQkCMqEGRDs8Q89Vp
wuW++nWImU1ado3BXY9u4XZslwJpScsmMeHJJdOui3BzuwhF3ev81rZ2WTXu4dChQ3PVs2iWtlzatkTklJNPjXVxB+REqxORaB7MZKCpHUhz2R9knLhAKIAT
jz82cVLumG8D2XHjxmXol+A6EcmCUCs0hIMLN+pk6e3E5ZubZyGA2lhppZVSM2I2zDI7QUJsfZGPxf3lL64IF2aJONnptXh9ys5ZhxNhhXzVD8djx47NBbq0
uPsYkCanjfWDK2pbuYW2xnKsJNfVIJ2CPOHEU4J5Xw7L3G7eU4YVU/8///lis3acXXf++efkeRebbrJx1tvZFzwgYobu5tQkzyX96KQj/it6zfxcXuV22+1z
uTRI4Oeoo44NX9g59fM3Yx+6O15APSzm1pbKCCVlB0Z0N1/GIuET80qEwrpL7jQDYDsJfGyyyabxYvBbkx9sUgS7dXfOrnfEGFgpMXD6DQ/61Z1AuKGxuhSG
rNJUhEMHCBKB859LB0gRCx3HLJhcWb8xkcZoD7/VVy4jhCgDcGXejmeeS7QiRrg0lqr8IAbD3sytXeMNzGfhKe2tnMTFsTkN4WlbDGveiIbhw0rgAAOmsuv1
5JN+Eq+e/0wcy/WPFI6uXd9KmPWX5a269eOW0SPjGOF7czJVPWCXCDUfW7uECSEIlm+W5vkYS10+4vLc8yRczr0AqzGjMDR8wjfGpSm5iP7DC3q8X6o8NGIc
ENBMfPqZmIxcI3FvYtsKB5OoYBNIMR4wZjPpS4i1R+kYjMMJnPpPoEwawxX3R1TLWMPA+onxTjcdGDC2tCxlW0y10867NSee/MvE9VE//EEyVzEbnHGFCbB1
iIIxnelPf/pTMjfrgX86E9wSQgIIj1bsb7HF0AySjBp1c4Tbz4kXCeyQ7Sp3XyiX3T//ucyrP8ZJNWanFOwwEB43XsT78GHoYi2kNyEKlx94wFdyk+AWW2yd
Lh1PyuSr8/n0F5+QAUIk2IZe3T3A0P5gWESCXJmNnTTCOnmGESSMJp/juFJig8EQAnJL6Ei+8qQdQ9IAGoZc7fmd9QRwk55+LOYzVsq6aW2CxC3TScxZE3qQ
iAlE92oLOcbFCAbPtCrhdhEo4VsMCgaTkYcedkRo8HNjEecOaWmKCUrwaWlMRCD79l06tsVfHIsxR+QAthgf/JL+YhDMj8guTMhFdn+p0IomA+HJ5R5lYfVC
waYeZWouD47knZ0gKScpK7351jvBAJvk71GhVA6O00vBgzHASVi1q2+UkGiVMDhGQldehqgWutekrgAEZQTfG21k5fbJyRMvv2y92Rvxf2owa7uO8l//olxf
bz61w8ebU864OI///dGPjk6+QWd9x0/GJMYjlAoLgXcI1vhwJ8HL/RKmF8qHZxbeIlwTrCKOA2NKgZKyhk665NLLYg3kutkv/LbGmuvHa25GpQsuakeJGAvx
UrjP+Ep/999//2ZMrGhngeFCWwRO/azVtttuk/UHJYKX2vPtwaovVvzMN6Vd/obXBeUEJ1KQEA5zQ3S5SCrn2vHh3cMECkLIWxGdo5nc99+FCST3aF2Mx2Uq
BlQeINFUEhfDeDZ1amvi11ln7bwvigahEmRDPiT6jWmZY24UZlQfAqgbUkTICJF6EctAGZFWXXXV5usHHRKW5cFGYEEEitCDVT8hlxlnbdXp9zXX3RLW68jQ
YDsnLDMzN4FhkSRl4IDmNPCtxI0aMeKqZoUVVwgNP6T5WGh5DCofhoUDuIJj9RUOq/zsvlmkjy63RvPaG2/HWX0bpiBcEqsLfhoKpRKBoQD1V9JfeDTrXys0
3Le8hmXgaoIL/sAjcude9DLw2u79IRjcen2maH3Du3Hmrp/erPntDbfEoS3fbo4LYdI/iTVhDQQdKEj04wJ7zhVXJxeURYU3990j5NzhSvjg/PPPi9D7SXmY
5jnn/LpZpu+SAccrAfOSYYV/G8I6Li2OSWZCjyaE0xhRG8Z/BNZmSm2wzISW0j3//PNTUI8/4aTmjJ+cG9M6/ZIvil/h791u7biccMElBdS9ZbzWzWoDBG8m
YQGw0ELt6lhjh1dead0fHRL6XnTRBZOBlSFUNJTLb+Fy8w4LL+x42PbtBQDGjL7d8xuB+/fvp8pg9hg4huY0H4JBi+DWiEEEgAmI8jMzgfI0jzcrmL+pfNwT
roGx1vjxTzWrrrx8amKTzISP4igF0qNHrKmKqJ1+v/zy5GazjdeNicf/STirPu2UQBHQCquDScIA5ookQnr55VfGLP9pTY84+PHtfzk7bUJaBAxr/Rni6BeX
gxBL7n3Q5PX0Tzz+TDNkg5WDUZZPQTDAJpiFP+Ft7gqiV3u+qx35CAIPAKNVgheRLP0U+frGIYeGS3xCTBN8Mt0d5QmafsJj0fell16OMdMqzZVXj0w6/PD/
hoflXCIFSX75yptgLassemB2V2fi2gsYsaSScY55Oe2ut966gbBJefY4RUEhNs0z4aHcFAGX/bJuXg+vhVtpPst/3okonkW81hdSzHixPBeBkU9uv11z2KHf
blZdZblsF93Jinz6gCfgwH/fsfr79bQ0OkKyCIKlO2+84dXprbWgaWSGXN+uYkQdqP9cIo04QDBYLvMkFPEhj8Z7BNK5VKJhiy++WHPltXc0X95v/2Qm+15o
LUleiem1grczeVYXOCT+NW1CSBFHovWZdQjo32+pZuxjjyeyMS94fOsTS4uR558/gh6x2/Ev997ZHH/c0dMZUp8kZUqQEAVRJfX4b6yGISVKgRB5WfBuO20Z
d14Pgj2ZjFJLlqrednrhI1nug3wUDKYZFuq9cDNk3bWyX6Jt9Q6mqtt4idBKhVPlPa/LM67NY489Nj2Pe1xdsEnf+N+D4h23G8R4I9yZwFkbEnameht9pYDg
Hk0FINZfb9VcsnPaaadnedvAW8vWrpgHiy3wdQ9M7hFsdRZdRQ15IZUoHYqVpRw4aGDexk9oh5d3/NQuzSmnnZkKkTurTyXsmJ71YfVYI5aQUqZsucnGVDwb
7iQvZu+994jyTyRuKRt8QvjxSgkQ/mc1k0M0REp1gL/uoXERawM5OgdRvmk2zOf+/JGPufebtmGOPXe/hE4nJXlIvAGbt6v9M849u/Y3VzUnHHVwvBnv2Gwb
I5a2AriOCefSqJ2pGMA3JpZoMm0IVlTSL+0jyiuhHHqFxppvvjgaOARNH8s0QyLYabTbbhsdk3fXhgZro2hVvzqLgf1WN/gkGg9DEGT1SmPG3BarDgblYfci
f1yjsWMfy8Wr9vwQcAmDmWcZGP651NlG3nifD23FuoVmyaWWTCZkvWleST1OJoUTOH00GGpWddc99ON+oXHdI/C2X0sE7cenndjc/vtbmt6xkqV79/aQSrhE
a/QiCCwSfFO+ywxYpRk46KNZ3pQA5pSUwbzczoK3aArf2i+8i6pxBbm+kmeWNI0PS7la1Pfzc84NT+SSqHO+fPUMfnz4oXtjfHRHWLGNA79/T7pqTx/ABk58
rm00JKzm3QgYF7c2Mh500AHN2Efuj/LtYTLKgRP8+gsWcuE7XsbcTjzS2hJm1xihcs9vhZhE//3u1QtDztd4J+u777aWxpnfGkEIkiv5TYKVVU5+luiaa64I
KV48GOjuCBwcFG22YypSzqpJQrDMucgRzVidyIcdHzrhGXj4uFYISOAeNmxYjEk2yHmim0ZeG+20Lwbg+hAil7KQgQAjLr843hh+XE5sqkPdMyd4kcw9OIVI
IIR2MwiuPUoTJz7dnBCRwXXXGBzvK3o5iPZys96QdeNk10sSR5aoIKrgCOay5KhggcMPmmjh7be1ca9dNcAVU6dzDQQazNgbh9De++9/UGpRdVcf/K4+0qoU
WS2ctRJAPRhMUobbfeJJJzfX/Pr2tDwEGX3LRfMN7xQSa/n0hIdijLFc0pTi0edKQus25KEbGhQc9bz+E3DjooLLc/yEZtKX9vlirs7o1WuBEKiL4/7rcUjk
Js2PT/9p4LpbRPd2SpfVlApvgZvoG04EMQi48aCxtvk3sJjQdnjO6quvFng7IM7pezjga3FQMoC3XfDCQsVW8/YoLZoLgBiqtIEMMpNAHXPJx/0jGBBXwmbB
qrySZxpgDSBK+JEr8GJoq6uvurU562dnR0TsvBiAr5/5lNOmLcTGNb4xu4gRl00qxOafmT6KMYxPRMpsnRDiFLpdffU1kpms3sak6nnxxfaNClxZ4z3If2Ts
483nd9+7OeB/9s/aZ0VcD9ShPeMjbufwCNEbh5ivUkYaM2ZMM+GJOHAj5qHgixW0hu/CCy8LAp2d7pdBLxeFRqQw1DknQqQduF1xheUbRyzfEdrU6u2BYdlE
tbi16BPVxkubv9fcdON1wYwPKzYdTr/hA9yEgiam7blE8CjIg4aeF473+eKwcBW5eG0ov+6rB/yUIXq/GpE8qe7Zyl3WWr3GsxSf9F60LXwOHTp0+hZ6giqs
b4WChHe42Gf/7P81P/1/ZzY3j3IW4Xwh7Ffkhj+BBThm9QkyHjHm4+WwtqZn4EkAyPRKeT+ESj8I3cQJjyQe9NU9NAWbC37ITZdNNt1yKiZmRWhFiRaAEIkm
8buEi+AAlAWKQ/Hyvufuq5glUQ/kyGeNkjTmlpsiYrZLM/z730nz6R4kQHQnIvm0td28kKUDnXmUrVRCqIO2iFuKxDpYIUyTMfGf2+PLzfrrrpbv8ZEfIriu
yvTo4R2i3Ztbx9wUrtejoZkGJ1zV32pn5u+ZYUJgCseYYs+9vjgdZ3DimUSZ3DjyugjDfjvcib3CQrY7g4X+4Wpu0umn/yTmb86MMeASzVE//F66M+oB31/+
cl8oo+/nZO1j4yY2hx/6P2EFDs5mZoa/2sZYXDy4xyCd+QrXf/7zvWFhdwiBGjI9QIOhJEqUAl44vJbRo2+MKN3RAcO3c+U5WgskCYFjbsIOP+7PKtUzPDUo
xjbnnHNOhrTxpwhtpYLL/3vu+WNz+BHfifPQfxfzS1vnYZsTJz6V/bG1Y2gIJV4V8WRxnTXBxdNPgk2QjJH69l06ghlDwqvZt3nmWYfq9wjPol4y3XpAKUDB
5wJrXT6+8dAUpLrJLBsv6URpEBKLGTAgADSK0SCPr0lwCBzGLOvlWffu3Zq/PTS2eWLcQ+G+/CKsyy7ZCeWlmYWjk2iev1c+z6RCICVw3HEnRlt/b755yMHN
wNDK4Ln55tHNId/6YQjRygFbG7oHn3L64x2giwTBf//7Mam9TGIW8doW3v8TfAWzbwxhHsfWA0EGS0zAUYoK/vr0WTz8/VeaiZNeal587pFYrnJQTJS24f73
b+3fn1a7F154UbP3sG81n91l83wh2Gd22i2UyIAYJI+LMeh1wfDrxUBaVLCdRL/qykvf01WuOqulWeGi8rCsImPbxpzc008/G3SNt6wHg8KtfsKFNYqjb/tL
M+amyyICOCQtuLpZ4BK8qq/anPm7nhuHWqXO1eSloGM9U8YQI3g6eYoyu/jiS0N4vxKrM86OUPe+uTmRJbTaAU2Mmcw9SkcffUxY8yPyNyXSJWD31slzzzu/
OfaEs5oN1l0xFF1rma1+dwwDmuovb0aUusu66204lcDoOK0oA4EgWC5zRiZWAQ2xJSzM4kvx/hshY4JUbiBB5LvzYVmhL/z3Ps23Djtk+kCzmD+hnsWHNgpB
76WpFKt6vHn7sG8d3px7zq/iZJ2NmttuvcXTrHmJvis2Q9ZZJdd5ERzjP51nQf1fIiJAI0ff3Rx/9GGJdIWq7azgA35UGQz0jUMOa24YOTpcrkEBY3uwpuct
g9kx7EyA3snMF//qlznhu+uuu8yRAAOrmPz6GBNuH1G5XXb9fMDergx5ZfJr0f5yqcgM+jEOl/3mUTfEMqXbcqxT5WfuovsSJTezonO/+ipCtt9Xvtb8/va7
m0EDB6RHoywcsPYuzK5txxof8a3/DZfq41nnnyIoYm3gkd/+VrjI8DRjOKGNmVO12Xl/Vvc876zrzjvvinHQhtHW1eGubp+wd1p+q+VFBM1TWTp2cMw1Xjri
5mbNNT7a3P+Xu5rBK6wZeByYARRyoW8srn4yLuSDQsDr3e0lctPs9FtvtZEIA0+ZNEpwjCUUnmGNnBH+z6ignQdQkbwQ171b7O+58754QfCTOfP8mU9/Khsr
AgHo/dL7CU+VK2RxD7556BHNtdff0uyw4zbR0SmxfGSrUAbds9PBbhntoa3UC04CxDXwcuFnJz3fDNvzU+E3D8uq32VVqLU5TEVUs+JnnH5qs3UshDX2nDKl
3bYP+RjKpCHEvxBvDKxWKio2h01Oz24xsEQLU2oLLBBvWwxllgPg6DM6UhwCRE2XpWJB6pgUpPfC83vdrwYJFwWhne8ceXizylpbx5sZe+f6Nq4PpVweDJ7g
sSwWIfrddtu1qmi/u/SJcyTuCyt6QQ708cd7ta1Nz30Xrmcl5CrGX1WX9yeZEzo5zgE/59wLmg0/tl7shH0y+Xn3z382LaOAgwXGX/nqgc3fxz7Z/Ne2GwS+
3shXfcKdiCu+phj0Cw+hJ+OBl/zmEXXZeJOh+aIxAOo4pNPcCrM4GMIzhMEELA5g+a0Y2mBdOZE3SPv9726JSNw3m/854GsZsdG56pjf85LA4YJwnT/gwG+E
//ps06/vUrFDt13z1yPCoJhWh0UInQ2BuGCFCGFUUUBWk9t5803XxlxC+1rH2Qn57GA/5tgfhZY9Ig++ZO4hGLxwCUcSHCEQmLzMTNBBxGhOcVT5Hww/f8NN
dmy23mJItBf7kATEw/poW57CGcJLzzz7fHPbmBtyoO3ZezFkZn6fj2r/yiuvanbd4+Bmh22HJANy555//oVkMnSCd20Yi8KDflNsr776SriEzzWvvzmlGTXy
6lwiVgryfZqdo0ed9d0cgaFP5Hzk/M06667Z3Pvnu3Kz5rID+kaw5tZYJD0khMPb0dshQDu2bwUYvfSFbBAceKu6/YbbFCSCIyNhIWU6DlEWk9LcGFNBmpx5
w5D+Cy9ikPnn9yqPsbEO6c3m7DNPyvAxpKlDXXNLrE6sAVhSl0Wk23/yc/EWuaVTWKwZA3/rx4YLGhqTUCzUa9r5edEPCazgtxXEoF8oVASnmCIzzeFHleXD
D91yh3xvLmIUDgm0PPAKJ3A9efIrqRkH9O8Xe2iuDEvfO4kzJ3iqdp98ckKz6257xQsLvFql3cQGF4iPTpSfeTIKhJt+U+wf+u1vr49AwXbz1O8SQvg8+OBv
NCOuur5ZafmP5pHUYNMuocIj+AcD4hk0oGSei52s9rXR+Pc+ML558C+jYxXBKvME06xIV3jyTCDlxxGcOeboH0aUdXvMFHOAT0U0b+kUIPRxoQO6+Ta/CHYW
3zcPDX4LxxQWXstwCcsjo4KplYNpIYgQIQgmAJDKVQSJ7XhjgXj2dnPdtb+ODW+fbf58zy0R+fh0AlMdmBPmmBUi3CuiqYuPv96668b7d/rnRFlLpHZzFW3n
Mg6CAALl1SPy6APGWjLc1htH/r457/xfpBCpG1LmNd16W2yZf/zBxJG+014ubfuuPmCmxRZbPP53i36s3bpc89C4ycLei8a6tHBnMS9GKKYFhz4/9VS7QNVY
d/14Edd11/12miJsx75z0zxaqF9bhx76zehPWJ0QEnhn8XkuXMBSIJQ0/nGZu+QqcaFEx7bcbJ3m45t+srn/gQeSFur9sBLawr06RQytTLee8977H4nJ+2ci
mvfRdLnxM0NB0PUNHsmCy9ymfqbARH2ekwu0lZKHIJ9LwOWQZPpXSFm5cCqRMTNHJRpSKW0zctQ9sYnv9Rhc39j8IF72ZLkFgCtvVjiPH+oDk0STDh26eb6m
A3zOjdAZK3AriGDvCGadEorAPqJecTZzjokiP7/+iQkRBj78gGbPPXafR8hmuKxwdPHFl8WLpzdPzUbZgKu0G2KCAVzw3C3GkU89+VIQcbnE5bwAgqBLLLFY
WBz7w15OwSE8FKF2CbF24dBYd7F4idrIm9pzMrSLVnOb1I8+DtC/4PyfRMj5hmCwduoDj8ABOLhJ+s8FAhshUpZQPf/8P9KlEhRaa82NYrX/Xz50YdL3gtXv
bbfdpvnjXTeHVdo83Otr0mBw68gB2hF+sIKdFdNHF9gpLr8l3+IG3NaukKxjfSKCRSpJnfVwCkEEQmCI7nHffw3SJE5e+b/vHdhce82IZttttp4OKICL8eeW
QFUOoOqThHlFXnbYcafUBjqIIQVGwIhJdNzbLiBigbBKtKBQPrjVxTdfJBTAwQd/PRnYvQ8DVis0TIraXAYWdbKMmAlBKCt4Nsnsvjk4pxJhQGlu4Ci4Kbb+
MdHYtYsNk+3GO7gIEBIOCgYOwZLaNsZQr7zqrX33Ztvz+lFw4IHvfOd7zR133R/80s5LWnNJ0N8JV45rKTgFB/gMTricGNM0QYh0M2T9tWNX7lZ5mGUx/rzC
11lenRQHfJsMtwjXGO/WW0eFNfx78MaMI7fk1bcWd22ovf0d+O7fP3EJn5QoVz6PLCY8GkAATGfXKN9PQYTABD0DITT6yBuuS0AsJ/nud4+MSbW++f/DtEI6
zy3Qvk6f8ZOfxgLCvTIaBukCIJZ1eEaDgA9smLT3ou3B6Z7xxXWaIqApbxz5x+boo74fJn7JLKP+uU3VX8xx1dXXNKutvl4qIQwCDrgEW63ho31pZficEvDG
+To5ITm37Vc5tMOQk56Pw2dikxn6ga1nz9gwGYoFHfUfXNo2R7L0kmGVYoU0mOBA/rlNmA2uSe6BsVp++UHLpKJVrwl7CtncC4UiVIn5WCj3wVdKDiwmPTfe
ZK3ctXpnrEQo+s8LfDP3q4QDzHC3006fyYn4Hbb/RKyAGREwcO1mWBt5uMQ8IHgEE0UNr2BGY7jNt1HokIfmGiRbdYtRPcOEQsuE6Jhjjo2JvitzCY68AFJ5
aSb35jURDNaGdv/hD49uvnnECXlqDUroDCIUExjbgXXJJZdKYpllrrV570Y94Lci2ZKRyy47PXZnrpeMA0Hzkoq4Qt6XXnFT7JsanAKuTiFXcMqjD8lU8f8f
cZ+VhMvV1mg3As4tDPBdMPTps3iua0NQwqr/2hVUca/wZdkOS77Qgj2bc35+Vq6M1n7VM7ew6B/FR6BPOeW43FUsigvH2gZLegmhjDGe3QXaLL7zDM3hxY6D
DTfaLPZX7RyLSO9M3Mk7rzDO3DcwqxP/Ws1y0knH5yTuqJv+EIbE8qdeCXO5eDXE4ZoLLFEAaOA5xdm1VywudIN/OGXKu0mIBeZnhdq5AhXeMvqm1Ph2HB5x
xOHpTgEAIAD6MFMrRG14/RvfOLTxot2ddtg0tYI2Id38EYZkbbil7tEQyaQR/Hj11dgO0j1WhgfzckUfeujR5vvD/y8OxGi3aHwY8Fa/r732uublF8blspra
ts99of1pX8yCebg4NDTEK7vuOmtktG5eYCnm4qdXwoyIjYFpUrQFh3a9nduqbcIl3XHHnfktz7wmig88G8dK8x//+IxYmnN9MmMxXAmUaZPaaeAe/BB+gQDf
Xbt2j+9Fmp123jRXrY+JHdOF6+rvvMJa5fVb3XgOjj7/+d1iPeKdzYrLL9dcecWl8czzNnKHpvqC15zlAZ+UpXLo2yVewjSVdqc1uCEemlhbMFylKcG4t8Ua
tFNOPa3Ze689p2t6HQKEK37Gd2k1BOEmeNZ5r83jmfydqc3X5icotJiVuQcf/M2Y2L232WLzDXMgTUick7d4jOW0zxLJiyloC1qhiOK3unRw8mQv2F0gXwBt
HAgRylUfCn4w/fvv6CNfRJektlv5E+LhyVyWNWmbbLrFNG06NQWmcMMizD//AsnYxijaxHCTnvtHjCu3aGx6AwtYlZnTVPgSxrfyfKOPbxp4aN9ACD51wwna
StwTzIDO3N5u3bo0F114fiqjuYWhE+aCx1aKPff67zzhSESRkoYzys74qPBDCYITTBLayAdPGFh9t8R6vdGjb8lzGtyXWr7rZKSWf/LhLD5m0HpGednqvt/a
kggWeH/1q4tj+/2Xmr79BsdGxdWThmCrPhYsBAqeu7oBscV8KkPTyWF+lx2wTKxsfjCWTnw9hagayoLRoA7pcPsttt5K+H/ea/PQkADtvNp77XP1WgP1+d2/
0Fx7w63NumuvEh1oTzei4ftEYAFj0Ljgtr4L8sGOUQ1cPUcw990D28/OOiODKeBHuIK3E/7//B39m9Y3zzp/q0O68aZR+e2/tlxSuTLojkn0iyYm2G9FpPHP
f7y9WWnFFfK+/PCRbWhnDi71SrR59Cz61vYXrrQLDxQOXGBgRKdB4a9Hj24xh3Xb9BXhcwtDJ7wFD4ty/HHHhFVsdxSAB4+ZLwODyz1MCSfgM7dGIVKMYGRZ
F4qI63b/Fa+F2XrvXDdZbelz/W6/3XnvJI9UZSpn3fe/eBJMxtv77rtPLvrdeKMhsWnz0YCnXUsqH34DvyGPvhibdvdAYZXqkP/CeQ89Mi5CxLvlDDifEONC
FOZopXHWs+KeFYDytr/zR5bL59M65TlV7x7kcdm+8tWDkmm329phiSyPhZ/tsnXtcweKCEK+mBiRlGdZaXzLbjDR3x8eGxZt4+xsbfjTpvZaYW/7rs8JSfjn
AfF0mFrYMfe0PscPCxa15cTVK664OqzRVlmGELVIjfBuaH4BBeUJELyKLib+ovyQDTaLflh9YTFwe0Z2iwtQwFl+TmvX7xan6AQ+9RpL+M9ltCN10T7LJh5M
cOofWAgTWPUPntponlUGcfZgBENWXX1gbstedbVVs19gqLa12rbLZWu1dd5JmlU+d1omLRhb3KLBUqEIV8+JZwN4cLb8046N0IqFBAd4LOvq3bt9GwqYaX8J
nbbfbr04FWmrnGYRHdTGv6eWZokXD6Y9b/E0c95/Lznzv5IHbay11pox5jux2fHTuwYePxIKa+kcPqgXH4Ibn+HTLltsue1UpniRMMFvB8NKiOCaMOGp5v77
7ok7sft1sV4R7VksGeCJcY83q8e+FZL5eCzPb6NQsRnvhfFNjwWWad6O17lEvJytjrLqDDesp8M+woS++XT8j9Q9dl5OsYkvTh9aeNlmicVjLdNbsbdkVYst
vfjYXEzspwkadg/BxjRgorGKEUrbG1zrOE0rj7wIZOKvR4+PNPf88dFmkd4R1Zvf8qEp4bb2DKKFGxuC+chjE4LxLT7s0Tz3QpyYFANx48OXXmnD2Iv3XjCW
Oi0b7uWLKRCYggA8Hau3N1p/1YQFrBhF2yyitsEE4c4DAG9p2lIEk2Kd36BBy+Z5A8o47ll+jGkuTBluISFk4VoF12OasmhdNsufMN3Eic9E/taVgwd4UV5b
ymFK4w/Mqy1wglcihP37LRPPrMYIJRr1AYPrRzgwkHWYxlfqRBv01i4tTSHQzKzhfPFcYXhXL2/CGFUf4I1ChCsury0kYLV6GnwisVIxst/6ASfgEYX860OP
Nbvtsn26osSDYJovNPa0RjJUcsKpDnW/i3lkjPIm58FeSVlrK7vFygvKDM+4VwLsW3557rjjrqQfhV2H7nguP7pqq8tmm38ix0i0PARLKtUJYcrS/pBJcN6I
OSTIRjgMh6ns4e8VQQllxNRLgyEa4YR0nVww5jlyYWhoGR1wVhizCEBEhmiIeSMYAAzFPJ5zUcxF2JylEwl8IE8bCCy/+2AwntJJ7kx1mNtXE7iiePUbIcs1
ND+mzxConP9LhxZSD+FQt2SlsN2+8wVzSODELBgXDsGpDv12X10sEg0sakYwwAx/AiLmW+RVBr5fiPLu8dXnC0UADu4GuOEJPNroGQy8aEywmpfStqQe8MAL
HMGFOhHc/RKgsvT+q/+pEMYll1g8GVobyYGBX4fGcPHVJ4w9YEC/ZFZCjLaUl76ZMnkppyZiB3XwgrMv4MJz/ReIIczGRaUY3CcotDp8gA8tuHhwpS/oLp+6
4Od3f3iwmfrW681Hwu1781V9fl63Q9EPjIUEMfn7xluxanuZ5rFxk0IbPBtP4tU0vfs0PQOe+YJn3wjBfzXf2/VOGIaFQ4H3bv4V9Hg7FOyT4/+adfUbsHKu
FiHB+GqZvkskLuHx+efb9uADfA6geXXyq02X2Jab2yhkSimOgjpKGPzXCd8m2WgpndFpCLCOqhgew7z2Wrt7kG/baqH2pEoMKB9iAgDCWBuqwn3I044ykjbk
ARMN4H+VlVd9rImDzFtGaDU25HOlfFd9OgtWdYNbX8AguY9h9IMLoUxpRvnd/0gwsrPMtFN1goFgUQJvhzYr+NSNCcEttRquPb/Ab33CNGA3YUw4qm3PWzwb
fLcnvvo/JbZdcFdbwTMvNiDD6zT7m3G+OqvgSF0wgFnfs1z8xrQErpjUfc8xN8HTpv8JU9BXBFef6pk+sFJwBGdWkNRUCNzBhzoIq/us1vzRJ/BTtMY5eMlz
OGOlKSqCCy7/JXCpp/Bb7buP7nhP2631ace44MFvEqXcNcqjtfxoUHymrp5xD77hATzacb+inS2/B49HHTMsXSwtC9yCCw7wVeEP/2nDf7SG926rrb7mcDc0
4iFgPERgGTA6pIl4FQC+VQZByvrvG4IQr5OYkMCHVFfVj+i0rTpoV/Vr338XhtNuLUb1G4Ehk2aCBB1Wj7Y9R0B5wOG+OTH9UC94qk7PJcykrRI0z13ul9DO
F+4G66MOhIFURGExivAVefJcfe5jTHWBwz3PME072dclrIjDQ1r3FTwUlIuVgEMwqadHuEsiqPJ+JFwsFvwfMQaCF220Fs0BMK31Y7nhB559gwHsLT3bvoEF
zjz3rC448Uy92tamRcuY1LyPe5Ln6IWe6nG/mHORsDivB13V88wz7Qm7FBTlpD7aHEP6TTirHThSl377DacEj6Jp2+2S9+XBN7ajwEsKV+CNd4EnDFHkoaBf
CqsGl6mUpj3Dr+AmGHDnm8UDr9+sqnqmxDOKcnJYGuxSSkM+sINRPWBjcCigbssNXmE4ZHoIuZLKJAhEBKb7zXDhuG18ZkuIIBTxdVo5k59WP2gIAUFQHcfo
6vdt8hRAGAjDuiefqBjg6h6YdEA+SPWMVdCuMmDEXHWaTb65LRZOQgjNGD8Crna+RNvylyBrH4zq0T/t05SW2NBS+qM9bdNKtL4VASwDl0Zd704N5WHcEC6D
PrsH3kJ6Ma9+qM99DKdNdeuHvrpX/cQY8AlOV/U13+QRFhwsU95pXwz8kZ7mhtr+6Y8+lNXEkOBx3zfrVX2XR9/NixBu6/MIIhj11wUeZSkSSd2EtPCGgQpm
bVGGnvEy4IK7rG/6AvfabsdGcdJPKDX9t1IdHOpmaco6KO+CF3BpiwDAo9/cWZFPv7WtbrB7rm7tqNOYkxDpp2dgkR/c6tdP+cDggmvf8IXO2oYrl7bafrWv
voQTefWxZ9CMIP9/0Wj1ClQSO/4AAAAASUVORK5CYII=`

121
server/llm_test.go Normal file
View file

@ -0,0 +1,121 @@
package server
import (
"context"
"os"
"strings"
"sync"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/llm"
)
// TODO - this would ideally be in the llm package, but that would require some refactoring of interfaces in the server
// package to avoid circular dependencies
// WARNING - these tests will fail on mac if you don't manually copy ggml-metal.metal to this dir (./server)
//
// TODO - Fix this ^^
var (
req = [2]api.GenerateRequest{
{
Model: "orca-mini",
Prompt: "tell me a short story about agi?",
Options: map[string]interface{}{},
}, {
Model: "orca-mini",
Prompt: "what is the origin of the us thanksgiving holiday?",
Options: map[string]interface{}{},
},
}
resp = [2]string{
"once upon a time",
"united states thanksgiving",
}
)
func TestIntegrationSimpleOrcaMini(t *testing.T) {
SkipIFNoTestData(t)
workDir, err := os.MkdirTemp("", "ollama")
require.NoError(t, err)
defer os.RemoveAll(workDir)
require.NoError(t, llm.Init(workDir))
ctx, cancel := context.WithTimeout(context.Background(), time.Second*60)
defer cancel()
opts := api.DefaultOptions()
opts.Seed = 42
opts.Temperature = 0.0
model, llmRunner := PrepareModelForPrompts(t, req[0].Model, opts)
defer llmRunner.Close()
response := OneShotPromptResponse(t, ctx, req[0], model, llmRunner)
assert.Contains(t, strings.ToLower(response), resp[0])
}
// TODO
// The server always loads a new runner and closes the old one, which forces serial execution
// At present this test case fails with concurrency problems. Eventually we should try to
// get true concurrency working with n_parallel support in the backend
func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
SkipIFNoTestData(t)
t.Skip("concurrent prediction on single runner not currently supported")
workDir, err := os.MkdirTemp("", "ollama")
require.NoError(t, err)
defer os.RemoveAll(workDir)
require.NoError(t, llm.Init(workDir))
ctx, cancel := context.WithTimeout(context.Background(), time.Second*60)
defer cancel()
opts := api.DefaultOptions()
opts.Seed = 42
opts.Temperature = 0.0
var wg sync.WaitGroup
wg.Add(len(req))
model, llmRunner := PrepareModelForPrompts(t, req[0].Model, opts)
defer llmRunner.Close()
for i := 0; i < len(req); i++ {
go func(i int) {
defer wg.Done()
response := OneShotPromptResponse(t, ctx, req[i], model, llmRunner)
t.Logf("Prompt: %s\nResponse: %s", req[0].Prompt, response)
assert.Contains(t, strings.ToLower(response), resp[i], "error in thread %d (%s)", i, req[i].Prompt)
}(i)
}
wg.Wait()
}
func TestIntegrationConcurrentRunnersOrcaMini(t *testing.T) {
SkipIFNoTestData(t)
workDir, err := os.MkdirTemp("", "ollama")
require.NoError(t, err)
defer os.RemoveAll(workDir)
require.NoError(t, llm.Init(workDir))
ctx, cancel := context.WithTimeout(context.Background(), time.Second*60)
defer cancel()
opts := api.DefaultOptions()
opts.Seed = 42
opts.Temperature = 0.0
var wg sync.WaitGroup
wg.Add(len(req))
t.Logf("Running %d concurrently", len(req))
for i := 0; i < len(req); i++ {
go func(i int) {
defer wg.Done()
model, llmRunner := PrepareModelForPrompts(t, req[0].Model, opts)
defer llmRunner.Close()
response := OneShotPromptResponse(t, ctx, req[i], model, llmRunner)
t.Logf("Prompt: %s\nResponse: %s", req[0].Prompt, response)
assert.Contains(t, strings.ToLower(response), resp[i], "error in thread %d (%s)", i, req[i].Prompt)
}(i)
}
wg.Wait()
}
// TODO - create a parallel test with 2 different models once we support concurrency

72
server/llm_utils_test.go Normal file
View file

@ -0,0 +1,72 @@
package server
import (
"context"
"errors"
"os"
"path"
"runtime"
"testing"
"github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/llm"
"github.com/stretchr/testify/require"
)
func SkipIFNoTestData(t *testing.T) {
modelDir := getModelDir()
if _, err := os.Stat(modelDir); errors.Is(err, os.ErrNotExist) {
t.Skipf("%s does not exist - skipping integration tests", modelDir)
}
}
func getModelDir() string {
_, filename, _, _ := runtime.Caller(0)
return path.Dir(path.Dir(filename) + "/../test_data/models/.")
}
func PrepareModelForPrompts(t *testing.T, modelName string, opts api.Options) (*Model, llm.LLM) {
modelDir := getModelDir()
os.Setenv("OLLAMA_MODELS", modelDir)
model, err := GetModel(modelName)
require.NoError(t, err, "GetModel ")
err = opts.FromMap(model.Options)
require.NoError(t, err, "opts from model ")
runner, err := llm.New("unused", model.ModelPath, model.AdapterPaths, model.ProjectorPaths, opts)
require.NoError(t, err, "llm.New failed")
return model, runner
}
func OneShotPromptResponse(t *testing.T, ctx context.Context, req api.GenerateRequest, model *Model, runner llm.LLM) string {
prompt, err := model.Prompt(PromptVars{
System: req.System,
Prompt: req.Prompt,
First: len(req.Context) == 0,
})
require.NoError(t, err, "prompt generation failed")
success := make(chan bool, 1)
response := ""
cb := func(r llm.PredictResult) {
if !r.Done {
response += r.Content
} else {
success <- true
}
}
predictReq := llm.PredictOpts{
Prompt: prompt,
Format: req.Format,
Images: req.Images,
}
err = runner.Predict(ctx, predictReq, cb)
require.NoError(t, err, "predict call failed")
select {
case <-ctx.Done():
t.Errorf("failed to complete before timeout: \n%s", response)
return ""
case <-success:
return response
}
}

View file

@ -25,6 +25,7 @@ import (
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"github.com/jmorganca/ollama/api" "github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/gpu"
"github.com/jmorganca/ollama/llm" "github.com/jmorganca/ollama/llm"
"github.com/jmorganca/ollama/parser" "github.com/jmorganca/ollama/parser"
"github.com/jmorganca/ollama/version" "github.com/jmorganca/ollama/version"
@ -81,20 +82,6 @@ func load(c *gin.Context, modelName string, reqOpts map[string]interface{}, sess
return nil, err return nil, err
} }
ctx := c.Request.Context()
// check if the loaded model is still running in a subprocess, in case something unexpected happened
if loaded.runner != nil {
if err := loaded.runner.Ping(ctx); err != nil {
log.Print("loaded llm process not responding, closing now")
// the subprocess is no longer running, so close it
loaded.runner.Close()
loaded.runner = nil
loaded.Model = nil
loaded.Options = nil
}
}
needLoad := loaded.runner == nil || // is there a model loaded? needLoad := loaded.runner == nil || // is there a model loaded?
loaded.ModelPath != model.ModelPath || // has the base model changed? loaded.ModelPath != model.ModelPath || // has the base model changed?
!reflect.DeepEqual(loaded.AdapterPaths, model.AdapterPaths) || // have the adapters changed? !reflect.DeepEqual(loaded.AdapterPaths, model.AdapterPaths) || // have the adapters changed?
@ -114,7 +101,7 @@ func load(c *gin.Context, modelName string, reqOpts map[string]interface{}, sess
// some older models are not compatible with newer versions of llama.cpp // some older models are not compatible with newer versions of llama.cpp
// show a generalized compatibility error until there is a better way to // show a generalized compatibility error until there is a better way to
// check for model compatibility // check for model compatibility
if strings.Contains(err.Error(), "failed to load model") { if errors.Is(llm.ErrUnsupportedFormat, err) || strings.Contains(err.Error(), "failed to load model") {
err = fmt.Errorf("%v: this model may be incompatible with your version of Ollama. If you previously pulled this model, try updating it by running `ollama pull %s`", err, model.ShortName) err = fmt.Errorf("%v: this model may be incompatible with your version of Ollama. If you previously pulled this model, try updating it by running `ollama pull %s`", err, model.ShortName)
} }
@ -126,10 +113,6 @@ func load(c *gin.Context, modelName string, reqOpts map[string]interface{}, sess
loaded.Options = &opts loaded.Options = &opts
} }
// update options for the loaded llm
// TODO(mxyng): this isn't thread safe, but it should be fine for now
loaded.runner.SetOptions(opts)
loaded.expireAt = time.Now().Add(sessionDuration) loaded.expireAt = time.Now().Add(sessionDuration)
if loaded.expireTimer == nil { if loaded.expireTimer == nil {
@ -909,9 +892,12 @@ func Serve(ln net.Listener) error {
os.Exit(0) os.Exit(0)
}() }()
if runtime.GOOS == "linux" { if err := llm.Init(s.WorkDir); err != nil {
return fmt.Errorf("unable to initialize llm library %w", err)
}
if runtime.GOOS == "linux" { // TODO - windows too
// check compatibility to log warnings // check compatibility to log warnings
if _, err := llm.CheckVRAM(); err != nil { if _, err := gpu.CheckVRAM(); err != nil {
log.Print(err.Error()) log.Print(err.Error())
} }
} }