diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index cd5678e0..26396b27 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -64,10 +64,10 @@ jobs: echo $env:PATH go generate -x ./... if: ${{ startsWith(matrix.os, 'windows-') }} - name: "Windows Go Generate" + name: 'Windows Go Generate' - run: go generate -x ./... if: ${{ ! startsWith(matrix.os, 'windows-') }} - name: "Unix Go Generate" + name: 'Unix Go Generate' - uses: actions/upload-artifact@v4 with: name: ${{ matrix.os }}-${{ matrix.arch }}-libraries @@ -148,7 +148,7 @@ jobs: with: go-version: '1.22' cache: true - - name: "Install ROCm" + - name: 'Install ROCm' run: | $ErrorActionPreference = "Stop" write-host "downloading AMD HIP Installer" @@ -156,7 +156,7 @@ jobs: write-host "Installing AMD HIP" Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait write-host "Completed AMD HIP" - - name: "Verify ROCm" + - name: 'Verify ROCm' run: | & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version - run: go get ./... @@ -185,7 +185,7 @@ jobs: with: go-version: '1.22' cache: true - - name: "Install CUDA" + - name: 'Install CUDA' run: | $ErrorActionPreference = "Stop" write-host "downloading CUDA Installer" @@ -199,7 +199,7 @@ jobs: echo "CUDA_PATH=$cudaPath" >> $env:GITHUB_ENV echo "CUDA_PATH_V${cudaVer}=$cudaPath" >> $env:GITHUB_ENV echo "CUDA_PATH_VX_Y=CUDA_PATH_V${cudaVer}" >> $env:GITHUB_ENV - - name: "Verify CUDA" + - name: 'Verify CUDA' run: nvcc -V - run: go get ./... - name: go generate @@ -216,7 +216,6 @@ jobs: OLLAMA_SKIP_CPU_GENERATE: '1' # TODO - do we need any artifacts? - lint: strategy: matrix: @@ -248,18 +247,18 @@ jobs: esac >>$GITHUB_ENV shell: bash - run: | - mkdir -p llm/build/linux/$ARCH/stub/bin/ - touch llm/build/linux/$ARCH/stub/bin/stub.so + mkdir -p llm/build/linux/$ARCH/stub/bin + touch llm/build/linux/$ARCH/stub/bin/ollama_llama_server if: ${{ startsWith(matrix.os, 'ubuntu-') }} - run: | - mkdir -p llm/build/darwin/$ARCH/stub/bin/ - touch llm/build/darwin/$ARCH/stub/bin/stub.dylib - touch llm/ggml-metal.metal + mkdir -p llm/build/darwin/$ARCH/stub/bin + touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server if: ${{ startsWith(matrix.os, 'macos-') }} - run: | - mkdir -p llm/build/windows/$ARCH/stub/stub/bin/ - touch llm/build/windows/$ARCH/stub/stub/bin/stub.dll + mkdir -p llm/build/windows/$ARCH/stub/bin + touch llm/build/windows/$ARCH/stub/bin/ollama_llama_server if: ${{ startsWith(matrix.os, 'windows-') }} + shell: bash - uses: golangci/golangci-lint-action@v4 with: args: --timeout 8m0s @@ -277,7 +276,7 @@ jobs: env: GOARCH: ${{ matrix.arch }} CGO_ENABLED: '1' - OLLAMA_CPU_TARGET: "static" + OLLAMA_CPU_TARGET: 'static' steps: - uses: actions/checkout@v4 with: @@ -294,18 +293,18 @@ jobs: esac >>$GITHUB_ENV shell: bash - run: | - mkdir -p llm/build/linux/$ARCH/stub/bin/ - touch llm//build/linux/$ARCH/stub/bin/stub.so + mkdir -p llm/build/linux/$ARCH/stub/bin + touch llm/build/linux/$ARCH/stub/bin/ollama_llama_server if: ${{ startsWith(matrix.os, 'ubuntu-') }} - run: | - mkdir -p llm/build/darwin/$ARCH/stub/bin/ - touch llm/build/darwin/$ARCH/stub/bin/stub.dylib - touch llm/ggml-metal.metal + mkdir -p llm/build/darwin/$ARCH/stub/bin + touch llm/build/darwin/$ARCH/stub/bin/ollama_llama_server if: ${{ startsWith(matrix.os, 'macos-') }} - run: | - mkdir -p llm/build/windows/$ARCH/stub/stub/bin/ - touch llm/build/windows/$ARCH/stub/stub/bin/stub.dll + mkdir -p llm/build/windows/$ARCH/stub/bin + touch llm/build/windows/$ARCH/stub/bin/ollama_llama_server if: ${{ startsWith(matrix.os, 'windows-') }} + shell: bash - run: go generate ./... - run: go build - run: go test -v ./... diff --git a/integration/llm_test.go b/integration/llm_test.go index 5b060447..bcc169d6 100644 --- a/integration/llm_test.go +++ b/integration/llm_test.go @@ -15,10 +15,6 @@ import ( // TODO - this would ideally be in the llm package, but that would require some refactoring of interfaces in the server // package to avoid circular dependencies -// WARNING - these tests will fail on mac if you don't manually copy ggml-metal.metal to this dir (./server) -// -// TODO - Fix this ^^ - var ( stream = false req = [2]api.GenerateRequest{ diff --git a/llm/generate/gen_darwin.sh b/llm/generate/gen_darwin.sh index 1fb84181..d9c41980 100755 --- a/llm/generate/gen_darwin.sh +++ b/llm/generate/gen_darwin.sh @@ -18,7 +18,7 @@ sign() { fi } -COMMON_DARWIN_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DCMAKE_SYSTEM_NAME=Darwin -DLLAMA_METAL_EMBED_LIBRARY=on" +COMMON_DARWIN_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.3 -DLLAMA_METAL_MACOSX_VERSION_MIN=11.3 -DCMAKE_SYSTEM_NAME=Darwin -DLLAMA_METAL_EMBED_LIBRARY=on" case "${GOARCH}" in "amd64") @@ -41,7 +41,7 @@ case "${GOARCH}" in BUILD_DIR="../build/darwin/${ARCH}/cpu" echo "Building LCD CPU" build - sign ${BUILD_DIR}/lib/libext_server.dylib + sign ${BUILD_DIR}/bin/ollama_llama_server compress # @@ -53,7 +53,7 @@ case "${GOARCH}" in BUILD_DIR="../build/darwin/${ARCH}/cpu_avx" echo "Building AVX CPU" build - sign ${BUILD_DIR}/lib/libext_server.dylib + sign ${BUILD_DIR}/bin/ollama_llama_server compress # @@ -66,7 +66,7 @@ case "${GOARCH}" in echo "Building AVX2 CPU" EXTRA_LIBS="${EXTRA_LIBS} -framework Accelerate -framework Foundation" build - sign ${BUILD_DIR}/lib/libext_server.dylib + sign ${BUILD_DIR}/bin/ollama_llama_server compress ;; "arm64") @@ -74,17 +74,17 @@ case "${GOARCH}" in # Static build for linking into the Go binary init_vars CMAKE_TARGETS="--target llama --target ggml" - CMAKE_DEFS="${COMMON_CPU_DEFS} -DBUILD_SHARED_LIBS=off -DLLAMA_ACCELERATE=off -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}" + CMAKE_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.3 -DCMAKE_SYSTEM_NAME=Darwin -DBUILD_SHARED_LIBS=off -DLLAMA_METAL=off -DLLAMA_ACCELERATE=off -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}" BUILD_DIR="../build/darwin/${ARCH}_static" echo "Building static library" build init_vars - CMAKE_DEFS="${COMMON_DARWIN_DEFS} -DLLAMA_METAL_EMBED_LIBRARY=on -DLLAMA_ACCELERATE=on -DCMAKE_SYSTEM_PROCESSOR=${ARCH} -DCMAKE_OSX_ARCHITECTURES=${ARCH} -DLLAMA_METAL=on ${CMAKE_DEFS}" + CMAKE_DEFS="${COMMON_DARWIN_DEFS} -DLLAMA_ACCELERATE=on -DCMAKE_SYSTEM_PROCESSOR=${ARCH} -DCMAKE_OSX_ARCHITECTURES=${ARCH} -DLLAMA_METAL=on ${CMAKE_DEFS}" BUILD_DIR="../build/darwin/${ARCH}/metal" EXTRA_LIBS="${EXTRA_LIBS} -framework Accelerate -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders" build - sign ${BUILD_DIR}/lib/libext_server.dylib + sign ${BUILD_DIR}/bin/ollama_llama_server compress ;; *)