feat: Binary wheels for CPU, CUDA (12.1 - 12.3), Metal (#1247)
* Generate binary wheel index on release * Add total release downloads badge * Update download label * Use official cibuildwheel action * Add workflows to build CUDA and Metal wheels * Update generate index workflow * Update workflow name
This commit is contained in:
parent
8649d7671b
commit
5a930ee9a1
6 changed files with 330 additions and 5 deletions
10
.github/workflows/build-and-release.yaml
vendored
10
.github/workflows/build-and-release.yaml
vendored
|
@ -11,7 +11,7 @@ jobs:
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
os: [ubuntu-latest, windows-latest, macOS-latest]
|
os: [ubuntu-20.04, windows-2019, macos-11]
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v3
|
- uses: actions/checkout@v3
|
||||||
|
@ -23,19 +23,19 @@ jobs:
|
||||||
with:
|
with:
|
||||||
python-version: "3.8"
|
python-version: "3.8"
|
||||||
|
|
||||||
- name: Install cibuildwheel
|
|
||||||
run: python -m pip install cibuildwheel==2.12.1
|
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
python -m pip install -e .[all]
|
python -m pip install -e .[all]
|
||||||
|
|
||||||
- name: Build wheels
|
- name: Build wheels
|
||||||
run: python -m cibuildwheel --output-dir wheelhouse
|
uses: pypa/cibuildwheel@v2.16.5
|
||||||
env:
|
env:
|
||||||
# disable repair
|
# disable repair
|
||||||
CIBW_REPAIR_WHEEL_COMMAND: ""
|
CIBW_REPAIR_WHEEL_COMMAND: ""
|
||||||
|
with:
|
||||||
|
package-dir: .
|
||||||
|
output-dir: wheelhouse
|
||||||
|
|
||||||
- uses: actions/upload-artifact@v3
|
- uses: actions/upload-artifact@v3
|
||||||
with:
|
with:
|
||||||
|
|
131
.github/workflows/build-wheels-cuda.yaml
vendored
Normal file
131
.github/workflows/build-wheels-cuda.yaml
vendored
Normal file
|
@ -0,0 +1,131 @@
|
||||||
|
name: Build Wheels (CUDA)
|
||||||
|
|
||||||
|
on: workflow_dispatch
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
define_matrix:
|
||||||
|
name: Define Build Matrix
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
outputs:
|
||||||
|
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: pwsh
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Define Job Output
|
||||||
|
id: set-matrix
|
||||||
|
run: |
|
||||||
|
$matrix = @{
|
||||||
|
'os' = @('ubuntu-20.04', 'windows-latest')
|
||||||
|
'pyver' = @("3.10", "3.11", "3.12")
|
||||||
|
'cuda' = @("12.1.1", "12.2.2", "12.3.2")
|
||||||
|
'releasetag' = @("basic")
|
||||||
|
}
|
||||||
|
|
||||||
|
$matrixOut = ConvertTo-Json $matrix -Compress
|
||||||
|
Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
|
||||||
|
|
||||||
|
build_wheels:
|
||||||
|
name: Build Wheel ${{ matrix.os }} ${{ matrix.pyver }} ${{ matrix.cuda }} ${{ matrix.releasetag == 'wheels' && 'AVX2' || matrix.releasetag }}
|
||||||
|
needs: define_matrix
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: pwsh
|
||||||
|
env:
|
||||||
|
CUDAVER: ${{ matrix.cuda }}
|
||||||
|
AVXVER: ${{ matrix.releasetag }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: "recursive"
|
||||||
|
|
||||||
|
- uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.pyver }}
|
||||||
|
|
||||||
|
- name: Setup Mamba
|
||||||
|
uses: conda-incubator/setup-miniconda@v2.2.0
|
||||||
|
with:
|
||||||
|
activate-environment: "build"
|
||||||
|
python-version: ${{ matrix.pyver }}
|
||||||
|
miniforge-variant: Mambaforge
|
||||||
|
miniforge-version: latest
|
||||||
|
use-mamba: true
|
||||||
|
add-pip-as-python-dependency: true
|
||||||
|
auto-activate-base: false
|
||||||
|
|
||||||
|
- name: VS Integration Cache
|
||||||
|
id: vs-integration-cache
|
||||||
|
if: runner.os == 'Windows'
|
||||||
|
uses: actions/cache@v3.3.2
|
||||||
|
with:
|
||||||
|
path: ./MSBuildExtensions
|
||||||
|
key: cuda-${{ matrix.cuda }}-vs-integration
|
||||||
|
|
||||||
|
- name: Get Visual Studio Integration
|
||||||
|
if: runner.os == 'Windows' && steps.vs-integration-cache.outputs.cache-hit != 'true'
|
||||||
|
run: |
|
||||||
|
if ($env:CUDAVER -eq '12.1.1') {$x = '12.1.0'} else {$x = $env:CUDAVER}
|
||||||
|
$links = (Invoke-RestMethod 'https://github.com/Jimver/cuda-toolkit/raw/dc0ca7bb29c5a92f7a963d3d5c93f8d59765136a/src/links/windows-links.ts').Trim().split().where({$_ -ne ''})
|
||||||
|
for ($i=$q=0;$i -lt $links.count -and $q -lt 2;$i++) {if ($links[$i] -eq "'$x',") {$q++}}
|
||||||
|
Invoke-RestMethod $links[$i].Trim("'") -OutFile 'cudainstaller.zip'
|
||||||
|
& 'C:\Program Files\7-Zip\7z.exe' e cudainstaller.zip -oMSBuildExtensions -r *\MSBuildExtensions\* > $null
|
||||||
|
Remove-Item 'cudainstaller.zip'
|
||||||
|
|
||||||
|
- name: Install Visual Studio Integration
|
||||||
|
if: runner.os == 'Windows'
|
||||||
|
run: |
|
||||||
|
$y = (gi '.\MSBuildExtensions').fullname + '\*'
|
||||||
|
(gi 'C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Microsoft\VC\*\BuildCustomizations').fullname.foreach({cp $y $_})
|
||||||
|
$cupath = 'CUDA_PATH_V' + $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','_')
|
||||||
|
echo "$cupath=$env:CONDA_PREFIX" >> $env:GITHUB_ENV
|
||||||
|
|
||||||
|
- name: Install Dependencies
|
||||||
|
env:
|
||||||
|
MAMBA_DOWNLOAD_FAILFAST: "0"
|
||||||
|
MAMBA_NO_LOW_SPEED_LIMIT: "1"
|
||||||
|
run: |
|
||||||
|
$cudaVersion = $env:CUDAVER
|
||||||
|
mamba install -y 'cuda' -c nvidia/label/cuda-$cudaVersion
|
||||||
|
python -m pip install build wheel
|
||||||
|
|
||||||
|
- name: Build Wheel
|
||||||
|
run: |
|
||||||
|
$cudaVersion = $env:CUDAVER.Remove($env:CUDAVER.LastIndexOf('.')).Replace('.','')
|
||||||
|
$env:CUDA_PATH = $env:CONDA_PREFIX
|
||||||
|
$env:CUDA_HOME = $env:CONDA_PREFIX
|
||||||
|
$env:CUDA_TOOLKIT_ROOT_DIR = $env:CONDA_PREFIX
|
||||||
|
if ($IsLinux) {
|
||||||
|
$env:LD_LIBRARY_PATH = $env:CONDA_PREFIX + '/lib:' + $env:LD_LIBRARY_PATH
|
||||||
|
}
|
||||||
|
$env:VERBOSE = '1'
|
||||||
|
$env:CMAKE_ARGS = '-DLLAMA_CUBLAS=on -DCMAKE_CUDA_ARCHITECTURES=all'
|
||||||
|
$env:CMAKE_ARGS = "-DLLAMA_CUDA_FORCE_MMQ=ON $env:CMAKE_ARGS"
|
||||||
|
if ($env:AVXVER -eq 'AVX') {
|
||||||
|
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off'
|
||||||
|
}
|
||||||
|
if ($env:AVXVER -eq 'AVX512') {
|
||||||
|
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX512=on'
|
||||||
|
}
|
||||||
|
if ($env:AVXVER -eq 'basic') {
|
||||||
|
$env:CMAKE_ARGS = $env:CMAKE_ARGS + ' -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off'
|
||||||
|
}
|
||||||
|
python -m build --wheel
|
||||||
|
# write the build tag to the output
|
||||||
|
Write-Output "CUDA_VERSION=$cudaVersion" >> $env:GITHUB_ENV
|
||||||
|
|
||||||
|
- uses: softprops/action-gh-release@v1
|
||||||
|
with:
|
||||||
|
files: dist/*
|
||||||
|
# Set tag_name to <tag>-cu<cuda_version>
|
||||||
|
tag_name: ${{ github.ref_name }}-cu${{ env.CUDA_VERSION }}
|
||||||
|
env:
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
87
.github/workflows/build-wheels-metal.yaml
vendored
Normal file
87
.github/workflows/build-wheels-metal.yaml
vendored
Normal file
|
@ -0,0 +1,87 @@
|
||||||
|
name: Build Wheels (Metal)
|
||||||
|
|
||||||
|
on: workflow_dispatch
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
define_matrix:
|
||||||
|
name: Define Build Matrix
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
outputs:
|
||||||
|
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: pwsh
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Define Job Output
|
||||||
|
id: set-matrix
|
||||||
|
run: |
|
||||||
|
$matrix = @{
|
||||||
|
'os' = @('macos-11', 'macos-12', 'macos-13')
|
||||||
|
'pyver' = @('3.10', '3.11', '3.12')
|
||||||
|
}
|
||||||
|
|
||||||
|
$matrixOut = ConvertTo-Json $matrix -Compress
|
||||||
|
Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
|
||||||
|
|
||||||
|
build_wheels:
|
||||||
|
name: ${{ matrix.os }} Python ${{ matrix.pyver }}
|
||||||
|
needs: define_matrix
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
|
||||||
|
env:
|
||||||
|
OSVER: ${{ matrix.os }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
submodules: "recursive"
|
||||||
|
|
||||||
|
- uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.pyver }}
|
||||||
|
|
||||||
|
- name: Install Dependencies
|
||||||
|
run: |
|
||||||
|
python -m pip install build wheel cmake
|
||||||
|
|
||||||
|
- name: Build Wheel
|
||||||
|
run: |
|
||||||
|
XCODE15PATH="/Applications/Xcode_15.0.app/Contents/Developer"
|
||||||
|
XCODE15BINPATH="${XCODE15PATH}/Toolchains/XcodeDefault.xctoolchain/usr/bin"
|
||||||
|
export CMAKE_ARGS="-DLLAMA_NATIVE=off -DLLAMA_METAL=on"
|
||||||
|
[[ "$OSVER" == "macos-13" ]] && export CC="${XCODE15BINPATH}/cc" && export CXX="${XCODE15BINPATH}/c++" && export MACOSX_DEPLOYMENT_TARGET="13.0"
|
||||||
|
[[ "$OSVER" == "macos-12" ]] && export MACOSX_DEPLOYMENT_TARGET="12.0"
|
||||||
|
[[ "$OSVER" == "macos-11" ]] && export MACOSX_DEPLOYMENT_TARGET="11.0"
|
||||||
|
|
||||||
|
export CMAKE_OSX_ARCHITECTURES="arm64" && export ARCHFLAGS="-arch arm64"
|
||||||
|
VERBOSE=1 python -m build --wheel
|
||||||
|
|
||||||
|
if [[ "$OSVER" == "macos-13" ]]; then
|
||||||
|
export SDKROOT="${XCODE15PATH}/Platforms/MacOSX.platform/Developer/SDKs/MacOSX14.0.sdk"
|
||||||
|
export MACOSX_DEPLOYMENT_TARGET="14.0"
|
||||||
|
VERBOSE=1 python -m build --wheel
|
||||||
|
fi
|
||||||
|
|
||||||
|
for file in ./dist/*.whl; do cp "$file" "${file/arm64.whl/aarch64.whl}"; done
|
||||||
|
|
||||||
|
export CMAKE_OSX_ARCHITECTURES="x86_64" && export CMAKE_ARGS="-DLLAMA_NATIVE=off -DLLAMA_AVX=off -DLLAMA_AVX2=off -DLLAMA_FMA=off -DLLAMA_F16C=off -DLLAMA_METAL=on" && export ARCHFLAGS="-arch x86_64"
|
||||||
|
VERBOSE=1 python -m build --wheel
|
||||||
|
|
||||||
|
if [[ "$OSVER" == "macos-13" ]]; then
|
||||||
|
export SDKROOT="${XCODE15PATH}/Platforms/MacOSX.platform/Developer/SDKs/MacOSX14.0.sdk"
|
||||||
|
export MACOSX_DEPLOYMENT_TARGET="14.0"
|
||||||
|
VERBOSE=1 python -m build --wheel
|
||||||
|
fi
|
||||||
|
|
||||||
|
- uses: softprops/action-gh-release@v1
|
||||||
|
with:
|
||||||
|
files: dist/*
|
||||||
|
# set release name to <tag>-metal
|
||||||
|
tag_name: ${{ github.ref_name }}-metal
|
||||||
|
env:
|
||||||
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
48
.github/workflows/generate-index-from-release.yaml
vendored
Normal file
48
.github/workflows/generate-index-from-release.yaml
vendored
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
name: Wheels Index
|
||||||
|
|
||||||
|
on:
|
||||||
|
# Trigger on any new release
|
||||||
|
release:
|
||||||
|
types: [published]
|
||||||
|
|
||||||
|
# Allows you to run this workflow manually from the Actions tab
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
# Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
pages: write
|
||||||
|
id-token: write
|
||||||
|
|
||||||
|
# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
|
||||||
|
# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
|
||||||
|
concurrency:
|
||||||
|
group: "pages"
|
||||||
|
cancel-in-progress: false
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
# Single deploy job since we're just deploying
|
||||||
|
deploy:
|
||||||
|
environment:
|
||||||
|
name: github-pages
|
||||||
|
url: ${{ steps.deployment.outputs.page_url }}
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
- name: Setup Pages
|
||||||
|
uses: actions/configure-pages@v4
|
||||||
|
- name: Build
|
||||||
|
run: |
|
||||||
|
./scripts/releases-to-pep-503.sh index/whl/cpu '^[v]?[0-9]+\.[0-9]+\.[0-9]+$'
|
||||||
|
./scripts/releases-to-pep-503.sh index/whl/cu121 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu121$'
|
||||||
|
./scripts/releases-to-pep-503.sh index/whl/cu122 '^[v]?[0-9]+\.[0-9]+\.[0-9]+-cu122$'
|
||||||
|
./scripts/releases-to-pep-503.sh index/whl/metal '^[v]?[0-9]+\.[0-9]+\.[0-9]+-metal$'
|
||||||
|
- name: Upload artifact
|
||||||
|
uses: actions/upload-pages-artifact@v3
|
||||||
|
with:
|
||||||
|
# Upload entire repository
|
||||||
|
path: 'index'
|
||||||
|
- name: Deploy to GitHub Pages
|
||||||
|
id: deployment
|
||||||
|
uses: actions/deploy-pages@v4
|
|
@ -6,6 +6,7 @@
|
||||||
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/llama-cpp-python)](https://pypi.org/project/llama-cpp-python/)
|
[![PyPI - Python Version](https://img.shields.io/pypi/pyversions/llama-cpp-python)](https://pypi.org/project/llama-cpp-python/)
|
||||||
[![PyPI - License](https://img.shields.io/pypi/l/llama-cpp-python)](https://pypi.org/project/llama-cpp-python/)
|
[![PyPI - License](https://img.shields.io/pypi/l/llama-cpp-python)](https://pypi.org/project/llama-cpp-python/)
|
||||||
[![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-cpp-python)](https://pypi.org/project/llama-cpp-python/)
|
[![PyPI - Downloads](https://img.shields.io/pypi/dm/llama-cpp-python)](https://pypi.org/project/llama-cpp-python/)
|
||||||
|
[![Github All Releases](https://img.shields.io/github/downloads/abetlen/llama-cpp-python/total.svg?label=Github%20Downloads)]()
|
||||||
|
|
||||||
Simple Python bindings for **@ggerganov's** [`llama.cpp`](https://github.com/ggerganov/llama.cpp) library.
|
Simple Python bindings for **@ggerganov's** [`llama.cpp`](https://github.com/ggerganov/llama.cpp) library.
|
||||||
This package provides:
|
This package provides:
|
||||||
|
|
58
scripts/release-to-pep-503.sh
Executable file
58
scripts/release-to-pep-503.sh
Executable file
|
@ -0,0 +1,58 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Get output directory or default to index/whl/cpu
|
||||||
|
output_dir=${1:-"index/whl/cpu"}
|
||||||
|
|
||||||
|
# Create output directory
|
||||||
|
mkdir -p $output_dir
|
||||||
|
|
||||||
|
# Change to output directory
|
||||||
|
pushd $output_dir
|
||||||
|
|
||||||
|
# Create an index html file
|
||||||
|
echo "<!DOCTYPE html>" > index.html
|
||||||
|
echo "<html>" >> index.html
|
||||||
|
echo " <head></head>" >> index.html
|
||||||
|
echo " <body>" >> index.html
|
||||||
|
echo " <a href=\"llama-cpp-python/\">llama-cpp-python</a>" >> index.html
|
||||||
|
echo " <br>" >> index.html
|
||||||
|
echo " </body>" >> index.html
|
||||||
|
echo "</html>" >> index.html
|
||||||
|
echo "" >> index.html
|
||||||
|
|
||||||
|
# Create llama-cpp-python directory
|
||||||
|
mkdir -p llama-cpp-python
|
||||||
|
|
||||||
|
# Change to llama-cpp-python directory
|
||||||
|
pushd llama-cpp-python
|
||||||
|
|
||||||
|
# Create an index html file
|
||||||
|
echo "<!DOCTYPE html>" > index.html
|
||||||
|
echo "<html>" >> index.html
|
||||||
|
echo " <body>" >> index.html
|
||||||
|
echo " <h1>Links for llama-cpp-python</h1>" >> index.html
|
||||||
|
|
||||||
|
# Get all releases
|
||||||
|
releases=$(curl -s https://api.github.com/repos/abetlen/llama-cpp-python/releases | jq -r .[].tag_name)
|
||||||
|
|
||||||
|
# Get pattern from second arg or default to valid python package version pattern
|
||||||
|
pattern=${2:-"^[v]?[0-9]+\.[0-9]+\.[0-9]+$"}
|
||||||
|
|
||||||
|
# Filter releases by pattern
|
||||||
|
releases=$(echo $releases | tr ' ' '\n' | grep -E $pattern)
|
||||||
|
|
||||||
|
# For each release, get all assets
|
||||||
|
for release in $releases; do
|
||||||
|
assets=$(curl -s https://api.github.com/repos/abetlen/llama-cpp-python/releases/tags/$release | jq -r .assets)
|
||||||
|
echo " <h2>$release</h2>" >> index.html
|
||||||
|
for asset in $(echo $assets | jq -r .[].browser_download_url); do
|
||||||
|
if [[ $asset == *".whl" ]]; then
|
||||||
|
echo " <a href=\"$asset\">$asset</a>" >> index.html
|
||||||
|
echo " <br>" >> index.html
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
done
|
||||||
|
|
||||||
|
echo " </body>" >> index.html
|
||||||
|
echo "</html>" >> index.html
|
||||||
|
echo "" >> index.html
|
Loading…
Reference in a new issue