From e890be48149a739cd311f8ec60ca8c60a2abb4e4 Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Mon, 17 Jun 2024 13:32:46 -0700 Subject: [PATCH 1/2] Revert "More parallelism on windows generate" This reverts commit 0577af98f4129fc6bf5cc47d6b4d82d394ee68a6. --- llm/generate/gen_windows.ps1 | 72 +++++++++++------------------------- 1 file changed, 21 insertions(+), 51 deletions(-) diff --git a/llm/generate/gen_windows.ps1 b/llm/generate/gen_windows.ps1 index 0eb48ffa..ed286c32 100644 --- a/llm/generate/gen_windows.ps1 +++ b/llm/generate/gen_windows.ps1 @@ -1,5 +1,7 @@ #!powershell +$ErrorActionPreference = "Stop" + function amdGPUs { if ($env:AMDGPU_TARGETS) { return $env:AMDGPU_TARGETS @@ -84,9 +86,9 @@ function init_vars { function git_module_setup { # TODO add flags to skip the init/patch logic to make it easier to mod llama.cpp code in-repo & git submodule init - if ($LASTEXITCODE -ne 0) { throw($LASTEXITCODE)} + if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} & git submodule update --force "${script:llamacppDir}" - if ($LASTEXITCODE -ne 0) { throw($LASTEXITCODE)} + if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} } function apply_patches { @@ -120,15 +122,10 @@ function build { write-host "generating config with: cmake -S ${script:llamacppDir} -B $script:buildDir $script:cmakeDefs" & cmake --version & cmake -S "${script:llamacppDir}" -B $script:buildDir $script:cmakeDefs - if ($LASTEXITCODE -ne 0) { throw($LASTEXITCODE)} - if ($cmakeDefs -contains "-G") { - $extra=@("-j8") - } else { - $extra= @("--", "/p:CL_MPcount=8") - } - write-host "building with: cmake --build $script:buildDir --config $script:config $($script:cmakeTargets | ForEach-Object { `"--target`", $_ }) $extra" - & cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ }) $extra - if ($LASTEXITCODE -ne 0) { write-host "cmake build exit status $LASTEXITCODE"; throw($LASTEXITCODE)} + if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} + write-host "building with: cmake --build $script:buildDir --config $script:config $($script:cmakeTargets | ForEach-Object { `"--target`", $_ })" + & cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ }) + if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} # Rearrange output to be consistent between different generators if ($null -ne ${script:config} -And (test-path -path "${script:buildDir}/bin/${script:config}" ) ) { mv -force "${script:buildDir}/bin/${script:config}/*" "${script:buildDir}/bin/" @@ -142,7 +139,7 @@ function sign { foreach ($file in @(get-childitem "${script:buildDir}/bin/*.exe") + @(get-childitem "${script:buildDir}/bin/*.dll")){ & "${script:SignTool}" sign /v /fd sha256 /t http://timestamp.digicert.com /f "${script:OLLAMA_CERT}" ` /csp "Google Cloud KMS Provider" /kc "${env:KEY_CONTAINER}" $file - if ($LASTEXITCODE -ne 0) { throw($LASTEXITCODE)} + if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} } } } @@ -218,13 +215,7 @@ function build_static() { } } -function build_cpu() { - if ($script:ARCH -eq "arm64") { - $gen_arch = "ARM64" - } else { # amd64 - $gen_arch = "x64" - } - +function build_cpu($gen_arch) { if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu"))) { # remaining llama.cpp builds use MSVC init_vars @@ -281,15 +272,7 @@ function build_cuda() { init_vars $script:buildDir="../build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT" $script:distDir="$script:DIST_BASE\cuda$script:CUDA_VARIANT" - $script:cmakeDefs += @( - "-A", "x64", - "-DLLAMA_CUDA=ON", - "-DLLAMA_AVX=on", - "-DLLAMA_AVX2=off", - "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", - "-DCMAKE_CUDA_FLAGS=-t8" - "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}" - ) + $script:cmakeDefs += @("-A", "x64", "-DLLAMA_CUDA=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}") if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) { write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`"" $script:cmakeDefs +=@("${env:OLLAMA_CUSTOM_CUDA_DEFS}") @@ -410,29 +393,16 @@ init_vars if ($($args.count) -eq 0) { git_module_setup apply_patches - - $tasks = @("build_static", "build_cpu") - $jobs = @() - if ($script:ARCH -ne "arm64") { - $tasks += $("build_cpu_avx", "build_cpu_avx2", "build_cuda", "build_oneapi", "build_rocm") - } - foreach ($t in $tasks) { - $jobs += @(Start-ThreadJob -ThrottleLimit 12 -FilePath .\gen_windows.ps1 -ArgumentList $t -Name $t) - } - get-job - foreach ($job in $jobs) { - write-host "----" $job.Name output follows - receive-job -wait -job $job - write-host "----" $job.Name $job.State - write-host "" - if ($job.State -contains 'Failed') { - cleanup - write-host "Terminating remaining jobs (this takes a while, you can ^C)" - # TODO find some way to kill the spawned cmake processes faster - remove-job -force -job $jobs - exit(-1) - } - get-job + build_static + if ($script:ARCH -eq "arm64") { + build_cpu("ARM64") + } else { # amd64 + build_cpu("x64") + build_cpu_avx + build_cpu_avx2 + build_cuda + build_oneapi + build_rocm } cleanup From b0930626c5b5e64c819f985313b805be488a1f5d Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Mon, 17 Jun 2024 13:44:46 -0700 Subject: [PATCH 2/2] Add back lower level parallel flags nvcc supports parallelism (threads) and cmake + make can use -j, while msbuild requires /p:CL_MPcount=8 --- llm/generate/gen_linux.sh | 2 +- llm/generate/gen_windows.ps1 | 19 ++++++++++++++++--- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/llm/generate/gen_linux.sh b/llm/generate/gen_linux.sh index f9100875..28ce1f21 100755 --- a/llm/generate/gen_linux.sh +++ b/llm/generate/gen_linux.sh @@ -178,7 +178,7 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then CMAKE_CUDA_DEFS="-DLLAMA_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES} ${OLLAMA_CUSTOM_CUDA_DEFS}" echo "Building custom CUDA GPU" else - CMAKE_CUDA_DEFS="-DLLAMA_CUDA=on -DLLAMA_CUDA_FORCE_MMQ=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}" + CMAKE_CUDA_DEFS="-DLLAMA_CUDA=on -DCMAKE_CUDA_FLAGS=-t8 -DLLAMA_CUDA_FORCE_MMQ=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}" fi CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS}" BUILD_DIR="../build/linux/${ARCH}/cuda${CUDA_VARIANT}" diff --git a/llm/generate/gen_windows.ps1 b/llm/generate/gen_windows.ps1 index ed286c32..87d1207f 100644 --- a/llm/generate/gen_windows.ps1 +++ b/llm/generate/gen_windows.ps1 @@ -123,8 +123,13 @@ function build { & cmake --version & cmake -S "${script:llamacppDir}" -B $script:buildDir $script:cmakeDefs if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} - write-host "building with: cmake --build $script:buildDir --config $script:config $($script:cmakeTargets | ForEach-Object { `"--target`", $_ })" - & cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ }) + if ($cmakeDefs -contains "-G") { + $extra=@("-j8") + } else { + $extra= @("--", "/p:CL_MPcount=8") + } + write-host "building with: cmake --build $script:buildDir --config $script:config $($script:cmakeTargets | ForEach-Object { `"--target`", $_ }) $extra" + & cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ }) $extra if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)} # Rearrange output to be consistent between different generators if ($null -ne ${script:config} -And (test-path -path "${script:buildDir}/bin/${script:config}" ) ) { @@ -272,7 +277,15 @@ function build_cuda() { init_vars $script:buildDir="../build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT" $script:distDir="$script:DIST_BASE\cuda$script:CUDA_VARIANT" - $script:cmakeDefs += @("-A", "x64", "-DLLAMA_CUDA=ON", "-DLLAMA_AVX=on", "-DLLAMA_AVX2=off", "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}") + $script:cmakeDefs += @( + "-A", "x64", + "-DLLAMA_CUDA=ON", + "-DLLAMA_AVX=on", + "-DLLAMA_AVX2=off", + "-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR", + "-DCMAKE_CUDA_FLAGS=-t8", + "-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}" + ) if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) { write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`"" $script:cmakeDefs +=@("${env:OLLAMA_CUSTOM_CUDA_DEFS}")