Merge pull request #3278 from zhewang1-intc/rebase_ollama_main
Enabling ollama to run on Intel GPUs with SYCL backend
This commit is contained in:
commit
646371f56d
7 changed files with 615 additions and 32 deletions
126
gpu/gpu.go
126
gpu/gpu.go
|
@ -16,6 +16,7 @@ import (
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"unsafe"
|
"unsafe"
|
||||||
|
@ -28,6 +29,7 @@ type handles struct {
|
||||||
deviceCount int
|
deviceCount int
|
||||||
cudart *C.cudart_handle_t
|
cudart *C.cudart_handle_t
|
||||||
nvcuda *C.nvcuda_handle_t
|
nvcuda *C.nvcuda_handle_t
|
||||||
|
oneapi *C.oneapi_handle_t
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
@ -80,6 +82,15 @@ var NvcudaWindowsGlobs = []string{
|
||||||
"c:\\windows\\system*\\nvcuda.dll",
|
"c:\\windows\\system*\\nvcuda.dll",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var OneapiWindowsGlobs = []string{
|
||||||
|
"c:\\Windows\\System32\\DriverStore\\FileRepository\\*\\ze_intel_gpu64.dll",
|
||||||
|
}
|
||||||
|
|
||||||
|
var OneapiLinuxGlobs = []string{
|
||||||
|
"/usr/lib/x86_64-linux-gnu/libze_intel_gpu.so*",
|
||||||
|
"/usr/lib*/libze_intel_gpu.so*",
|
||||||
|
}
|
||||||
|
|
||||||
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
|
// Jetson devices have JETSON_JETPACK="x.y.z" factory set to the Jetpack version installed.
|
||||||
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
|
// Included to drive logic for reducing Ollama-allocated overhead on L4T/Jetson devices.
|
||||||
var CudaTegra string = os.Getenv("JETSON_JETPACK")
|
var CudaTegra string = os.Getenv("JETSON_JETPACK")
|
||||||
|
@ -94,6 +105,8 @@ func initGPUHandles() *handles {
|
||||||
var cudartMgmtPatterns []string
|
var cudartMgmtPatterns []string
|
||||||
var nvcudaMgmtName string
|
var nvcudaMgmtName string
|
||||||
var nvcudaMgmtPatterns []string
|
var nvcudaMgmtPatterns []string
|
||||||
|
var oneapiMgmtName string
|
||||||
|
var oneapiMgmtPatterns []string
|
||||||
|
|
||||||
tmpDir, _ := PayloadsDir()
|
tmpDir, _ := PayloadsDir()
|
||||||
switch runtime.GOOS {
|
switch runtime.GOOS {
|
||||||
|
@ -105,6 +118,8 @@ func initGPUHandles() *handles {
|
||||||
// Aligned with driver, we can't carry as payloads
|
// Aligned with driver, we can't carry as payloads
|
||||||
nvcudaMgmtName = "nvcuda.dll"
|
nvcudaMgmtName = "nvcuda.dll"
|
||||||
nvcudaMgmtPatterns = NvcudaWindowsGlobs
|
nvcudaMgmtPatterns = NvcudaWindowsGlobs
|
||||||
|
oneapiMgmtName = "ze_intel_gpu64.dll"
|
||||||
|
oneapiMgmtPatterns = OneapiWindowsGlobs
|
||||||
case "linux":
|
case "linux":
|
||||||
cudartMgmtName = "libcudart.so*"
|
cudartMgmtName = "libcudart.so*"
|
||||||
if tmpDir != "" {
|
if tmpDir != "" {
|
||||||
|
@ -115,6 +130,8 @@ func initGPUHandles() *handles {
|
||||||
// Aligned with driver, we can't carry as payloads
|
// Aligned with driver, we can't carry as payloads
|
||||||
nvcudaMgmtName = "libcuda.so*"
|
nvcudaMgmtName = "libcuda.so*"
|
||||||
nvcudaMgmtPatterns = NvcudaLinuxGlobs
|
nvcudaMgmtPatterns = NvcudaLinuxGlobs
|
||||||
|
oneapiMgmtName = "libze_intel_gpu.so"
|
||||||
|
oneapiMgmtPatterns = OneapiLinuxGlobs
|
||||||
default:
|
default:
|
||||||
return gpuHandles
|
return gpuHandles
|
||||||
}
|
}
|
||||||
|
@ -141,6 +158,18 @@ func initGPUHandles() *handles {
|
||||||
return gpuHandles
|
return gpuHandles
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
oneapiLibPaths := FindGPULibs(oneapiMgmtName, oneapiMgmtPatterns)
|
||||||
|
if len(oneapiLibPaths) > 0 {
|
||||||
|
deviceCount, oneapi, libPath := LoadOneapiMgmt(oneapiLibPaths)
|
||||||
|
if oneapi != nil {
|
||||||
|
slog.Debug("detected Intel GPUs", "library", libPath, "count", deviceCount)
|
||||||
|
gpuHandles.oneapi = oneapi
|
||||||
|
gpuHandles.deviceCount = deviceCount
|
||||||
|
return gpuHandles
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return gpuHandles
|
return gpuHandles
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -181,39 +210,53 @@ func GetGPUInfo() GpuInfoList {
|
||||||
if cpuVariant == "" && runtime.GOARCH == "amd64" {
|
if cpuVariant == "" && runtime.GOARCH == "amd64" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
gpuInfo := GpuInfo{
|
if gpuHandles.cudart != nil || gpuHandles.nvcuda != nil {
|
||||||
Library: "cuda",
|
gpuInfo := GpuInfo{
|
||||||
}
|
Library: "cuda",
|
||||||
var driverMajor int
|
}
|
||||||
var driverMinor int
|
var driverMajor int
|
||||||
if gpuHandles.cudart != nil {
|
var driverMinor int
|
||||||
C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo)
|
if gpuHandles.cudart != nil {
|
||||||
} else {
|
C.cudart_check_vram(*gpuHandles.cudart, C.int(i), &memInfo)
|
||||||
C.nvcuda_check_vram(*gpuHandles.nvcuda, C.int(i), &memInfo)
|
} else {
|
||||||
driverMajor = int(gpuHandles.nvcuda.driver_major)
|
C.nvcuda_check_vram(*gpuHandles.nvcuda, C.int(i), &memInfo)
|
||||||
driverMinor = int(gpuHandles.nvcuda.driver_minor)
|
driverMajor = int(gpuHandles.nvcuda.driver_major)
|
||||||
}
|
driverMinor = int(gpuHandles.nvcuda.driver_minor)
|
||||||
if memInfo.err != nil {
|
}
|
||||||
slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
|
if memInfo.err != nil {
|
||||||
C.free(unsafe.Pointer(memInfo.err))
|
slog.Info("error looking up nvidia GPU memory", "error", C.GoString(memInfo.err))
|
||||||
continue
|
C.free(unsafe.Pointer(memInfo.err))
|
||||||
}
|
continue
|
||||||
if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
|
}
|
||||||
slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
|
if memInfo.major < CudaComputeMin[0] || (memInfo.major == CudaComputeMin[0] && memInfo.minor < CudaComputeMin[1]) {
|
||||||
continue
|
slog.Info(fmt.Sprintf("[%d] CUDA GPU is too old. Compute Capability detected: %d.%d", i, memInfo.major, memInfo.minor))
|
||||||
}
|
continue
|
||||||
gpuInfo.TotalMemory = uint64(memInfo.total)
|
}
|
||||||
gpuInfo.FreeMemory = uint64(memInfo.free)
|
gpuInfo.TotalMemory = uint64(memInfo.total)
|
||||||
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
gpuInfo.FreeMemory = uint64(memInfo.free)
|
||||||
gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
|
gpuInfo.ID = C.GoString(&memInfo.gpu_id[0])
|
||||||
gpuInfo.MinimumMemory = cudaMinimumMemory
|
gpuInfo.Compute = fmt.Sprintf("%d.%d", memInfo.major, memInfo.minor)
|
||||||
gpuInfo.DependencyPath = depPath
|
gpuInfo.MinimumMemory = cudaMinimumMemory
|
||||||
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
gpuInfo.DependencyPath = depPath
|
||||||
gpuInfo.DriverMajor = int(driverMajor)
|
gpuInfo.Name = C.GoString(&memInfo.gpu_name[0])
|
||||||
gpuInfo.DriverMinor = int(driverMinor)
|
gpuInfo.DriverMajor = int(driverMajor)
|
||||||
|
gpuInfo.DriverMinor = int(driverMinor)
|
||||||
|
|
||||||
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
|
// TODO potentially sort on our own algorithm instead of what the underlying GPU library does...
|
||||||
resp = append(resp, gpuInfo)
|
resp = append(resp, gpuInfo)
|
||||||
|
}
|
||||||
|
if gpuHandles.oneapi != nil {
|
||||||
|
gpuInfo := GpuInfo{
|
||||||
|
Library: "oneapi",
|
||||||
|
}
|
||||||
|
C.oneapi_check_vram(*gpuHandles.oneapi, &memInfo)
|
||||||
|
var totalFreeMem float64 = float64(memInfo.free) * 0.95 // work-around: leave some reserve vram for mkl lib used in ggml-sycl backend.
|
||||||
|
memInfo.free = C.uint64_t(totalFreeMem)
|
||||||
|
gpuInfo.TotalMemory = uint64(memInfo.total)
|
||||||
|
gpuInfo.FreeMemory = uint64(memInfo.free)
|
||||||
|
gpuInfo.ID = strconv.Itoa(i)
|
||||||
|
resp = append(resp, gpuInfo)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Then AMD
|
// Then AMD
|
||||||
|
@ -348,6 +391,23 @@ func LoadNVCUDAMgmt(nvcudaLibPaths []string) (int, *C.nvcuda_handle_t, string) {
|
||||||
return 0, nil, ""
|
return 0, nil, ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func LoadOneapiMgmt(oneapiLibPaths []string) (int, *C.oneapi_handle_t, string) {
|
||||||
|
var resp C.oneapi_init_resp_t
|
||||||
|
resp.oh.verbose = getVerboseState()
|
||||||
|
for _, libPath := range oneapiLibPaths {
|
||||||
|
lib := C.CString(libPath)
|
||||||
|
defer C.free(unsafe.Pointer(lib))
|
||||||
|
C.oneapi_init(lib, &resp)
|
||||||
|
if resp.err != nil {
|
||||||
|
slog.Debug("Unable to load oneAPI management library", "library", libPath, "error", C.GoString(resp.err))
|
||||||
|
C.free(unsafe.Pointer(resp.err))
|
||||||
|
} else {
|
||||||
|
return int(resp.num_devices), &resp.oh, libPath
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0, nil, ""
|
||||||
|
}
|
||||||
|
|
||||||
func getVerboseState() C.uint16_t {
|
func getVerboseState() C.uint16_t {
|
||||||
if envconfig.Debug {
|
if envconfig.Debug {
|
||||||
return C.uint16_t(1)
|
return C.uint16_t(1)
|
||||||
|
@ -368,6 +428,8 @@ func (l GpuInfoList) GetVisibleDevicesEnv() (string, string) {
|
||||||
return cudaGetVisibleDevicesEnv(l)
|
return cudaGetVisibleDevicesEnv(l)
|
||||||
case "rocm":
|
case "rocm":
|
||||||
return rocmGetVisibleDevicesEnv(l)
|
return rocmGetVisibleDevicesEnv(l)
|
||||||
|
case "oneapi":
|
||||||
|
return oneapiGetVisibleDevicesEnv(l)
|
||||||
default:
|
default:
|
||||||
slog.Debug("no filter required for library " + l[0].Library)
|
slog.Debug("no filter required for library " + l[0].Library)
|
||||||
return "", ""
|
return "", ""
|
||||||
|
|
|
@ -62,6 +62,7 @@ void cpu_check_ram(mem_info_t *resp);
|
||||||
|
|
||||||
#include "gpu_info_cudart.h"
|
#include "gpu_info_cudart.h"
|
||||||
#include "gpu_info_nvcuda.h"
|
#include "gpu_info_nvcuda.h"
|
||||||
|
#include "gpu_info_oneapi.h"
|
||||||
|
|
||||||
#endif // __GPU_INFO_H__
|
#endif // __GPU_INFO_H__
|
||||||
#endif // __APPLE__
|
#endif // __APPLE__
|
214
gpu/gpu_info_oneapi.c
Normal file
214
gpu/gpu_info_oneapi.c
Normal file
|
@ -0,0 +1,214 @@
|
||||||
|
#ifndef __APPLE__
|
||||||
|
|
||||||
|
#include "gpu_info_oneapi.h"
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp)
|
||||||
|
{
|
||||||
|
ze_result_t ret;
|
||||||
|
resp->err = NULL;
|
||||||
|
const int buflen = 256;
|
||||||
|
char buf[buflen + 1];
|
||||||
|
int i;
|
||||||
|
struct lookup
|
||||||
|
{
|
||||||
|
char *s;
|
||||||
|
void **p;
|
||||||
|
} l[] = {
|
||||||
|
{"zesInit", (void *)&resp->oh.zesInit},
|
||||||
|
{"zesDriverGet", (void *)&resp->oh.zesDriverGet},
|
||||||
|
{"zesDeviceGet", (void *)&resp->oh.zesDeviceGet},
|
||||||
|
{"zesDeviceGetProperties", (void *)&resp->oh.zesDeviceGetProperties},
|
||||||
|
{"zesDeviceEnumMemoryModules",
|
||||||
|
(void *)&resp->oh.zesDeviceEnumMemoryModules},
|
||||||
|
{"zesMemoryGetProperties", (void *)&resp->oh.zesMemoryGetProperties},
|
||||||
|
{"zesMemoryGetState", (void *)&resp->oh.zesMemoryGetState},
|
||||||
|
{NULL, NULL},
|
||||||
|
};
|
||||||
|
|
||||||
|
resp->oh.handle = LOAD_LIBRARY(oneapi_lib_path, RTLD_LAZY);
|
||||||
|
if (!resp->oh.handle)
|
||||||
|
{
|
||||||
|
char *msg = LOAD_ERR();
|
||||||
|
snprintf(buf, buflen,
|
||||||
|
"Unable to load %s library to query for Intel GPUs: %s\n",
|
||||||
|
oneapi_lib_path, msg);
|
||||||
|
free(msg);
|
||||||
|
resp->err = strdup(buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO once we've squashed the remaining corner cases remove this log
|
||||||
|
LOG(resp->oh.verbose,
|
||||||
|
"wiring Level-Zero management library functions in %s\n",
|
||||||
|
oneapi_lib_path);
|
||||||
|
|
||||||
|
for (i = 0; l[i].s != NULL; i++)
|
||||||
|
{
|
||||||
|
// TODO once we've squashed the remaining corner cases remove this log
|
||||||
|
LOG(resp->oh.verbose, "dlsym: %s\n", l[i].s);
|
||||||
|
|
||||||
|
*l[i].p = LOAD_SYMBOL(resp->oh.handle, l[i].s);
|
||||||
|
if (!l[i].p)
|
||||||
|
{
|
||||||
|
resp->oh.handle = NULL;
|
||||||
|
char *msg = LOAD_ERR();
|
||||||
|
LOG(resp->oh.verbose, "dlerr: %s\n", msg);
|
||||||
|
UNLOAD_LIBRARY(resp->oh.handle);
|
||||||
|
snprintf(buf, buflen, "symbol lookup for %s failed: %s", l[i].s, msg);
|
||||||
|
free(msg);
|
||||||
|
resp->err = strdup(buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = (*resp->oh.zesInit)(0);
|
||||||
|
if (ret != ZE_RESULT_SUCCESS)
|
||||||
|
{
|
||||||
|
LOG(resp->oh.verbose, "zesInit err: %d\n", ret);
|
||||||
|
UNLOAD_LIBRARY(resp->oh.handle);
|
||||||
|
resp->oh.handle = NULL;
|
||||||
|
snprintf(buf, buflen, "oneapi vram init failure: %d", ret);
|
||||||
|
resp->err = strdup(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
(*resp->oh.zesDriverGet)(&resp->num_devices, NULL);
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
void oneapi_check_vram(oneapi_handle_t h, mem_info_t *resp)
|
||||||
|
{
|
||||||
|
ze_result_t ret;
|
||||||
|
resp->err = NULL;
|
||||||
|
uint64_t totalMem = 0;
|
||||||
|
uint64_t usedMem = 0;
|
||||||
|
const int buflen = 256;
|
||||||
|
char buf[buflen + 1];
|
||||||
|
int i, d, m;
|
||||||
|
|
||||||
|
if (h.handle == NULL)
|
||||||
|
{
|
||||||
|
resp->err = strdup("Level-Zero handle not initialized");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t driversCount = 0;
|
||||||
|
ret = (*h.zesDriverGet)(&driversCount, NULL);
|
||||||
|
if (ret != ZE_RESULT_SUCCESS)
|
||||||
|
{
|
||||||
|
snprintf(buf, buflen, "unable to get driver count: %d", ret);
|
||||||
|
resp->err = strdup(buf);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
LOG(h.verbose, "discovered %d Level-Zero drivers\n", driversCount);
|
||||||
|
|
||||||
|
zes_driver_handle_t *allDrivers =
|
||||||
|
malloc(driversCount * sizeof(zes_driver_handle_t));
|
||||||
|
(*h.zesDriverGet)(&driversCount, allDrivers);
|
||||||
|
|
||||||
|
resp->total = 0;
|
||||||
|
resp->free = 0;
|
||||||
|
|
||||||
|
for (d = 0; d < driversCount; d++)
|
||||||
|
{
|
||||||
|
uint32_t deviceCount = 0;
|
||||||
|
ret = (*h.zesDeviceGet)(allDrivers[d], &deviceCount, NULL);
|
||||||
|
if (ret != ZE_RESULT_SUCCESS)
|
||||||
|
{
|
||||||
|
snprintf(buf, buflen, "unable to get device count: %d", ret);
|
||||||
|
resp->err = strdup(buf);
|
||||||
|
free(allDrivers);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG(h.verbose, "discovered %d Level-Zero devices\n", deviceCount);
|
||||||
|
|
||||||
|
zes_device_handle_t *devices =
|
||||||
|
malloc(deviceCount * sizeof(zes_device_handle_t));
|
||||||
|
(*h.zesDeviceGet)(allDrivers[d], &deviceCount, devices);
|
||||||
|
|
||||||
|
for (i = 0; i < deviceCount; i++)
|
||||||
|
{
|
||||||
|
zes_device_ext_properties_t ext_props;
|
||||||
|
ext_props.stype = ZES_STRUCTURE_TYPE_DEVICE_EXT_PROPERTIES;
|
||||||
|
ext_props.pNext = NULL;
|
||||||
|
|
||||||
|
zes_device_properties_t props;
|
||||||
|
props.stype = ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES;
|
||||||
|
props.pNext = &ext_props;
|
||||||
|
|
||||||
|
ret = (*h.zesDeviceGetProperties)(devices[i], &props);
|
||||||
|
if (ret != ZE_RESULT_SUCCESS)
|
||||||
|
{
|
||||||
|
snprintf(buf, buflen, "unable to get device properties: %d", ret);
|
||||||
|
resp->err = strdup(buf);
|
||||||
|
free(allDrivers);
|
||||||
|
free(devices);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (h.verbose)
|
||||||
|
{
|
||||||
|
// When in verbose mode, report more information about
|
||||||
|
// the card we discover.
|
||||||
|
LOG(h.verbose, "[%d] oneAPI device name: %s\n", i,
|
||||||
|
props.modelName);
|
||||||
|
LOG(h.verbose, "[%d] oneAPI brand: %s\n", i,
|
||||||
|
props.brandName);
|
||||||
|
LOG(h.verbose, "[%d] oneAPI vendor: %s\n", i,
|
||||||
|
props.vendorName);
|
||||||
|
LOG(h.verbose, "[%d] oneAPI S/N: %s\n", i,
|
||||||
|
props.serialNumber);
|
||||||
|
LOG(h.verbose, "[%d] oneAPI board number: %s\n", i,
|
||||||
|
props.boardNumber);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t memCount = 0;
|
||||||
|
ret = (*h.zesDeviceEnumMemoryModules)(devices[i], &memCount, NULL);
|
||||||
|
if (ret != ZE_RESULT_SUCCESS)
|
||||||
|
{
|
||||||
|
snprintf(buf, buflen,
|
||||||
|
"unable to enumerate Level-Zero memory modules: %d", ret);
|
||||||
|
resp->err = strdup(buf);
|
||||||
|
free(allDrivers);
|
||||||
|
free(devices);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG(h.verbose, "discovered %d Level-Zero memory modules\n", memCount);
|
||||||
|
|
||||||
|
zes_mem_handle_t *mems = malloc(memCount * sizeof(zes_mem_handle_t));
|
||||||
|
(*h.zesDeviceEnumMemoryModules)(devices[i], &memCount, mems);
|
||||||
|
|
||||||
|
for (m = 0; m < memCount; m++)
|
||||||
|
{
|
||||||
|
zes_mem_state_t state;
|
||||||
|
state.stype = ZES_STRUCTURE_TYPE_MEM_STATE;
|
||||||
|
state.pNext = NULL;
|
||||||
|
ret = (*h.zesMemoryGetState)(mems[m], &state);
|
||||||
|
if (ret != ZE_RESULT_SUCCESS)
|
||||||
|
{
|
||||||
|
snprintf(buf, buflen, "unable to get memory state: %d", ret);
|
||||||
|
resp->err = strdup(buf);
|
||||||
|
free(allDrivers);
|
||||||
|
free(devices);
|
||||||
|
free(mems);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
resp->total += state.size;
|
||||||
|
resp->free += state.free;
|
||||||
|
}
|
||||||
|
|
||||||
|
free(mems);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(devices);
|
||||||
|
}
|
||||||
|
|
||||||
|
free(allDrivers);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif // __APPLE__
|
211
gpu/gpu_info_oneapi.h
Normal file
211
gpu/gpu_info_oneapi.h
Normal file
|
@ -0,0 +1,211 @@
|
||||||
|
#ifndef __APPLE__
|
||||||
|
#ifndef __GPU_INFO_ONEAPI_H__
|
||||||
|
#define __GPU_INFO_ONEAPI_H__
|
||||||
|
#include "gpu_info.h"
|
||||||
|
|
||||||
|
#define ZE_MAX_DEVICE_NAME 256
|
||||||
|
#define ZE_MAX_DEVICE_UUID_SIZE 16
|
||||||
|
#define ZES_STRING_PROPERTY_SIZE 64
|
||||||
|
#define ZE_BIT(_i) (1 << _i)
|
||||||
|
|
||||||
|
// Just enough typedef's to dlopen/dlsym for memory information
|
||||||
|
typedef enum ze_result_t
|
||||||
|
{
|
||||||
|
ZE_RESULT_SUCCESS = 0,
|
||||||
|
// Other values omitted for now...
|
||||||
|
} ze_result_t;
|
||||||
|
|
||||||
|
typedef uint8_t ze_bool_t;
|
||||||
|
typedef struct _zes_driver_handle_t *zes_driver_handle_t;
|
||||||
|
typedef struct _zes_device_handle_t *zes_device_handle_t;
|
||||||
|
typedef struct _zes_mem_handle_t *zes_mem_handle_t;
|
||||||
|
|
||||||
|
typedef enum _ze_structure_type_t
|
||||||
|
{
|
||||||
|
ZE_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff
|
||||||
|
} ze_structure_type_t;
|
||||||
|
|
||||||
|
typedef enum _zes_structure_type_t
|
||||||
|
{
|
||||||
|
ZES_STRUCTURE_TYPE_DEVICE_PROPERTIES = 0x1,
|
||||||
|
ZES_STRUCTURE_TYPE_MEM_PROPERTIES = 0xb,
|
||||||
|
ZES_STRUCTURE_TYPE_MEM_STATE = 0x1e,
|
||||||
|
ZES_STRUCTURE_TYPE_DEVICE_EXT_PROPERTIES = 0x2d,
|
||||||
|
ZES_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff
|
||||||
|
} zes_structure_type_t;
|
||||||
|
|
||||||
|
typedef enum _zes_mem_type_t
|
||||||
|
{
|
||||||
|
ZES_MEM_TYPE_FORCE_UINT32 = 0x7fffffff
|
||||||
|
} zes_mem_type_t;
|
||||||
|
|
||||||
|
typedef enum _zes_mem_loc_t
|
||||||
|
{
|
||||||
|
ZES_MEM_LOC_SYSTEM = 0,
|
||||||
|
ZES_MEM_LOC_DEVICE = 1,
|
||||||
|
ZES_MEM_LOC_FORCE_UINT32 = 0x7fffffff
|
||||||
|
} zes_mem_loc_t;
|
||||||
|
|
||||||
|
typedef enum _zes_mem_health_t
|
||||||
|
{
|
||||||
|
ZES_MEM_HEALTH_FORCE_UINT32 = 0x7fffffff
|
||||||
|
} zes_mem_health_t;
|
||||||
|
|
||||||
|
typedef struct _ze_device_uuid_t
|
||||||
|
{
|
||||||
|
uint8_t id[ZE_MAX_DEVICE_UUID_SIZE];
|
||||||
|
} ze_device_uuid_t;
|
||||||
|
|
||||||
|
typedef struct _zes_uuid_t
|
||||||
|
{
|
||||||
|
uint8_t id[ZE_MAX_DEVICE_UUID_SIZE];
|
||||||
|
} zes_uuid_t;
|
||||||
|
|
||||||
|
typedef enum _ze_device_type_t
|
||||||
|
{
|
||||||
|
ZE_DEVICE_TYPE_GPU = 1,
|
||||||
|
ZE_DEVICE_TYPE_CPU = 2,
|
||||||
|
ZE_DEVICE_TYPE_FPGA = 3,
|
||||||
|
ZE_DEVICE_TYPE_MCA = 4,
|
||||||
|
ZE_DEVICE_TYPE_VPU = 5,
|
||||||
|
ZE_DEVICE_TYPE_FORCE_UINT32 = 0x7fffffff
|
||||||
|
} ze_device_type_t;
|
||||||
|
|
||||||
|
typedef enum _zes_device_type_t
|
||||||
|
{
|
||||||
|
ZES_DEVICE_TYPE_GPU = 1,
|
||||||
|
ZES_DEVICE_TYPE_CPU = 2,
|
||||||
|
ZES_DEVICE_TYPE_FPGA = 3,
|
||||||
|
ZES_DEVICE_TYPE_MCA = 4,
|
||||||
|
ZES_DEVICE_TYPE_VPU = 5,
|
||||||
|
ZES_DEVICE_TYPE_FORCE_UINT32 = 0x7fffffff
|
||||||
|
} zes_device_type_t;
|
||||||
|
|
||||||
|
typedef uint32_t ze_device_property_flags_t;
|
||||||
|
typedef enum _ze_device_property_flag_t
|
||||||
|
{
|
||||||
|
ZE_DEVICE_PROPERTY_FLAG_INTEGRATED = ZE_BIT(0),
|
||||||
|
ZE_DEVICE_PROPERTY_FLAG_SUBDEVICE = ZE_BIT(1),
|
||||||
|
ZE_DEVICE_PROPERTY_FLAG_ECC = ZE_BIT(2),
|
||||||
|
ZE_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING = ZE_BIT(3),
|
||||||
|
ZE_DEVICE_PROPERTY_FLAG_FORCE_UINT32 = 0x7fffffff
|
||||||
|
} ze_device_property_flag_t;
|
||||||
|
|
||||||
|
typedef uint32_t zes_device_property_flags_t;
|
||||||
|
typedef enum _zes_device_property_flag_t
|
||||||
|
{
|
||||||
|
ZES_DEVICE_PROPERTY_FLAG_INTEGRATED = ZE_BIT(0),
|
||||||
|
ZES_DEVICE_PROPERTY_FLAG_SUBDEVICE = ZE_BIT(1),
|
||||||
|
ZES_DEVICE_PROPERTY_FLAG_ECC = ZE_BIT(2),
|
||||||
|
ZES_DEVICE_PROPERTY_FLAG_ONDEMANDPAGING = ZE_BIT(3),
|
||||||
|
ZES_DEVICE_PROPERTY_FLAG_FORCE_UINT32 = 0x7fffffff
|
||||||
|
} zes_device_property_flag_t;
|
||||||
|
|
||||||
|
typedef struct _ze_device_properties_t
|
||||||
|
{
|
||||||
|
ze_structure_type_t stype;
|
||||||
|
void *pNext;
|
||||||
|
ze_device_type_t type;
|
||||||
|
uint32_t vendorId;
|
||||||
|
uint32_t deviceId;
|
||||||
|
ze_device_property_flags_t flags;
|
||||||
|
uint32_t subdeviceId;
|
||||||
|
uint32_t coreClockRate;
|
||||||
|
uint64_t maxMemAllocSize;
|
||||||
|
uint32_t maxHardwareContexts;
|
||||||
|
uint32_t maxCommandQueuePriority;
|
||||||
|
uint32_t numThreadsPerEU;
|
||||||
|
uint32_t physicalEUSimdWidth;
|
||||||
|
uint32_t numEUsPerSubslice;
|
||||||
|
uint32_t numSubslicesPerSlice;
|
||||||
|
uint32_t numSlices;
|
||||||
|
uint64_t timerResolution;
|
||||||
|
uint32_t timestampValidBits;
|
||||||
|
uint32_t kernelTimestampValidBits;
|
||||||
|
ze_device_uuid_t uuid;
|
||||||
|
char name[ZE_MAX_DEVICE_NAME];
|
||||||
|
} ze_device_properties_t;
|
||||||
|
|
||||||
|
typedef struct _zes_device_properties_t
|
||||||
|
{
|
||||||
|
zes_structure_type_t stype;
|
||||||
|
void *pNext;
|
||||||
|
ze_device_properties_t core;
|
||||||
|
uint32_t numSubdevices;
|
||||||
|
char serialNumber[ZES_STRING_PROPERTY_SIZE];
|
||||||
|
char boardNumber[ZES_STRING_PROPERTY_SIZE];
|
||||||
|
char brandName[ZES_STRING_PROPERTY_SIZE];
|
||||||
|
char modelName[ZES_STRING_PROPERTY_SIZE];
|
||||||
|
char vendorName[ZES_STRING_PROPERTY_SIZE];
|
||||||
|
char driverVersion[ZES_STRING_PROPERTY_SIZE];
|
||||||
|
} zes_device_properties_t;
|
||||||
|
|
||||||
|
typedef struct _zes_device_ext_properties_t
|
||||||
|
{
|
||||||
|
zes_structure_type_t stype;
|
||||||
|
void *pNext;
|
||||||
|
zes_uuid_t uuid;
|
||||||
|
zes_device_type_t type;
|
||||||
|
zes_device_property_flags_t flags;
|
||||||
|
} zes_device_ext_properties_t;
|
||||||
|
|
||||||
|
typedef struct _zes_mem_properties_t
|
||||||
|
{
|
||||||
|
zes_structure_type_t stype;
|
||||||
|
void *pNext;
|
||||||
|
zes_mem_type_t type;
|
||||||
|
ze_bool_t onSubdevice;
|
||||||
|
uint32_t subdeviceId;
|
||||||
|
zes_mem_loc_t location;
|
||||||
|
uint64_t physicalSize;
|
||||||
|
int32_t busWidth;
|
||||||
|
int32_t numChannels;
|
||||||
|
} zes_mem_properties_t;
|
||||||
|
|
||||||
|
typedef struct _zes_mem_state_t
|
||||||
|
{
|
||||||
|
zes_structure_type_t stype;
|
||||||
|
const void *pNext;
|
||||||
|
zes_mem_health_t health;
|
||||||
|
uint64_t free;
|
||||||
|
uint64_t size;
|
||||||
|
} zes_mem_state_t;
|
||||||
|
|
||||||
|
typedef struct oneapi_handle
|
||||||
|
{
|
||||||
|
void *handle;
|
||||||
|
uint16_t verbose;
|
||||||
|
ze_result_t (*zesInit)(int);
|
||||||
|
ze_result_t (*zesDriverGet)(uint32_t *pCount, zes_driver_handle_t *phDrivers);
|
||||||
|
ze_result_t (*zesDeviceGet)(zes_driver_handle_t hDriver, uint32_t *pCount,
|
||||||
|
zes_device_handle_t *phDevices);
|
||||||
|
ze_result_t (*zesDeviceGetProperties)(zes_device_handle_t hDevice,
|
||||||
|
zes_device_properties_t *pProperties);
|
||||||
|
ze_result_t (*zesDeviceEnumMemoryModules)(zes_device_handle_t hDevice,
|
||||||
|
uint32_t *pCount,
|
||||||
|
zes_mem_handle_t *phMemory);
|
||||||
|
ze_result_t (*zesMemoryGetProperties)(zes_mem_handle_t hMemory,
|
||||||
|
zes_mem_properties_t *pProperties);
|
||||||
|
ze_result_t (*zesMemoryGetState)(zes_mem_handle_t hMemory,
|
||||||
|
zes_mem_state_t *pState);
|
||||||
|
|
||||||
|
} oneapi_handle_t;
|
||||||
|
|
||||||
|
typedef struct oneapi_init_resp
|
||||||
|
{
|
||||||
|
char *err; // If err is non-null handle is invalid
|
||||||
|
int num_devices;
|
||||||
|
oneapi_handle_t oh;
|
||||||
|
} oneapi_init_resp_t;
|
||||||
|
|
||||||
|
typedef struct oneapi_version_resp
|
||||||
|
{
|
||||||
|
ze_result_t status;
|
||||||
|
char *str; // Contains version or error string if status != 0
|
||||||
|
} oneapi_version_resp_t;
|
||||||
|
|
||||||
|
void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp);
|
||||||
|
void oneapi_check_vram(oneapi_handle_t rh, mem_info_t *resp);
|
||||||
|
|
||||||
|
#endif // __GPU_INFO_INTEL_H__
|
||||||
|
#endif // __APPLE__
|
21
gpu/gpu_oneapi.go
Normal file
21
gpu/gpu_oneapi.go
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
//go:build linux || windows
|
||||||
|
|
||||||
|
package gpu
|
||||||
|
|
||||||
|
import (
|
||||||
|
"log/slog"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
func oneapiGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
|
||||||
|
ids := []string{}
|
||||||
|
for _, info := range gpuInfo {
|
||||||
|
if info.Library != "oneapi" {
|
||||||
|
// TODO shouldn't happen if things are wired correctly...
|
||||||
|
slog.Debug("oneapiGetVisibleDevicesEnv skipping over non-sycl device", "library", info.Library)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
ids = append(ids, info.ID)
|
||||||
|
}
|
||||||
|
return "ONEAPI_DEVICE_SELECTOR", "level_zero:" + strings.Join(ids, ",")
|
||||||
|
}
|
|
@ -206,6 +206,36 @@ if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
|
||||||
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [ -z "${ONEAPI_ROOT}" ]; then
|
||||||
|
# Try the default location in case it exists
|
||||||
|
ONEAPI_ROOT=/opt/intel/oneapi
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ -d "${ONEAPI_ROOT}" ]; then
|
||||||
|
echo "OneAPI libraries detected - building dynamic OneAPI library"
|
||||||
|
init_vars
|
||||||
|
source ${ONEAPI_ROOT}/setvars.sh --force # set up environment variables for oneAPI
|
||||||
|
CC=icx
|
||||||
|
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL=ON -DLLAMA_SYCL_F16=OFF"
|
||||||
|
BUILD_DIR="../build/linux/${ARCH}/oneapi"
|
||||||
|
EXTRA_LIBS="-fsycl -Wl,-rpath,${ONEAPI_ROOT}/compiler/latest/lib,-rpath,${ONEAPI_ROOT}/mkl/latest/lib,-rpath,${ONEAPI_ROOT}/tbb/latest/lib,-rpath,${ONEAPI_ROOT}/compiler/latest/opt/oclfpga/linux64/lib -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb"
|
||||||
|
DEBUG_FLAGS="" # icx compiles with -O0 if we pass -g, so we must remove it
|
||||||
|
build
|
||||||
|
|
||||||
|
# copy oneAPI dependencies
|
||||||
|
for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e sycl -e mkl -e tbb); do
|
||||||
|
cp "${dep}" "${BUILD_DIR}/bin/"
|
||||||
|
done
|
||||||
|
cp "${ONEAPI_ROOT}/compiler/latest/lib/libOpenCL.so" "${BUILD_DIR}/bin/"
|
||||||
|
cp "${ONEAPI_ROOT}/compiler/latest/lib/libimf.so" "${BUILD_DIR}/bin/"
|
||||||
|
cp "${ONEAPI_ROOT}/compiler/latest/lib/libintlc.so.5" "${BUILD_DIR}/bin/"
|
||||||
|
cp "${ONEAPI_ROOT}/compiler/latest/lib/libirng.so" "${BUILD_DIR}/bin/"
|
||||||
|
cp "${ONEAPI_ROOT}/compiler/latest/lib/libpi_level_zero.so" "${BUILD_DIR}/bin/"
|
||||||
|
cp "${ONEAPI_ROOT}/compiler/latest/lib/libsvml.so" "${BUILD_DIR}/bin/"
|
||||||
|
cp "${ONEAPI_ROOT}/compiler/latest/lib/libur_loader.so.0" "${BUILD_DIR}/bin/"
|
||||||
|
compress
|
||||||
|
fi
|
||||||
|
|
||||||
if [ -z "${ROCM_PATH}" ]; then
|
if [ -z "${ROCM_PATH}" ]; then
|
||||||
# Try the default location in case it exists
|
# Try the default location in case it exists
|
||||||
ROCM_PATH=/opt/rocm
|
ROCM_PATH=/opt/rocm
|
||||||
|
|
|
@ -289,6 +289,49 @@ function build_cuda() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function build_oneapi() {
|
||||||
|
if ((-not "${env:OLLAMA_SKIP_CUDA_GENERATE}") -and ("${env:ONEAPI_ROOT}")) {
|
||||||
|
# Get oneAPI version
|
||||||
|
$script:ONEAPI_VERSION = icpx --version
|
||||||
|
$script:ONEAPI_VERSION = [regex]::Match($script:ONEAPI_VERSION, '(?<=oneAPI DPC\+\+/C\+\+ Compiler )(?<version>\d+\.\d+\.\d+)').Value
|
||||||
|
if ($null -ne $script:ONEAPI_VERSION) {
|
||||||
|
$script:ONEAPI_VARIANT = "_v" + $script:ONEAPI_VERSION
|
||||||
|
}
|
||||||
|
init_vars
|
||||||
|
$script:buildDir = "../build/windows/${script:ARCH}/oneapi$script:ONEAPI_VARIANT"
|
||||||
|
$script:distDir ="$script:DIST_BASE\oneapi$script:ONEAPI_VARIANT"
|
||||||
|
$script:cmakeDefs += @(
|
||||||
|
"-G", "MinGW Makefiles",
|
||||||
|
"-DLLAMA_SYCL=ON",
|
||||||
|
"-DCMAKE_C_COMPILER=icx",
|
||||||
|
"-DCMAKE_CXX_COMPILER=icx",
|
||||||
|
"-DCMAKE_BUILD_TYPE=Release"
|
||||||
|
)
|
||||||
|
|
||||||
|
Write-Host "Building oneAPI"
|
||||||
|
build
|
||||||
|
# Ninja doesn't prefix with config name
|
||||||
|
if ($null -ne $script:DUMPBIN) {
|
||||||
|
& "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | Select-String ".dll"
|
||||||
|
}
|
||||||
|
sign
|
||||||
|
install
|
||||||
|
|
||||||
|
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libirngmd.dll" "${script:distDir}"
|
||||||
|
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libmmd.dll" "${script:distDir}"
|
||||||
|
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_level_zero.dll" "${script:distDir}"
|
||||||
|
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_unified_runtime.dll" "${script:distDir}"
|
||||||
|
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_win_proxy_loader.dll" "${script:distDir}"
|
||||||
|
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\svml_dispmd.dll" "${script:distDir}"
|
||||||
|
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\sycl7.dll" "${script:distDir}"
|
||||||
|
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_core.2.dll" "${script:distDir}"
|
||||||
|
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_sycl_blas.4.dll" "${script:distDir}"
|
||||||
|
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_tbb_thread.2.dll" "${script:distDir}"
|
||||||
|
} else {
|
||||||
|
Write-Host "Skipping oneAPI generation step"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function build_rocm() {
|
function build_rocm() {
|
||||||
if ((-not "${env:OLLAMA_SKIP_ROCM_GENERATE}") -and ("${env:HIP_PATH}")) {
|
if ((-not "${env:OLLAMA_SKIP_ROCM_GENERATE}") -and ("${env:HIP_PATH}")) {
|
||||||
$script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
|
$script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
|
||||||
|
@ -356,6 +399,7 @@ if ($($args.count) -eq 0) {
|
||||||
build_cpu_avx
|
build_cpu_avx
|
||||||
build_cpu_avx2
|
build_cpu_avx2
|
||||||
build_cuda
|
build_cuda
|
||||||
|
build_oneapi
|
||||||
build_rocm
|
build_rocm
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue