Merge pull request #2131 from dhiltgen/probe_cards_at_init
Probe GPUs before backend init
This commit is contained in:
commit
2738837786
1 changed files with 34 additions and 1 deletions
|
@ -3,6 +3,27 @@
|
||||||
// Necessary evil since the server types are not defined in a header
|
// Necessary evil since the server types are not defined in a header
|
||||||
#include "server.cpp"
|
#include "server.cpp"
|
||||||
|
|
||||||
|
// Low level API access to verify GPU access
|
||||||
|
#if defined(GGML_USE_CUBLAS)
|
||||||
|
#if defined(GGML_USE_HIPBLAS)
|
||||||
|
#include <hip/hip_runtime.h>
|
||||||
|
#include <hipblas/hipblas.h>
|
||||||
|
#include <hip/hip_fp16.h>
|
||||||
|
#ifdef __HIP_PLATFORM_AMD__
|
||||||
|
// for rocblas_initialize()
|
||||||
|
#include "rocblas/rocblas.h"
|
||||||
|
#endif // __HIP_PLATFORM_AMD__
|
||||||
|
#define cudaGetDevice hipGetDevice
|
||||||
|
#define cudaError_t hipError_t
|
||||||
|
#define cudaSuccess hipSuccess
|
||||||
|
#define cudaGetErrorString hipGetErrorString
|
||||||
|
#else
|
||||||
|
#include <cuda_runtime.h>
|
||||||
|
#include <cublas_v2.h>
|
||||||
|
#include <cuda_fp16.h>
|
||||||
|
#endif // defined(GGML_USE_HIPBLAS)
|
||||||
|
#endif // GGML_USE_CUBLAS
|
||||||
|
|
||||||
// Expose the llama server as a callable extern "C" API
|
// Expose the llama server as a callable extern "C" API
|
||||||
llama_server_context *llama = NULL;
|
llama_server_context *llama = NULL;
|
||||||
std::atomic<bool> ext_server_running(false);
|
std::atomic<bool> ext_server_running(false);
|
||||||
|
@ -12,7 +33,7 @@ void llama_server_init(ext_server_params *sparams, ext_server_resp_t *err) {
|
||||||
#if SERVER_VERBOSE != 1
|
#if SERVER_VERBOSE != 1
|
||||||
log_disable();
|
log_disable();
|
||||||
#endif
|
#endif
|
||||||
LOG_TEE("system info: %s", llama_print_system_info());
|
LOG_TEE("system info: %s\n", llama_print_system_info());
|
||||||
assert(err != NULL && sparams != NULL);
|
assert(err != NULL && sparams != NULL);
|
||||||
err->id = 0;
|
err->id = 0;
|
||||||
err->msg[0] = '\0';
|
err->msg[0] = '\0';
|
||||||
|
@ -60,6 +81,18 @@ void llama_server_init(ext_server_params *sparams, ext_server_resp_t *err) {
|
||||||
params.mmproj = std::string(sparams->mmproj);
|
params.mmproj = std::string(sparams->mmproj);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(GGML_USE_CUBLAS)
|
||||||
|
// Before attempting to init the backend which will assert on error, verify the CUDA/ROCM GPU is accessible
|
||||||
|
LOG_TEE("Performing pre-initialization of GPU\n");
|
||||||
|
int id;
|
||||||
|
cudaError_t cudaErr = cudaGetDevice(&id);
|
||||||
|
if (cudaErr != cudaSuccess) {
|
||||||
|
err->id = -1;
|
||||||
|
snprintf(err->msg, err->msg_len, "Unable to init GPU: %s", cudaGetErrorString(cudaErr));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
llama_backend_init(params.numa);
|
llama_backend_init(params.numa);
|
||||||
|
|
||||||
// load the model
|
// load the model
|
||||||
|
|
Loading…
Add table
Reference in a new issue