Handle errors returned by llama.cpp

This commit is contained in:
Andrei Betlen 2023-03-24 15:47:17 -04:00
parent bcde1f19b7
commit b93675608a

View file

@ -1,3 +1,4 @@
import os
import uuid import uuid
import time import time
import multiprocessing import multiprocessing
@ -35,6 +36,9 @@ class Llama:
self.tokens = (llama_cpp.llama_token * self.params.n_ctx)() self.tokens = (llama_cpp.llama_token * self.params.n_ctx)()
if not os.path.exists(model_path):
raise ValueError(f"Model path does not exist: {model_path}")
self.ctx = llama_cpp.llama_init_from_file( self.ctx = llama_cpp.llama_init_from_file(
self.model_path.encode("utf-8"), self.params self.model_path.encode("utf-8"), self.params
) )
@ -66,6 +70,8 @@ class Llama:
llama_cpp.llama_n_ctx(self.ctx), llama_cpp.llama_n_ctx(self.ctx),
True, True,
) )
if prompt_tokens < 0:
raise RuntimeError(f"Failed to tokenize prompt: {prompt_tokens}")
if prompt_tokens + max_tokens > self.params.n_ctx: if prompt_tokens + max_tokens > self.params.n_ctx:
raise ValueError( raise ValueError(
@ -115,13 +121,15 @@ class Llama:
finish_reason = "stop" finish_reason = "stop"
break break
llama_cpp.llama_eval( rc = llama_cpp.llama_eval(
self.ctx, self.ctx,
(llama_cpp.llama_token * 1)(self.tokens[prompt_tokens + i]), (llama_cpp.llama_token * 1)(self.tokens[prompt_tokens + i]),
1, 1,
prompt_tokens + completion_tokens, prompt_tokens + completion_tokens,
self.n_threads, self.n_threads,
) )
if rc != 0:
raise RuntimeError(f"Failed to evaluate next token: {rc}")
text = text.decode("utf-8") text = text.decode("utf-8")