server: Support none defaulting to infinity for completions (#111)
* Support defaulting to infinity or -1 for chat completions * Check if completion_tokens is none in error handler. * fix: max_tokens in create completion should match openai spec * Fix __call__ --------- Co-authored-by: Andrei Betlen <abetlen@gmail.com>
This commit is contained in:
parent
12b7f2f4e9
commit
4b01a873ef
3 changed files with 6 additions and 4 deletions
|
@ -1917,7 +1917,7 @@ class Llama:
|
||||||
completion_or_chunks = self._create_completion(
|
completion_or_chunks = self._create_completion(
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
suffix=suffix,
|
suffix=suffix,
|
||||||
max_tokens=max_tokens,
|
max_tokens=-1 if max_tokens is None else max_tokens,
|
||||||
temperature=temperature,
|
temperature=temperature,
|
||||||
top_p=top_p,
|
top_p=top_p,
|
||||||
min_p=min_p,
|
min_p=min_p,
|
||||||
|
@ -1951,7 +1951,7 @@ class Llama:
|
||||||
self,
|
self,
|
||||||
prompt: str,
|
prompt: str,
|
||||||
suffix: Optional[str] = None,
|
suffix: Optional[str] = None,
|
||||||
max_tokens: int = 128,
|
max_tokens: Optional[int] = 16,
|
||||||
temperature: float = 0.8,
|
temperature: float = 0.8,
|
||||||
top_p: float = 0.95,
|
top_p: float = 0.95,
|
||||||
min_p: float = 0.05,
|
min_p: float = 0.05,
|
||||||
|
|
|
@ -72,7 +72,7 @@ class ErrorResponseFormatters:
|
||||||
return 400, ErrorResponse(
|
return 400, ErrorResponse(
|
||||||
message=message.format(
|
message=message.format(
|
||||||
context_window,
|
context_window,
|
||||||
completion_tokens + prompt_tokens,
|
(completion_tokens or 0) + prompt_tokens,
|
||||||
prompt_tokens,
|
prompt_tokens,
|
||||||
completion_tokens,
|
completion_tokens,
|
||||||
), # type: ignore
|
), # type: ignore
|
||||||
|
|
|
@ -110,7 +110,9 @@ class CreateCompletionRequest(BaseModel):
|
||||||
default=None,
|
default=None,
|
||||||
description="A suffix to append to the generated text. If None, no suffix is appended. Useful for chatbots.",
|
description="A suffix to append to the generated text. If None, no suffix is appended. Useful for chatbots.",
|
||||||
)
|
)
|
||||||
max_tokens: int = max_tokens_field
|
max_tokens: Optional[int] = Field(
|
||||||
|
default=16, ge=0, description="The maximum number of tokens to generate."
|
||||||
|
)
|
||||||
temperature: float = temperature_field
|
temperature: float = temperature_field
|
||||||
top_p: float = top_p_field
|
top_p: float = top_p_field
|
||||||
min_p: float = min_p_field
|
min_p: float = min_p_field
|
||||||
|
|
Loading…
Reference in a new issue