Compare commits

..

No commits in common. "0078e0f1cf32bddde65698c65ea3e0c38c03010f" and "8b9cd38c0d7acde7b5e183f90da3c58298c0c299" have entirely different histories.

6 changed files with 24 additions and 41 deletions

View file

@ -7,13 +7,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] ## [Unreleased]
## [0.2.60]
- feat: Update llama.cpp to ggerganov/llama.cpp@75cd4c77292034ecec587ecb401366f57338f7c0
- fix: Always embed metal library by @abetlen in b3bfea6dbfb6ed9ce18f9a2723e0a9e4bd1da7ad
- fix: missing logprobs in response, incorrect response type for functionary by @abetlen in 1ae3abbcc3af7f4a25a3ffc40b246f18039565e8
- fix(docs): incorrect tool_choice example by @CISC in #1330
## [0.2.59] ## [0.2.59]
- feat: Update llama.cpp to ggerganov/llama.cpp@ba0c7c70ab5b15f1f2be7fb0dfbe0366dda30d6c - feat: Update llama.cpp to ggerganov/llama.cpp@ba0c7c70ab5b15f1f2be7fb0dfbe0366dda30d6c

View file

@ -18,7 +18,7 @@ if (LLAMA_BUILD)
set(LLAMA_F16C "Off" CACHE BOOL "llama: enable F16C" FORCE) set(LLAMA_F16C "Off" CACHE BOOL "llama: enable F16C" FORCE)
endif() endif()
if (APPLE) if (APPLE AND CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
set(LLAMA_METAL_EMBED_LIBRARY "On" CACHE BOOL "llama: embed metal library" FORCE) set(LLAMA_METAL_EMBED_LIBRARY "On" CACHE BOOL "llama: embed metal library" FORCE)
endif() endif()

View file

@ -458,12 +458,12 @@ The high-level API supports OpenAI compatible function and tool calling. This is
} }
} }
}], }],
tool_choice={ tool_choice=[{
"type": "function", "type": "function",
"function": { "function": {
"name": "UserDetail" "name": "UserDetail"
} }
} }]
) )
``` ```

View file

@ -1,4 +1,4 @@
from .llama_cpp import * from .llama_cpp import *
from .llama import * from .llama import *
__version__ = "0.2.60" __version__ = "0.2.59"

View file

@ -6,7 +6,7 @@ import ctypes
import dataclasses import dataclasses
import random import random
import string import string
from typing import Any, Dict, Iterator, List, Literal, Optional, Tuple, Union, Protocol, cast from typing import Any, Dict, Iterator, List, Literal, Optional, Tuple, Union, Protocol
import jinja2 import jinja2
@ -338,7 +338,6 @@ def _convert_completion_to_chat_function(
} }
], ],
}, },
"logprobs": None,
"finish_reason": "tool_calls", "finish_reason": "tool_calls",
} }
], ],
@ -1192,6 +1191,7 @@ def format_mistral_instruct(
elif ( elif (
message["role"] == "assistant" message["role"] == "assistant"
and message["content"] is not None and message["content"] is not None
and isinstance(message["content"], str)
): ):
prompt += " [/INST]" + message["content"] + eos prompt += " [/INST]" + message["content"] + eos
prompt += " [/INST]" prompt += " [/INST]"
@ -1263,7 +1263,7 @@ def format_gemma(
**kwargs: Any, **kwargs: Any,
) -> ChatFormatterResponse: ) -> ChatFormatterResponse:
system_message = _get_system_message(messages) system_message = _get_system_message(messages)
if system_message != "": if system_message is not None and system_message != "":
logger.debug( logger.debug(
"`role='system'` messages are not allowed on Google's Gemma models." "`role='system'` messages are not allowed on Google's Gemma models."
) )
@ -1628,7 +1628,6 @@ def functionary_chat_handler(
} }
], ],
}, },
"logprobs": None,
"finish_reason": "tool_calls", "finish_reason": "tool_calls",
} }
], ],
@ -1910,14 +1909,14 @@ def functionary_v1_v2_chat_handler(
return grammar return grammar
def create_completion(stop): def create_completion(stop):
completion = cast(llama_types.Completion, llama.create_completion( completion: llama_types.Completion = llama.create_completion(
prompt=prompt, prompt=prompt,
temperature=temperature, temperature=temperature,
top_p=top_p, top_p=top_p,
top_k=top_k, top_k=top_k,
min_p=min_p, min_p=min_p,
typical_p=typical_p, typical_p=typical_p,
stream=False, stream=stream,
stop=stop, stop=stop,
max_tokens=max_tokens, max_tokens=max_tokens,
presence_penalty=presence_penalty, presence_penalty=presence_penalty,
@ -1930,7 +1929,7 @@ def functionary_v1_v2_chat_handler(
model=model, model=model,
logits_processor=logits_processor, logits_processor=logits_processor,
grammar=grammar, grammar=grammar,
)) )
return completion return completion
@ -2051,7 +2050,7 @@ def functionary_v1_v2_chat_handler(
assert "usage" in completion assert "usage" in completion
assert len(function_calls) == len(function_bodies) assert len(function_calls) == len(function_bodies)
tool_calls: List[llama_types.ChatCompletionMessageToolCall] = [] tool_calls = []
for function_call, function_body in zip(function_calls, function_bodies): for function_call, function_body in zip(function_calls, function_bodies):
tool_calls.append( tool_calls.append(
{ {
@ -2071,12 +2070,6 @@ def functionary_v1_v2_chat_handler(
) )
# TODO: support stream mode # TODO: support stream mode
function_call_dict: Union[Dict[str, str], Dict[Literal["function_call"], llama_types.ChatCompletionRequestAssistantMessageFunctionCall]] = {
"function_call": {
"name": tool_calls[0]["function"]["name"],
"arguments": tool_calls[0]["function"]["arguments"],
}
} if len(tool_calls) == 1 else {}
return llama_types.CreateChatCompletionResponse( return llama_types.CreateChatCompletionResponse(
id="chat" + completion["id"], id="chat" + completion["id"],
object="chat.completion", object="chat.completion",
@ -2085,12 +2078,14 @@ def functionary_v1_v2_chat_handler(
choices=[ choices=[
{ {
"index": 0, "index": 0,
"logprobs": None,
"message": { "message": {
"role": "assistant", "role": "assistant",
"content": None if content == "" else content, "content": None if content == "" else content,
"tool_calls": tool_calls, "function_call": {
**function_call_dict, "name": tool_calls[0]["function"]["name"],
"arguments": tool_calls[0]["function"]["arguments"],
} if len(tool_calls) > 0 else None,
"tool_calls": tool_calls if len(tool_calls) > 0 else None,
}, },
"finish_reason": "tool_calls" if len(tool_calls) > 0 else "stop", "finish_reason": "tool_calls" if len(tool_calls) > 0 else "stop",
} }
@ -2570,8 +2565,8 @@ def chatml_function_calling(
tool_name = text[len("functions.") :] tool_name = text[len("functions.") :]
tool = next((tool for tool in tools if tool["function"]["name"] == tool_name), None) tool = next((tool for tool in tools if tool["function"]["name"] == tool_name), None)
if not stream: if not stream:
completions: List[llama_types.CreateCompletionResponse] = [] completions = []
completions_tool_name: List[str] = [] completions_tool_name = []
while tool is not None: while tool is not None:
prompt += f"functions.{tool_name}:\n" prompt += f"functions.{tool_name}:\n"
try: try:
@ -2608,7 +2603,6 @@ def chatml_function_calling(
logits_processor=logits_processor, logits_processor=logits_processor,
grammar=grammar, grammar=grammar,
) )
completion_or_chunks = cast(llama_types.CreateCompletionResponse, completion_or_chunks)
completions.append(completion_or_chunks) completions.append(completion_or_chunks)
completions_tool_name.append(tool_name) completions_tool_name.append(tool_name)
prompt += completion_or_chunks["choices"][0]["text"] prompt += completion_or_chunks["choices"][0]["text"]
@ -2637,7 +2631,6 @@ def chatml_function_calling(
follow_up_gbnf_tool_grammar, verbose=llama.verbose follow_up_gbnf_tool_grammar, verbose=llama.verbose
), ),
) )
response = cast(llama_types.CreateCompletionResponse, response)
tool_name = response["choices"][0]["text"][len("functions.") :] tool_name = response["choices"][0]["text"][len("functions.") :]
tool = next( tool = next(
@ -2645,7 +2638,7 @@ def chatml_function_calling(
) )
# Merge completions # Merge completions
function_call_dict: Union[Dict[str, str], Dict[Literal["function_call"], llama_types.ChatCompletionRequestAssistantMessageFunctionCall]] = { function_call = {
"function_call": { "function_call": {
"name": tool_name, "name": tool_name,
"arguments": completions[0]["choices"][0]["text"], "arguments": completions[0]["choices"][0]["text"],
@ -2660,7 +2653,6 @@ def chatml_function_calling(
{ {
"finish_reason": "tool_calls", "finish_reason": "tool_calls",
"index": 0, "index": 0,
"logprobs": None,
"message": { "message": {
"role": "assistant", "role": "assistant",
"content": None, "content": None,
@ -2681,22 +2673,20 @@ def chatml_function_calling(
zip(completions_tool_name, completions) zip(completions_tool_name, completions)
) )
], ],
**function_call_dict **function_call
}, },
} }
], ],
"usage": { "usage": {
"completion_tokens": sum( "completion_tokens": sum(
completion["usage"]["completion_tokens"] if "usage" in completion else 0 completion["usage"]["completion_tokens"]
for completion in completions for completion in completions
), ),
"prompt_tokens": sum( "prompt_tokens": sum(
completion["usage"]["prompt_tokens"] if "usage" in completion else 0 completion["usage"]["prompt_tokens"] for completion in completions
for completion in completions
), ),
"total_tokens": sum( "total_tokens": sum(
completion["usage"]["total_tokens"] if "usage" in completion else 0 completion["usage"]["total_tokens"] for completion in completions
for completion in completions
), ),
}, },
} }

2
vendor/llama.cpp vendored

@ -1 +1 @@
Subproject commit 75cd4c77292034ecec587ecb401366f57338f7c0 Subproject commit 60cdf40cc32f0ad4cb11e0ca8fd38f3b93d8d640