This commit is contained in:
baalajimaestro 2024-04-06 16:34:43 +05:30
commit 0078e0f1cf
Signed by: baalajimaestro
GPG key ID: F93C394FE9BBAFD5
6 changed files with 41 additions and 24 deletions

View file

@ -7,6 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased] ## [Unreleased]
## [0.2.60]
- feat: Update llama.cpp to ggerganov/llama.cpp@75cd4c77292034ecec587ecb401366f57338f7c0
- fix: Always embed metal library by @abetlen in b3bfea6dbfb6ed9ce18f9a2723e0a9e4bd1da7ad
- fix: missing logprobs in response, incorrect response type for functionary by @abetlen in 1ae3abbcc3af7f4a25a3ffc40b246f18039565e8
- fix(docs): incorrect tool_choice example by @CISC in #1330
## [0.2.59] ## [0.2.59]
- feat: Update llama.cpp to ggerganov/llama.cpp@ba0c7c70ab5b15f1f2be7fb0dfbe0366dda30d6c - feat: Update llama.cpp to ggerganov/llama.cpp@ba0c7c70ab5b15f1f2be7fb0dfbe0366dda30d6c

View file

@ -18,7 +18,7 @@ if (LLAMA_BUILD)
set(LLAMA_F16C "Off" CACHE BOOL "llama: enable F16C" FORCE) set(LLAMA_F16C "Off" CACHE BOOL "llama: enable F16C" FORCE)
endif() endif()
if (APPLE AND CMAKE_SYSTEM_PROCESSOR MATCHES "arm64") if (APPLE)
set(LLAMA_METAL_EMBED_LIBRARY "On" CACHE BOOL "llama: embed metal library" FORCE) set(LLAMA_METAL_EMBED_LIBRARY "On" CACHE BOOL "llama: embed metal library" FORCE)
endif() endif()

View file

@ -458,12 +458,12 @@ The high-level API supports OpenAI compatible function and tool calling. This is
} }
} }
}], }],
tool_choice=[{ tool_choice={
"type": "function", "type": "function",
"function": { "function": {
"name": "UserDetail" "name": "UserDetail"
} }
}] }
) )
``` ```

View file

@ -1,4 +1,4 @@
from .llama_cpp import * from .llama_cpp import *
from .llama import * from .llama import *
__version__ = "0.2.59" __version__ = "0.2.60"

View file

@ -6,7 +6,7 @@ import ctypes
import dataclasses import dataclasses
import random import random
import string import string
from typing import Any, Dict, Iterator, List, Literal, Optional, Tuple, Union, Protocol from typing import Any, Dict, Iterator, List, Literal, Optional, Tuple, Union, Protocol, cast
import jinja2 import jinja2
@ -338,6 +338,7 @@ def _convert_completion_to_chat_function(
} }
], ],
}, },
"logprobs": None,
"finish_reason": "tool_calls", "finish_reason": "tool_calls",
} }
], ],
@ -1191,7 +1192,6 @@ def format_mistral_instruct(
elif ( elif (
message["role"] == "assistant" message["role"] == "assistant"
and message["content"] is not None and message["content"] is not None
and isinstance(message["content"], str)
): ):
prompt += " [/INST]" + message["content"] + eos prompt += " [/INST]" + message["content"] + eos
prompt += " [/INST]" prompt += " [/INST]"
@ -1263,7 +1263,7 @@ def format_gemma(
**kwargs: Any, **kwargs: Any,
) -> ChatFormatterResponse: ) -> ChatFormatterResponse:
system_message = _get_system_message(messages) system_message = _get_system_message(messages)
if system_message is not None and system_message != "": if system_message != "":
logger.debug( logger.debug(
"`role='system'` messages are not allowed on Google's Gemma models." "`role='system'` messages are not allowed on Google's Gemma models."
) )
@ -1628,6 +1628,7 @@ def functionary_chat_handler(
} }
], ],
}, },
"logprobs": None,
"finish_reason": "tool_calls", "finish_reason": "tool_calls",
} }
], ],
@ -1909,14 +1910,14 @@ def functionary_v1_v2_chat_handler(
return grammar return grammar
def create_completion(stop): def create_completion(stop):
completion: llama_types.Completion = llama.create_completion( completion = cast(llama_types.Completion, llama.create_completion(
prompt=prompt, prompt=prompt,
temperature=temperature, temperature=temperature,
top_p=top_p, top_p=top_p,
top_k=top_k, top_k=top_k,
min_p=min_p, min_p=min_p,
typical_p=typical_p, typical_p=typical_p,
stream=stream, stream=False,
stop=stop, stop=stop,
max_tokens=max_tokens, max_tokens=max_tokens,
presence_penalty=presence_penalty, presence_penalty=presence_penalty,
@ -1929,7 +1930,7 @@ def functionary_v1_v2_chat_handler(
model=model, model=model,
logits_processor=logits_processor, logits_processor=logits_processor,
grammar=grammar, grammar=grammar,
) ))
return completion return completion
@ -2050,7 +2051,7 @@ def functionary_v1_v2_chat_handler(
assert "usage" in completion assert "usage" in completion
assert len(function_calls) == len(function_bodies) assert len(function_calls) == len(function_bodies)
tool_calls = [] tool_calls: List[llama_types.ChatCompletionMessageToolCall] = []
for function_call, function_body in zip(function_calls, function_bodies): for function_call, function_body in zip(function_calls, function_bodies):
tool_calls.append( tool_calls.append(
{ {
@ -2070,6 +2071,12 @@ def functionary_v1_v2_chat_handler(
) )
# TODO: support stream mode # TODO: support stream mode
function_call_dict: Union[Dict[str, str], Dict[Literal["function_call"], llama_types.ChatCompletionRequestAssistantMessageFunctionCall]] = {
"function_call": {
"name": tool_calls[0]["function"]["name"],
"arguments": tool_calls[0]["function"]["arguments"],
}
} if len(tool_calls) == 1 else {}
return llama_types.CreateChatCompletionResponse( return llama_types.CreateChatCompletionResponse(
id="chat" + completion["id"], id="chat" + completion["id"],
object="chat.completion", object="chat.completion",
@ -2078,14 +2085,12 @@ def functionary_v1_v2_chat_handler(
choices=[ choices=[
{ {
"index": 0, "index": 0,
"logprobs": None,
"message": { "message": {
"role": "assistant", "role": "assistant",
"content": None if content == "" else content, "content": None if content == "" else content,
"function_call": { "tool_calls": tool_calls,
"name": tool_calls[0]["function"]["name"], **function_call_dict,
"arguments": tool_calls[0]["function"]["arguments"],
} if len(tool_calls) > 0 else None,
"tool_calls": tool_calls if len(tool_calls) > 0 else None,
}, },
"finish_reason": "tool_calls" if len(tool_calls) > 0 else "stop", "finish_reason": "tool_calls" if len(tool_calls) > 0 else "stop",
} }
@ -2565,8 +2570,8 @@ def chatml_function_calling(
tool_name = text[len("functions.") :] tool_name = text[len("functions.") :]
tool = next((tool for tool in tools if tool["function"]["name"] == tool_name), None) tool = next((tool for tool in tools if tool["function"]["name"] == tool_name), None)
if not stream: if not stream:
completions = [] completions: List[llama_types.CreateCompletionResponse] = []
completions_tool_name = [] completions_tool_name: List[str] = []
while tool is not None: while tool is not None:
prompt += f"functions.{tool_name}:\n" prompt += f"functions.{tool_name}:\n"
try: try:
@ -2603,6 +2608,7 @@ def chatml_function_calling(
logits_processor=logits_processor, logits_processor=logits_processor,
grammar=grammar, grammar=grammar,
) )
completion_or_chunks = cast(llama_types.CreateCompletionResponse, completion_or_chunks)
completions.append(completion_or_chunks) completions.append(completion_or_chunks)
completions_tool_name.append(tool_name) completions_tool_name.append(tool_name)
prompt += completion_or_chunks["choices"][0]["text"] prompt += completion_or_chunks["choices"][0]["text"]
@ -2631,6 +2637,7 @@ def chatml_function_calling(
follow_up_gbnf_tool_grammar, verbose=llama.verbose follow_up_gbnf_tool_grammar, verbose=llama.verbose
), ),
) )
response = cast(llama_types.CreateCompletionResponse, response)
tool_name = response["choices"][0]["text"][len("functions.") :] tool_name = response["choices"][0]["text"][len("functions.") :]
tool = next( tool = next(
@ -2638,7 +2645,7 @@ def chatml_function_calling(
) )
# Merge completions # Merge completions
function_call = { function_call_dict: Union[Dict[str, str], Dict[Literal["function_call"], llama_types.ChatCompletionRequestAssistantMessageFunctionCall]] = {
"function_call": { "function_call": {
"name": tool_name, "name": tool_name,
"arguments": completions[0]["choices"][0]["text"], "arguments": completions[0]["choices"][0]["text"],
@ -2653,6 +2660,7 @@ def chatml_function_calling(
{ {
"finish_reason": "tool_calls", "finish_reason": "tool_calls",
"index": 0, "index": 0,
"logprobs": None,
"message": { "message": {
"role": "assistant", "role": "assistant",
"content": None, "content": None,
@ -2673,20 +2681,22 @@ def chatml_function_calling(
zip(completions_tool_name, completions) zip(completions_tool_name, completions)
) )
], ],
**function_call **function_call_dict
}, },
} }
], ],
"usage": { "usage": {
"completion_tokens": sum( "completion_tokens": sum(
completion["usage"]["completion_tokens"] completion["usage"]["completion_tokens"] if "usage" in completion else 0
for completion in completions for completion in completions
), ),
"prompt_tokens": sum( "prompt_tokens": sum(
completion["usage"]["prompt_tokens"] for completion in completions completion["usage"]["prompt_tokens"] if "usage" in completion else 0
for completion in completions
), ),
"total_tokens": sum( "total_tokens": sum(
completion["usage"]["total_tokens"] for completion in completions completion["usage"]["total_tokens"] if "usage" in completion else 0
for completion in completions
), ),
}, },
} }

2
vendor/llama.cpp vendored

@ -1 +1 @@
Subproject commit 60cdf40cc32f0ad4cb11e0ca8fd38f3b93d8d640 Subproject commit 75cd4c77292034ecec587ecb401366f57338f7c0