bce6dc0ac2
* update functionary parts in server readme * add write-up about hf tokenizer
485 lines
18 KiB
Text
485 lines
18 KiB
Text
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Functions\n",
|
|
"\n",
|
|
"The OpenAI compatbile web server in `llama-cpp-python` supports function calling.\n",
|
|
"\n",
|
|
"Function calling allows API clients to specify a schema that gives the model a format it should respond in.\n",
|
|
"Function calling in `llama-cpp-python` works by combining models pretrained for function calling such as [`functionary`](https://huggingface.co/meetkai) with constrained sampling to produce a response that is compatible with the schema.\n",
|
|
"\n",
|
|
"Note however that this improves but does not guarantee that the response will be compatible with the schema.\n",
|
|
"\n",
|
|
"## Requirements\n",
|
|
"\n",
|
|
"Before we begin you will need the following:\n",
|
|
"\n",
|
|
"- A running `llama-cpp-python` server with a function calling compatible model. [See here](https://llama-cpp-python.readthedocs.io/en/latest/server/#function-calling)\n",
|
|
"- The OpenAI Python Client `pip install openai`\n",
|
|
"- (Optional) The Instructor Python Library `pip install instructor`\n",
|
|
"\n",
|
|
"## Function Calling with OpenAI Python Client\n",
|
|
"\n",
|
|
"We'll start with a basic demo that only uses the OpenAI Python Client."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"ChatCompletion(id='chatcmpl-a2d9eb9f-7354-472f-b6ad-4d7a807729a3', choices=[Choice(finish_reason='stop', index=0, message=ChatCompletionMessage(content='The current weather in San Francisco is **72°F** (22°C).\\n ', role='assistant', function_call=None, tool_calls=None))], created=1699638365, model='gpt-3.5-turbo-1106', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=22, prompt_tokens=136, total_tokens=158))\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import openai\n",
|
|
"import json\n",
|
|
"\n",
|
|
"\n",
|
|
"client = openai.OpenAI(\n",
|
|
" api_key = \"sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\", # can be anything\n",
|
|
" base_url = \"http://100.64.159.73:8000/v1\" # NOTE: Replace with IP address and port of your llama-cpp-python server\n",
|
|
")\n",
|
|
"\n",
|
|
"# Example dummy function hard coded to return the same weather\n",
|
|
"# In production, this could be your backend API or an external API\n",
|
|
"def get_current_weather(location, unit=\"fahrenheit\"):\n",
|
|
" \"\"\"Get the current weather in a given location\"\"\"\n",
|
|
" if \"tokyo\" in location.lower():\n",
|
|
" return json.dumps({\"location\": \"Tokyo\", \"temperature\": \"10\", \"unit\": \"celsius\"})\n",
|
|
" elif \"san francisco\" in location.lower():\n",
|
|
" return json.dumps({\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": \"fahrenheit\"})\n",
|
|
" elif \"paris\" in location.lower():\n",
|
|
" return json.dumps({\"location\": \"Paris\", \"temperature\": \"22\", \"unit\": \"celsius\"})\n",
|
|
" else:\n",
|
|
" return json.dumps({\"location\": location, \"temperature\": \"unknown\"})\n",
|
|
"\n",
|
|
"def run_conversation():\n",
|
|
" # Step 1: send the conversation and available functions to the model\n",
|
|
" messages = [{\"role\": \"user\", \"content\": \"What's the weather like in San Francisco, Tokyo, and Paris?\"}]\n",
|
|
" tools = [\n",
|
|
" {\n",
|
|
" \"type\": \"function\",\n",
|
|
" \"function\": {\n",
|
|
" \"name\": \"get_current_weather\",\n",
|
|
" \"description\": \"Get the current weather in a given location\",\n",
|
|
" \"parameters\": {\n",
|
|
" \"type\": \"object\",\n",
|
|
" \"properties\": {\n",
|
|
" \"location\": {\n",
|
|
" \"type\": \"string\",\n",
|
|
" \"description\": \"The city and state, e.g. San Francisco, CA\",\n",
|
|
" },\n",
|
|
" \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]},\n",
|
|
" },\n",
|
|
" \"required\": [\"location\"],\n",
|
|
" },\n",
|
|
" },\n",
|
|
" }\n",
|
|
" ]\n",
|
|
" response = client.chat.completions.create(\n",
|
|
" model=\"gpt-3.5-turbo-1106\",\n",
|
|
" messages=messages,\n",
|
|
" tools=tools,\n",
|
|
" tool_choice=\"auto\", # auto is default, but we'll be explicit\n",
|
|
" )\n",
|
|
" response_message = response.choices[0].message\n",
|
|
" tool_calls = response_message.tool_calls\n",
|
|
" # Step 2: check if the model wanted to call a function\n",
|
|
" if tool_calls:\n",
|
|
" # Step 3: call the function\n",
|
|
" # Note: the JSON response may not always be valid; be sure to handle errors\n",
|
|
" available_functions = {\n",
|
|
" \"get_current_weather\": get_current_weather,\n",
|
|
" } # only one function in this example, but you can have multiple\n",
|
|
" messages.append(response_message) # extend conversation with assistant's reply\n",
|
|
" # Step 4: send the info for each function call and function response to the model\n",
|
|
" for tool_call in tool_calls:\n",
|
|
" function_name = tool_call.function.name\n",
|
|
" function_to_call = available_functions[function_name]\n",
|
|
" function_args = json.loads(tool_call.function.arguments)\n",
|
|
" function_response = function_to_call(\n",
|
|
" location=function_args.get(\"location\"),\n",
|
|
" unit=function_args.get(\"unit\"),\n",
|
|
" )\n",
|
|
" messages.append(\n",
|
|
" {\n",
|
|
" \"tool_call_id\": tool_call.id,\n",
|
|
" \"role\": \"tool\",\n",
|
|
" \"name\": function_name,\n",
|
|
" \"content\": function_response,\n",
|
|
" }\n",
|
|
" ) # extend conversation with function response\n",
|
|
" second_response = client.chat.completions.create(\n",
|
|
" model=\"gpt-3.5-turbo-1106\",\n",
|
|
" messages=messages,\n",
|
|
" ) # get a new response from the model where it can see the function response\n",
|
|
" return second_response\n",
|
|
"print(run_conversation())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Function Calling with Instructor\n",
|
|
"\n",
|
|
"The above example is a bit verbose and requires you to manually verify the schema.\n",
|
|
"\n",
|
|
"For our next examples we'll use the `instructor` library to simplify the process and accomplish a number of different tasks with function calling.\n",
|
|
"\n",
|
|
"You'll first need to install the [`instructor`](https://github.com/jxnl/instructor/).\n",
|
|
"\n",
|
|
"You can do so by running the following command in your terminal:\n",
|
|
"\n",
|
|
"```bash\n",
|
|
"pip install instructor\n",
|
|
"```\n",
|
|
"\n",
|
|
"Below we'll go through a few basic examples taken directly from the [instructor cookbook](https://jxnl.github.io/instructor/)\n",
|
|
"\n",
|
|
"## Basic Usage"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"name='Jason' age=25\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import instructor\n",
|
|
"from pydantic import BaseModel\n",
|
|
"\n",
|
|
"# Enables `response_model`\n",
|
|
"client = instructor.patch(client=client)\n",
|
|
"\n",
|
|
"class UserDetail(BaseModel):\n",
|
|
" name: str\n",
|
|
" age: int\n",
|
|
"\n",
|
|
"user = client.chat.completions.create(\n",
|
|
" model=\"gpt-3.5-turbo\",\n",
|
|
" response_model=UserDetail,\n",
|
|
" messages=[\n",
|
|
" {\"role\": \"user\", \"content\": \"Extract Jason is 25 years old\"},\n",
|
|
" ]\n",
|
|
")\n",
|
|
"\n",
|
|
"assert isinstance(user, UserDetail)\n",
|
|
"assert user.name == \"Jason\"\n",
|
|
"assert user.age == 25\n",
|
|
"\n",
|
|
"print(user)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Text Classification\n",
|
|
"\n",
|
|
"### Single-Label Classification"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"class_label=<Labels.SPAM: 'spam'>\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import enum\n",
|
|
"\n",
|
|
"class Labels(str, enum.Enum):\n",
|
|
" \"\"\"Enumeration for single-label text classification.\"\"\"\n",
|
|
" SPAM = \"spam\"\n",
|
|
" NOT_SPAM = \"not_spam\"\n",
|
|
"\n",
|
|
"class SinglePrediction(BaseModel):\n",
|
|
" \"\"\"\n",
|
|
" Class for a single class label prediction.\n",
|
|
" \"\"\"\n",
|
|
" class_label: Labels\n",
|
|
"\n",
|
|
"def classify(data: str) -> SinglePrediction:\n",
|
|
" \"\"\"Perform single-label classification on the input text.\"\"\"\n",
|
|
" return client.chat.completions.create(\n",
|
|
" model=\"gpt-3.5-turbo-0613\",\n",
|
|
" response_model=SinglePrediction,\n",
|
|
" messages=[\n",
|
|
" {\n",
|
|
" \"role\": \"user\",\n",
|
|
" \"content\": f\"Classify the following text: {data}\",\n",
|
|
" },\n",
|
|
" ],\n",
|
|
" ) # type: ignore\n",
|
|
"\n",
|
|
"prediction = classify(\"Hello there I'm a Nigerian prince and I want to give you money\")\n",
|
|
"assert prediction.class_label == Labels.SPAM\n",
|
|
"print(prediction)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Multi-Label Classification"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"class_labels=[<MultiLabels.TECH_ISSUE: 'tech_issue'>, <MultiLabels.BILLING: 'billing'>]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from typing import List\n",
|
|
"\n",
|
|
"# Define Enum class for multiple labels\n",
|
|
"class MultiLabels(str, enum.Enum):\n",
|
|
" TECH_ISSUE = \"tech_issue\"\n",
|
|
" BILLING = \"billing\"\n",
|
|
" GENERAL_QUERY = \"general_query\"\n",
|
|
"\n",
|
|
"# Define the multi-class prediction model\n",
|
|
"class MultiClassPrediction(BaseModel):\n",
|
|
" \"\"\"\n",
|
|
" Class for a multi-class label prediction.\n",
|
|
" \"\"\"\n",
|
|
" class_labels: List[MultiLabels]\n",
|
|
"\n",
|
|
"def multi_classify(data: str) -> MultiClassPrediction:\n",
|
|
" \"\"\"Perform multi-label classification on the input text.\"\"\"\n",
|
|
" return client.chat.completions.create(\n",
|
|
" model=\"gpt-3.5-turbo-0613\",\n",
|
|
" response_model=MultiClassPrediction,\n",
|
|
" messages=[\n",
|
|
" {\n",
|
|
" \"role\": \"user\",\n",
|
|
" \"content\": f\"Classify the following support ticket: {data}\",\n",
|
|
" },\n",
|
|
" ],\n",
|
|
" ) # type: ignore\n",
|
|
"\n",
|
|
"# Test multi-label classification\n",
|
|
"ticket = \"My account is locked and I can't access my billing info.\"\n",
|
|
"prediction = multi_classify(ticket)\n",
|
|
"print(prediction)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Self-Critique"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"question='What is the meaning of life?' answer='According to the Devil, the meaning of life is to live a life of sin and debauchery.'\n",
|
|
"1 validation error for QuestionAnswerNoEvil\n",
|
|
"answer\n",
|
|
" Assertion failed, The statement promotes sin and debauchery, which can be considered objectionable. [type=assertion_error, input_value='According to the Devil, ... of sin and debauchery.', input_type=str]\n",
|
|
" For further information visit https://errors.pydantic.dev/2.3/v/assertion_error\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from typing_extensions import Annotated\n",
|
|
"from pydantic import BaseModel, BeforeValidator\n",
|
|
"\n",
|
|
"from instructor import llm_validator\n",
|
|
"\n",
|
|
"\n",
|
|
"question = \"What is the meaning of life?\"\n",
|
|
"context = \"The according to the devil the meaning of live is to live a life of sin and debauchery.\"\n",
|
|
"\n",
|
|
"class QuestionAnswer(BaseModel):\n",
|
|
" question: str\n",
|
|
" answer: str\n",
|
|
"\n",
|
|
"qa: QuestionAnswer = client.chat.completions.create(\n",
|
|
" model=\"gpt-3.5-turbo\",\n",
|
|
" response_model=QuestionAnswer,\n",
|
|
" messages=[\n",
|
|
" {\n",
|
|
" \"role\": \"system\",\n",
|
|
" \"content\": \"You are a system that answers questions based on the context. answer exactly what the question asks using the context.\",\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"role\": \"user\",\n",
|
|
" \"content\": f\"using the context: {context}\\n\\nAnswer the following question: {question}\",\n",
|
|
" },\n",
|
|
" ],\n",
|
|
")\n",
|
|
"print(qa)\n",
|
|
"\n",
|
|
"class QuestionAnswerNoEvil(BaseModel):\n",
|
|
" question: str\n",
|
|
" answer: Annotated[\n",
|
|
" str,\n",
|
|
" BeforeValidator(\n",
|
|
" llm_validator(\"don't say objectionable things\", allow_override=True)\n",
|
|
" ),\n",
|
|
" ]\n",
|
|
"\n",
|
|
"try:\n",
|
|
" qa: QuestionAnswerNoEvil = client.chat.completions.create(\n",
|
|
" model=\"gpt-3.5-turbo\",\n",
|
|
" response_model=QuestionAnswerNoEvil,\n",
|
|
" messages=[\n",
|
|
" {\n",
|
|
" \"role\": \"system\",\n",
|
|
" \"content\": \"You are a system that answers questions based on the context. answer exactly what the question asks using the context.\",\n",
|
|
" },\n",
|
|
" {\n",
|
|
" \"role\": \"user\",\n",
|
|
" \"content\": f\"using the context: {context}\\n\\nAnswer the following question: {question}\",\n",
|
|
" },\n",
|
|
" ],\n",
|
|
" )\n",
|
|
"except Exception as e:\n",
|
|
" print(e)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Answering Questions with Validated Citations"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 42,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"question='What did the author do during college?' answer=[Fact(fact='The author, Jason Liu, studied Computational Mathematics and Physics in university.', substring_quote=['Computational Mathematics'])]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import re\n",
|
|
"from typing import List\n",
|
|
"\n",
|
|
"from pydantic import Field, BaseModel, model_validator, FieldValidationInfo\n",
|
|
"\n",
|
|
"class Fact(BaseModel):\n",
|
|
" fact: str = Field(...)\n",
|
|
" substring_quote: List[str] = Field(...)\n",
|
|
"\n",
|
|
" @model_validator(mode=\"after\")\n",
|
|
" def validate_sources(self, info: FieldValidationInfo) -> \"Fact\":\n",
|
|
" text_chunks = info.context.get(\"text_chunk\", None)\n",
|
|
" spans = list(self.get_spans(text_chunks))\n",
|
|
" self.substring_quote = [text_chunks[span[0] : span[1]] for span in spans]\n",
|
|
" return self\n",
|
|
"\n",
|
|
" def get_spans(self, context):\n",
|
|
" for quote in self.substring_quote:\n",
|
|
" yield from self._get_span(quote, context)\n",
|
|
"\n",
|
|
" def _get_span(self, quote, context):\n",
|
|
" for match in re.finditer(re.escape(quote), context):\n",
|
|
" yield match.span()\n",
|
|
"\n",
|
|
"class QuestionAnswer(BaseModel):\n",
|
|
" question: str = Field(...)\n",
|
|
" answer: List[Fact] = Field(...)\n",
|
|
"\n",
|
|
" @model_validator(mode=\"after\")\n",
|
|
" def validate_sources(self) -> \"QuestionAnswer\":\n",
|
|
" self.answer = [fact for fact in self.answer if len(fact.substring_quote) > 0]\n",
|
|
" return self\n",
|
|
"\n",
|
|
"\n",
|
|
"def ask_ai(question: str, context: str) -> QuestionAnswer:\n",
|
|
" return client.chat.completions.create(\n",
|
|
" model=\"gpt-3.5-turbo-0613\",\n",
|
|
" temperature=0.0,\n",
|
|
" response_model=QuestionAnswer,\n",
|
|
" messages=[\n",
|
|
" {\"role\": \"system\", \"content\": \"You are a world class algorithm to answer questions with correct and exact citations.\"},\n",
|
|
" {\"role\": \"user\", \"content\": f\"{context}\"},\n",
|
|
" {\"role\": \"user\", \"content\": f\"Question: {question}\"}\n",
|
|
" ],\n",
|
|
" validation_context={\"text_chunk\": context},\n",
|
|
" )\n",
|
|
"\n",
|
|
"question = \"What did the author do during college?\"\n",
|
|
"context = \"\"\"\n",
|
|
"My name is Jason Liu, and I grew up in Toronto Canada but I was born in China.\n",
|
|
"I went to an arts high school but in university I studied Computational Mathematics and physics.\n",
|
|
"As part of coop I worked at many companies including Stitchfix, Facebook.\n",
|
|
"I also started the Data Science club at the University of Waterloo and I was the president of the club for 2 years.\n",
|
|
"\"\"\"\n",
|
|
"\n",
|
|
"qa = ask_ai(question, context)\n",
|
|
"print(qa)"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "python-3.8.10",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.11.5+"
|
|
},
|
|
"orig_nbformat": 4
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|