Merge tag 'v0.2.17' into main
This commit is contained in:
commit
ca30d898e9
5 changed files with 119 additions and 21 deletions
|
@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
|
|
||||||
## [Unreleased]
|
## [Unreleased]
|
||||||
|
|
||||||
|
## [0.2.17]
|
||||||
|
|
||||||
|
- Update llama.cpp to ggerganov/llama.cpp@df9d1293defe783f42bc83af732d3c670552c541
|
||||||
|
- Hotfix: Set `CUDA_ARCHITECTURES=OFF` for `llava_shared` target on Windows by @abetlen in 4388f3341413110217b98c4f097ac5c590bdf40b
|
||||||
|
|
||||||
## [0.2.16]
|
## [0.2.16]
|
||||||
|
|
||||||
- Update llama.cpp to ggerganov/llama.cp@a75fa576abba9d37f463580c379e4bbf1e1ad03c
|
- Update llama.cpp to ggerganov/llama.cp@a75fa576abba9d37f463580c379e4bbf1e1ad03c
|
||||||
|
|
|
@ -6,6 +6,8 @@ option(LLAMA_BUILD "Build llama.cpp shared library and install alongside python
|
||||||
|
|
||||||
if (LLAMA_BUILD)
|
if (LLAMA_BUILD)
|
||||||
set(BUILD_SHARED_LIBS "On")
|
set(BUILD_SHARED_LIBS "On")
|
||||||
|
|
||||||
|
# Building llama
|
||||||
if (APPLE AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
|
if (APPLE AND NOT CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
|
||||||
# Need to disable these llama.cpp flags on Apple x86_64,
|
# Need to disable these llama.cpp flags on Apple x86_64,
|
||||||
# otherwise users may encounter invalid instruction errors
|
# otherwise users may encounter invalid instruction errors
|
||||||
|
@ -41,8 +43,14 @@ if (LLAMA_BUILD)
|
||||||
FILES $<TARGET_RUNTIME_DLLS:llama>
|
FILES $<TARGET_RUNTIME_DLLS:llama>
|
||||||
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
|
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/llama_cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Building llava
|
||||||
add_subdirectory(vendor/llama.cpp/examples/llava)
|
add_subdirectory(vendor/llama.cpp/examples/llava)
|
||||||
set_target_properties(llava_shared PROPERTIES OUTPUT_NAME "llava")
|
set_target_properties(llava_shared PROPERTIES OUTPUT_NAME "llava")
|
||||||
|
# Set CUDA_ARCHITECTURES to OFF on windows
|
||||||
|
if (WIN32)
|
||||||
|
set_target_properties(llava_shared PROPERTIES CUDA_ARCHITECTURES OFF)
|
||||||
|
endif()
|
||||||
install(
|
install(
|
||||||
TARGETS llava_shared
|
TARGETS llava_shared
|
||||||
LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
|
LIBRARY DESTINATION ${SKBUILD_PLATLIB_DIR}/llama_cpp
|
||||||
|
|
|
@ -1,15 +1,41 @@
|
||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Functions\n",
|
||||||
|
"\n",
|
||||||
|
"The OpenAI compatbile web server in `llama-cpp-python` supports function calling.\n",
|
||||||
|
"\n",
|
||||||
|
"Function calling allows API clients to specify a schema that gives the model a format it should respond in.\n",
|
||||||
|
"Function calling in `llama-cpp-python` works by combining models pretrained for function calling such as [`functionary`](https://huggingface.co/abetlen/functionary-7b-v1-GGUF) with constrained sampling to produce a response that is compatible with the schema.\n",
|
||||||
|
"\n",
|
||||||
|
"Note however that this improves but does not guarantee that the response will be compatible with the schema.\n",
|
||||||
|
"\n",
|
||||||
|
"## Requirements\n",
|
||||||
|
"\n",
|
||||||
|
"Before we begin you will need the following:\n",
|
||||||
|
"\n",
|
||||||
|
"- A running `llama-cpp-python` server with a function calling compatible model. [See here](https://llama-cpp-python.readthedocs.io/en/latest/server/#function-calling)\n",
|
||||||
|
"- The OpenAI Python Client `pip install openai`\n",
|
||||||
|
"- (Optional) The Instructor Python Library `pip install instructor`\n",
|
||||||
|
"\n",
|
||||||
|
"## Function Calling with OpenAI Python Client\n",
|
||||||
|
"\n",
|
||||||
|
"We'll start with a basic demo that only uses the OpenAI Python Client."
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 29,
|
"execution_count": 4,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"ChatCompletion(id='chatcmpl-b6dcbb47-1120-4761-8cd9-83542c97647b', choices=[Choice(finish_reason='stop', index=0, message=ChatCompletionMessage(content=\"The current temperature in San Francisco is 72 degrees Fahrenheit. It's a sunny day with clear skies, making it perfect for outdoor activities.\\n \", role='assistant', function_call=None, tool_calls=None))], created=1699602158, model='gpt-3.5-turbo-1106', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=38, prompt_tokens=135, total_tokens=173))\n"
|
"ChatCompletion(id='chatcmpl-a2d9eb9f-7354-472f-b6ad-4d7a807729a3', choices=[Choice(finish_reason='stop', index=0, message=ChatCompletionMessage(content='The current weather in San Francisco is **72°F** (22°C).\\n ', role='assistant', function_call=None, tool_calls=None))], created=1699638365, model='gpt-3.5-turbo-1106', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=22, prompt_tokens=136, total_tokens=158))\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -20,7 +46,7 @@
|
||||||
"\n",
|
"\n",
|
||||||
"client = openai.OpenAI(\n",
|
"client = openai.OpenAI(\n",
|
||||||
" api_key = \"sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\", # can be anything\n",
|
" api_key = \"sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\", # can be anything\n",
|
||||||
" base_url = \"http://100.64.159.73:8000/v1\"\n",
|
" base_url = \"http://100.64.159.73:8000/v1\" # NOTE: Replace with IP address and port of your llama-cpp-python server\n",
|
||||||
")\n",
|
")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Example dummy function hard coded to return the same weather\n",
|
"# Example dummy function hard coded to return the same weather\n",
|
||||||
|
@ -100,9 +126,32 @@
|
||||||
"print(run_conversation())"
|
"print(run_conversation())"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Function Calling with Instructor\n",
|
||||||
|
"\n",
|
||||||
|
"The above example is a bit verbose and requires you to manually verify the schema.\n",
|
||||||
|
"\n",
|
||||||
|
"For our next examples we'll use the `instructor` library to simplify the process and accomplish a number of different tasks with function calling.\n",
|
||||||
|
"\n",
|
||||||
|
"You'll first need to install the [`instructor`](https://github.com/jxnl/instructor/).\n",
|
||||||
|
"\n",
|
||||||
|
"You can do so by running the following command in your terminal:\n",
|
||||||
|
"\n",
|
||||||
|
"```bash\n",
|
||||||
|
"pip install instructor\n",
|
||||||
|
"```\n",
|
||||||
|
"\n",
|
||||||
|
"Below we'll go through a few basic examples taken directly from the [instructor cookbook](https://jxnl.github.io/instructor/)\n",
|
||||||
|
"\n",
|
||||||
|
"## Basic Usage"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 30,
|
"execution_count": 5,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
|
@ -140,10 +189,27 @@
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "markdown",
|
||||||
"execution_count": 31,
|
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"source": [
|
||||||
|
"## Text Classification\n",
|
||||||
|
"\n",
|
||||||
|
"### Single-Label Classification"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"class_label=<Labels.SPAM: 'spam'>\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"import enum\n",
|
"import enum\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
@ -172,19 +238,27 @@
|
||||||
" ) # type: ignore\n",
|
" ) # type: ignore\n",
|
||||||
"\n",
|
"\n",
|
||||||
"prediction = classify(\"Hello there I'm a Nigerian prince and I want to give you money\")\n",
|
"prediction = classify(\"Hello there I'm a Nigerian prince and I want to give you money\")\n",
|
||||||
"assert prediction.class_label == Labels.SPAM"
|
"assert prediction.class_label == Labels.SPAM\n",
|
||||||
|
"print(prediction)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Multi-Label Classification"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 32,
|
"execution_count": 12,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"class_labels=[<MultiLabels.BILLING: 'billing'>, <MultiLabels.TECH_ISSUE: 'tech_issue'>]\n"
|
"class_labels=[<MultiLabels.TECH_ISSUE: 'tech_issue'>, <MultiLabels.BILLING: 'billing'>]\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -223,16 +297,27 @@
|
||||||
"print(prediction)"
|
"print(prediction)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Self-Critique"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 33,
|
"execution_count": 13,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"question='What is the meaning of life?' answer='The meaning of life, according to the Devil, is to live a life of sin and debauchery.'\n"
|
"question='What is the meaning of life?' answer='According to the Devil, the meaning of life is to live a life of sin and debauchery.'\n",
|
||||||
|
"1 validation error for QuestionAnswerNoEvil\n",
|
||||||
|
"answer\n",
|
||||||
|
" Assertion failed, The statement promotes sin and debauchery, which can be considered objectionable. [type=assertion_error, input_value='According to the Devil, ... of sin and debauchery.', input_type=str]\n",
|
||||||
|
" For further information visit https://errors.pydantic.dev/2.3/v/assertion_error\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -294,6 +379,13 @@
|
||||||
" print(e)"
|
" print(e)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"## Answering Questions with Validated Citations"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 42,
|
"execution_count": 42,
|
||||||
|
@ -366,13 +458,6 @@
|
||||||
"qa = ask_ai(question, context)\n",
|
"qa = ask_ai(question, context)\n",
|
||||||
"print(qa)"
|
"print(qa)"
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from .llama_cpp import *
|
from .llama_cpp import *
|
||||||
from .llama import *
|
from .llama import *
|
||||||
|
|
||||||
__version__ = "0.2.16"
|
__version__ = "0.2.17"
|
2
vendor/llama.cpp
vendored
2
vendor/llama.cpp
vendored
|
@ -1 +1 @@
|
||||||
Subproject commit a75fa576abba9d37f463580c379e4bbf1e1ad03c
|
Subproject commit df9d1293defe783f42bc83af732d3c670552c541
|
Loading…
Reference in a new issue