llama.cpp/examples/notebooks/Multimodal.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<div>\n",
    "    <img src=\"https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png\" width=\"500\"/>\n",
    "</div>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'text': 'Llama C++'}\n"
     ]
    }
   ],
   "source": [
    "from openai import OpenAI\n",
    "\n",
    "client = OpenAI(base_url=\"http://localhost:8000/v1\", api_key=\"llama.cpp\")\n",
    "response = client.chat.completions.create(\n",
    "    model=\"gpt-4-vision-preview\",\n",
    "    messages=[\n",
    "        {\n",
    "            \"role\": \"user\",\n",
    "            \"content\": [\n",
    "                {\n",
    "                    \"type\": \"image_url\",\n",
    "                    \"image_url\": {\n",
    "                        \"url\": \"https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png\",\n",
    "                    },\n",
    "                },\n",
    "                {\"type\": \"text\", \"text\": \"What does the image say. Format your response as a json object with a single 'text' key.\"},\n",
    "            ],\n",
    "        }\n",
    "    ],\n",
    "    response_format={ \n",
    "        \"type\": \"json_object\",\n",
    "        \"schema\": {\n",
    "            \"type\": \"object\",\n",
    "            \"properties\": {\n",
    "                \"text\": {\n",
    "                    \"type\": \"string\"\n",
    "                }\n",
    "            }\n",
    "        }\n",
    "    }\n",
    ")\n",
    "import json\n",
    "print(json.loads(response.choices[0].message.content))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": ".venv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5+"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}