From 252e1ff2b4b2d92f8aaa267e3828a74da3084f43 Mon Sep 17 00:00:00 2001
From: Andrei Betlen <abetlen@gmail.com>
Date: Sun, 25 Feb 2024 21:09:41 -0500
Subject: [PATCH] docs(examples): Add huggingface pull example

---
 examples/hf_pull/main.py | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 examples/hf_pull/main.py

diff --git a/examples/hf_pull/main.py b/examples/hf_pull/main.py
new file mode 100644
index 0000000..d3eb11c
--- /dev/null
+++ b/examples/hf_pull/main.py
@@ -0,0 +1,39 @@
+import llama_cpp
+import llama_cpp.llama_tokenizer
+
+
+llama = llama_cpp.Llama.from_pretrained(
+    repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
+    filename="*q8_0.gguf",
+    tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
+    verbose=False
+)
+
+response = llama.create_chat_completion(
+    messages=[
+        {
+            "role": "user",
+            "content": "What is the capital of France?"
+        }
+    ],
+    response_format={
+        "type": "json_object",
+        "schema": {
+            "type": "object",
+            "properties": {
+                "country": {"type": "string"},
+                "capital": {"type": "string"}
+            },
+            "required": ["country", "capital"],
+        }
+    },
+    stream=True
+)
+
+for chunk in response:
+    delta = chunk["choices"][0]["delta"]
+    if "content" not in delta:
+        continue
+    print(delta["content"], end="", flush=True)
+
+print()
\ No newline at end of file