diff --git a/examples/hf_pull/main.py b/examples/hf_pull/main.py new file mode 100644 index 0000000..d3eb11c --- /dev/null +++ b/examples/hf_pull/main.py @@ -0,0 +1,39 @@ +import llama_cpp +import llama_cpp.llama_tokenizer + + +llama = llama_cpp.Llama.from_pretrained( + repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF", + filename="*q8_0.gguf", + tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"), + verbose=False +) + +response = llama.create_chat_completion( + messages=[ + { + "role": "user", + "content": "What is the capital of France?" + } + ], + response_format={ + "type": "json_object", + "schema": { + "type": "object", + "properties": { + "country": {"type": "string"}, + "capital": {"type": "string"} + }, + "required": ["country", "capital"], + } + }, + stream=True +) + +for chunk in response: + delta = chunk["choices"][0]["delta"] + if "content" not in delta: + continue + print(delta["content"], end="", flush=True) + +print() \ No newline at end of file