llama.cpp/examples/high_level_api/high_level_api_inference.py

import json
import argparse

from llama_cpp import Llama

parser = argparse.ArgumentParser()
parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-models.bin")
args = parser.parse_args()

llm = Llama(model_path=args.model)

output = llm(
    "Question: What are the names of the planets in the solar system? Answer: ",
    max_tokens=48,
    stop=["Q:", "\n"],
    echo=True,
)

print(json.dumps(output, indent=2))
Initial commit 2023-03-23 09:33:06 +00:00			`import json`
Update basic example 2023-03-23 18:57:31 +00:00			`import argparse`

Initial commit 2023-03-23 09:33:06 +00:00			`from llama_cpp import Llama`

Update basic example 2023-03-23 18:57:31 +00:00			`parser = argparse.ArgumentParser()`
Update model paths to be more clear they should point to file 2023-04-10 02:45:55 +00:00			`parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-models.bin")`
Update basic example 2023-03-23 18:57:31 +00:00			`args = parser.parse_args()`

			`llm = Llama(model_path=args.model)`
Initial commit 2023-03-23 09:33:06 +00:00
Black formatting 2023-03-24 18:35:41 +00:00			`output = llm(`
			`"Question: What are the names of the planets in the solar system? Answer: ",`
Small fixes for examples 2023-04-04 00:33:07 +00:00			`max_tokens=48,`
Black formatting 2023-03-24 18:35:41 +00:00			`stop=["Q:", "\n"],`
			`echo=True,`
			`)`
Initial commit 2023-03-23 09:33:06 +00:00
Black formatting 2023-03-24 18:35:41 +00:00			`print(json.dumps(output, indent=2))`