diff --git a/README.md b/README.md index 0c84c1f..2c8c0a5 100644 --- a/README.md +++ b/README.md @@ -27,14 +27,14 @@ pip install llama-cpp-python ```python >>> from llama_cpp import Llama ->>> llm = Llama(model_path="models/7B/...") +>>> llm = Llama(model_path="./models/7B/ggml-model.bin") >>> output = llm("Q: Name the planets in the solar system? A: ", max_tokens=32, stop=["Q:", "\n"], echo=True) >>> print(output) { "id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", "object": "text_completion", "created": 1679561337, - "model": "models/7B/...", + "model": "./models/7B/ggml-model.bin", "choices": [ { "text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.", @@ -60,7 +60,7 @@ To install the server package and get started: ```bash pip install llama-cpp-python[server] -export MODEL=./models/7B +export MODEL=./models/7B/ggml-model.bin python3 -m llama_cpp.server ``` diff --git a/docs/index.md b/docs/index.md index efe7fcf..4055155 100644 --- a/docs/index.md +++ b/docs/index.md @@ -29,14 +29,14 @@ pip install llama-cpp-python ```python >>> from llama_cpp import Llama ->>> llm = Llama(model_path="models/7B/...") +>>> llm = Llama(model_path="./models/7B/ggml-model.bin") >>> output = llm("Q: Name the planets in the solar system? A: ", max_tokens=32, stop=["Q:", "\n"], echo=True) >>> print(output) { "id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx", "object": "text_completion", "created": 1679561337, - "model": "models/7B/...", + "model": "./models/7B/ggml-model.bin", "choices": [ { "text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.", @@ -62,7 +62,7 @@ To install the server package and get started: ```bash pip install llama-cpp-python[server] -export MODEL=./models/7B +export MODEL=./models/7B/ggml-model.bin python3 -m llama_cpp.server ``` diff --git a/examples/high_level_api/fastapi_server.py b/examples/high_level_api/fastapi_server.py index a649692..3ed0eac 100644 --- a/examples/high_level_api/fastapi_server.py +++ b/examples/high_level_api/fastapi_server.py @@ -4,7 +4,7 @@ To run this example: ```bash pip install fastapi uvicorn sse-starlette -export MODEL=../models/7B/... +export MODEL=../models/7B/ggml-model.bin uvicorn fastapi_server_chat:app --reload ``` diff --git a/examples/high_level_api/high_level_api_embedding.py b/examples/high_level_api/high_level_api_embedding.py index 8d783f7..feb0ed6 100644 --- a/examples/high_level_api/high_level_api_embedding.py +++ b/examples/high_level_api/high_level_api_embedding.py @@ -3,7 +3,7 @@ import argparse from llama_cpp import Llama parser = argparse.ArgumentParser() -parser.add_argument("-m", "--model", type=str, default=".//models/...") +parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-model.bin") args = parser.parse_args() llm = Llama(model_path=args.model, embedding=True) diff --git a/examples/high_level_api/high_level_api_inference.py b/examples/high_level_api/high_level_api_inference.py index 0fa9cb7..e41f375 100644 --- a/examples/high_level_api/high_level_api_inference.py +++ b/examples/high_level_api/high_level_api_inference.py @@ -4,7 +4,7 @@ import argparse from llama_cpp import Llama parser = argparse.ArgumentParser() -parser.add_argument("-m", "--model", type=str, default="./models/...") +parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-models.bin") args = parser.parse_args() llm = Llama(model_path=args.model) diff --git a/examples/high_level_api/high_level_api_streaming.py b/examples/high_level_api/high_level_api_streaming.py index 787bc6e..747c613 100644 --- a/examples/high_level_api/high_level_api_streaming.py +++ b/examples/high_level_api/high_level_api_streaming.py @@ -4,7 +4,7 @@ import argparse from llama_cpp import Llama parser = argparse.ArgumentParser() -parser.add_argument("-m", "--model", type=str, default="./models/...") +parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-models.bin") args = parser.parse_args() llm = Llama(model_path=args.model) diff --git a/examples/high_level_api/langchain_custom_llm.py b/examples/high_level_api/langchain_custom_llm.py index 6ffd78e..b91632f 100644 --- a/examples/high_level_api/langchain_custom_llm.py +++ b/examples/high_level_api/langchain_custom_llm.py @@ -29,7 +29,7 @@ class LlamaLLM(LLM): parser = argparse.ArgumentParser() -parser.add_argument("-m", "--model", type=str, default="./models/...") +parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-models.bin") args = parser.parse_args() # Load the model diff --git a/examples/low_level_api/low_level_api_llama_cpp.py b/examples/low_level_api/low_level_api_llama_cpp.py index 2a639aa..b048c0a 100644 --- a/examples/low_level_api/low_level_api_llama_cpp.py +++ b/examples/low_level_api/low_level_api_llama_cpp.py @@ -9,7 +9,7 @@ N_THREADS = multiprocessing.cpu_count() prompt = b"\n\n### Instruction:\nWhat is the capital of France?\n\n### Response:\n" lparams = llama_cpp.llama_context_default_params() -ctx = llama_cpp.llama_init_from_file(b"models/ggml-alpaca-7b-q4.bin", lparams) +ctx = llama_cpp.llama_init_from_file(b"../models/7B/ggml-model.bin", lparams) # determine the required inference memory per token: tmp = [0, 1, 2, 3]