Update model paths to be more clear they should point to file
This commit is contained in:
parent
a79d3eb732
commit
196650ccb2
8 changed files with 12 additions and 12 deletions
|
@ -27,14 +27,14 @@ pip install llama-cpp-python
|
|||
|
||||
```python
|
||||
>>> from llama_cpp import Llama
|
||||
>>> llm = Llama(model_path="models/7B/...")
|
||||
>>> llm = Llama(model_path="./models/7B/ggml-model.bin")
|
||||
>>> output = llm("Q: Name the planets in the solar system? A: ", max_tokens=32, stop=["Q:", "\n"], echo=True)
|
||||
>>> print(output)
|
||||
{
|
||||
"id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
|
||||
"object": "text_completion",
|
||||
"created": 1679561337,
|
||||
"model": "models/7B/...",
|
||||
"model": "./models/7B/ggml-model.bin",
|
||||
"choices": [
|
||||
{
|
||||
"text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.",
|
||||
|
@ -60,7 +60,7 @@ To install the server package and get started:
|
|||
|
||||
```bash
|
||||
pip install llama-cpp-python[server]
|
||||
export MODEL=./models/7B
|
||||
export MODEL=./models/7B/ggml-model.bin
|
||||
python3 -m llama_cpp.server
|
||||
```
|
||||
|
||||
|
|
|
@ -29,14 +29,14 @@ pip install llama-cpp-python
|
|||
|
||||
```python
|
||||
>>> from llama_cpp import Llama
|
||||
>>> llm = Llama(model_path="models/7B/...")
|
||||
>>> llm = Llama(model_path="./models/7B/ggml-model.bin")
|
||||
>>> output = llm("Q: Name the planets in the solar system? A: ", max_tokens=32, stop=["Q:", "\n"], echo=True)
|
||||
>>> print(output)
|
||||
{
|
||||
"id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
|
||||
"object": "text_completion",
|
||||
"created": 1679561337,
|
||||
"model": "models/7B/...",
|
||||
"model": "./models/7B/ggml-model.bin",
|
||||
"choices": [
|
||||
{
|
||||
"text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.",
|
||||
|
@ -62,7 +62,7 @@ To install the server package and get started:
|
|||
|
||||
```bash
|
||||
pip install llama-cpp-python[server]
|
||||
export MODEL=./models/7B
|
||||
export MODEL=./models/7B/ggml-model.bin
|
||||
python3 -m llama_cpp.server
|
||||
```
|
||||
|
||||
|
|
|
@ -4,7 +4,7 @@ To run this example:
|
|||
|
||||
```bash
|
||||
pip install fastapi uvicorn sse-starlette
|
||||
export MODEL=../models/7B/...
|
||||
export MODEL=../models/7B/ggml-model.bin
|
||||
uvicorn fastapi_server_chat:app --reload
|
||||
```
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@ import argparse
|
|||
from llama_cpp import Llama
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-m", "--model", type=str, default=".//models/...")
|
||||
parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-model.bin")
|
||||
args = parser.parse_args()
|
||||
|
||||
llm = Llama(model_path=args.model, embedding=True)
|
||||
|
|
|
@ -4,7 +4,7 @@ import argparse
|
|||
from llama_cpp import Llama
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-m", "--model", type=str, default="./models/...")
|
||||
parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-models.bin")
|
||||
args = parser.parse_args()
|
||||
|
||||
llm = Llama(model_path=args.model)
|
||||
|
|
|
@ -4,7 +4,7 @@ import argparse
|
|||
from llama_cpp import Llama
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-m", "--model", type=str, default="./models/...")
|
||||
parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-models.bin")
|
||||
args = parser.parse_args()
|
||||
|
||||
llm = Llama(model_path=args.model)
|
||||
|
|
|
@ -29,7 +29,7 @@ class LlamaLLM(LLM):
|
|||
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("-m", "--model", type=str, default="./models/...")
|
||||
parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-models.bin")
|
||||
args = parser.parse_args()
|
||||
|
||||
# Load the model
|
||||
|
|
|
@ -9,7 +9,7 @@ N_THREADS = multiprocessing.cpu_count()
|
|||
prompt = b"\n\n### Instruction:\nWhat is the capital of France?\n\n### Response:\n"
|
||||
|
||||
lparams = llama_cpp.llama_context_default_params()
|
||||
ctx = llama_cpp.llama_init_from_file(b"models/ggml-alpaca-7b-q4.bin", lparams)
|
||||
ctx = llama_cpp.llama_init_from_file(b"../models/7B/ggml-model.bin", lparams)
|
||||
|
||||
# determine the required inference memory per token:
|
||||
tmp = [0, 1, 2, 3]
|
||||
|
|
Loading…
Reference in a new issue