Update model paths to be more clear they should point to file
This commit is contained in:
parent
a79d3eb732
commit
196650ccb2
8 changed files with 12 additions and 12 deletions
|
@ -27,14 +27,14 @@ pip install llama-cpp-python
|
||||||
|
|
||||||
```python
|
```python
|
||||||
>>> from llama_cpp import Llama
|
>>> from llama_cpp import Llama
|
||||||
>>> llm = Llama(model_path="models/7B/...")
|
>>> llm = Llama(model_path="./models/7B/ggml-model.bin")
|
||||||
>>> output = llm("Q: Name the planets in the solar system? A: ", max_tokens=32, stop=["Q:", "\n"], echo=True)
|
>>> output = llm("Q: Name the planets in the solar system? A: ", max_tokens=32, stop=["Q:", "\n"], echo=True)
|
||||||
>>> print(output)
|
>>> print(output)
|
||||||
{
|
{
|
||||||
"id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
|
"id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"created": 1679561337,
|
"created": 1679561337,
|
||||||
"model": "models/7B/...",
|
"model": "./models/7B/ggml-model.bin",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.",
|
"text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.",
|
||||||
|
@ -60,7 +60,7 @@ To install the server package and get started:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install llama-cpp-python[server]
|
pip install llama-cpp-python[server]
|
||||||
export MODEL=./models/7B
|
export MODEL=./models/7B/ggml-model.bin
|
||||||
python3 -m llama_cpp.server
|
python3 -m llama_cpp.server
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -29,14 +29,14 @@ pip install llama-cpp-python
|
||||||
|
|
||||||
```python
|
```python
|
||||||
>>> from llama_cpp import Llama
|
>>> from llama_cpp import Llama
|
||||||
>>> llm = Llama(model_path="models/7B/...")
|
>>> llm = Llama(model_path="./models/7B/ggml-model.bin")
|
||||||
>>> output = llm("Q: Name the planets in the solar system? A: ", max_tokens=32, stop=["Q:", "\n"], echo=True)
|
>>> output = llm("Q: Name the planets in the solar system? A: ", max_tokens=32, stop=["Q:", "\n"], echo=True)
|
||||||
>>> print(output)
|
>>> print(output)
|
||||||
{
|
{
|
||||||
"id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
|
"id": "cmpl-xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx",
|
||||||
"object": "text_completion",
|
"object": "text_completion",
|
||||||
"created": 1679561337,
|
"created": 1679561337,
|
||||||
"model": "models/7B/...",
|
"model": "./models/7B/ggml-model.bin",
|
||||||
"choices": [
|
"choices": [
|
||||||
{
|
{
|
||||||
"text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.",
|
"text": "Q: Name the planets in the solar system? A: Mercury, Venus, Earth, Mars, Jupiter, Saturn, Uranus, Neptune and Pluto.",
|
||||||
|
@ -62,7 +62,7 @@ To install the server package and get started:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install llama-cpp-python[server]
|
pip install llama-cpp-python[server]
|
||||||
export MODEL=./models/7B
|
export MODEL=./models/7B/ggml-model.bin
|
||||||
python3 -m llama_cpp.server
|
python3 -m llama_cpp.server
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,7 @@ To run this example:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
pip install fastapi uvicorn sse-starlette
|
pip install fastapi uvicorn sse-starlette
|
||||||
export MODEL=../models/7B/...
|
export MODEL=../models/7B/ggml-model.bin
|
||||||
uvicorn fastapi_server_chat:app --reload
|
uvicorn fastapi_server_chat:app --reload
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@ import argparse
|
||||||
from llama_cpp import Llama
|
from llama_cpp import Llama
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("-m", "--model", type=str, default=".//models/...")
|
parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-model.bin")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
llm = Llama(model_path=args.model, embedding=True)
|
llm = Llama(model_path=args.model, embedding=True)
|
||||||
|
|
|
@ -4,7 +4,7 @@ import argparse
|
||||||
from llama_cpp import Llama
|
from llama_cpp import Llama
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("-m", "--model", type=str, default="./models/...")
|
parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-models.bin")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
llm = Llama(model_path=args.model)
|
llm = Llama(model_path=args.model)
|
||||||
|
|
|
@ -4,7 +4,7 @@ import argparse
|
||||||
from llama_cpp import Llama
|
from llama_cpp import Llama
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("-m", "--model", type=str, default="./models/...")
|
parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-models.bin")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
llm = Llama(model_path=args.model)
|
llm = Llama(model_path=args.model)
|
||||||
|
|
|
@ -29,7 +29,7 @@ class LlamaLLM(LLM):
|
||||||
|
|
||||||
|
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument("-m", "--model", type=str, default="./models/...")
|
parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-models.bin")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
# Load the model
|
# Load the model
|
||||||
|
|
|
@ -9,7 +9,7 @@ N_THREADS = multiprocessing.cpu_count()
|
||||||
prompt = b"\n\n### Instruction:\nWhat is the capital of France?\n\n### Response:\n"
|
prompt = b"\n\n### Instruction:\nWhat is the capital of France?\n\n### Response:\n"
|
||||||
|
|
||||||
lparams = llama_cpp.llama_context_default_params()
|
lparams = llama_cpp.llama_context_default_params()
|
||||||
ctx = llama_cpp.llama_init_from_file(b"models/ggml-alpaca-7b-q4.bin", lparams)
|
ctx = llama_cpp.llama_init_from_file(b"../models/7B/ggml-model.bin", lparams)
|
||||||
|
|
||||||
# determine the required inference memory per token:
|
# determine the required inference memory per token:
|
||||||
tmp = [0, 1, 2, 3]
|
tmp = [0, 1, 2, 3]
|
||||||
|
|
Loading…
Reference in a new issue