simplify loading
This commit is contained in:
parent
3b4f45f6bf
commit
ecfb4abafb
5 changed files with 40 additions and 40 deletions
|
@ -10,7 +10,7 @@ Install dependencies:
|
||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
```
|
```
|
||||||
|
|
||||||
Put your model in `models/` and run:
|
Run a server:
|
||||||
|
|
||||||
```
|
```
|
||||||
python3 ollama.py serve
|
python3 ollama.py serve
|
||||||
|
@ -19,17 +19,18 @@ python3 ollama.py serve
|
||||||
**Start frontend service:**
|
**Start frontend service:**
|
||||||
|
|
||||||
Install dependencies:
|
Install dependencies:
|
||||||
|
|
||||||
```
|
```
|
||||||
cd desktop
|
cd desktop
|
||||||
npm install
|
npm install
|
||||||
```
|
```
|
||||||
|
|
||||||
Run the UI:
|
Run the UI:
|
||||||
|
|
||||||
```
|
```
|
||||||
npm start
|
npm start
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
## Building
|
## Building
|
||||||
|
|
||||||
If using Apple silicon, you need a Python version that supports arm64:
|
If using Apple silicon, you need a Python version that supports arm64:
|
||||||
|
@ -57,3 +58,11 @@ python3 build.py
|
||||||
cd desktop
|
cd desktop
|
||||||
npm run package
|
npm run package
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## Update requirements.txt
|
||||||
|
|
||||||
|
In the root directory, run:
|
||||||
|
|
||||||
|
```
|
||||||
|
pipreqs . --force
|
||||||
|
```
|
||||||
|
|
1
models/.gitignore
vendored
1
models/.gitignore
vendored
|
@ -1 +0,0 @@
|
||||||
*.bin
|
|
|
@ -1,9 +0,0 @@
|
||||||
Place models here for use.
|
|
||||||
|
|
||||||
Here's some recommendations:
|
|
||||||
https://huggingface.co/TheBloke/vicuna-7B-v1.3-GGML
|
|
||||||
|
|
||||||
|
|
||||||
https://huggingface.co/TheBloke/orca_mini_3B-GGML/resolve/main/orca-mini-3b.ggmlv3.q4_1.bin
|
|
||||||
|
|
||||||
https://huggingface.co/TheBloke/orca_mini_13B-GGML/resolve/main/orca-mini-13b.ggmlv3.q4_1.bin
|
|
47
ollama.py
47
ollama.py
|
@ -27,47 +27,46 @@ def models_directory():
|
||||||
return models_dir
|
return models_dir
|
||||||
|
|
||||||
|
|
||||||
def load(model=None, path=None):
|
def load(model):
|
||||||
"""
|
"""
|
||||||
Load a model.
|
Load a model.
|
||||||
|
|
||||||
The model can be specified by providing either the path or the model name,
|
|
||||||
but not both. If both are provided, this function will raise a ValueError.
|
|
||||||
If the model does not exist or could not be loaded, this function returns an error.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
model (str, optional): The name of the model to load.
|
model (str): The name or path of the model to load.
|
||||||
path (str, optional): The path to the model file.
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
|
str or None: The name of the model
|
||||||
dict or None: If the model cannot be loaded, a dictionary with an 'error' key is returned.
|
dict or None: If the model cannot be loaded, a dictionary with an 'error' key is returned.
|
||||||
If the model is successfully loaded, None is returned.
|
If the model is successfully loaded, None is returned.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
with lock:
|
with lock:
|
||||||
if path is not None and model is not None:
|
load_from = ""
|
||||||
raise ValueError(
|
if os.path.exists(model) and model.endswith(".bin"):
|
||||||
"Both path and model are specified. Please provide only one of them."
|
# model is being referenced by path rather than name directly
|
||||||
)
|
path = os.path.abspath(model)
|
||||||
elif path is not None:
|
base = os.path.basename(path)
|
||||||
name = os.path.basename(path)
|
|
||||||
load_from = path
|
load_from = path
|
||||||
elif model is not None:
|
name = os.path.splitext(base)[0] # Split the filename and extension
|
||||||
name = model
|
|
||||||
dir = models_directory()
|
|
||||||
load_from = str(dir / f"{model}.bin")
|
|
||||||
else:
|
else:
|
||||||
raise ValueError("Either path or model must be specified.")
|
# model is being loaded from the ollama models directory
|
||||||
|
dir = models_directory()
|
||||||
|
|
||||||
|
# TODO: download model from a repository if it does not exist
|
||||||
|
load_from = str(dir / f"{model}.bin")
|
||||||
|
name = model
|
||||||
|
|
||||||
|
if load_from == "":
|
||||||
|
return None, {"error": "Model not found."}
|
||||||
|
|
||||||
if not os.path.exists(load_from):
|
if not os.path.exists(load_from):
|
||||||
return {"error": f"The model at {load_from} does not exist."}
|
return None, {"error": f"The model {load_from} does not exist."}
|
||||||
|
|
||||||
if name not in llms:
|
if name not in llms:
|
||||||
# TODO: download model from a repository if it does not exist
|
|
||||||
llms[name] = Llama(model_path=load_from)
|
llms[name] = Llama(model_path=load_from)
|
||||||
|
|
||||||
# TODO: this should start a persistent instance of ollama with the model loaded
|
return name, None
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def unload(model):
|
def unload(model):
|
||||||
|
@ -84,10 +83,10 @@ def unload(model):
|
||||||
|
|
||||||
def generate(model, prompt):
|
def generate(model, prompt):
|
||||||
# auto load
|
# auto load
|
||||||
error = load(model)
|
name, error = load(model)
|
||||||
if error is not None:
|
if error is not None:
|
||||||
return error
|
return error
|
||||||
generated = llms[model](
|
generated = llms[name](
|
||||||
str(prompt), # TODO: optimize prompt based on model
|
str(prompt), # TODO: optimize prompt based on model
|
||||||
max_tokens=4096,
|
max_tokens=4096,
|
||||||
stop=["Q:", "\n"],
|
stop=["Q:", "\n"],
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
|
click==8.1.3
|
||||||
Flask==2.3.2
|
Flask==2.3.2
|
||||||
flask_cors==3.0.10
|
Flask_Cors==3.0.10
|
||||||
llama-cpp-python==0.1.65
|
llama_cpp_python==0.1.65
|
||||||
pyinstaller==5.13.0
|
pyinstaller==5.13.0
|
||||||
pyinstaller-hooks-contrib==2023.3
|
setuptools==65.6.3
|
||||||
|
tqdm==4.65.0
|
||||||
|
|
Loading…
Reference in a new issue