feat(server): Add support for pulling models from Huggingface Hub (#1222)

* Basic support for hf pull on server

* Add hf_model_repo_id setting

* Update README
This commit is contained in:
Andrei 2024-02-26 14:35:08 -05:00 committed by GitHub
parent b3e358dee4
commit 4d574bd765
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 24 additions and 2 deletions

View file

@ -577,6 +577,12 @@ python3 -m llama_cpp.server --model models/7B/llama-model.gguf --chat_format cha
That will format the prompt according to how model expects it. You can find the prompt format in the model card.
For possible options, see [llama_cpp/llama_chat_format.py](llama_cpp/llama_chat_format.py) and look for lines starting with "@register_chat_format".
If you have `huggingface-hub` installed, you can also use the `--hf_model_repo_id` flag to load a model from the Hugging Face Hub.
```bash
python3 -m llama_cpp.server --hf_model_repo_id Qwen/Qwen1.5-0.5B-Chat-GGUF --model '*q8_0.gguf'
```
### Web Server Features
- [Local Copilot replacement](https://llama-cpp-python.readthedocs.io/en/latest/server/#code-completion)

View file

@ -121,8 +121,19 @@ class LlamaProxy:
else:
raise ValueError(f"Unknown value type {value_type}")
_model = llama_cpp.Llama(
model_path=settings.model,
import functools
kwargs = {}
if settings.hf_model_repo_id is not None:
create_fn = functools.partial(llama_cpp.Llama.from_pretrained, repo_id=settings.hf_model_repo_id, filename=settings.model)
else:
create_fn = llama_cpp.Llama
kwargs["model_path"] = settings.model
_model = create_fn(
**kwargs,
# Model Params
n_gpu_layers=settings.n_gpu_layers,
main_gpu=settings.main_gpu,

View file

@ -143,6 +143,11 @@ class ModelSettings(BaseSettings):
default=None,
description="The model name or path to a pretrained HuggingFace tokenizer model. Same as you would pass to AutoTokenizer.from_pretrained().",
)
# Loading from HuggingFace Model Hub
hf_model_repo_id: Optional[str] = Field(
default=None,
description="The model repo id to use for the HuggingFace tokenizer model.",
)
# Speculative Decoding
draft_model: Optional[str] = Field(
default=None,