llama.cpp/docker/open_llama/start_server.sh

12 lines
338 B
Bash
Raw Permalink Normal View History

2023-05-23 19:26:40 +00:00
#!/bin/sh
2023-06-02 08:48:54 +00:00
# For mlock support
2023-05-23 19:26:40 +00:00
ulimit -l unlimited
if [ "$IMAGE" = "python:3-slim-bullseye" ]; then
python3 -B -m llama_cpp.server --model /app/model.bin
else
# You may have to reduce --n_gpu_layers=1000 to 20 or less if you don't have enough VRAM
python3 -B -m llama_cpp.server --model /app/model.bin --n_gpu_layers=1000
fi