llama.cpp/docker/open_llama/start_server.sh

#!/bin/sh

# For mlock support
ulimit -l unlimited

if [ "$IMAGE" = "python:3-slim-bullseye" ]; then
    python3 -B -m llama_cpp.server --model /app/model.bin
else
    # You may have to reduce --n_gpu_layers=1000 to 20 or less if you don't have enough VRAM
    python3 -B -m llama_cpp.server --model /app/model.bin --n_gpu_layers=1000
fi
Initial commit of auto docker 2023-05-23 19:26:40 +00:00			`#!/bin/sh`

Working Open Llama 3B in a box 2023-06-02 08:48:54 +00:00			`# For mlock support`
Initial commit of auto docker 2023-05-23 19:26:40 +00:00			`ulimit -l unlimited`

			`if [ "$IMAGE" = "python:3-slim-bullseye" ]; then`
			`python3 -B -m llama_cpp.server --model /app/model.bin`
			`else`
			`# You may have to reduce --n_gpu_layers=1000 to 20 or less if you don't have enough VRAM`
			`python3 -B -m llama_cpp.server --model /app/model.bin --n_gpu_layers=1000`
			`fi`