llama.cpp/examples/batch-processing/server.py

"""llama-cpp-python server from scratch in a single file.
"""

# import llama_cpp

# path = b"../../models/Qwen1.5-0.5B-Chat-GGUF/qwen1_5-0_5b-chat-q8_0.gguf"

# model_params = llama_cpp.llama_model_default_params()
# model = llama_cpp.llama_load_model_from_file(path, model_params)

# if model is None:
#     raise RuntimeError(f"Failed to load model from file: {path}")


# ctx_params = llama_cpp.llama_context_default_params()
# ctx = llama_cpp.llama_new_context_with_model(model, ctx_params)

# if ctx is None:
#     raise RuntimeError("Failed to create context")


from fastapi import FastAPI

app = FastAPI()

import openai.types.chat as types

@app.post("/v1/chat/completions")
def create_chat_completions():
    return {"message": "Hello World"}
feat: Add support for yaml based configs 2024-04-10 06:47:01 +00:00			`"""llama-cpp-python server from scratch in a single file.`
			`"""`

			`# import llama_cpp`

			`# path = b"../../models/Qwen1.5-0.5B-Chat-GGUF/qwen1_5-0_5b-chat-q8_0.gguf"`

			`# model_params = llama_cpp.llama_model_default_params()`
			`# model = llama_cpp.llama_load_model_from_file(path, model_params)`

			`# if model is None:`
			`# raise RuntimeError(f"Failed to load model from file: {path}")`


			`# ctx_params = llama_cpp.llama_context_default_params()`
			`# ctx = llama_cpp.llama_new_context_with_model(model, ctx_params)`

			`# if ctx is None:`
			`# raise RuntimeError("Failed to create context")`


			`from fastapi import FastAPI`

			`app = FastAPI()`

			`import openai.types.chat as types`

			`@app.post("/v1/chat/completions")`
			`def create_chat_completions():`
			`return {"message": "Hello World"}`