Add mkdocs

2023-03-24 18:57:59 -04:00 · 2023-03-24 18:57:59 -04:00 · df15caa877
commit df15caa877
parent a61fd3b509
5 changed files with 1059 additions and 510 deletions
--- a/docs/index.md
+++ b/docs/index.md
@ -0,0 +1,23 @@
+# 🦙 Python Bindings for `llama.cpp`
+
+Simple Python bindings for **@ggerganov's** [`llama.cpp`](https://github.com/ggerganov/llama.cpp) library.
+This package provides:
+
+- Low-level access to C API via `ctypes` interface.
+- High-level Python API for text completion
+  - OpenAI-like API
+  - LangChain compatibility
+
+
+## API Reference
+
+::: llama_cpp.Llama
+    options:
+        members:
+            - __init__
+            - __call__
+        show_root_heading: true
+
+::: llama_cpp.llama_cpp
+    options:
+        show_if_no_docstring: true
--- a/llama_cpp/llama.py
+++ b/llama_cpp/llama.py
@ -8,6 +8,8 @@ from . import llama_cpp


 class Llama:
+    """High-level Python wrapper for a llama.cpp model."""
+
    def __init__(
        self,
        model_path: str,
@ -18,7 +20,25 @@ class Llama:
        logits_all: bool = False,
        vocab_only: bool = False,
        n_threads: Optional[int] = None,
-    ):
+    ) -> "Llama":
+        """Load a llama.cpp model from `model_path`.
+
+        Args:
+            model_path: Path to the model directory.
+            n_ctx: Number of tokens to keep in memory.
+            n_parts: Number of parts to split the model into. If -1, the number of parts is automatically determined.
+            seed: Random seed.
+            f16_kv: Use half-precision for key/value matrices.
+            logits_all: Return logits for all tokens, not just the vocabulary.
+            vocab_only: Only use tokens in the vocabulary.
+            n_threads: Number of threads to use. If None, the number of threads is automatically determined.
+
+        Raises:
+            ValueError: If the model path does not exist.
+
+        Returns:
+            A Llama instance.
+        """
        self.model_path = model_path

        self.last_n = 64
@ -56,6 +76,27 @@ class Llama:
        repeat_penalty: float = 1.1,
        top_k: int = 40,
    ):
+        """Generate text from a prompt.
+
+        Args:
+            prompt: The prompt to generate text from.
+            suffix: A suffix to append to the generated text. If None, no suffix is appended.
+            max_tokens: The maximum number of tokens to generate.
+            temperature: The temperature to use for sampling.
+            top_p: The top-p value to use for sampling.
+            logprobs: The number of logprobs to return. If None, no logprobs are returned.
+            echo: Whether to echo the prompt.
+            stop: A list of strings to stop generation when encountered.
+            repeat_penalty: The penalty to apply to repeated tokens.
+            top_k: The top-k value to use for sampling.
+
+        Raises:
+            ValueError: If the requested tokens exceed the context window.
+            RuntimeError: If the prompt fails to tokenize or the model fails to evaluate the prompt.
+
+        Returns:
+            Response object containing the generated text.
+        """
        text = b""
        finish_reason = "length"
        completion_tokens = 0
--- a/mkdocs.yml
+++ b/mkdocs.yml
@ -0,0 +1,10 @@
+site_name: llama-cpp-python
+
+theme:
+  name: "material"
+
+plugins:
+  - mkdocstrings
+
+watch:
+  - llama_cpp
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -19,6 +19,9 @@ python = "^3.8.1"
 [tool.poetry.group.dev.dependencies]
 black = "^23.1.0"
 twine = "^4.0.2"
+mkdocs = "^1.4.2"
+mkdocstrings = {extras = ["python"], version = "^0.20.0"}
+mkdocs-material = "^9.1.4"

 [build-system]
 requires = [