From c1d92ce6806c7b5c31cd6354f639565572c10c8f Mon Sep 17 00:00:00 2001 From: Aniket Maurya Date: Tue, 12 Dec 2023 01:40:38 +0000 Subject: [PATCH 1/6] fix minor typo (#958) * fix minor typo * Fix typo --------- Co-authored-by: Andrei --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index dd4eb52..560ca27 100644 --- a/README.md +++ b/README.md @@ -207,7 +207,8 @@ The gguf-converted files for this model can be found here: [functionary-7b-v1](h messages = [ { "role": "system", - "content": "A chat between a curious user and an artificial intelligence assitant. The assistant gives helpful, detailed, and polite answers to the user's questions. The assistant callse functions with appropriate input when necessary" + "content": "A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. The assistant calls functions with appropriate input when necessary" + }, { "role": "user", From 6bbeea07ae49b16cf1cad7a6f2f5fec370c22d01 Mon Sep 17 00:00:00 2001 From: zocainViken <75504411+zocainViken@users.noreply.github.com> Date: Tue, 12 Dec 2023 02:41:38 +0100 Subject: [PATCH 2/6] README.md multimodal params fix (#967) multi modal params fix: add logits = True -> to make llava work --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 560ca27..0aacdf9 100644 --- a/README.md +++ b/README.md @@ -266,7 +266,8 @@ Then you'll need to use a custom chat handler to load the clip model and process >>> llm = Llama( model_path="./path/to/llava/llama-model.gguf", chat_handler=chat_handler, - n_ctx=2048 # n_ctx should be increased to accomodate the image embedding + n_ctx=2048, # n_ctx should be increased to accomodate the image embedding + logits_all=True,# needed to make llava work ) >>> llm.create_chat_completion( messages = [ From b938cccf05ff7b6ae58f7d79f5f7d17591950648 Mon Sep 17 00:00:00 2001 From: chiensen <113402074+chiensen@users.noreply.github.com> Date: Tue, 12 Dec 2023 09:44:04 +0800 Subject: [PATCH 3/6] Add Pygmalion chat format (#986) --- llama_cpp/llama_chat_format.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py index 24ef9e2..62e3783 100644 --- a/llama_cpp/llama_chat_format.py +++ b/llama_cpp/llama_chat_format.py @@ -637,6 +637,23 @@ def format_zephyr( _prompt = _format_chatml(system_message, _messages, _sep) return ChatFormatterResponse(prompt=_prompt, stop=_sep) + +@register_chat_format("pygmalion") +def format_pygmalion( + messages: List[llama_types.ChatCompletionRequestMessage], + **kwargs: Any, +) -> ChatFormatterResponse: + system_template = """<|system|>{system_message}""" + system_message = _get_system_message(messages) + system_message = system_template.format(system_message=system_message) + _roles = dict(user="<|user|>", assistant="<|model|>") + _sep = "\n" + _messages = _map_roles(messages, _roles) + _messages.append((_roles["assistant"], None)) + _prompt = _format_chatml(system_message, _messages, _sep) + return ChatFormatterResponse(prompt=_prompt, stop=_sep) + + @register_chat_format("chatml") def format_chatml( messages: List[llama_types.ChatCompletionRequestMessage], From ac35f68e4d718a56b2977e5483285c0881dcb116 Mon Sep 17 00:00:00 2001 From: zocainViken <75504411+zocainViken@users.noreply.github.com> Date: Tue, 12 Dec 2023 02:44:51 +0100 Subject: [PATCH 4/6] Fix UnsupportedOperation: fileno in suppress_stdout_stderr (#961) * bug fixing * llava from readme got this error: UnsupportedOperation: fileno quick fix by checking hasattr * multi modal params fix: add logits = True -> to make llava work * multi modal params fix: add logits = True -> to make llava work --------- Co-authored-by: Andrei --- llama_cpp/_utils.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/llama_cpp/_utils.py b/llama_cpp/_utils.py index 1b61eec..171f357 100644 --- a/llama_cpp/_utils.py +++ b/llama_cpp/_utils.py @@ -17,14 +17,18 @@ class suppress_stdout_stderr(object): if self.disable: return self + # Check if sys.stdout and sys.stderr have fileno method + if not hasattr(self.sys.stdout, 'fileno') or not hasattr(self.sys.stderr, 'fileno'): + return self # Return the instance without making changes + self.outnull_file = self.open(self.os.devnull, "w") self.errnull_file = self.open(self.os.devnull, "w") self.old_stdout_fileno_undup = self.sys.stdout.fileno() self.old_stderr_fileno_undup = self.sys.stderr.fileno() - self.old_stdout_fileno = self.os.dup(self.sys.stdout.fileno()) - self.old_stderr_fileno = self.os.dup(self.sys.stderr.fileno()) + self.old_stdout_fileno = self.os.dup(self.old_stdout_fileno_undup) + self.old_stderr_fileno = self.os.dup(self.old_stderr_fileno_undup) self.old_stdout = self.sys.stdout self.old_stderr = self.sys.stderr @@ -40,14 +44,16 @@ class suppress_stdout_stderr(object): if self.disable: return - self.sys.stdout = self.old_stdout - self.sys.stderr = self.old_stderr + # Check if sys.stdout and sys.stderr have fileno method + if hasattr(self.sys.stdout, 'fileno') and hasattr(self.sys.stderr, 'fileno'): + self.sys.stdout = self.old_stdout + self.sys.stderr = self.old_stderr - self.os.dup2(self.old_stdout_fileno, self.old_stdout_fileno_undup) - self.os.dup2(self.old_stderr_fileno, self.old_stderr_fileno_undup) + self.os.dup2(self.old_stdout_fileno, self.old_stdout_fileno_undup) + self.os.dup2(self.old_stderr_fileno, self.old_stderr_fileno_undup) - self.os.close(self.old_stdout_fileno) - self.os.close(self.old_stderr_fileno) + self.os.close(self.old_stdout_fileno) + self.os.close(self.old_stderr_fileno) - self.outnull_file.close() - self.errnull_file.close() + self.outnull_file.close() + self.errnull_file.close() From ef22e478db533e89fe3d48577cf9f338b4195b70 Mon Sep 17 00:00:00 2001 From: Tanner Hobson Date: Mon, 11 Dec 2023 20:46:27 -0500 Subject: [PATCH 5/6] Replace logits_to_logprobs implementation with numpy equivalent to llama.cpp (#991) See #990. This change makes the logits_to_logprobs function equivalent to the version in the llama.cpp repository. It uses numpy so it's much faster than the previous version. --- llama_cpp/llama.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/llama_cpp/llama.py b/llama_cpp/llama.py index c696804..292378d 100644 --- a/llama_cpp/llama.py +++ b/llama_cpp/llama.py @@ -2280,10 +2280,14 @@ class Llama: return self._model.token_nl() @staticmethod - def logits_to_logprobs(logits: List[float]) -> List[float]: - exps = [math.exp(float(x)) for x in logits] - sum_exps = sum(exps) - return [math.log(x / sum_exps) for x in exps] + def logits_to_logprobs(logits: npt.NDArray[np.single]) -> npt.NDArray[np.single]: + maximum = np.max(logits) + tmp = np.subtract(logits, maximum, dtype=np.single) + np.exp(tmp, out=tmp) + normalizer = 1.0 / np.sum(tmp) + np.multiply(normalizer, tmp, out=tmp) + np.log(tmp, out=tmp) + return tmp @staticmethod def longest_token_prefix(a: Sequence[int], b: Sequence[int]): From 8e44a32075de4aba2fc9877d4a2a34a0e7314c0d Mon Sep 17 00:00:00 2001 From: Radoslav Gerganov Date: Tue, 12 Dec 2023 03:47:11 +0200 Subject: [PATCH 6/6] Add support for running the server with SSL (#994) --- llama_cpp/server/__main__.py | 3 ++- llama_cpp/server/app.py | 7 +++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/llama_cpp/server/__main__.py b/llama_cpp/server/__main__.py index a294ebf..45fc5a8 100644 --- a/llama_cpp/server/__main__.py +++ b/llama_cpp/server/__main__.py @@ -96,5 +96,6 @@ if __name__ == "__main__": app = create_app(settings=settings) uvicorn.run( - app, host=os.getenv("HOST", settings.host), port=int(os.getenv("PORT", settings.port)) + app, host=os.getenv("HOST", settings.host), port=int(os.getenv("PORT", settings.port)), + ssl_keyfile=settings.ssl_keyfile, ssl_certfile=settings.ssl_certfile ) diff --git a/llama_cpp/server/app.py b/llama_cpp/server/app.py index a2dad56..7138cf4 100644 --- a/llama_cpp/server/app.py +++ b/llama_cpp/server/app.py @@ -150,6 +150,13 @@ class Settings(BaseSettings): # Server Params host: str = Field(default="localhost", description="Listen address") port: int = Field(default=8000, description="Listen port") + # SSL Params + ssl_keyfile: Optional[str] = Field( + default=None, description="SSL key file for HTTPS" + ) + ssl_certfile: Optional[str] = Field( + default=None, description="SSL certificate file for HTTPS" + ) interrupt_requests: bool = Field( default=True, description="Whether to interrupt requests when a new request is received.",