From 62630de0f94076d93881e9ace393145c58673400 Mon Sep 17 00:00:00 2001
From: "Timothy J. Baek" <timothyjrbeck@gmail.com>
Date: Tue, 21 May 2024 22:43:43 -0700
Subject: [PATCH] feat: llama cpp pipeline

---
 main.py                                  | 10 ++++-
 pipelines/examples/llama_cpp_pipeline.py | 47 ++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 2 deletions(-)
 create mode 100644 pipelines/examples/llama_cpp_pipeline.py

diff --git a/main.py b/main.py
index f735241..63234e7 100644
--- a/main.py
+++ b/main.py
@@ -146,6 +146,10 @@ async def generate_openai_chat_completion(form_data: OpenAIChatCompletionForm):
 
                 if isinstance(res, Iterator):
                     for line in res:
+                        if isinstance(line, BaseModel):
+                            line = line.model_dump_json()
+                            line = f"data: {line}"
+
                         try:
                             line = line.decode("utf-8")
                         except:
@@ -189,18 +193,20 @@ async def generate_openai_chat_completion(form_data: OpenAIChatCompletionForm):
 
             if isinstance(res, dict):
                 return res
+            elif isinstance(res, BaseModel):
+                return res.model_dump()
             else:
+
                 message = ""
 
                 if isinstance(res, str):
                     message = res
 
-                elif isinstance(res, Generator):
+                if isinstance(res, Generator):
                     for stream in res:
                         message = f"{message}{stream}"
 
                 logging.info(f"stream:false:{message}")
-
                 return {
                     "id": f"{form_data.model}-{str(uuid.uuid4())}",
                     "object": "chat.completion",
diff --git a/pipelines/examples/llama_cpp_pipeline.py b/pipelines/examples/llama_cpp_pipeline.py
new file mode 100644
index 0000000..2032558
--- /dev/null
+++ b/pipelines/examples/llama_cpp_pipeline.py
@@ -0,0 +1,47 @@
+from typing import List, Union, Generator, Iterator
+from schemas import OpenAIChatMessage
+
+
+class Pipeline:
+    def __init__(self):
+        # Optionally, you can set the id and name of the pipeline.
+        self.id = "llama_cpp_pipeline"
+        self.name = "Llama C++ Pipeline"
+        self.llm = None
+        pass
+
+    async def on_startup(self):
+        # This function is called when the server is started.
+        print(f"on_startup:{__name__}")
+        from llama_cpp import Llama
+
+        self.llm = Llama(
+            model_path="./models/llama3.gguf",
+            # n_gpu_layers=-1, # Uncomment to use GPU acceleration
+            # seed=1337, # Uncomment to set a specific seed
+            # n_ctx=2048, # Uncomment to increase the context window
+        )
+
+        pass
+
+    async def on_shutdown(self):
+        # This function is called when the server is stopped.
+        print(f"on_shutdown:{__name__}")
+        pass
+
+    def get_response(
+        self, user_message: str, messages: List[OpenAIChatMessage], body: dict
+    ) -> Union[str, Generator, Iterator]:
+        # This is where you can add your custom pipelines like RAG.'
+        print(f"get_response:{__name__}")
+
+        print(messages)
+        print(user_message)
+        print(body)
+
+        response = self.llm.create_chat_completion_openai_v1(
+            messages=[message.model_dump() for message in messages],
+            stream=body["stream"],
+        )
+
+        return response