feat: ollama pipeline

2025-06-26 18:15:58 +00:00 · 2024-05-21 22:03:54 -07:00 · 2024-05-21 22:03:54 -07:00 · b3bb653f46
commit b3bb653f46
parent f1bcd5be0f
3 changed files with 113 additions and 7 deletions
--- a/main.py
+++ b/main.py
@ -18,6 +18,7 @@ from schemas import OpenAIChatCompletionForm
 import os
 import importlib.util

+import logging

 from concurrent.futures import ThreadPoolExecutor

@ -37,7 +38,7 @@ def load_modules_from_directory(directory):

 for loaded_module in load_modules_from_directory("./pipelines"):
    # Do something with the loaded module
-    print("Loaded:", loaded_module.__name__)
+    logging.info("Loaded:", loaded_module.__name__)

    pipeline = loaded_module.Pipeline()

@ -105,6 +106,7 @@ async def get_models():
                "object": "model",
                "created": int(time.time()),
                "owned_by": "openai",
+                "pipeline": True,
            }
            for pipeline in PIPELINES.values()
        ]
@ -123,7 +125,7 @@ async def generate_openai_chat_completion(form_data: OpenAIChatCompletionForm):
        )

    def job():
-        print(form_data.model)
+        logging.info(form_data.model)
        get_response = app.state.PIPELINES[form_data.model]["module"].get_response

        if form_data.stream:
@ -135,11 +137,11 @@ async def generate_openai_chat_completion(form_data: OpenAIChatCompletionForm):
                    body=form_data.model_dump(),
                )

-                print(f"stream:true:{res}")
+                logging.info(f"stream:true:{res}")

                if isinstance(res, str):
                    message = stream_message_template(form_data.model, res)
-                    print(f"stream_content:str:{message}")
+                    logging.info(f"stream_content:str:{message}")
                    yield f"data: {json.dumps(message)}\n\n"

                if isinstance(res, Iterator):
@ -149,7 +151,7 @@ async def generate_openai_chat_completion(form_data: OpenAIChatCompletionForm):
                        except:
                            pass

-                        print(f"stream_content:Generator:{line}")
+                        logging.info(f"stream_content:Generator:{line}")

                        if line.startswith("data:"):
                            yield f"{line}\n\n"
@ -183,7 +185,7 @@ async def generate_openai_chat_completion(form_data: OpenAIChatCompletionForm):
                messages=form_data.messages,
                body=form_data.model_dump(),
            )
-            print(f"stream:false:{res}")
+            logging.info(f"stream:false:{res}")

            if isinstance(res, dict):
                return res
@ -197,7 +199,7 @@ async def generate_openai_chat_completion(form_data: OpenAIChatCompletionForm):
                    for stream in res:
                        message = f"{message}{stream}"

-                print(f"stream:false:{message}")
+                logging.info(f"stream:false:{message}")

                return {
                    "id": f"{form_data.model}-{str(uuid.uuid4())}",
--- a/pipelines/examples/ollama_pipeline.py
+++ b/pipelines/examples/ollama_pipeline.py
@ -0,0 +1,52 @@
+from typing import List, Union, Generator, Iterator
+from schemas import OpenAIChatMessage
+import requests
+
+
+class Pipeline:
+    def __init__(self):
+        # Optionally, you can set the id and name of the pipeline.
+        self.id = "ollama_pipeline"
+        self.name = "Ollama Pipeline"
+        pass
+
+    async def on_startup(self):
+        # This function is called when the server is started.
+        print(f"on_startup:{__name__}")
+        pass
+
+    async def on_shutdown(self):
+        # This function is called when the server is stopped.
+        print(f"on_shutdown:{__name__}")
+        pass
+
+    def get_response(
+        self, user_message: str, messages: List[OpenAIChatMessage], body: dict
+    ) -> Union[str, Generator, Iterator]:
+        # This is where you can add your custom pipelines like RAG.'
+        print(f"get_response:{__name__}")
+
+        OLLAMA_BASE_URL = "http://localhost:11434"
+        MODEL = "llama3"
+
+        if "user" in body:
+            print("######################################")
+            print(f'# User: {body["user"]["name"]} ({body["user"]["id"]})')
+            print(f"# Message: {user_message}")
+            print("######################################")
+
+        try:
+            r = requests.post(
+                url=f"{OLLAMA_BASE_URL}/v1/chat/completions",
+                json={**body, "model": MODEL},
+                stream=True,
+            )
+
+            r.raise_for_status()
+
+            if body["stream"]:
+                return r.iter_lines()
+            else:
+                return r.json()
+        except Exception as e:
+            return f"Error: {e}"
--- a/pipelines/ollama_pipeline.py
+++ b/pipelines/ollama_pipeline.py
@ -0,0 +1,52 @@
+from typing import List, Union, Generator, Iterator
+from schemas import OpenAIChatMessage
+import requests
+
+
+class Pipeline:
+    def __init__(self):
+        # Optionally, you can set the id and name of the pipeline.
+        self.id = "ollama_pipeline"
+        self.name = "Ollama Pipeline"
+        pass
+
+    async def on_startup(self):
+        # This function is called when the server is started.
+        print(f"on_startup:{__name__}")
+        pass
+
+    async def on_shutdown(self):
+        # This function is called when the server is stopped.
+        print(f"on_shutdown:{__name__}")
+        pass
+
+    def get_response(
+        self, user_message: str, messages: List[OpenAIChatMessage], body: dict
+    ) -> Union[str, Generator, Iterator]:
+        # This is where you can add your custom pipelines like RAG.'
+        print(f"get_response:{__name__}")
+
+        OLLAMA_BASE_URL = "http://localhost:11434"
+        MODEL = "llama3"
+
+        if "user" in body:
+            print("######################################")
+            print(f'# User: {body["user"]["name"]} ({body["user"]["id"]})')
+            print(f"# Message: {user_message}")
+            print("######################################")
+
+        try:
+            r = requests.post(
+                url=f"{OLLAMA_BASE_URL}/v1/chat/completions",
+                json={**body, "model": MODEL},
+                stream=True,
+            )
+
+            r.raise_for_status()
+
+            if body["stream"]:
+                return r.iter_lines()
+            else:
+                return r.json()
+        except Exception as e:
+            return f"Error: {e}"