diff --git a/main.py b/main.py index e8d6aad..45db6e8 100644 --- a/main.py +++ b/main.py @@ -129,7 +129,11 @@ async def generate_openai_chat_completion(form_data: OpenAIChatCompletionForm): if form_data.stream: def stream_content(): - res = get_response(user_message, messages=form_data.messages) + res = get_response( + user_message, + messages=form_data.messages, + body=form_data.model_dump_json(), + ) print(f"stream:true:{res}") @@ -164,7 +168,11 @@ async def generate_openai_chat_completion(form_data: OpenAIChatCompletionForm): return StreamingResponse(stream_content(), media_type="text/event-stream") else: - res = get_response(user_message, messages=form_data.messages) + res = get_response( + user_message, + messages=form_data.messages, + body=form_data.model_dump_json(), + ) print(f"stream:false:{res}") message = "" diff --git a/pipelines/examples/haystack_pipeline.py b/pipelines/examples/haystack_pipeline.py index b7df99d..309e5f3 100644 --- a/pipelines/examples/haystack_pipeline.py +++ b/pipelines/examples/haystack_pipeline.py @@ -79,7 +79,7 @@ class Pipeline: pass def get_response( - self, user_message: str, messages: List[OpenAIChatMessage] + self, user_message: str, messages: List[OpenAIChatMessage], body: dict ) -> Union[str, Generator]: # This is where you can add your custom RAG pipeline. # Typically, you would retrieve relevant information from your knowledge base and synthesize it to generate a response. diff --git a/pipelines/examples/llamaindex_ollama_github_pipeline.py b/pipelines/examples/llamaindex_ollama_github_pipeline.py index 3f04661..09c7e9c 100644 --- a/pipelines/examples/llamaindex_ollama_github_pipeline.py +++ b/pipelines/examples/llamaindex_ollama_github_pipeline.py @@ -70,7 +70,7 @@ class Pipeline: pass def get_response( - self, user_message: str, messages: List[OpenAIChatMessage] + self, user_message: str, messages: List[OpenAIChatMessage], body: dict ) -> Union[str, Generator]: # This is where you can add your custom RAG pipeline. # Typically, you would retrieve relevant information from your knowledge base and synthesize it to generate a response. diff --git a/pipelines/examples/llamaindex_ollama_pipeline.py b/pipelines/examples/llamaindex_ollama_pipeline.py index 8985222..c846c0d 100644 --- a/pipelines/examples/llamaindex_ollama_pipeline.py +++ b/pipelines/examples/llamaindex_ollama_pipeline.py @@ -30,7 +30,7 @@ class Pipeline: pass def get_response( - self, user_message: str, messages: List[OpenAIChatMessage] + self, user_message: str, messages: List[OpenAIChatMessage], body: dict ) -> Union[str, Generator]: # This is where you can add your custom RAG pipeline. # Typically, you would retrieve relevant information from your knowledge base and synthesize it to generate a response. diff --git a/pipelines/examples/llamaindex_pipeline.py b/pipelines/examples/llamaindex_pipeline.py index 975d4e3..d61777b 100644 --- a/pipelines/examples/llamaindex_pipeline.py +++ b/pipelines/examples/llamaindex_pipeline.py @@ -25,7 +25,7 @@ class Pipeline: pass def get_response( - self, user_message: str, messages: List[OpenAIChatMessage] + self, user_message: str, messages: List[OpenAIChatMessage], body: dict ) -> Union[str, Generator]: # This is where you can add your custom RAG pipeline. # Typically, you would retrieve relevant information from your knowledge base and synthesize it to generate a response. diff --git a/pipelines/examples/openai_pipeline.py b/pipelines/examples/openai_pipeline.py new file mode 100644 index 0000000..16e9db5 --- /dev/null +++ b/pipelines/examples/openai_pipeline.py @@ -0,0 +1,49 @@ +from typing import List, Union, Generator +from schemas import OpenAIChatMessage +import requests + + +class Pipeline: + def __init__(self): + pass + + async def on_startup(self): + # This function is called when the server is started. + print(f"on_startup:{__name__}") + pass + + async def on_shutdown(self): + # This function is called when the server is stopped. + print(f"on_shutdown:{__name__}") + pass + + def get_response( + self, user_message: str, messages: List[OpenAIChatMessage], body: dict + ) -> Union[str, Generator]: + # This is where you can add your custom pipelines like RAG.' + print(f"get_response:{__name__}") + + print(messages) + print(user_message) + OPENAI_API_KEY = "your-api-key-here" + + headers = {} + headers["Authorization"] = f"Bearer {OPENAI_API_KEY}" + headers["Content-Type"] = "application/json" + + r = requests.request( + method="POST", + url="https://api.openai.com/v1", + data=body, + headers=headers, + stream=True, + ) + + r.raise_for_status() + + # Check if response is SSE + if "text/event-stream" in r.headers.get("Content-Type", ""): + return r.iter_content(chunk_size=8192) + else: + response_data = r.json() + return f"{response_data['choices'][0]['text']}" diff --git a/pipelines/examples/pipeline_example.py b/pipelines/examples/pipeline_example.py index 4341c97..ec9edfd 100644 --- a/pipelines/examples/pipeline_example.py +++ b/pipelines/examples/pipeline_example.py @@ -17,12 +17,13 @@ class Pipeline: pass def get_response( - self, user_message: str, messages: List[OpenAIChatMessage] + self, user_message: str, messages: List[OpenAIChatMessage], body: dict ) -> Union[str, Generator]: # This is where you can add your custom pipelines like RAG.' print(f"get_response:{__name__}") print(messages) print(user_message) + print(body) return f"{__name__} response to: {user_message}" diff --git a/pipelines/pipeline.py b/pipelines/pipeline.py index 4341c97..0fda0ad 100644 --- a/pipelines/pipeline.py +++ b/pipelines/pipeline.py @@ -17,7 +17,7 @@ class Pipeline: pass def get_response( - self, user_message: str, messages: List[OpenAIChatMessage] + self, user_message: str, messages: List[OpenAIChatMessage], body: dict ) -> Union[str, Generator]: # This is where you can add your custom pipelines like RAG.' print(f"get_response:{__name__}")