diff --git a/main.py b/main.py index 506d235..9e21e5d 100644 --- a/main.py +++ b/main.py @@ -109,7 +109,7 @@ async def get_models(): @app.post("/chat/completions") @app.post("/v1/chat/completions") -async def generate_openai_chat_completion(form_data: OpenAIChatCompletionForm): +def generate_openai_chat_completion(form_data: OpenAIChatCompletionForm): user_message = get_last_user_message(form_data.messages) if form_data.model not in PIPELINES: @@ -119,7 +119,6 @@ async def generate_openai_chat_completion(form_data: OpenAIChatCompletionForm): ) def job(): - get_response = PIPELINES[form_data.model]["module"].get_response if form_data.stream: @@ -138,7 +137,7 @@ async def generate_openai_chat_completion(form_data: OpenAIChatCompletionForm): yield f"data: {json.dumps(message)}\n\n" finish_message = { - "id": f"rag-{str(uuid.uuid4())}", + "id": f"{form_data.model}-{str(uuid.uuid4())}", "object": "chat.completion.chunk", "created": int(time.time()), "model": MODEL_ID, @@ -168,7 +167,7 @@ async def generate_openai_chat_completion(form_data: OpenAIChatCompletionForm): message = f"{message}{stream}" return { - "id": f"rag-{str(uuid.uuid4())}", + "id": f"{form_data.model}-{str(uuid.uuid4())}", "object": "chat.completion", "created": int(time.time()), "model": MODEL_ID, @@ -185,14 +184,7 @@ async def generate_openai_chat_completion(form_data: OpenAIChatCompletionForm): ], } - try: - return await run_in_threadpool(job) - except Exception as e: - print(e) - raise HTTPException( - status_code=500, - detail="{e}", - ) + return job() @app.get("/") diff --git a/pipelines/examples/haystack_pipeline.py b/pipelines/examples/haystack_pipeline.py index 5a87486..b5dc31f 100644 --- a/pipelines/examples/haystack_pipeline.py +++ b/pipelines/examples/haystack_pipeline.py @@ -2,76 +2,7 @@ from typing import List, Union, Generator from schemas import OpenAIChatMessage import os - -os.environ["OPENAI_API_KEY"] = "your_openai_api_key_here" - -from haystack.components.embedders import SentenceTransformersDocumentEmbedder -from haystack.components.embedders import SentenceTransformersTextEmbedder -from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever -from haystack.components.builders import PromptBuilder -from haystack.components.generators import OpenAIGenerator - -from haystack.document_stores.in_memory import InMemoryDocumentStore - - -from datasets import load_dataset -from haystack import Document -from haystack import Pipeline - - -document_store = InMemoryDocumentStore() - -dataset = load_dataset("bilgeyucel/seven-wonders", split="train") -docs = [Document(content=doc["content"], meta=doc["meta"]) for doc in dataset] - - -doc_embedder = SentenceTransformersDocumentEmbedder( - model="sentence-transformers/all-MiniLM-L6-v2" -) -doc_embedder.warm_up() - - -docs_with_embeddings = doc_embedder.run(docs) -document_store.write_documents(docs_with_embeddings["documents"]) - - -text_embedder = SentenceTransformersTextEmbedder( - model="sentence-transformers/all-MiniLM-L6-v2" -) - - -retriever = InMemoryEmbeddingRetriever(document_store) - - -template = """ -Given the following information, answer the question. - -Context: -{% for document in documents %} - {{ document.content }} -{% endfor %} - -Question: {{question}} -Answer: -""" - -prompt_builder = PromptBuilder(template=template) - - -generator = OpenAIGenerator(model="gpt-3.5-turbo") - - -basic_rag_pipeline = Pipeline() -# Add components to your pipeline -basic_rag_pipeline.add_component("text_embedder", text_embedder) -basic_rag_pipeline.add_component("retriever", retriever) -basic_rag_pipeline.add_component("prompt_builder", prompt_builder) -basic_rag_pipeline.add_component("llm", generator) - -# Now, connect the components to each other -basic_rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding") -basic_rag_pipeline.connect("retriever", "prompt_builder.documents") -basic_rag_pipeline.connect("prompt_builder", "llm") +global basic_rag_pipeline def get_response( @@ -92,6 +23,68 @@ def get_response( async def on_startup(): + + os.environ["OPENAI_API_KEY"] = "your_openai_api_key_here" + + from haystack.components.embedders import SentenceTransformersDocumentEmbedder + from haystack.components.embedders import SentenceTransformersTextEmbedder + from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever + from haystack.components.builders import PromptBuilder + from haystack.components.generators import OpenAIGenerator + + from haystack.document_stores.in_memory import InMemoryDocumentStore + + from datasets import load_dataset + from haystack import Document + from haystack import Pipeline + + document_store = InMemoryDocumentStore() + + dataset = load_dataset("bilgeyucel/seven-wonders", split="train") + docs = [Document(content=doc["content"], meta=doc["meta"]) for doc in dataset] + + doc_embedder = SentenceTransformersDocumentEmbedder( + model="sentence-transformers/all-MiniLM-L6-v2" + ) + doc_embedder.warm_up() + + docs_with_embeddings = doc_embedder.run(docs) + document_store.write_documents(docs_with_embeddings["documents"]) + + text_embedder = SentenceTransformersTextEmbedder( + model="sentence-transformers/all-MiniLM-L6-v2" + ) + + retriever = InMemoryEmbeddingRetriever(document_store) + + template = """ + Given the following information, answer the question. + + Context: + {% for document in documents %} + {{ document.content }} + {% endfor %} + + Question: {{question}} + Answer: + """ + + prompt_builder = PromptBuilder(template=template) + + generator = OpenAIGenerator(model="gpt-3.5-turbo") + + basic_rag_pipeline = Pipeline() + # Add components to your pipeline + basic_rag_pipeline.add_component("text_embedder", text_embedder) + basic_rag_pipeline.add_component("retriever", retriever) + basic_rag_pipeline.add_component("prompt_builder", prompt_builder) + basic_rag_pipeline.add_component("llm", generator) + + # Now, connect the components to each other + basic_rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding") + basic_rag_pipeline.connect("retriever", "prompt_builder.documents") + basic_rag_pipeline.connect("prompt_builder", "llm") + # This function is called when the server is started. pass diff --git a/pipelines/examples/llamaindex_ollama_github_pipeline.py b/pipelines/examples/llamaindex_ollama_github_pipeline.py index d343080..a951283 100644 --- a/pipelines/examples/llamaindex_ollama_github_pipeline.py +++ b/pipelines/examples/llamaindex_ollama_github_pipeline.py @@ -3,18 +3,6 @@ from schemas import OpenAIChatMessage import os import asyncio -from llama_index.embeddings.ollama import OllamaEmbedding -from llama_index.llms.ollama import Ollama -from llama_index.core import VectorStoreIndex, Settings -from llama_index.readers.github import GithubRepositoryReader, GithubClient - -Settings.embed_model = OllamaEmbedding( - model_name="nomic-embed-text", - base_url="http://localhost:11434", -) -Settings.llm = Ollama(model="llama3") - - index = None documents = None @@ -35,6 +23,18 @@ def get_response( async def on_startup(): + + from llama_index.embeddings.ollama import OllamaEmbedding + from llama_index.llms.ollama import Ollama + from llama_index.core import VectorStoreIndex, Settings + from llama_index.readers.github import GithubRepositoryReader, GithubClient + + Settings.embed_model = OllamaEmbedding( + model_name="nomic-embed-text", + base_url="http://localhost:11434", + ) + Settings.llm = Ollama(model="llama3") + global index, documents github_token = os.environ.get("GITHUB_TOKEN") diff --git a/pipelines/examples/llamaindex_ollama_pipeline.py b/pipelines/examples/llamaindex_ollama_pipeline.py index 43eb09d..3f427ec 100644 --- a/pipelines/examples/llamaindex_ollama_pipeline.py +++ b/pipelines/examples/llamaindex_ollama_pipeline.py @@ -1,17 +1,6 @@ from typing import List, Union, Generator from schemas import OpenAIChatMessage -from llama_index.embeddings.ollama import OllamaEmbedding -from llama_index.llms.ollama import Ollama -from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader - - -Settings.embed_model = OllamaEmbedding( - model_name="nomic-embed-text", - base_url="http://localhost:11434", -) -Settings.llm = Ollama(model="llama3") - documents = None index = None @@ -29,10 +18,22 @@ def get_response( query_engine = index.as_query_engine(streaming=True) response = query_engine.query(user_message) + print(response) + return response.response_gen async def on_startup(): + from llama_index.embeddings.ollama import OllamaEmbedding + from llama_index.llms.ollama import Ollama + from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader + + Settings.embed_model = OllamaEmbedding( + model_name="nomic-embed-text", + base_url="http://localhost:11434", + ) + Settings.llm = Ollama(model="llama3") + # This function is called when the server is started. global documents, index diff --git a/pipelines/examples/llamaindex_pipeline.py b/pipelines/examples/llamaindex_pipeline.py index e709bd0..ea2c6bd 100644 --- a/pipelines/examples/llamaindex_pipeline.py +++ b/pipelines/examples/llamaindex_pipeline.py @@ -1,15 +1,8 @@ from typing import List, Union, Generator from schemas import OpenAIChatMessage -import os - -# Set the OpenAI API key -os.environ["OPENAI_API_KEY"] = "your_openai_api_key_here" - -from llama_index.core import VectorStoreIndex, SimpleDirectoryReader - -documents = SimpleDirectoryReader("./data").load_data() -index = VectorStoreIndex.from_documents(documents) +documents = None +index = None def get_response( @@ -28,6 +21,16 @@ def get_response( async def on_startup(): + global documents, index + import os + + # Set the OpenAI API key + os.environ["OPENAI_API_KEY"] = "your_openai_api_key_here" + + from llama_index.core import VectorStoreIndex, SimpleDirectoryReader + + documents = SimpleDirectoryReader("./data").load_data() + index = VectorStoreIndex.from_documents(documents) # This function is called when the server is started. pass diff --git a/pipelines/examples/pipeline.py b/pipelines/examples/pipeline.py index f867969..2d987c1 100644 --- a/pipelines/examples/pipeline.py +++ b/pipelines/examples/pipeline.py @@ -16,12 +16,7 @@ def get_response( async def on_startup(): # This function is called when the server is started. print(f"on_startup:{__name__}") - - # Optional: return pipeline metadata - # return { - # "id": "pipeline_id", - # "name": "pipeline_name", - # } + pass async def on_shutdown():