diff --git a/examples/haystack_pipeline.py b/examples/haystack_pipeline.py new file mode 100644 index 0000000..52ef777 --- /dev/null +++ b/examples/haystack_pipeline.py @@ -0,0 +1,91 @@ +from typing import List, Union, Generator +from schemas import OpenAIChatMessage +import os + + +os.environ["OPENAI_API_KEY"] = "your_openai_api_key_here" + +from haystack.components.embedders import SentenceTransformersDocumentEmbedder +from haystack.components.embedders import SentenceTransformersTextEmbedder +from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever +from haystack.components.builders import PromptBuilder +from haystack.components.generators import OpenAIGenerator + +from haystack.document_stores.in_memory import InMemoryDocumentStore + + +from datasets import load_dataset +from haystack import Document +from haystack import Pipeline + + +document_store = InMemoryDocumentStore() + +dataset = load_dataset("bilgeyucel/seven-wonders", split="train") +docs = [Document(content=doc["content"], meta=doc["meta"]) for doc in dataset] + + +doc_embedder = SentenceTransformersDocumentEmbedder( + model="sentence-transformers/all-MiniLM-L6-v2" +) +doc_embedder.warm_up() + + +docs_with_embeddings = doc_embedder.run(docs) +document_store.write_documents(docs_with_embeddings["documents"]) + + +text_embedder = SentenceTransformersTextEmbedder( + model="sentence-transformers/all-MiniLM-L6-v2" +) + + +retriever = InMemoryEmbeddingRetriever(document_store) + + +template = """ +Given the following information, answer the question. + +Context: +{% for document in documents %} + {{ document.content }} +{% endfor %} + +Question: {{question}} +Answer: +""" + +prompt_builder = PromptBuilder(template=template) + + +generator = OpenAIGenerator(model="gpt-3.5-turbo") + + +basic_rag_pipeline = Pipeline() +# Add components to your pipeline +basic_rag_pipeline.add_component("text_embedder", text_embedder) +basic_rag_pipeline.add_component("retriever", retriever) +basic_rag_pipeline.add_component("prompt_builder", prompt_builder) +basic_rag_pipeline.add_component("llm", generator) + +# Now, connect the components to each other +basic_rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding") +basic_rag_pipeline.connect("retriever", "prompt_builder.documents") +basic_rag_pipeline.connect("prompt_builder", "llm") + + +def get_response( + user_message: str, messages: List[OpenAIChatMessage] +) -> Union[str, Generator]: + # This is where you can add your custom RAG pipeline. + # Typically, you would retrieve relevant information from your knowledge base and synthesize it to generate a response. + + print(messages) + print(user_message) + + question = user_message + response = basic_rag_pipeline.run( + {"text_embedder": {"text": question}, "prompt_builder": {"question": question}} + ) + + return response["llm"]["replies"][0]