Create haystack_pipeline.py

This commit is contained in:
Timothy J. Baek 2024-05-21 15:01:30 -07:00
parent 025813d392
commit f2630aa9de

View File

@ -0,0 +1,91 @@
from typing import List, Union, Generator
from schemas import OpenAIChatMessage
import os
os.environ["OPENAI_API_KEY"] = "your_openai_api_key_here"
from haystack.components.embedders import SentenceTransformersDocumentEmbedder
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.components.builders import PromptBuilder
from haystack.components.generators import OpenAIGenerator
from haystack.document_stores.in_memory import InMemoryDocumentStore
from datasets import load_dataset
from haystack import Document
from haystack import Pipeline
document_store = InMemoryDocumentStore()
dataset = load_dataset("bilgeyucel/seven-wonders", split="train")
docs = [Document(content=doc["content"], meta=doc["meta"]) for doc in dataset]
doc_embedder = SentenceTransformersDocumentEmbedder(
model="sentence-transformers/all-MiniLM-L6-v2"
)
doc_embedder.warm_up()
docs_with_embeddings = doc_embedder.run(docs)
document_store.write_documents(docs_with_embeddings["documents"])
text_embedder = SentenceTransformersTextEmbedder(
model="sentence-transformers/all-MiniLM-L6-v2"
)
retriever = InMemoryEmbeddingRetriever(document_store)
template = """
Given the following information, answer the question.
Context:
{% for document in documents %}
{{ document.content }}
{% endfor %}
Question: {{question}}
Answer:
"""
prompt_builder = PromptBuilder(template=template)
generator = OpenAIGenerator(model="gpt-3.5-turbo")
basic_rag_pipeline = Pipeline()
# Add components to your pipeline
basic_rag_pipeline.add_component("text_embedder", text_embedder)
basic_rag_pipeline.add_component("retriever", retriever)
basic_rag_pipeline.add_component("prompt_builder", prompt_builder)
basic_rag_pipeline.add_component("llm", generator)
# Now, connect the components to each other
basic_rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
basic_rag_pipeline.connect("retriever", "prompt_builder.documents")
basic_rag_pipeline.connect("prompt_builder", "llm")
def get_response(
user_message: str, messages: List[OpenAIChatMessage]
) -> Union[str, Generator]:
# This is where you can add your custom RAG pipeline.
# Typically, you would retrieve relevant information from your knowledge base and synthesize it to generate a response.
print(messages)
print(user_message)
question = user_message
response = basic_rag_pipeline.run(
{"text_embedder": {"text": question}, "prompt_builder": {"question": question}}
)
return response["llm"]["replies"][0]