This commit is contained in:
Timothy J. Baek 2024-05-21 18:02:01 -07:00
parent 6b4fba3309
commit 6810ed2166
6 changed files with 104 additions and 120 deletions

16
main.py
View File

@ -109,7 +109,7 @@ async def get_models():
@app.post("/chat/completions")
@app.post("/v1/chat/completions")
async def generate_openai_chat_completion(form_data: OpenAIChatCompletionForm):
def generate_openai_chat_completion(form_data: OpenAIChatCompletionForm):
user_message = get_last_user_message(form_data.messages)
if form_data.model not in PIPELINES:
@ -119,7 +119,6 @@ async def generate_openai_chat_completion(form_data: OpenAIChatCompletionForm):
)
def job():
get_response = PIPELINES[form_data.model]["module"].get_response
if form_data.stream:
@ -138,7 +137,7 @@ async def generate_openai_chat_completion(form_data: OpenAIChatCompletionForm):
yield f"data: {json.dumps(message)}\n\n"
finish_message = {
"id": f"rag-{str(uuid.uuid4())}",
"id": f"{form_data.model}-{str(uuid.uuid4())}",
"object": "chat.completion.chunk",
"created": int(time.time()),
"model": MODEL_ID,
@ -168,7 +167,7 @@ async def generate_openai_chat_completion(form_data: OpenAIChatCompletionForm):
message = f"{message}{stream}"
return {
"id": f"rag-{str(uuid.uuid4())}",
"id": f"{form_data.model}-{str(uuid.uuid4())}",
"object": "chat.completion",
"created": int(time.time()),
"model": MODEL_ID,
@ -185,14 +184,7 @@ async def generate_openai_chat_completion(form_data: OpenAIChatCompletionForm):
],
}
try:
return await run_in_threadpool(job)
except Exception as e:
print(e)
raise HTTPException(
status_code=500,
detail="{e}",
)
return job()
@app.get("/")

View File

@ -2,76 +2,7 @@ from typing import List, Union, Generator
from schemas import OpenAIChatMessage
import os
os.environ["OPENAI_API_KEY"] = "your_openai_api_key_here"
from haystack.components.embedders import SentenceTransformersDocumentEmbedder
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.components.builders import PromptBuilder
from haystack.components.generators import OpenAIGenerator
from haystack.document_stores.in_memory import InMemoryDocumentStore
from datasets import load_dataset
from haystack import Document
from haystack import Pipeline
document_store = InMemoryDocumentStore()
dataset = load_dataset("bilgeyucel/seven-wonders", split="train")
docs = [Document(content=doc["content"], meta=doc["meta"]) for doc in dataset]
doc_embedder = SentenceTransformersDocumentEmbedder(
model="sentence-transformers/all-MiniLM-L6-v2"
)
doc_embedder.warm_up()
docs_with_embeddings = doc_embedder.run(docs)
document_store.write_documents(docs_with_embeddings["documents"])
text_embedder = SentenceTransformersTextEmbedder(
model="sentence-transformers/all-MiniLM-L6-v2"
)
retriever = InMemoryEmbeddingRetriever(document_store)
template = """
Given the following information, answer the question.
Context:
{% for document in documents %}
{{ document.content }}
{% endfor %}
Question: {{question}}
Answer:
"""
prompt_builder = PromptBuilder(template=template)
generator = OpenAIGenerator(model="gpt-3.5-turbo")
basic_rag_pipeline = Pipeline()
# Add components to your pipeline
basic_rag_pipeline.add_component("text_embedder", text_embedder)
basic_rag_pipeline.add_component("retriever", retriever)
basic_rag_pipeline.add_component("prompt_builder", prompt_builder)
basic_rag_pipeline.add_component("llm", generator)
# Now, connect the components to each other
basic_rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
basic_rag_pipeline.connect("retriever", "prompt_builder.documents")
basic_rag_pipeline.connect("prompt_builder", "llm")
global basic_rag_pipeline
def get_response(
@ -92,6 +23,68 @@ def get_response(
async def on_startup():
os.environ["OPENAI_API_KEY"] = "your_openai_api_key_here"
from haystack.components.embedders import SentenceTransformersDocumentEmbedder
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.components.builders import PromptBuilder
from haystack.components.generators import OpenAIGenerator
from haystack.document_stores.in_memory import InMemoryDocumentStore
from datasets import load_dataset
from haystack import Document
from haystack import Pipeline
document_store = InMemoryDocumentStore()
dataset = load_dataset("bilgeyucel/seven-wonders", split="train")
docs = [Document(content=doc["content"], meta=doc["meta"]) for doc in dataset]
doc_embedder = SentenceTransformersDocumentEmbedder(
model="sentence-transformers/all-MiniLM-L6-v2"
)
doc_embedder.warm_up()
docs_with_embeddings = doc_embedder.run(docs)
document_store.write_documents(docs_with_embeddings["documents"])
text_embedder = SentenceTransformersTextEmbedder(
model="sentence-transformers/all-MiniLM-L6-v2"
)
retriever = InMemoryEmbeddingRetriever(document_store)
template = """
Given the following information, answer the question.
Context:
{% for document in documents %}
{{ document.content }}
{% endfor %}
Question: {{question}}
Answer:
"""
prompt_builder = PromptBuilder(template=template)
generator = OpenAIGenerator(model="gpt-3.5-turbo")
basic_rag_pipeline = Pipeline()
# Add components to your pipeline
basic_rag_pipeline.add_component("text_embedder", text_embedder)
basic_rag_pipeline.add_component("retriever", retriever)
basic_rag_pipeline.add_component("prompt_builder", prompt_builder)
basic_rag_pipeline.add_component("llm", generator)
# Now, connect the components to each other
basic_rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
basic_rag_pipeline.connect("retriever", "prompt_builder.documents")
basic_rag_pipeline.connect("prompt_builder", "llm")
# This function is called when the server is started.
pass

View File

@ -3,18 +3,6 @@ from schemas import OpenAIChatMessage
import os
import asyncio
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.llms.ollama import Ollama
from llama_index.core import VectorStoreIndex, Settings
from llama_index.readers.github import GithubRepositoryReader, GithubClient
Settings.embed_model = OllamaEmbedding(
model_name="nomic-embed-text",
base_url="http://localhost:11434",
)
Settings.llm = Ollama(model="llama3")
index = None
documents = None
@ -35,6 +23,18 @@ def get_response(
async def on_startup():
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.llms.ollama import Ollama
from llama_index.core import VectorStoreIndex, Settings
from llama_index.readers.github import GithubRepositoryReader, GithubClient
Settings.embed_model = OllamaEmbedding(
model_name="nomic-embed-text",
base_url="http://localhost:11434",
)
Settings.llm = Ollama(model="llama3")
global index, documents
github_token = os.environ.get("GITHUB_TOKEN")

View File

@ -1,17 +1,6 @@
from typing import List, Union, Generator
from schemas import OpenAIChatMessage
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.llms.ollama import Ollama
from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader
Settings.embed_model = OllamaEmbedding(
model_name="nomic-embed-text",
base_url="http://localhost:11434",
)
Settings.llm = Ollama(model="llama3")
documents = None
index = None
@ -29,10 +18,22 @@ def get_response(
query_engine = index.as_query_engine(streaming=True)
response = query_engine.query(user_message)
print(response)
return response.response_gen
async def on_startup():
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.llms.ollama import Ollama
from llama_index.core import Settings, VectorStoreIndex, SimpleDirectoryReader
Settings.embed_model = OllamaEmbedding(
model_name="nomic-embed-text",
base_url="http://localhost:11434",
)
Settings.llm = Ollama(model="llama3")
# This function is called when the server is started.
global documents, index

View File

@ -1,15 +1,8 @@
from typing import List, Union, Generator
from schemas import OpenAIChatMessage
import os
# Set the OpenAI API key
os.environ["OPENAI_API_KEY"] = "your_openai_api_key_here"
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
documents = SimpleDirectoryReader("./data").load_data()
index = VectorStoreIndex.from_documents(documents)
documents = None
index = None
def get_response(
@ -28,6 +21,16 @@ def get_response(
async def on_startup():
global documents, index
import os
# Set the OpenAI API key
os.environ["OPENAI_API_KEY"] = "your_openai_api_key_here"
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
documents = SimpleDirectoryReader("./data").load_data()
index = VectorStoreIndex.from_documents(documents)
# This function is called when the server is started.
pass

View File

@ -16,12 +16,7 @@ def get_response(
async def on_startup():
# This function is called when the server is started.
print(f"on_startup:{__name__}")
# Optional: return pipeline metadata
# return {
# "id": "pipeline_id",
# "name": "pipeline_name",
# }
pass
async def on_shutdown():