This commit is contained in:
Timothy J. Baek 2024-06-18 14:55:18 -07:00
parent eb21750466
commit 20e4f6cc16

View File

@ -237,7 +237,7 @@ def get_embedding_function(
def get_rag_context( def get_rag_context(
docs, files,
messages, messages,
embedding_function, embedding_function,
k, k,
@ -245,29 +245,29 @@ def get_rag_context(
r, r,
hybrid_search, hybrid_search,
): ):
log.debug(f"docs: {docs} {messages} {embedding_function} {reranking_function}") log.debug(f"files: {files} {messages} {embedding_function} {reranking_function}")
query = get_last_user_message(messages) query = get_last_user_message(messages)
extracted_collections = [] extracted_collections = []
relevant_contexts = [] relevant_contexts = []
for doc in docs: for file in files:
context = None context = None
collection_names = ( collection_names = (
doc["collection_names"] file["collection_names"]
if doc["type"] == "collection" if file["type"] == "collection"
else [doc["collection_name"]] else [file["collection_name"]]
) )
collection_names = set(collection_names).difference(extracted_collections) collection_names = set(collection_names).difference(extracted_collections)
if not collection_names: if not collection_names:
log.debug(f"skipping {doc} as it has already been extracted") log.debug(f"skipping {file} as it has already been extracted")
continue continue
try: try:
if doc["type"] == "text": if file["type"] == "text":
context = doc["content"] context = file["content"]
else: else:
if hybrid_search: if hybrid_search:
context = query_collection_with_hybrid_search( context = query_collection_with_hybrid_search(
@ -290,7 +290,7 @@ def get_rag_context(
context = None context = None
if context: if context:
relevant_contexts.append({**context, "source": doc}) relevant_contexts.append({**context, "source": file})
extracted_collections.extend(collection_names) extracted_collections.extend(collection_names)