This commit is contained in:
Timothy J. Baek 2024-06-18 14:55:18 -07:00
parent eb21750466
commit 20e4f6cc16

View File

@ -237,7 +237,7 @@ def get_embedding_function(
def get_rag_context(
docs,
files,
messages,
embedding_function,
k,
@ -245,29 +245,29 @@ def get_rag_context(
r,
hybrid_search,
):
log.debug(f"docs: {docs} {messages} {embedding_function} {reranking_function}")
log.debug(f"files: {files} {messages} {embedding_function} {reranking_function}")
query = get_last_user_message(messages)
extracted_collections = []
relevant_contexts = []
for doc in docs:
for file in files:
context = None
collection_names = (
doc["collection_names"]
if doc["type"] == "collection"
else [doc["collection_name"]]
file["collection_names"]
if file["type"] == "collection"
else [file["collection_name"]]
)
collection_names = set(collection_names).difference(extracted_collections)
if not collection_names:
log.debug(f"skipping {doc} as it has already been extracted")
log.debug(f"skipping {file} as it has already been extracted")
continue
try:
if doc["type"] == "text":
context = doc["content"]
if file["type"] == "text":
context = file["content"]
else:
if hybrid_search:
context = query_collection_with_hybrid_search(
@ -290,7 +290,7 @@ def get_rag_context(
context = None
if context:
relevant_contexts.append({**context, "source": doc})
relevant_contexts.append({**context, "source": file})
extracted_collections.extend(collection_names)