refac: citations -> sources

This commit is contained in:
Timothy Jaeryang Baek
2024-11-21 19:46:09 -08:00
parent 7062e637e8
commit 81386e9b04
9 changed files with 165 additions and 126 deletions

View File

@@ -902,10 +902,11 @@ def process_file(
Document(
page_content=form_data.content,
metadata={
"name": file.meta.get("name", file.filename),
**file.meta,
"name": file.filename,
"created_by": file.user_id,
"file_id": file.id,
**file.meta,
"source": file.filename,
},
)
]
@@ -932,10 +933,11 @@ def process_file(
Document(
page_content=file.data.get("content", ""),
metadata={
"name": file.meta.get("name", file.filename),
**file.meta,
"name": file.filename,
"created_by": file.user_id,
"file_id": file.id,
**file.meta,
"source": file.filename,
},
)
]
@@ -955,15 +957,30 @@ def process_file(
docs = loader.load(
file.filename, file.meta.get("content_type"), file_path
)
docs = [
Document(
page_content=doc.page_content,
metadata={
**doc.metadata,
"name": file.filename,
"created_by": file.user_id,
"file_id": file.id,
"source": file.filename,
},
)
for doc in docs
]
else:
docs = [
Document(
page_content=file.data.get("content", ""),
metadata={
**file.meta,
"name": file.filename,
"created_by": file.user_id,
"file_id": file.id,
**file.meta,
"source": file.filename,
},
)
]

View File

@@ -307,7 +307,7 @@ def get_embedding_function(
return lambda query: generate_multiple(query, func)
def get_rag_context(
def get_sources_from_files(
files,
queries,
embedding_function,
@@ -387,43 +387,24 @@ def get_rag_context(
del file["data"]
relevant_contexts.append({**context, "file": file})
contexts = []
citations = []
sources = []
for context in relevant_contexts:
try:
if "documents" in context:
file_names = list(
set(
[
metadata["name"]
for metadata in context["metadatas"][0]
if metadata is not None and "name" in metadata
]
)
)
contexts.append(
((", ".join(file_names) + ":\n\n") if file_names else "")
+ "\n\n".join(
[text for text in context["documents"][0] if text is not None]
)
)
if "metadatas" in context:
citation = {
source = {
"source": context["file"],
"document": context["documents"][0],
"metadata": context["metadatas"][0],
}
if "distances" in context and context["distances"]:
citation["distances"] = context["distances"][0]
citations.append(citation)
source["distances"] = context["distances"][0]
sources.append(source)
except Exception as e:
log.exception(e)
print("contexts", contexts)
print("citations", citations)
return contexts, citations
return sources
def get_model_path(model: str, update_model: bool = False):

View File

@@ -56,7 +56,7 @@ def upload_file(file: UploadFile = File(...), user=Depends(get_verified_user)):
FileForm(
**{
"id": id,
"filename": filename,
"filename": name,
"path": file_path,
"meta": {
"name": name,