refac: include source name to citation

This commit is contained in:
Timothy J. Baek 2024-05-06 15:49:00 -07:00
parent 4c490132ba
commit 64ed0d1089
3 changed files with 38 additions and 53 deletions

View File

@ -271,14 +271,14 @@ def rag_messages(
for doc in docs: for doc in docs:
context = None context = None
collection = doc.get("collection_name") collection_names = (
if collection: doc["collection_names"]
collection = [collection] if doc["type"] == "collection"
else: else [doc["collection_name"]]
collection = doc.get("collection_names", []) )
collection = set(collection).difference(extracted_collections) collection_names = set(collection_names).difference(extracted_collections)
if not collection: if not collection_names:
log.debug(f"skipping {doc} as it has already been extracted") log.debug(f"skipping {doc} as it has already been extracted")
continue continue
@ -288,11 +288,7 @@ def rag_messages(
else: else:
if hybrid_search: if hybrid_search:
context = query_collection_with_hybrid_search( context = query_collection_with_hybrid_search(
collection_names=( collection_names=collection_names,
doc["collection_names"]
if doc["type"] == "collection"
else [doc["collection_name"]]
),
query=query, query=query,
embedding_function=embedding_function, embedding_function=embedding_function,
k=k, k=k,
@ -301,11 +297,7 @@ def rag_messages(
) )
else: else:
context = query_collection( context = query_collection(
collection_names=( collection_names=collection_names,
doc["collection_names"]
if doc["type"] == "collection"
else [doc["collection_name"]]
),
query=query, query=query,
embedding_function=embedding_function, embedding_function=embedding_function,
k=k, k=k,
@ -315,9 +307,9 @@ def rag_messages(
context = None context = None
if context: if context:
relevant_contexts.append(context) relevant_contexts.append({**context, "source": doc})
extracted_collections.extend(collection) extracted_collections.extend(collection_names)
context_string = "" context_string = ""
@ -325,11 +317,14 @@ def rag_messages(
for context in relevant_contexts: for context in relevant_contexts:
try: try:
if "documents" in context: if "documents" in context:
items = [item for item in context["documents"][0] if item is not None] context_string += "\n\n".join(
context_string += "\n\n".join(items) [text for text in context["documents"][0] if text is not None]
)
if "metadatas" in context: if "metadatas" in context:
citations.append( citations.append(
{ {
"source": context["source"],
"document": context["documents"][0], "document": context["documents"][0],
"metadata": context["metadatas"][0], "metadata": context["metadatas"][0],
} }

View File

@ -10,10 +10,10 @@
let mergedDocuments = []; let mergedDocuments = [];
onMount(async () => { onMount(async () => {
console.log(citation);
// Merge the document with its metadata // Merge the document with its metadata
mergedDocuments = citation.document?.map((c, i) => { mergedDocuments = citation.document?.map((c, i) => {
return { return {
source: citation.source,
document: c, document: c,
metadata: citation.metadata?.[i] metadata: citation.metadata?.[i]
}; };
@ -54,7 +54,7 @@
{$i18n.t('Source')} {$i18n.t('Source')}
</div> </div>
<div class="text-sm dark:text-gray-400"> <div class="text-sm dark:text-gray-400">
{document.metadata?.source ?? $i18n.t('No source available')} {document.source?.name ?? $i18n.t('No source available')}
</div> </div>
</div> </div>
<div class="flex flex-col w-full"> <div class="flex flex-col w-full">

View File

@ -66,9 +66,8 @@
let showRateComment = false; let showRateComment = false;
let showCitations = {};
// Backend returns a list of citations per collection, we flatten it to citations per source // Backend returns a list of citations per collection, we flatten it to citations per source
let flattenedCitations = {}; let citations = {};
$: tokens = marked.lexer(sanitizeResponseContent(message.content)); $: tokens = marked.lexer(sanitizeResponseContent(message.content));
@ -137,27 +136,21 @@
} }
if (message.citations) { if (message.citations) {
for (const citation of message.citations) { message.citations.forEach((citation) => {
const zipped = (citation?.document ?? []).map(function (document, index) { citation.document.forEach((document, index) => {
return [document, citation.metadata?.[index]]; const metadata = citation.metadata?.[index];
}); const source = citation?.source?.name ?? metadata?.source ?? 'N/A';
for (const [document, metadata] of zipped) { citations[source] = citations[source] || {
const source = metadata?.source ?? 'N/A'; source: citation.source,
if (source in flattenedCitations) { document: [],
flattenedCitations[source].document.push(document); metadata: []
flattenedCitations[source].metadata.push(metadata);
} else {
flattenedCitations[source] = {
document: [document],
metadata: [metadata]
}; };
}
}
}
console.log(flattenedCitations); citations[source].document.push(document);
console.log(Object.keys(flattenedCitations)); citations[source].metadata.push(metadata);
});
});
} }
}; };
@ -474,15 +467,12 @@
</div> </div>
</div> </div>
{#if Object.keys(flattenedCitations).length > 0} {#if Object.keys(citations).length > 0}
<hr class=" dark:border-gray-800" /> <hr class=" dark:border-gray-800" />
<div class="my-2.5 w-full flex overflow-x-auto gap-2 flex-wrap"> <div class="my-2.5 w-full flex overflow-x-auto gap-2 flex-wrap">
{#each [...Object.keys(flattenedCitations)] as source, idx} {#each Object.keys(citations) as source, idx}
<CitationsModal <CitationsModal bind:show={citations[source].show} citation={citations[source]} />
bind:show={showCitations[source]}
citation={flattenedCitations[source]}
/>
<div class="flex gap-1 text-xs font-semibold"> <div class="flex gap-1 text-xs font-semibold">
<div> <div>
@ -492,10 +482,10 @@
<button <button
class="dark:text-gray-500 underline" class="dark:text-gray-500 underline"
on:click={() => { on:click={() => {
showCitations[source] = !showCitations[source]; citations[source].show = !citations[source].show;
}} }}
> >
{source} {citations[source].source.name}
</button> </button>
</div> </div>
{/each} {/each}