mirror of
https://github.com/open-webui/open-webui
synced 2025-04-15 21:13:44 +00:00
feat: retrieval whole document mode
This commit is contained in:
parent
1d8b3b8c51
commit
6d764ee55e
@ -317,58 +317,63 @@ def get_rag_context(
|
|||||||
relevant_contexts = []
|
relevant_contexts = []
|
||||||
|
|
||||||
for file in files:
|
for file in files:
|
||||||
context = None
|
if file.get("context") == "full":
|
||||||
|
context = {
|
||||||
collection_names = (
|
"documents": [[file["content"]]],
|
||||||
file["collection_names"]
|
"metadatas": [[{"file_id": file["id"], "name": file["name"]}]],
|
||||||
if file["type"] == "collection"
|
}
|
||||||
else [file["collection_name"]] if file["collection_name"] else []
|
else:
|
||||||
)
|
|
||||||
|
|
||||||
collection_names = set(collection_names).difference(extracted_collections)
|
|
||||||
if not collection_names:
|
|
||||||
log.debug(f"skipping {file} as it has already been extracted")
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
|
||||||
context = None
|
context = None
|
||||||
if file["type"] == "text":
|
|
||||||
context = file["content"]
|
collection_names = (
|
||||||
else:
|
file["collection_names"]
|
||||||
if hybrid_search:
|
if file["type"] == "collection"
|
||||||
try:
|
else [file["collection_name"]] if file["collection_name"] else []
|
||||||
context = query_collection_with_hybrid_search(
|
)
|
||||||
|
|
||||||
|
collection_names = set(collection_names).difference(extracted_collections)
|
||||||
|
if not collection_names:
|
||||||
|
log.debug(f"skipping {file} as it has already been extracted")
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
context = None
|
||||||
|
if file["type"] == "text":
|
||||||
|
context = file["content"]
|
||||||
|
else:
|
||||||
|
if hybrid_search:
|
||||||
|
try:
|
||||||
|
context = query_collection_with_hybrid_search(
|
||||||
|
collection_names=collection_names,
|
||||||
|
query=query,
|
||||||
|
embedding_function=embedding_function,
|
||||||
|
k=k,
|
||||||
|
reranking_function=reranking_function,
|
||||||
|
r=r,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
log.debug(
|
||||||
|
"Error when using hybrid search, using"
|
||||||
|
" non hybrid search as fallback."
|
||||||
|
)
|
||||||
|
|
||||||
|
if (not hybrid_search) or (context is None):
|
||||||
|
context = query_collection(
|
||||||
collection_names=collection_names,
|
collection_names=collection_names,
|
||||||
query=query,
|
query=query,
|
||||||
embedding_function=embedding_function,
|
embedding_function=embedding_function,
|
||||||
k=k,
|
k=k,
|
||||||
reranking_function=reranking_function,
|
|
||||||
r=r,
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
log.debug(
|
|
||||||
"Error when using hybrid search, using"
|
|
||||||
" non hybrid search as fallback."
|
|
||||||
)
|
)
|
||||||
|
except Exception as e:
|
||||||
|
log.exception(e)
|
||||||
|
|
||||||
if (not hybrid_search) or (context is None):
|
extracted_collections.extend(collection_names)
|
||||||
context = query_collection(
|
|
||||||
collection_names=collection_names,
|
|
||||||
query=query,
|
|
||||||
embedding_function=embedding_function,
|
|
||||||
k=k,
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
log.exception(e)
|
|
||||||
|
|
||||||
if context:
|
if context:
|
||||||
relevant_contexts.append({**context, "source": file})
|
relevant_contexts.append({**context, "file": file})
|
||||||
|
|
||||||
extracted_collections.extend(collection_names)
|
|
||||||
|
|
||||||
contexts = []
|
contexts = []
|
||||||
citations = []
|
citations = []
|
||||||
|
|
||||||
for context in relevant_contexts:
|
for context in relevant_contexts:
|
||||||
try:
|
try:
|
||||||
if "documents" in context:
|
if "documents" in context:
|
||||||
@ -381,7 +386,7 @@ def get_rag_context(
|
|||||||
if "metadatas" in context:
|
if "metadatas" in context:
|
||||||
citations.append(
|
citations.append(
|
||||||
{
|
{
|
||||||
"source": context["source"],
|
"source": context["file"],
|
||||||
"document": context["documents"][0],
|
"document": context["documents"][0],
|
||||||
"metadata": context["metadatas"][0],
|
"metadata": context["metadatas"][0],
|
||||||
}
|
}
|
||||||
|
@ -36,6 +36,7 @@
|
|||||||
<FileItem
|
<FileItem
|
||||||
className="w-full"
|
className="w-full"
|
||||||
{file}
|
{file}
|
||||||
|
edit={true}
|
||||||
url={`${file?.url}`}
|
url={`${file?.url}`}
|
||||||
name={file.name}
|
name={file.name}
|
||||||
type={file.type}
|
type={file.type}
|
||||||
|
@ -459,6 +459,7 @@
|
|||||||
size={file?.size}
|
size={file?.size}
|
||||||
status={file.status}
|
status={file.status}
|
||||||
dismissible={true}
|
dismissible={true}
|
||||||
|
edit={true}
|
||||||
on:dismiss={() => {
|
on:dismiss={() => {
|
||||||
files.splice(fileIdx, 1);
|
files.splice(fileIdx, 1);
|
||||||
files = files;
|
files = files;
|
||||||
|
@ -15,7 +15,7 @@
|
|||||||
export let status = 'processed';
|
export let status = 'processed';
|
||||||
|
|
||||||
export let file = null;
|
export let file = null;
|
||||||
export let enableModal = true;
|
export let edit = false;
|
||||||
|
|
||||||
export let name: string;
|
export let name: string;
|
||||||
export let type: string;
|
export let type: string;
|
||||||
@ -25,7 +25,7 @@
|
|||||||
</script>
|
</script>
|
||||||
|
|
||||||
{#if file}
|
{#if file}
|
||||||
<FileItemModal bind:show={showModal} bind:file />
|
<FileItemModal bind:show={showModal} bind:file {edit} />
|
||||||
{/if}
|
{/if}
|
||||||
|
|
||||||
<div class="relative group">
|
<div class="relative group">
|
||||||
|
@ -7,57 +7,95 @@
|
|||||||
import Modal from './Modal.svelte';
|
import Modal from './Modal.svelte';
|
||||||
import XMark from '../icons/XMark.svelte';
|
import XMark from '../icons/XMark.svelte';
|
||||||
import Info from '../icons/Info.svelte';
|
import Info from '../icons/Info.svelte';
|
||||||
|
import Switch from './Switch.svelte';
|
||||||
|
import Tooltip from './Tooltip.svelte';
|
||||||
|
|
||||||
export let file;
|
export let file;
|
||||||
export let show = false;
|
export let show = false;
|
||||||
|
|
||||||
|
export let edit = false;
|
||||||
|
|
||||||
|
let enableFullContent = false;
|
||||||
|
|
||||||
onMount(() => {
|
onMount(() => {
|
||||||
console.log(file);
|
console.log(file);
|
||||||
|
|
||||||
|
if (file?.context === 'full') {
|
||||||
|
enableFullContent = true;
|
||||||
|
}
|
||||||
});
|
});
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<Modal bind:show size="md">
|
<Modal bind:show size="md">
|
||||||
<div class="font-primary px-6 py-5 w-full flex flex-col justify-center dark:text-gray-400">
|
<div class="font-primary px-6 py-5 w-full flex flex-col justify-center dark:text-gray-400">
|
||||||
<div class="flex items-start justify-between pb-2">
|
<div class=" pb-2">
|
||||||
<div>
|
<div class="flex items-start justify-between">
|
||||||
<div class=" font-medium text-lg dark:text-gray-100">
|
<div>
|
||||||
<a
|
<div class=" font-medium text-lg dark:text-gray-100">
|
||||||
href={file.url ? (file.type === 'file' ? `${file.url}/content` : `${file.url}`) : '#'}
|
<a
|
||||||
target="_blank"
|
href={file.url ? (file.type === 'file' ? `${file.url}/content` : `${file.url}`) : '#'}
|
||||||
class="hover:underline line-clamp-1"
|
target="_blank"
|
||||||
>
|
class="hover:underline line-clamp-1"
|
||||||
{file?.name ?? 'File'}
|
>
|
||||||
</a>
|
{file?.name ?? 'File'}
|
||||||
|
</a>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div>
|
<div>
|
||||||
<div class=" flex text-sm gap-1 text-gray-500">
|
<button
|
||||||
|
on:click={() => {
|
||||||
|
show = false;
|
||||||
|
}}
|
||||||
|
>
|
||||||
|
<XMark />
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<div class="flex flex-col md:flex-row gap-1 justify-between w-full">
|
||||||
|
<div class=" flex flex-wrap text-sm gap-1 text-gray-500">
|
||||||
{#if file.size}
|
{#if file.size}
|
||||||
<div class="capitalize">{formatFileSize(file.size)}</div>
|
<div class="capitalize shrink-0">{formatFileSize(file.size)}</div>
|
||||||
•
|
•
|
||||||
{/if}
|
{/if}
|
||||||
|
|
||||||
{#if file.content}
|
{#if file.content}
|
||||||
<div class="capitalize">{getLineCount(file.content)} extracted lines</div>
|
<div class="capitalize shrink-0">{getLineCount(file.content)} extracted lines</div>
|
||||||
|
|
||||||
<div class="flex items-center gap-1">
|
<div class="flex items-center gap-1 shrink-0">
|
||||||
<Info />
|
<Info />
|
||||||
|
|
||||||
Formatting may be inconsistent from source.
|
Formatting may be inconsistent from source.
|
||||||
</div>
|
</div>
|
||||||
{/if}
|
{/if}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div>
|
{#if edit}
|
||||||
<button
|
<div>
|
||||||
on:click={() => {
|
<Tooltip
|
||||||
show = false;
|
content={enableFullContent
|
||||||
}}
|
? 'Inject the entire document as context for comprehensive processing.'
|
||||||
>
|
: 'Default to segmented retrieval for focused and relevant content extraction.'}
|
||||||
<XMark />
|
>
|
||||||
</button>
|
<div class="flex items-center gap-1.5 text-xs">
|
||||||
|
{#if enableFullContent}
|
||||||
|
Use Entire Document
|
||||||
|
{:else}
|
||||||
|
Use Focused Retrieval
|
||||||
|
{/if}
|
||||||
|
<Switch
|
||||||
|
bind:state={enableFullContent}
|
||||||
|
on:change={(e) => {
|
||||||
|
file.context = e.detail ? 'full' : undefined;
|
||||||
|
}}
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</Tooltip>
|
||||||
|
</div>
|
||||||
|
{/if}
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user