mirror of
https://github.com/open-webui/open-webui
synced 2025-02-20 12:00:22 +00:00
feat: retrieval whole document mode
This commit is contained in:
parent
1d8b3b8c51
commit
6d764ee55e
@ -317,58 +317,63 @@ def get_rag_context(
|
||||
relevant_contexts = []
|
||||
|
||||
for file in files:
|
||||
context = None
|
||||
|
||||
collection_names = (
|
||||
file["collection_names"]
|
||||
if file["type"] == "collection"
|
||||
else [file["collection_name"]] if file["collection_name"] else []
|
||||
)
|
||||
|
||||
collection_names = set(collection_names).difference(extracted_collections)
|
||||
if not collection_names:
|
||||
log.debug(f"skipping {file} as it has already been extracted")
|
||||
continue
|
||||
|
||||
try:
|
||||
if file.get("context") == "full":
|
||||
context = {
|
||||
"documents": [[file["content"]]],
|
||||
"metadatas": [[{"file_id": file["id"], "name": file["name"]}]],
|
||||
}
|
||||
else:
|
||||
context = None
|
||||
if file["type"] == "text":
|
||||
context = file["content"]
|
||||
else:
|
||||
if hybrid_search:
|
||||
try:
|
||||
context = query_collection_with_hybrid_search(
|
||||
|
||||
collection_names = (
|
||||
file["collection_names"]
|
||||
if file["type"] == "collection"
|
||||
else [file["collection_name"]] if file["collection_name"] else []
|
||||
)
|
||||
|
||||
collection_names = set(collection_names).difference(extracted_collections)
|
||||
if not collection_names:
|
||||
log.debug(f"skipping {file} as it has already been extracted")
|
||||
continue
|
||||
|
||||
try:
|
||||
context = None
|
||||
if file["type"] == "text":
|
||||
context = file["content"]
|
||||
else:
|
||||
if hybrid_search:
|
||||
try:
|
||||
context = query_collection_with_hybrid_search(
|
||||
collection_names=collection_names,
|
||||
query=query,
|
||||
embedding_function=embedding_function,
|
||||
k=k,
|
||||
reranking_function=reranking_function,
|
||||
r=r,
|
||||
)
|
||||
except Exception as e:
|
||||
log.debug(
|
||||
"Error when using hybrid search, using"
|
||||
" non hybrid search as fallback."
|
||||
)
|
||||
|
||||
if (not hybrid_search) or (context is None):
|
||||
context = query_collection(
|
||||
collection_names=collection_names,
|
||||
query=query,
|
||||
embedding_function=embedding_function,
|
||||
k=k,
|
||||
reranking_function=reranking_function,
|
||||
r=r,
|
||||
)
|
||||
except Exception as e:
|
||||
log.debug(
|
||||
"Error when using hybrid search, using"
|
||||
" non hybrid search as fallback."
|
||||
)
|
||||
except Exception as e:
|
||||
log.exception(e)
|
||||
|
||||
if (not hybrid_search) or (context is None):
|
||||
context = query_collection(
|
||||
collection_names=collection_names,
|
||||
query=query,
|
||||
embedding_function=embedding_function,
|
||||
k=k,
|
||||
)
|
||||
except Exception as e:
|
||||
log.exception(e)
|
||||
extracted_collections.extend(collection_names)
|
||||
|
||||
if context:
|
||||
relevant_contexts.append({**context, "source": file})
|
||||
|
||||
extracted_collections.extend(collection_names)
|
||||
relevant_contexts.append({**context, "file": file})
|
||||
|
||||
contexts = []
|
||||
citations = []
|
||||
|
||||
for context in relevant_contexts:
|
||||
try:
|
||||
if "documents" in context:
|
||||
@ -381,7 +386,7 @@ def get_rag_context(
|
||||
if "metadatas" in context:
|
||||
citations.append(
|
||||
{
|
||||
"source": context["source"],
|
||||
"source": context["file"],
|
||||
"document": context["documents"][0],
|
||||
"metadata": context["metadatas"][0],
|
||||
}
|
||||
|
@ -36,6 +36,7 @@
|
||||
<FileItem
|
||||
className="w-full"
|
||||
{file}
|
||||
edit={true}
|
||||
url={`${file?.url}`}
|
||||
name={file.name}
|
||||
type={file.type}
|
||||
|
@ -459,6 +459,7 @@
|
||||
size={file?.size}
|
||||
status={file.status}
|
||||
dismissible={true}
|
||||
edit={true}
|
||||
on:dismiss={() => {
|
||||
files.splice(fileIdx, 1);
|
||||
files = files;
|
||||
|
@ -15,7 +15,7 @@
|
||||
export let status = 'processed';
|
||||
|
||||
export let file = null;
|
||||
export let enableModal = true;
|
||||
export let edit = false;
|
||||
|
||||
export let name: string;
|
||||
export let type: string;
|
||||
@ -25,7 +25,7 @@
|
||||
</script>
|
||||
|
||||
{#if file}
|
||||
<FileItemModal bind:show={showModal} bind:file />
|
||||
<FileItemModal bind:show={showModal} bind:file {edit} />
|
||||
{/if}
|
||||
|
||||
<div class="relative group">
|
||||
|
@ -7,57 +7,95 @@
|
||||
import Modal from './Modal.svelte';
|
||||
import XMark from '../icons/XMark.svelte';
|
||||
import Info from '../icons/Info.svelte';
|
||||
import Switch from './Switch.svelte';
|
||||
import Tooltip from './Tooltip.svelte';
|
||||
|
||||
export let file;
|
||||
export let show = false;
|
||||
|
||||
export let edit = false;
|
||||
|
||||
let enableFullContent = false;
|
||||
|
||||
onMount(() => {
|
||||
console.log(file);
|
||||
|
||||
if (file?.context === 'full') {
|
||||
enableFullContent = true;
|
||||
}
|
||||
});
|
||||
</script>
|
||||
|
||||
<Modal bind:show size="md">
|
||||
<div class="font-primary px-6 py-5 w-full flex flex-col justify-center dark:text-gray-400">
|
||||
<div class="flex items-start justify-between pb-2">
|
||||
<div>
|
||||
<div class=" font-medium text-lg dark:text-gray-100">
|
||||
<a
|
||||
href={file.url ? (file.type === 'file' ? `${file.url}/content` : `${file.url}`) : '#'}
|
||||
target="_blank"
|
||||
class="hover:underline line-clamp-1"
|
||||
>
|
||||
{file?.name ?? 'File'}
|
||||
</a>
|
||||
<div class=" pb-2">
|
||||
<div class="flex items-start justify-between">
|
||||
<div>
|
||||
<div class=" font-medium text-lg dark:text-gray-100">
|
||||
<a
|
||||
href={file.url ? (file.type === 'file' ? `${file.url}/content` : `${file.url}`) : '#'}
|
||||
target="_blank"
|
||||
class="hover:underline line-clamp-1"
|
||||
>
|
||||
{file?.name ?? 'File'}
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<div class=" flex text-sm gap-1 text-gray-500">
|
||||
<button
|
||||
on:click={() => {
|
||||
show = false;
|
||||
}}
|
||||
>
|
||||
<XMark />
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<div class="flex flex-col md:flex-row gap-1 justify-between w-full">
|
||||
<div class=" flex flex-wrap text-sm gap-1 text-gray-500">
|
||||
{#if file.size}
|
||||
<div class="capitalize">{formatFileSize(file.size)}</div>
|
||||
<div class="capitalize shrink-0">{formatFileSize(file.size)}</div>
|
||||
•
|
||||
{/if}
|
||||
|
||||
{#if file.content}
|
||||
<div class="capitalize">{getLineCount(file.content)} extracted lines</div>
|
||||
<div class="capitalize shrink-0">{getLineCount(file.content)} extracted lines</div>
|
||||
|
||||
<div class="flex items-center gap-1">
|
||||
<div class="flex items-center gap-1 shrink-0">
|
||||
<Info />
|
||||
|
||||
Formatting may be inconsistent from source.
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<button
|
||||
on:click={() => {
|
||||
show = false;
|
||||
}}
|
||||
>
|
||||
<XMark />
|
||||
</button>
|
||||
{#if edit}
|
||||
<div>
|
||||
<Tooltip
|
||||
content={enableFullContent
|
||||
? 'Inject the entire document as context for comprehensive processing.'
|
||||
: 'Default to segmented retrieval for focused and relevant content extraction.'}
|
||||
>
|
||||
<div class="flex items-center gap-1.5 text-xs">
|
||||
{#if enableFullContent}
|
||||
Use Entire Document
|
||||
{:else}
|
||||
Use Focused Retrieval
|
||||
{/if}
|
||||
<Switch
|
||||
bind:state={enableFullContent}
|
||||
on:change={(e) => {
|
||||
file.context = e.detail ? 'full' : undefined;
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
</Tooltip>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user