mirror of
https://github.com/open-webui/open-webui
synced 2024-11-24 04:54:12 +00:00
enh: inline citations
This commit is contained in:
parent
5be7cbfdf5
commit
386c976e9a
@ -1181,21 +1181,30 @@ CHUNK_OVERLAP = PersistentConfig(
|
||||
int(os.environ.get("CHUNK_OVERLAP", "100")),
|
||||
)
|
||||
|
||||
DEFAULT_RAG_TEMPLATE = """You are given a user query, some textual context and rules, all inside xml tags. You have to answer the query based on the context while respecting the rules.
|
||||
DEFAULT_RAG_TEMPLATE = """### Task:
|
||||
Respond to the user query using the provided context, incorporating inline citations in the format [source_id].
|
||||
|
||||
### Guidelines:
|
||||
- If you don't know the answer, clearly state that.
|
||||
- If uncertain, ask the user for clarification.
|
||||
- Respond in the same language as the user's query.
|
||||
- If the context is unreadable or of poor quality, inform the user and provide the best possible answer.
|
||||
- If the answer isn't present in the context but you possess the knowledge, explain this to the user and provide the answer using your own understanding.
|
||||
- Include inline citations using [source_id] corresponding to the sources listed in the context.
|
||||
- Do not use XML tags in your response.
|
||||
- Ensure citations are concise and directly related to the information provided.
|
||||
|
||||
### Example of Citation:
|
||||
If the user asks about a specific topic and the information is found in "whitepaper.pdf", the response should include the citation like so:
|
||||
* "According to the study, the proposed method increases efficiency by 20% [whitepaper.pdf]."
|
||||
|
||||
### Output:
|
||||
Provide a clear and direct response to the user's query, including inline citations in the format [source_id] where relevant.
|
||||
|
||||
<context>
|
||||
{{CONTEXT}}
|
||||
</context>
|
||||
|
||||
<rules>
|
||||
- If you don't know, just say so.
|
||||
- If you are not sure, ask for clarification.
|
||||
- Answer in the same language as the user query.
|
||||
- If the context appears unreadable or of poor quality, tell the user then answer as best as you can.
|
||||
- If the answer is not in the context but you think you know the answer, explain that to the user then answer with your own knowledge.
|
||||
- Answer directly and without using xml tags.
|
||||
</rules>
|
||||
|
||||
<user_query>
|
||||
{{QUERY}}
|
||||
</user_query>
|
||||
|
@ -679,7 +679,13 @@ class ChatCompletionMiddleware(BaseHTTPMiddleware):
|
||||
|
||||
# If context is not empty, insert it into the messages
|
||||
if len(contexts) > 0:
|
||||
context_string = "/n".join(contexts).strip()
|
||||
context_string = ""
|
||||
for context_idx, context in enumerate(contexts):
|
||||
print(context)
|
||||
source_id = citations[context_idx].get("source", {}).get("name", "")
|
||||
context_string += f"<source><source_id>{source_id}</source_id><source_context>{context}</source_context></source>\n"
|
||||
|
||||
context_string = context_string.strip()
|
||||
prompt = get_last_user_message(body["messages"])
|
||||
|
||||
if prompt is None:
|
||||
|
@ -94,6 +94,7 @@
|
||||
<div class="flex text-xs font-medium">
|
||||
{#each _citations as citation, idx}
|
||||
<button
|
||||
id={`source-${citation.source.name}`}
|
||||
class="no-toggle outline-none flex dark:text-gray-300 p-1 bg-gray-50 hover:bg-gray-100 dark:bg-gray-900 dark:hover:bg-gray-850 transition rounded-xl max-w-96"
|
||||
on:click={() => {
|
||||
showCitationModal = true;
|
||||
|
@ -11,9 +11,11 @@
|
||||
export let id;
|
||||
export let content;
|
||||
export let model = null;
|
||||
export let citations = null;
|
||||
|
||||
export let save = false;
|
||||
export let floatingButtons = true;
|
||||
export let onSourceClick = () => {};
|
||||
|
||||
let contentContainerElement;
|
||||
let buttonsContainerElement;
|
||||
@ -129,6 +131,8 @@
|
||||
{content}
|
||||
{model}
|
||||
{save}
|
||||
sourceIds={(citations ?? []).map((c) => c?.source?.name)}
|
||||
{onSourceClick}
|
||||
on:update={(e) => {
|
||||
dispatch('update', e.detail);
|
||||
}}
|
||||
|
@ -16,6 +16,9 @@
|
||||
export let model = null;
|
||||
export let save = false;
|
||||
|
||||
export let sourceIds = [];
|
||||
export let onSourceClick = () => {};
|
||||
|
||||
let tokens = [];
|
||||
|
||||
const options = {
|
||||
@ -28,7 +31,7 @@
|
||||
$: (async () => {
|
||||
if (content) {
|
||||
tokens = marked.lexer(
|
||||
replaceTokens(processResponseContent(content), model?.name, $user?.name)
|
||||
replaceTokens(processResponseContent(content), sourceIds, model?.name, $user?.name)
|
||||
);
|
||||
}
|
||||
})();
|
||||
@ -39,6 +42,7 @@
|
||||
{tokens}
|
||||
{id}
|
||||
{save}
|
||||
{onSourceClick}
|
||||
on:update={(e) => {
|
||||
dispatch('update', e.detail);
|
||||
}}
|
||||
|
@ -12,9 +12,11 @@
|
||||
|
||||
import Image from '$lib/components/common/Image.svelte';
|
||||
import KatexRenderer from './KatexRenderer.svelte';
|
||||
import Source from './Source.svelte';
|
||||
|
||||
export let id: string;
|
||||
export let tokens: Token[];
|
||||
export let onSourceClick: Function = () => {};
|
||||
</script>
|
||||
|
||||
{#each tokens as token}
|
||||
@ -26,6 +28,8 @@
|
||||
{@html html}
|
||||
{:else if token.text.includes(`<iframe src="${WEBUI_BASE_URL}/api/v1/files/`)}
|
||||
{@html `${token.text}`}
|
||||
{:else if token.text.includes(`<source_id`)}
|
||||
<Source {token} onClick={onSourceClick} />
|
||||
{:else}
|
||||
{token.text}
|
||||
{/if}
|
||||
|
@ -25,6 +25,7 @@
|
||||
export let top = true;
|
||||
|
||||
export let save = false;
|
||||
export let onSourceClick: Function = () => {};
|
||||
|
||||
const headerComponent = (depth: number) => {
|
||||
return 'h' + depth;
|
||||
@ -62,7 +63,7 @@
|
||||
<hr />
|
||||
{:else if token.type === 'heading'}
|
||||
<svelte:element this={headerComponent(token.depth)}>
|
||||
<MarkdownInlineTokens id={`${id}-${tokenIdx}-h`} tokens={token.tokens} />
|
||||
<MarkdownInlineTokens id={`${id}-${tokenIdx}-h`} tokens={token.tokens} {onSourceClick} />
|
||||
</svelte:element>
|
||||
{:else if token.type === 'code'}
|
||||
{#if token.raw.includes('```')}
|
||||
@ -108,6 +109,7 @@
|
||||
<MarkdownInlineTokens
|
||||
id={`${id}-${tokenIdx}-header-${headerIdx}`}
|
||||
tokens={header.tokens}
|
||||
{onSourceClick}
|
||||
/>
|
||||
</div>
|
||||
</th>
|
||||
@ -126,6 +128,7 @@
|
||||
<MarkdownInlineTokens
|
||||
id={`${id}-${tokenIdx}-row-${rowIdx}-${cellIdx}`}
|
||||
tokens={cell.tokens}
|
||||
{onSourceClick}
|
||||
/>
|
||||
</div>
|
||||
</td>
|
||||
@ -205,19 +208,27 @@
|
||||
></iframe>
|
||||
{:else if token.type === 'paragraph'}
|
||||
<p>
|
||||
<MarkdownInlineTokens id={`${id}-${tokenIdx}-p`} tokens={token.tokens ?? []} />
|
||||
<MarkdownInlineTokens
|
||||
id={`${id}-${tokenIdx}-p`}
|
||||
tokens={token.tokens ?? []}
|
||||
{onSourceClick}
|
||||
/>
|
||||
</p>
|
||||
{:else if token.type === 'text'}
|
||||
{#if top}
|
||||
<p>
|
||||
{#if token.tokens}
|
||||
<MarkdownInlineTokens id={`${id}-${tokenIdx}-t`} tokens={token.tokens} />
|
||||
<MarkdownInlineTokens id={`${id}-${tokenIdx}-t`} tokens={token.tokens} {onSourceClick} />
|
||||
{:else}
|
||||
{unescapeHtml(token.text)}
|
||||
{/if}
|
||||
</p>
|
||||
{:else if token.tokens}
|
||||
<MarkdownInlineTokens id={`${id}-${tokenIdx}-p`} tokens={token.tokens ?? []} />
|
||||
<MarkdownInlineTokens
|
||||
id={`${id}-${tokenIdx}-p`}
|
||||
tokens={token.tokens ?? []}
|
||||
{onSourceClick}
|
||||
/>
|
||||
{:else}
|
||||
{unescapeHtml(token.text)}
|
||||
{/if}
|
||||
|
23
src/lib/components/chat/Messages/Markdown/Source.svelte
Normal file
23
src/lib/components/chat/Messages/Markdown/Source.svelte
Normal file
@ -0,0 +1,23 @@
|
||||
<script lang="ts">
|
||||
export let token;
|
||||
export let onClick: Function = () => {};
|
||||
|
||||
let id = '';
|
||||
function extractDataAttribute(input) {
|
||||
// Use a regular expression to extract the value of the `data` attribute
|
||||
const match = input.match(/data="([^"]*)"/);
|
||||
// Check if a match was found and return the first captured group
|
||||
return match ? match[1] : null;
|
||||
}
|
||||
|
||||
$: id = extractDataAttribute(token.text);
|
||||
</script>
|
||||
|
||||
<button
|
||||
class="text-xs font-medium px-1.5 py-0.5 dark:bg-white/5 dark:hover:bg-white/10 bg-black/5 hover:bg-black/10 transition rounded-lg"
|
||||
on:click={() => {
|
||||
onClick(id);
|
||||
}}
|
||||
>
|
||||
{id}
|
||||
</button>
|
@ -621,9 +621,18 @@
|
||||
<ContentRenderer
|
||||
id={message.id}
|
||||
content={message.content}
|
||||
citations={message.citations}
|
||||
floatingButtons={message?.done}
|
||||
save={!readOnly}
|
||||
{model}
|
||||
onSourceClick={(e) => {
|
||||
console.log(e);
|
||||
const sourceButton = document.getElementById(`source-${e}`);
|
||||
|
||||
if (sourceButton) {
|
||||
sourceButton.click();
|
||||
}
|
||||
}}
|
||||
on:update={(e) => {
|
||||
const { raw, oldContent, newContent } = e.detail;
|
||||
|
||||
|
@ -5,11 +5,7 @@
|
||||
|
||||
import { models, settings } from '$lib/stores';
|
||||
import { user as _user } from '$lib/stores';
|
||||
import {
|
||||
copyToClipboard as _copyToClipboard,
|
||||
processResponseContent,
|
||||
replaceTokens
|
||||
} from '$lib/utils';
|
||||
import { copyToClipboard as _copyToClipboard } from '$lib/utils';
|
||||
|
||||
import Name from './Name.svelte';
|
||||
import ProfileImage from './ProfileImage.svelte';
|
||||
|
@ -8,12 +8,13 @@ import { TTS_RESPONSE_SPLIT } from '$lib/types';
|
||||
// Helper functions
|
||||
//////////////////////////
|
||||
|
||||
export const replaceTokens = (content, char, user) => {
|
||||
export const replaceTokens = (content, sourceIds, char, user) => {
|
||||
const charToken = /{{char}}/gi;
|
||||
const userToken = /{{user}}/gi;
|
||||
const videoIdToken = /{{VIDEO_FILE_ID_([a-f0-9-]+)}}/gi; // Regex to capture the video ID
|
||||
const htmlIdToken = /{{HTML_FILE_ID_([a-f0-9-]+)}}/gi; // Regex to capture the HTML ID
|
||||
|
||||
|
||||
// Replace {{char}} if char is provided
|
||||
if (char !== undefined && char !== null) {
|
||||
content = content.replace(charToken, char);
|
||||
@ -36,6 +37,18 @@ export const replaceTokens = (content, char, user) => {
|
||||
return `<iframe src="${htmlUrl}" width="100%" frameborder="0" onload="this.style.height=(this.contentWindow.document.body.scrollHeight+20)+'px';"></iframe>`;
|
||||
});
|
||||
|
||||
|
||||
// Remove sourceIds from the content and replace them with <source_id>...</source_id>
|
||||
if (Array.isArray(sourceIds)) {
|
||||
sourceIds.forEach((sourceId) => {
|
||||
// Create a token based on the exact `[sourceId]` string
|
||||
const sourceToken = `\\[${sourceId}\\]`; // Escape special characters for RegExp
|
||||
const sourceRegex = new RegExp(sourceToken, 'g'); // Match all occurrences of [sourceId]
|
||||
|
||||
content = content.replace(sourceRegex, `<source_id data="${sourceId}" />`);
|
||||
});
|
||||
}
|
||||
|
||||
return content;
|
||||
};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user