enh: inline citations

This commit is contained in:
Timothy Jaeryang Baek 2024-11-21 17:58:29 -08:00
parent 5be7cbfdf5
commit 386c976e9a
11 changed files with 102 additions and 22 deletions

View File

@ -1181,21 +1181,30 @@ CHUNK_OVERLAP = PersistentConfig(
int(os.environ.get("CHUNK_OVERLAP", "100")), int(os.environ.get("CHUNK_OVERLAP", "100")),
) )
DEFAULT_RAG_TEMPLATE = """You are given a user query, some textual context and rules, all inside xml tags. You have to answer the query based on the context while respecting the rules. DEFAULT_RAG_TEMPLATE = """### Task:
Respond to the user query using the provided context, incorporating inline citations in the format [source_id].
### Guidelines:
- If you don't know the answer, clearly state that.
- If uncertain, ask the user for clarification.
- Respond in the same language as the user's query.
- If the context is unreadable or of poor quality, inform the user and provide the best possible answer.
- If the answer isn't present in the context but you possess the knowledge, explain this to the user and provide the answer using your own understanding.
- Include inline citations using [source_id] corresponding to the sources listed in the context.
- Do not use XML tags in your response.
- Ensure citations are concise and directly related to the information provided.
### Example of Citation:
If the user asks about a specific topic and the information is found in "whitepaper.pdf", the response should include the citation like so:
* "According to the study, the proposed method increases efficiency by 20% [whitepaper.pdf]."
### Output:
Provide a clear and direct response to the user's query, including inline citations in the format [source_id] where relevant.
<context> <context>
{{CONTEXT}} {{CONTEXT}}
</context> </context>
<rules>
- If you don't know, just say so.
- If you are not sure, ask for clarification.
- Answer in the same language as the user query.
- If the context appears unreadable or of poor quality, tell the user then answer as best as you can.
- If the answer is not in the context but you think you know the answer, explain that to the user then answer with your own knowledge.
- Answer directly and without using xml tags.
</rules>
<user_query> <user_query>
{{QUERY}} {{QUERY}}
</user_query> </user_query>

View File

@ -679,7 +679,13 @@ class ChatCompletionMiddleware(BaseHTTPMiddleware):
# If context is not empty, insert it into the messages # If context is not empty, insert it into the messages
if len(contexts) > 0: if len(contexts) > 0:
context_string = "/n".join(contexts).strip() context_string = ""
for context_idx, context in enumerate(contexts):
print(context)
source_id = citations[context_idx].get("source", {}).get("name", "")
context_string += f"<source><source_id>{source_id}</source_id><source_context>{context}</source_context></source>\n"
context_string = context_string.strip()
prompt = get_last_user_message(body["messages"]) prompt = get_last_user_message(body["messages"])
if prompt is None: if prompt is None:

View File

@ -94,6 +94,7 @@
<div class="flex text-xs font-medium"> <div class="flex text-xs font-medium">
{#each _citations as citation, idx} {#each _citations as citation, idx}
<button <button
id={`source-${citation.source.name}`}
class="no-toggle outline-none flex dark:text-gray-300 p-1 bg-gray-50 hover:bg-gray-100 dark:bg-gray-900 dark:hover:bg-gray-850 transition rounded-xl max-w-96" class="no-toggle outline-none flex dark:text-gray-300 p-1 bg-gray-50 hover:bg-gray-100 dark:bg-gray-900 dark:hover:bg-gray-850 transition rounded-xl max-w-96"
on:click={() => { on:click={() => {
showCitationModal = true; showCitationModal = true;

View File

@ -11,9 +11,11 @@
export let id; export let id;
export let content; export let content;
export let model = null; export let model = null;
export let citations = null;
export let save = false; export let save = false;
export let floatingButtons = true; export let floatingButtons = true;
export let onSourceClick = () => {};
let contentContainerElement; let contentContainerElement;
let buttonsContainerElement; let buttonsContainerElement;
@ -129,6 +131,8 @@
{content} {content}
{model} {model}
{save} {save}
sourceIds={(citations ?? []).map((c) => c?.source?.name)}
{onSourceClick}
on:update={(e) => { on:update={(e) => {
dispatch('update', e.detail); dispatch('update', e.detail);
}} }}

View File

@ -16,6 +16,9 @@
export let model = null; export let model = null;
export let save = false; export let save = false;
export let sourceIds = [];
export let onSourceClick = () => {};
let tokens = []; let tokens = [];
const options = { const options = {
@ -28,7 +31,7 @@
$: (async () => { $: (async () => {
if (content) { if (content) {
tokens = marked.lexer( tokens = marked.lexer(
replaceTokens(processResponseContent(content), model?.name, $user?.name) replaceTokens(processResponseContent(content), sourceIds, model?.name, $user?.name)
); );
} }
})(); })();
@ -39,6 +42,7 @@
{tokens} {tokens}
{id} {id}
{save} {save}
{onSourceClick}
on:update={(e) => { on:update={(e) => {
dispatch('update', e.detail); dispatch('update', e.detail);
}} }}

View File

@ -12,9 +12,11 @@
import Image from '$lib/components/common/Image.svelte'; import Image from '$lib/components/common/Image.svelte';
import KatexRenderer from './KatexRenderer.svelte'; import KatexRenderer from './KatexRenderer.svelte';
import Source from './Source.svelte';
export let id: string; export let id: string;
export let tokens: Token[]; export let tokens: Token[];
export let onSourceClick: Function = () => {};
</script> </script>
{#each tokens as token} {#each tokens as token}
@ -26,6 +28,8 @@
{@html html} {@html html}
{:else if token.text.includes(`<iframe src="${WEBUI_BASE_URL}/api/v1/files/`)} {:else if token.text.includes(`<iframe src="${WEBUI_BASE_URL}/api/v1/files/`)}
{@html `${token.text}`} {@html `${token.text}`}
{:else if token.text.includes(`<source_id`)}
<Source {token} onClick={onSourceClick} />
{:else} {:else}
{token.text} {token.text}
{/if} {/if}

View File

@ -25,6 +25,7 @@
export let top = true; export let top = true;
export let save = false; export let save = false;
export let onSourceClick: Function = () => {};
const headerComponent = (depth: number) => { const headerComponent = (depth: number) => {
return 'h' + depth; return 'h' + depth;
@ -62,7 +63,7 @@
<hr /> <hr />
{:else if token.type === 'heading'} {:else if token.type === 'heading'}
<svelte:element this={headerComponent(token.depth)}> <svelte:element this={headerComponent(token.depth)}>
<MarkdownInlineTokens id={`${id}-${tokenIdx}-h`} tokens={token.tokens} /> <MarkdownInlineTokens id={`${id}-${tokenIdx}-h`} tokens={token.tokens} {onSourceClick} />
</svelte:element> </svelte:element>
{:else if token.type === 'code'} {:else if token.type === 'code'}
{#if token.raw.includes('```')} {#if token.raw.includes('```')}
@ -108,6 +109,7 @@
<MarkdownInlineTokens <MarkdownInlineTokens
id={`${id}-${tokenIdx}-header-${headerIdx}`} id={`${id}-${tokenIdx}-header-${headerIdx}`}
tokens={header.tokens} tokens={header.tokens}
{onSourceClick}
/> />
</div> </div>
</th> </th>
@ -126,6 +128,7 @@
<MarkdownInlineTokens <MarkdownInlineTokens
id={`${id}-${tokenIdx}-row-${rowIdx}-${cellIdx}`} id={`${id}-${tokenIdx}-row-${rowIdx}-${cellIdx}`}
tokens={cell.tokens} tokens={cell.tokens}
{onSourceClick}
/> />
</div> </div>
</td> </td>
@ -205,19 +208,27 @@
></iframe> ></iframe>
{:else if token.type === 'paragraph'} {:else if token.type === 'paragraph'}
<p> <p>
<MarkdownInlineTokens id={`${id}-${tokenIdx}-p`} tokens={token.tokens ?? []} /> <MarkdownInlineTokens
id={`${id}-${tokenIdx}-p`}
tokens={token.tokens ?? []}
{onSourceClick}
/>
</p> </p>
{:else if token.type === 'text'} {:else if token.type === 'text'}
{#if top} {#if top}
<p> <p>
{#if token.tokens} {#if token.tokens}
<MarkdownInlineTokens id={`${id}-${tokenIdx}-t`} tokens={token.tokens} /> <MarkdownInlineTokens id={`${id}-${tokenIdx}-t`} tokens={token.tokens} {onSourceClick} />
{:else} {:else}
{unescapeHtml(token.text)} {unescapeHtml(token.text)}
{/if} {/if}
</p> </p>
{:else if token.tokens} {:else if token.tokens}
<MarkdownInlineTokens id={`${id}-${tokenIdx}-p`} tokens={token.tokens ?? []} /> <MarkdownInlineTokens
id={`${id}-${tokenIdx}-p`}
tokens={token.tokens ?? []}
{onSourceClick}
/>
{:else} {:else}
{unescapeHtml(token.text)} {unescapeHtml(token.text)}
{/if} {/if}

View File

@ -0,0 +1,23 @@
<script lang="ts">
export let token;
export let onClick: Function = () => {};
let id = '';
function extractDataAttribute(input) {
// Use a regular expression to extract the value of the `data` attribute
const match = input.match(/data="([^"]*)"/);
// Check if a match was found and return the first captured group
return match ? match[1] : null;
}
$: id = extractDataAttribute(token.text);
</script>
<button
class="text-xs font-medium px-1.5 py-0.5 dark:bg-white/5 dark:hover:bg-white/10 bg-black/5 hover:bg-black/10 transition rounded-lg"
on:click={() => {
onClick(id);
}}
>
{id}
</button>

View File

@ -621,9 +621,18 @@
<ContentRenderer <ContentRenderer
id={message.id} id={message.id}
content={message.content} content={message.content}
citations={message.citations}
floatingButtons={message?.done} floatingButtons={message?.done}
save={!readOnly} save={!readOnly}
{model} {model}
onSourceClick={(e) => {
console.log(e);
const sourceButton = document.getElementById(`source-${e}`);
if (sourceButton) {
sourceButton.click();
}
}}
on:update={(e) => { on:update={(e) => {
const { raw, oldContent, newContent } = e.detail; const { raw, oldContent, newContent } = e.detail;

View File

@ -5,11 +5,7 @@
import { models, settings } from '$lib/stores'; import { models, settings } from '$lib/stores';
import { user as _user } from '$lib/stores'; import { user as _user } from '$lib/stores';
import { import { copyToClipboard as _copyToClipboard } from '$lib/utils';
copyToClipboard as _copyToClipboard,
processResponseContent,
replaceTokens
} from '$lib/utils';
import Name from './Name.svelte'; import Name from './Name.svelte';
import ProfileImage from './ProfileImage.svelte'; import ProfileImage from './ProfileImage.svelte';

View File

@ -8,12 +8,13 @@ import { TTS_RESPONSE_SPLIT } from '$lib/types';
// Helper functions // Helper functions
////////////////////////// //////////////////////////
export const replaceTokens = (content, char, user) => { export const replaceTokens = (content, sourceIds, char, user) => {
const charToken = /{{char}}/gi; const charToken = /{{char}}/gi;
const userToken = /{{user}}/gi; const userToken = /{{user}}/gi;
const videoIdToken = /{{VIDEO_FILE_ID_([a-f0-9-]+)}}/gi; // Regex to capture the video ID const videoIdToken = /{{VIDEO_FILE_ID_([a-f0-9-]+)}}/gi; // Regex to capture the video ID
const htmlIdToken = /{{HTML_FILE_ID_([a-f0-9-]+)}}/gi; // Regex to capture the HTML ID const htmlIdToken = /{{HTML_FILE_ID_([a-f0-9-]+)}}/gi; // Regex to capture the HTML ID
// Replace {{char}} if char is provided // Replace {{char}} if char is provided
if (char !== undefined && char !== null) { if (char !== undefined && char !== null) {
content = content.replace(charToken, char); content = content.replace(charToken, char);
@ -36,6 +37,18 @@ export const replaceTokens = (content, char, user) => {
return `<iframe src="${htmlUrl}" width="100%" frameborder="0" onload="this.style.height=(this.contentWindow.document.body.scrollHeight+20)+'px';"></iframe>`; return `<iframe src="${htmlUrl}" width="100%" frameborder="0" onload="this.style.height=(this.contentWindow.document.body.scrollHeight+20)+'px';"></iframe>`;
}); });
// Remove sourceIds from the content and replace them with <source_id>...</source_id>
if (Array.isArray(sourceIds)) {
sourceIds.forEach((sourceId) => {
// Create a token based on the exact `[sourceId]` string
const sourceToken = `\\[${sourceId}\\]`; // Escape special characters for RegExp
const sourceRegex = new RegExp(sourceToken, 'g'); // Match all occurrences of [sourceId]
content = content.replace(sourceRegex, `<source_id data="${sourceId}" />`);
});
}
return content; return content;
}; };