mirror of
https://github.com/open-webui/open-webui
synced 2025-01-31 06:49:03 +00:00
enh: token text splitter support
This commit is contained in:
parent
8a0da6d376
commit
586e005f0f
@ -392,18 +392,19 @@ async def get_rag_config(user=Depends(get_admin_user)):
|
||||
return {
|
||||
"status": True,
|
||||
"pdf_extract_images": app.state.config.PDF_EXTRACT_IMAGES,
|
||||
"file": {
|
||||
"max_size": app.state.config.FILE_MAX_SIZE,
|
||||
"max_count": app.state.config.FILE_MAX_COUNT,
|
||||
},
|
||||
"content_extraction": {
|
||||
"engine": app.state.config.CONTENT_EXTRACTION_ENGINE,
|
||||
"tika_server_url": app.state.config.TIKA_SERVER_URL,
|
||||
},
|
||||
"chunk": {
|
||||
"text_splitter": app.state.config.TEXT_SPLITTER,
|
||||
"chunk_size": app.state.config.CHUNK_SIZE,
|
||||
"chunk_overlap": app.state.config.CHUNK_OVERLAP,
|
||||
},
|
||||
"file": {
|
||||
"max_size": app.state.config.FILE_MAX_SIZE,
|
||||
"max_count": app.state.config.FILE_MAX_COUNT,
|
||||
},
|
||||
"youtube": {
|
||||
"language": app.state.config.YOUTUBE_LOADER_LANGUAGE,
|
||||
"translation": app.state.YOUTUBE_LOADER_TRANSLATION,
|
||||
@ -442,6 +443,7 @@ class ContentExtractionConfig(BaseModel):
|
||||
|
||||
|
||||
class ChunkParamUpdateForm(BaseModel):
|
||||
text_splitter: Optional[str] = None
|
||||
chunk_size: int
|
||||
chunk_overlap: int
|
||||
|
||||
@ -501,6 +503,7 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
|
||||
app.state.config.TIKA_SERVER_URL = form_data.content_extraction.tika_server_url
|
||||
|
||||
if form_data.chunk is not None:
|
||||
app.state.config.TEXT_SPLITTER = form_data.chunk.text_splitter
|
||||
app.state.config.CHUNK_SIZE = form_data.chunk.chunk_size
|
||||
app.state.config.CHUNK_OVERLAP = form_data.chunk.chunk_overlap
|
||||
|
||||
@ -547,6 +550,7 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
|
||||
"tika_server_url": app.state.config.TIKA_SERVER_URL,
|
||||
},
|
||||
"chunk": {
|
||||
"text_splitter": app.state.config.TEXT_SPLITTER,
|
||||
"chunk_size": app.state.config.CHUNK_SIZE,
|
||||
"chunk_overlap": app.state.config.CHUNK_OVERLAP,
|
||||
},
|
||||
@ -607,11 +611,10 @@ class QuerySettingsForm(BaseModel):
|
||||
async def update_query_settings(
|
||||
form_data: QuerySettingsForm, user=Depends(get_admin_user)
|
||||
):
|
||||
app.state.config.RAG_TEMPLATE = (
|
||||
form_data.template if form_data.template != "" else DEFAULT_RAG_TEMPLATE
|
||||
)
|
||||
app.state.config.RAG_TEMPLATE = form_data.template
|
||||
app.state.config.TOP_K = form_data.k if form_data.k else 4
|
||||
app.state.config.RELEVANCE_THRESHOLD = form_data.r if form_data.r else 0.0
|
||||
|
||||
app.state.config.ENABLE_RAG_HYBRID_SEARCH = (
|
||||
form_data.hybrid if form_data.hybrid else False
|
||||
)
|
||||
|
@ -19,6 +19,7 @@ from open_webui.apps.retrieval.vector.connector import VECTOR_DB_CLIENT
|
||||
from open_webui.utils.misc import get_last_user_message
|
||||
|
||||
from open_webui.env import SRC_LOG_LEVELS
|
||||
from open_webui.config import DEFAULT_RAG_TEMPLATE
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
@ -239,6 +240,9 @@ def query_collection_with_hybrid_search(
|
||||
|
||||
|
||||
def rag_template(template: str, context: str, query: str):
|
||||
if template == "":
|
||||
template = DEFAULT_RAG_TEMPLATE
|
||||
|
||||
if "[context]" not in template and "{{CONTEXT}}" not in template:
|
||||
log.debug(
|
||||
"WARNING: The RAG template does not contain the '[context]' or '{{CONTEXT}}' placeholder."
|
||||
|
@ -27,6 +27,7 @@
|
||||
import SensitiveInput from '$lib/components/common/SensitiveInput.svelte';
|
||||
import Tooltip from '$lib/components/common/Tooltip.svelte';
|
||||
import Switch from '$lib/components/common/Switch.svelte';
|
||||
import { text } from '@sveltejs/kit';
|
||||
|
||||
const i18n = getContext('i18n');
|
||||
|
||||
@ -49,6 +50,7 @@
|
||||
let tikaServerUrl = '';
|
||||
let showTikaServerUrl = false;
|
||||
|
||||
let textSplitter = '';
|
||||
let chunkSize = 0;
|
||||
let chunkOverlap = 0;
|
||||
let pdfExtractImages = true;
|
||||
@ -178,6 +180,7 @@
|
||||
max_count: fileMaxCount === '' ? null : fileMaxCount
|
||||
},
|
||||
chunk: {
|
||||
text_splitter: textSplitter,
|
||||
chunk_overlap: chunkOverlap,
|
||||
chunk_size: chunkSize
|
||||
},
|
||||
@ -223,11 +226,13 @@
|
||||
await setRerankingConfig();
|
||||
|
||||
querySettings = await getQuerySettings(localStorage.token);
|
||||
|
||||
const res = await getRAGConfig(localStorage.token);
|
||||
|
||||
if (res) {
|
||||
pdfExtractImages = res.pdf_extract_images;
|
||||
|
||||
textSplitter = res.chunk.text_splitter;
|
||||
chunkSize = res.chunk.chunk_size;
|
||||
chunkOverlap = res.chunk.chunk_overlap;
|
||||
|
||||
@ -639,6 +644,19 @@
|
||||
<div class=" ">
|
||||
<div class="mb-1 text-sm font-medium">{$i18n.t('Chunk Params')}</div>
|
||||
|
||||
<div class="flex w-full justify-between mb-1.5">
|
||||
<div class="self-center text-xs font-medium">{$i18n.t('Text Splitter')}</div>
|
||||
<div class="flex items-center relative">
|
||||
<select
|
||||
class="dark:bg-gray-900 w-fit pr-8 rounded px-2 text-xs bg-transparent outline-none text-right"
|
||||
bind:value={textSplitter}
|
||||
>
|
||||
<option value="">{$i18n.t('Default (Character)')} </option>
|
||||
<option value="token">{$i18n.t('Token (Tiktoken)')}</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class=" flex gap-1.5">
|
||||
<div class=" w-full justify-between">
|
||||
<div class="self-center text-xs font-medium min-w-fit mb-1">{$i18n.t('Chunk Size')}</div>
|
||||
|
Loading…
Reference in New Issue
Block a user