diff --git a/backend/open_webui/apps/retrieval/main.py b/backend/open_webui/apps/retrieval/main.py index 815f18276..87df03238 100644 --- a/backend/open_webui/apps/retrieval/main.py +++ b/backend/open_webui/apps/retrieval/main.py @@ -392,18 +392,19 @@ async def get_rag_config(user=Depends(get_admin_user)): return { "status": True, "pdf_extract_images": app.state.config.PDF_EXTRACT_IMAGES, - "file": { - "max_size": app.state.config.FILE_MAX_SIZE, - "max_count": app.state.config.FILE_MAX_COUNT, - }, "content_extraction": { "engine": app.state.config.CONTENT_EXTRACTION_ENGINE, "tika_server_url": app.state.config.TIKA_SERVER_URL, }, "chunk": { + "text_splitter": app.state.config.TEXT_SPLITTER, "chunk_size": app.state.config.CHUNK_SIZE, "chunk_overlap": app.state.config.CHUNK_OVERLAP, }, + "file": { + "max_size": app.state.config.FILE_MAX_SIZE, + "max_count": app.state.config.FILE_MAX_COUNT, + }, "youtube": { "language": app.state.config.YOUTUBE_LOADER_LANGUAGE, "translation": app.state.YOUTUBE_LOADER_TRANSLATION, @@ -442,6 +443,7 @@ class ContentExtractionConfig(BaseModel): class ChunkParamUpdateForm(BaseModel): + text_splitter: Optional[str] = None chunk_size: int chunk_overlap: int @@ -501,6 +503,7 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_ app.state.config.TIKA_SERVER_URL = form_data.content_extraction.tika_server_url if form_data.chunk is not None: + app.state.config.TEXT_SPLITTER = form_data.chunk.text_splitter app.state.config.CHUNK_SIZE = form_data.chunk.chunk_size app.state.config.CHUNK_OVERLAP = form_data.chunk.chunk_overlap @@ -547,6 +550,7 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_ "tika_server_url": app.state.config.TIKA_SERVER_URL, }, "chunk": { + "text_splitter": app.state.config.TEXT_SPLITTER, "chunk_size": app.state.config.CHUNK_SIZE, "chunk_overlap": app.state.config.CHUNK_OVERLAP, }, @@ -607,11 +611,10 @@ class QuerySettingsForm(BaseModel): async def update_query_settings( form_data: QuerySettingsForm, user=Depends(get_admin_user) ): - app.state.config.RAG_TEMPLATE = ( - form_data.template if form_data.template != "" else DEFAULT_RAG_TEMPLATE - ) + app.state.config.RAG_TEMPLATE = form_data.template app.state.config.TOP_K = form_data.k if form_data.k else 4 app.state.config.RELEVANCE_THRESHOLD = form_data.r if form_data.r else 0.0 + app.state.config.ENABLE_RAG_HYBRID_SEARCH = ( form_data.hybrid if form_data.hybrid else False ) diff --git a/backend/open_webui/apps/retrieval/utils.py b/backend/open_webui/apps/retrieval/utils.py index ccd2c9a85..a53f2b6b3 100644 --- a/backend/open_webui/apps/retrieval/utils.py +++ b/backend/open_webui/apps/retrieval/utils.py @@ -19,6 +19,7 @@ from open_webui.apps.retrieval.vector.connector import VECTOR_DB_CLIENT from open_webui.utils.misc import get_last_user_message from open_webui.env import SRC_LOG_LEVELS +from open_webui.config import DEFAULT_RAG_TEMPLATE log = logging.getLogger(__name__) @@ -239,6 +240,9 @@ def query_collection_with_hybrid_search( def rag_template(template: str, context: str, query: str): + if template == "": + template = DEFAULT_RAG_TEMPLATE + if "[context]" not in template and "{{CONTEXT}}" not in template: log.debug( "WARNING: The RAG template does not contain the '[context]' or '{{CONTEXT}}' placeholder." diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte index 63212b03e..1a5bd00be 100644 --- a/src/lib/components/admin/Settings/Documents.svelte +++ b/src/lib/components/admin/Settings/Documents.svelte @@ -27,6 +27,7 @@ import SensitiveInput from '$lib/components/common/SensitiveInput.svelte'; import Tooltip from '$lib/components/common/Tooltip.svelte'; import Switch from '$lib/components/common/Switch.svelte'; + import { text } from '@sveltejs/kit'; const i18n = getContext('i18n'); @@ -49,6 +50,7 @@ let tikaServerUrl = ''; let showTikaServerUrl = false; + let textSplitter = ''; let chunkSize = 0; let chunkOverlap = 0; let pdfExtractImages = true; @@ -178,6 +180,7 @@ max_count: fileMaxCount === '' ? null : fileMaxCount }, chunk: { + text_splitter: textSplitter, chunk_overlap: chunkOverlap, chunk_size: chunkSize }, @@ -223,11 +226,13 @@ await setRerankingConfig(); querySettings = await getQuerySettings(localStorage.token); + const res = await getRAGConfig(localStorage.token); if (res) { pdfExtractImages = res.pdf_extract_images; + textSplitter = res.chunk.text_splitter; chunkSize = res.chunk.chunk_size; chunkOverlap = res.chunk.chunk_overlap; @@ -639,6 +644,19 @@
{$i18n.t('Chunk Params')}
+
+
{$i18n.t('Text Splitter')}
+
+ +
+
+
{$i18n.t('Chunk Size')}