mirror of
https://github.com/open-webui/open-webui
synced 2025-06-26 18:26:48 +00:00
enh: token text splitter support
This commit is contained in:
@@ -392,18 +392,19 @@ async def get_rag_config(user=Depends(get_admin_user)):
|
||||
return {
|
||||
"status": True,
|
||||
"pdf_extract_images": app.state.config.PDF_EXTRACT_IMAGES,
|
||||
"file": {
|
||||
"max_size": app.state.config.FILE_MAX_SIZE,
|
||||
"max_count": app.state.config.FILE_MAX_COUNT,
|
||||
},
|
||||
"content_extraction": {
|
||||
"engine": app.state.config.CONTENT_EXTRACTION_ENGINE,
|
||||
"tika_server_url": app.state.config.TIKA_SERVER_URL,
|
||||
},
|
||||
"chunk": {
|
||||
"text_splitter": app.state.config.TEXT_SPLITTER,
|
||||
"chunk_size": app.state.config.CHUNK_SIZE,
|
||||
"chunk_overlap": app.state.config.CHUNK_OVERLAP,
|
||||
},
|
||||
"file": {
|
||||
"max_size": app.state.config.FILE_MAX_SIZE,
|
||||
"max_count": app.state.config.FILE_MAX_COUNT,
|
||||
},
|
||||
"youtube": {
|
||||
"language": app.state.config.YOUTUBE_LOADER_LANGUAGE,
|
||||
"translation": app.state.YOUTUBE_LOADER_TRANSLATION,
|
||||
@@ -442,6 +443,7 @@ class ContentExtractionConfig(BaseModel):
|
||||
|
||||
|
||||
class ChunkParamUpdateForm(BaseModel):
|
||||
text_splitter: Optional[str] = None
|
||||
chunk_size: int
|
||||
chunk_overlap: int
|
||||
|
||||
@@ -501,6 +503,7 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
|
||||
app.state.config.TIKA_SERVER_URL = form_data.content_extraction.tika_server_url
|
||||
|
||||
if form_data.chunk is not None:
|
||||
app.state.config.TEXT_SPLITTER = form_data.chunk.text_splitter
|
||||
app.state.config.CHUNK_SIZE = form_data.chunk.chunk_size
|
||||
app.state.config.CHUNK_OVERLAP = form_data.chunk.chunk_overlap
|
||||
|
||||
@@ -547,6 +550,7 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
|
||||
"tika_server_url": app.state.config.TIKA_SERVER_URL,
|
||||
},
|
||||
"chunk": {
|
||||
"text_splitter": app.state.config.TEXT_SPLITTER,
|
||||
"chunk_size": app.state.config.CHUNK_SIZE,
|
||||
"chunk_overlap": app.state.config.CHUNK_OVERLAP,
|
||||
},
|
||||
@@ -607,11 +611,10 @@ class QuerySettingsForm(BaseModel):
|
||||
async def update_query_settings(
|
||||
form_data: QuerySettingsForm, user=Depends(get_admin_user)
|
||||
):
|
||||
app.state.config.RAG_TEMPLATE = (
|
||||
form_data.template if form_data.template != "" else DEFAULT_RAG_TEMPLATE
|
||||
)
|
||||
app.state.config.RAG_TEMPLATE = form_data.template
|
||||
app.state.config.TOP_K = form_data.k if form_data.k else 4
|
||||
app.state.config.RELEVANCE_THRESHOLD = form_data.r if form_data.r else 0.0
|
||||
|
||||
app.state.config.ENABLE_RAG_HYBRID_SEARCH = (
|
||||
form_data.hybrid if form_data.hybrid else False
|
||||
)
|
||||
|
||||
@@ -19,6 +19,7 @@ from open_webui.apps.retrieval.vector.connector import VECTOR_DB_CLIENT
|
||||
from open_webui.utils.misc import get_last_user_message
|
||||
|
||||
from open_webui.env import SRC_LOG_LEVELS
|
||||
from open_webui.config import DEFAULT_RAG_TEMPLATE
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
@@ -239,6 +240,9 @@ def query_collection_with_hybrid_search(
|
||||
|
||||
|
||||
def rag_template(template: str, context: str, query: str):
|
||||
if template == "":
|
||||
template = DEFAULT_RAG_TEMPLATE
|
||||
|
||||
if "[context]" not in template and "{{CONTEXT}}" not in template:
|
||||
log.debug(
|
||||
"WARNING: The RAG template does not contain the '[context]' or '{{CONTEXT}}' placeholder."
|
||||
|
||||
Reference in New Issue
Block a user