mirror of
https://github.com/open-webui/open-webui
synced 2025-06-09 07:56:42 +00:00
Merge pull request #14787 from vaclcer/vaclavs-custom-docling
feat: Customize Docling's "Describe Pictures" feature
This commit is contained in:
commit
6bf393a480
@ -1968,6 +1968,54 @@ DOCLING_DO_PICTURE_DESCRIPTION = PersistentConfig(
|
||||
os.getenv("DOCLING_DO_PICTURE_DESCRIPTION", "False").lower() == "true",
|
||||
)
|
||||
|
||||
DOCLING_PICTURE_DESCRIPTION_MODE = PersistentConfig(
|
||||
"DOCLING_PICTURE_DESCRIPTION_MODE",
|
||||
"rag.docling_picture_description_mode",
|
||||
os.getenv("DOCLING_PICTURE_DESCRIPTION_MODE", ""),
|
||||
)
|
||||
|
||||
DOCLING_PICTURE_DESCRIPTION_LOCAL_REPO_ID = PersistentConfig(
|
||||
"DOCLING_PICTURE_DESCRIPTION_LOCAL_REPO_ID",
|
||||
"rag.docling_picture_description_local_repo_id",
|
||||
os.getenv("DOCLING_PICTURE_DESCRIPTION_LOCAL_REPO_ID", "HuggingFaceTB/SmolVLM-256M-Instruct"),
|
||||
)
|
||||
|
||||
DOCLING_PICTURE_DESCRIPTION_LOCAL_MAX_TOKENS = PersistentConfig(
|
||||
"DOCLING_PICTURE_DESCRIPTION_LOCAL_MAX_TOKENS",
|
||||
"rag.docling_picture_description_local_max_tokens",
|
||||
int(os.getenv("DOCLING_PICTURE_DESCRIPTION_LOCAL_MAX_TOKENS", 200)),
|
||||
)
|
||||
|
||||
DOCLING_PICTURE_DESCRIPTION_LOCAL_PROMPT = PersistentConfig(
|
||||
"DOCLING_PICTURE_DESCRIPTION_LOCAL_PROMPT",
|
||||
"rag.docling_picture_description_local_prompt",
|
||||
os.getenv(
|
||||
"DOCLING_PICTURE_DESCRIPTION_LOCAL_PROMPT",
|
||||
"Describe this image in a few sentences.",
|
||||
)
|
||||
)
|
||||
|
||||
DOCLING_PICTURE_DESCRIPTION_API_URL = PersistentConfig(
|
||||
"DOCLING_PICTURE_DESCRIPTION_API_URL",
|
||||
"rag.docling_picture_description_api_url",
|
||||
os.getenv("DOCLING_PICTURE_DESCRIPTION_API_URL", ""),
|
||||
)
|
||||
|
||||
DOCLING_PICTURE_DESCRIPTION_API_MODEL = PersistentConfig(
|
||||
"DOCLING_PICTURE_DESCRIPTION_API_MODEL",
|
||||
"rag.docling_picture_description_api_model",
|
||||
os.getenv("DOCLING_PICTURE_DESCRIPTION_API_MODEL", ""),
|
||||
)
|
||||
|
||||
DOCLING_PICTURE_DESCRIPTION_API_PROMPT = PersistentConfig(
|
||||
"DOCLING_PICTURE_DESCRIPTION_API_PROMPT",
|
||||
"rag.docling_picture_description_api_prompt",
|
||||
os.getenv(
|
||||
"DOCLING_PICTURE_DESCRIPTION_API_PROMPT",
|
||||
"Describe this image in a few sentences.",
|
||||
)
|
||||
)
|
||||
|
||||
DOCUMENT_INTELLIGENCE_ENDPOINT = PersistentConfig(
|
||||
"DOCUMENT_INTELLIGENCE_ENDPOINT",
|
||||
"rag.document_intelligence_endpoint",
|
||||
|
@ -231,6 +231,13 @@ from open_webui.config import (
|
||||
DOCLING_OCR_ENGINE,
|
||||
DOCLING_OCR_LANG,
|
||||
DOCLING_DO_PICTURE_DESCRIPTION,
|
||||
DOCLING_PICTURE_DESCRIPTION_MODE,
|
||||
DOCLING_PICTURE_DESCRIPTION_LOCAL_REPO_ID,
|
||||
DOCLING_PICTURE_DESCRIPTION_LOCAL_MAX_TOKENS,
|
||||
DOCLING_PICTURE_DESCRIPTION_LOCAL_PROMPT,
|
||||
DOCLING_PICTURE_DESCRIPTION_API_URL,
|
||||
DOCLING_PICTURE_DESCRIPTION_API_MODEL,
|
||||
DOCLING_PICTURE_DESCRIPTION_API_PROMPT,
|
||||
DOCUMENT_INTELLIGENCE_ENDPOINT,
|
||||
DOCUMENT_INTELLIGENCE_KEY,
|
||||
MISTRAL_OCR_API_KEY,
|
||||
@ -701,6 +708,13 @@ app.state.config.DOCLING_SERVER_URL = DOCLING_SERVER_URL
|
||||
app.state.config.DOCLING_OCR_ENGINE = DOCLING_OCR_ENGINE
|
||||
app.state.config.DOCLING_OCR_LANG = DOCLING_OCR_LANG
|
||||
app.state.config.DOCLING_DO_PICTURE_DESCRIPTION = DOCLING_DO_PICTURE_DESCRIPTION
|
||||
app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE = DOCLING_PICTURE_DESCRIPTION_MODE
|
||||
app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL_REPO_ID = DOCLING_PICTURE_DESCRIPTION_LOCAL_REPO_ID
|
||||
app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL_MAX_TOKENS = DOCLING_PICTURE_DESCRIPTION_LOCAL_MAX_TOKENS
|
||||
app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL_PROMPT = DOCLING_PICTURE_DESCRIPTION_LOCAL_PROMPT
|
||||
app.state.config.DOCLING_PICTURE_DESCRIPTION_API_URL = DOCLING_PICTURE_DESCRIPTION_API_URL
|
||||
app.state.config.DOCLING_PICTURE_DESCRIPTION_API_MODEL = DOCLING_PICTURE_DESCRIPTION_API_MODEL
|
||||
app.state.config.DOCLING_PICTURE_DESCRIPTION_API_PROMPT = DOCLING_PICTURE_DESCRIPTION_API_PROMPT
|
||||
app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT = DOCUMENT_INTELLIGENCE_ENDPOINT
|
||||
app.state.config.DOCUMENT_INTELLIGENCE_KEY = DOCUMENT_INTELLIGENCE_KEY
|
||||
app.state.config.MISTRAL_OCR_API_KEY = MISTRAL_OCR_API_KEY
|
||||
|
@ -2,6 +2,7 @@ import requests
|
||||
import logging
|
||||
import ftfy
|
||||
import sys
|
||||
import json
|
||||
|
||||
from langchain_community.document_loaders import (
|
||||
AzureAIDocumentIntelligenceLoader,
|
||||
@ -154,6 +155,41 @@ class DoclingLoader:
|
||||
"do_picture_description"
|
||||
)
|
||||
|
||||
picture_description_mode = self.params.get("picture_description_mode", "").lower()
|
||||
|
||||
if picture_description_mode == "local":
|
||||
|
||||
params["picture_description_local"] = json.dumps({
|
||||
"repo_id": self.params.get(
|
||||
"picture_description_local_repo_id", "HuggingFaceTB/SmolVLM-256M-Instruct"
|
||||
),
|
||||
"generation_config": {
|
||||
"max_new_tokens": self.params.get(
|
||||
"picture_description_local_max_tokens", 200
|
||||
)
|
||||
},
|
||||
"prompt": self.params.get(
|
||||
"picture_description_local_prompt", "Describe this image in a few sentences."
|
||||
)
|
||||
})
|
||||
|
||||
elif picture_description_mode == "api":
|
||||
|
||||
params["picture_description_api"] = json.dumps({
|
||||
"url": self.params.get(
|
||||
"picture_description_api_url", ""
|
||||
),
|
||||
"params": {
|
||||
"model": self.params.get(
|
||||
"picture_description_api_model", ""
|
||||
)
|
||||
},
|
||||
"timeout": 30,
|
||||
"prompt": self.params.get(
|
||||
"picture_description_api_prompt", "Describe this image in a few sentences."
|
||||
)
|
||||
})
|
||||
|
||||
if self.params.get("ocr_engine") and self.params.get("ocr_lang"):
|
||||
params["ocr_engine"] = self.params.get("ocr_engine")
|
||||
params["ocr_lang"] = [
|
||||
@ -281,17 +317,25 @@ class Loader:
|
||||
if self._is_text_file(file_ext, file_content_type):
|
||||
loader = TextLoader(file_path, autodetect_encoding=True)
|
||||
else:
|
||||
# Build params for DoclingLoader
|
||||
params = {
|
||||
"ocr_engine": self.kwargs.get("DOCLING_OCR_ENGINE"),
|
||||
"ocr_lang": self.kwargs.get("DOCLING_OCR_LANG"),
|
||||
"do_picture_description": self.kwargs.get("DOCLING_DO_PICTURE_DESCRIPTION"),
|
||||
"picture_description_mode": self.kwargs.get("DOCLING_PICTURE_DESCRIPTION_MODE"),
|
||||
"picture_description_local_repo_id": self.kwargs.get("DOCLING_PICTURE_DESCRIPTION_LOCAL_REPO_ID"),
|
||||
"picture_description_local_max_tokens": self.kwargs.get("DOCLING_PICTURE_DESCRIPTION_LOCAL_MAX_TOKENS"),
|
||||
"picture_description_local_prompt": self.kwargs.get("DOCLING_PICTURE_DESCRIPTION_LOCAL_PROMPT"),
|
||||
"picture_description_api_url": self.kwargs.get("DOCLING_PICTURE_DESCRIPTION_API_URL"),
|
||||
"picture_description_api_model": self.kwargs.get("DOCLING_PICTURE_DESCRIPTION_API_MODEL"),
|
||||
"picture_description_api_prompt": self.kwargs.get("DOCLING_PICTURE_DESCRIPTION_API_PROMPT")
|
||||
}
|
||||
|
||||
loader = DoclingLoader(
|
||||
url=self.kwargs.get("DOCLING_SERVER_URL"),
|
||||
file_path=file_path,
|
||||
mime_type=file_content_type,
|
||||
params={
|
||||
"ocr_engine": self.kwargs.get("DOCLING_OCR_ENGINE"),
|
||||
"ocr_lang": self.kwargs.get("DOCLING_OCR_LANG"),
|
||||
"do_picture_description": self.kwargs.get(
|
||||
"DOCLING_DO_PICTURE_DESCRIPTION"
|
||||
),
|
||||
},
|
||||
params=params
|
||||
)
|
||||
elif (
|
||||
self.engine == "document_intelligence"
|
||||
|
@ -414,6 +414,13 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)):
|
||||
"DOCLING_OCR_ENGINE": request.app.state.config.DOCLING_OCR_ENGINE,
|
||||
"DOCLING_OCR_LANG": request.app.state.config.DOCLING_OCR_LANG,
|
||||
"DOCLING_DO_PICTURE_DESCRIPTION": request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION,
|
||||
"DOCLING_PICTURE_DESCRIPTION_MODE": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE,
|
||||
"DOCLING_PICTURE_DESCRIPTION_LOCAL_REPO_ID": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL_REPO_ID,
|
||||
"DOCLING_PICTURE_DESCRIPTION_LOCAL_MAX_TOKENS": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL_MAX_TOKENS,
|
||||
"DOCLING_PICTURE_DESCRIPTION_LOCAL_PROMPT": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL_PROMPT,
|
||||
"DOCLING_PICTURE_DESCRIPTION_API_URL": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_API_URL,
|
||||
"DOCLING_PICTURE_DESCRIPTION_API_MODEL": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_API_MODEL,
|
||||
"DOCLING_PICTURE_DESCRIPTION_API_PROMPT": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_API_PROMPT,
|
||||
"DOCUMENT_INTELLIGENCE_ENDPOINT": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT,
|
||||
"DOCUMENT_INTELLIGENCE_KEY": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY,
|
||||
"MISTRAL_OCR_API_KEY": request.app.state.config.MISTRAL_OCR_API_KEY,
|
||||
@ -575,6 +582,13 @@ class ConfigForm(BaseModel):
|
||||
DOCLING_OCR_ENGINE: Optional[str] = None
|
||||
DOCLING_OCR_LANG: Optional[str] = None
|
||||
DOCLING_DO_PICTURE_DESCRIPTION: Optional[bool] = None
|
||||
DOCLING_PICTURE_DESCRIPTION_MODE: Optional[str] = None
|
||||
DOCLING_PICTURE_DESCRIPTION_LOCAL_REPO_ID: Optional[str] = None
|
||||
DOCLING_PICTURE_DESCRIPTION_LOCAL_MAX_TOKENS: Optional[int] = None
|
||||
DOCLING_PICTURE_DESCRIPTION_LOCAL_PROMPT: Optional[str] = None
|
||||
DOCLING_PICTURE_DESCRIPTION_API_URL: Optional[str] = None
|
||||
DOCLING_PICTURE_DESCRIPTION_API_MODEL: Optional[str] = None
|
||||
DOCLING_PICTURE_DESCRIPTION_API_PROMPT: Optional[str] = None
|
||||
DOCUMENT_INTELLIGENCE_ENDPOINT: Optional[str] = None
|
||||
DOCUMENT_INTELLIGENCE_KEY: Optional[str] = None
|
||||
MISTRAL_OCR_API_KEY: Optional[str] = None
|
||||
@ -748,6 +762,42 @@ async def update_rag_config(
|
||||
else request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION
|
||||
)
|
||||
|
||||
request.app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE = (
|
||||
form_data.DOCLING_PICTURE_DESCRIPTION_MODE
|
||||
if form_data.DOCLING_PICTURE_DESCRIPTION_MODE is not None
|
||||
else request.app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE
|
||||
)
|
||||
request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL_REPO_ID = (
|
||||
form_data.DOCLING_PICTURE_DESCRIPTION_LOCAL_REPO_ID
|
||||
if form_data.DOCLING_PICTURE_DESCRIPTION_LOCAL_REPO_ID is not None
|
||||
else request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL_REPO_ID
|
||||
)
|
||||
request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL_MAX_TOKENS = (
|
||||
form_data.DOCLING_PICTURE_DESCRIPTION_LOCAL_MAX_TOKENS
|
||||
if form_data.DOCLING_PICTURE_DESCRIPTION_LOCAL_MAX_TOKENS is not None
|
||||
else request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL_MAX_TOKENS
|
||||
)
|
||||
request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL_PROMPT = (
|
||||
form_data.DOCLING_PICTURE_DESCRIPTION_LOCAL_PROMPT
|
||||
if form_data.DOCLING_PICTURE_DESCRIPTION_LOCAL_PROMPT is not None
|
||||
else request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL_PROMPT
|
||||
)
|
||||
request.app.state.config.DOCLING_PICTURE_DESCRIPTION_API_URL = (
|
||||
form_data.DOCLING_PICTURE_DESCRIPTION_API_URL
|
||||
if form_data.DOCLING_PICTURE_DESCRIPTION_API_URL is not None
|
||||
else request.app.state.config.DOCLING_PICTURE_DESCRIPTION_API_URL
|
||||
)
|
||||
request.app.state.config.DOCLING_PICTURE_DESCRIPTION_API_MODEL = (
|
||||
form_data.DOCLING_PICTURE_DESCRIPTION_API_MODEL
|
||||
if form_data.DOCLING_PICTURE_DESCRIPTION_API_MODEL is not None
|
||||
else request.app.state.config.DOCLING_PICTURE_DESCRIPTION_API_MODEL
|
||||
)
|
||||
request.app.state.config.DOCLING_PICTURE_DESCRIPTION_API_PROMPT = (
|
||||
form_data.DOCLING_PICTURE_DESCRIPTION_API_PROMPT
|
||||
if form_data.DOCLING_PICTURE_DESCRIPTION_API_PROMPT is not None
|
||||
else request.app.state.config.DOCLING_PICTURE_DESCRIPTION_API_PROMPT
|
||||
)
|
||||
|
||||
request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT = (
|
||||
form_data.DOCUMENT_INTELLIGENCE_ENDPOINT
|
||||
if form_data.DOCUMENT_INTELLIGENCE_ENDPOINT is not None
|
||||
@ -985,6 +1035,13 @@ async def update_rag_config(
|
||||
"DOCLING_OCR_ENGINE": request.app.state.config.DOCLING_OCR_ENGINE,
|
||||
"DOCLING_OCR_LANG": request.app.state.config.DOCLING_OCR_LANG,
|
||||
"DOCLING_DO_PICTURE_DESCRIPTION": request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION,
|
||||
"DOCLING_PICTURE_DESCRIPTION_MODE": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE,
|
||||
"DOCLING_PICTURE_DESCRIPTION_LOCAL_REPO_ID": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL_REPO_ID,
|
||||
"DOCLING_PICTURE_DESCRIPTION_LOCAL_MAX_TOKENS": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL_MAX_TOKENS,
|
||||
"DOCLING_PICTURE_DESCRIPTION_LOCAL_PROMPT": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL_PROMPT,
|
||||
"DOCLING_PICTURE_DESCRIPTION_API_URL": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_API_URL,
|
||||
"DOCLING_PICTURE_DESCRIPTION_API_MODEL": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_API_MODEL,
|
||||
"DOCLING_PICTURE_DESCRIPTION_API_PROMPT": request.app.state.config.DOCLING_PICTURE_DESCRIPTION_API_PROMPT,
|
||||
"DOCUMENT_INTELLIGENCE_ENDPOINT": request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT,
|
||||
"DOCUMENT_INTELLIGENCE_KEY": request.app.state.config.DOCUMENT_INTELLIGENCE_KEY,
|
||||
"MISTRAL_OCR_API_KEY": request.app.state.config.MISTRAL_OCR_API_KEY,
|
||||
@ -1334,6 +1391,13 @@ def process_file(
|
||||
DOCLING_OCR_ENGINE=request.app.state.config.DOCLING_OCR_ENGINE,
|
||||
DOCLING_OCR_LANG=request.app.state.config.DOCLING_OCR_LANG,
|
||||
DOCLING_DO_PICTURE_DESCRIPTION=request.app.state.config.DOCLING_DO_PICTURE_DESCRIPTION,
|
||||
DOCLING_PICTURE_DESCRIPTION_MODE=request.app.state.config.DOCLING_PICTURE_DESCRIPTION_MODE,
|
||||
DOCLING_PICTURE_DESCRIPTION_LOCAL_REPO_ID=request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL_REPO_ID,
|
||||
DOCLING_PICTURE_DESCRIPTION_LOCAL_MAX_TOKENS=request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL_MAX_TOKENS,
|
||||
DOCLING_PICTURE_DESCRIPTION_LOCAL_PROMPT=request.app.state.config.DOCLING_PICTURE_DESCRIPTION_LOCAL_PROMPT,
|
||||
DOCLING_PICTURE_DESCRIPTION_API_URL=request.app.state.config.DOCLING_PICTURE_DESCRIPTION_API_URL,
|
||||
DOCLING_PICTURE_DESCRIPTION_API_MODEL=request.app.state.config.DOCLING_PICTURE_DESCRIPTION_API_MODEL,
|
||||
DOCLING_PICTURE_DESCRIPTION_API_PROMPT=request.app.state.config.DOCLING_PICTURE_DESCRIPTION_API_PROMPT,
|
||||
PDF_EXTRACT_IMAGES=request.app.state.config.PDF_EXTRACT_IMAGES,
|
||||
DOCUMENT_INTELLIGENCE_ENDPOINT=request.app.state.config.DOCUMENT_INTELLIGENCE_ENDPOINT,
|
||||
DOCUMENT_INTELLIGENCE_KEY=request.app.state.config.DOCUMENT_INTELLIGENCE_KEY,
|
||||
|
@ -510,6 +510,140 @@
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
{#if RAGConfig.DOCLING_DO_PICTURE_DESCRIPTION}
|
||||
<div class="flex w-full mt-2">
|
||||
<div class="flex-1 flex items-center gap-4">
|
||||
<label class="flex items-center gap-1 text-xs font-medium">
|
||||
<Tooltip
|
||||
content={$i18n.t('Use a model locally executed by Docling for picture description.')}
|
||||
placement="top-start"
|
||||
>
|
||||
<input
|
||||
type="radio"
|
||||
name="picture-description-mode"
|
||||
value="local"
|
||||
bind:group={RAGConfig.DOCLING_PICTURE_DESCRIPTION_MODE}
|
||||
checked={RAGConfig.DOCLING_PICTURE_DESCRIPTION_MODE === 'local'}
|
||||
/>
|
||||
<span style="padding-left: 0.5em">{$i18n.t('Local Description')}</span>
|
||||
</Tooltip>
|
||||
</label>
|
||||
<label class="flex items-center gap-1 text-xs font-medium">
|
||||
<Tooltip
|
||||
content={$i18n.t('Use a remote API for picture description.')}
|
||||
placement="top-start"
|
||||
>
|
||||
<input
|
||||
type="radio"
|
||||
name="picture-description-mode"
|
||||
value="api"
|
||||
bind:group={RAGConfig.DOCLING_PICTURE_DESCRIPTION_MODE}
|
||||
checked={RAGConfig.DOCLING_PICTURE_DESCRIPTION_MODE === 'api'}
|
||||
/>
|
||||
<span style="padding-left: 0.5em">{$i18n.t('Remote Description')}</span>
|
||||
</Tooltip>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{#if RAGConfig.DOCLING_PICTURE_DESCRIPTION_MODE === 'local'}
|
||||
<div class="flex flex-col gap-2 mt-2 ml-4">
|
||||
<div class="flex items-center gap-2">
|
||||
<div class="min-w-fit text-xs font-medium">
|
||||
<Tooltip
|
||||
content={$i18n.t('The HuggingFace repo ID for the local vision-language model.')}
|
||||
placement="top-start"
|
||||
>
|
||||
{$i18n.t('Repo ID')}
|
||||
</Tooltip>
|
||||
</div>
|
||||
<input
|
||||
class=" w-full rounded-lg py-1.5 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
|
||||
placeholder={$i18n.t('HuggingFaceTB/SmolVLM-256M-Instruct')}
|
||||
bind:value={RAGConfig.DOCLING_PICTURE_DESCRIPTION_LOCAL_REPO_ID}
|
||||
/>
|
||||
</div>
|
||||
<div class="flex items-center gap-2">
|
||||
<div class="min-w-fit text-xs font-medium">
|
||||
<Tooltip
|
||||
content={$i18n.t('Maximum number of tokens for the generated description.')}
|
||||
placement="top-start"
|
||||
>
|
||||
{$i18n.t('Max Tokens')}
|
||||
</Tooltip>
|
||||
</div>
|
||||
<input
|
||||
class=" w-full rounded-lg py-1.5 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
|
||||
placeholder={$i18n.t('200')}
|
||||
bind:value={RAGConfig.DOCLING_PICTURE_DESCRIPTION_LOCAL_MAX_TOKENS}
|
||||
/>
|
||||
</div>
|
||||
<div class="flex items-center gap-2">
|
||||
<div class="min-w-fit text-xs font-medium">
|
||||
<Tooltip
|
||||
content={$i18n.t('Prompt to use for describing the image.')}
|
||||
placement="top-start"
|
||||
>
|
||||
{$i18n.t('Prompt')}
|
||||
</Tooltip>
|
||||
</div>
|
||||
<input
|
||||
class=" w-full rounded-lg py-1.5 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
|
||||
placeholder={$i18n.t('Describe this image in a few sentences.')}
|
||||
bind:value={RAGConfig.DOCLING_PICTURE_DESCRIPTION_LOCAL_PROMPT}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
{:else if RAGConfig.DOCLING_PICTURE_DESCRIPTION_MODE === 'api'}
|
||||
<div class="flex flex-col gap-2 mt-2 ml-4">
|
||||
<div class="flex items-center gap-2">
|
||||
<div class="min-w-fit text-xs font-medium">
|
||||
<Tooltip
|
||||
content={$i18n.t('The remote API endpoint for picture description.')}
|
||||
placement="top-start"
|
||||
>
|
||||
{$i18n.t('URL')}
|
||||
</Tooltip>
|
||||
</div>
|
||||
<input
|
||||
class=" w-full rounded-lg py-1.5 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
|
||||
placeholder={$i18n.t('Enter Remote API URL')}
|
||||
bind:value={RAGConfig.DOCLING_PICTURE_DESCRIPTION_API_URL}
|
||||
/>
|
||||
</div>
|
||||
<div class="flex items-center gap-2">
|
||||
<div class="min-w-fit text-xs font-medium">
|
||||
<Tooltip
|
||||
content={$i18n.t('The model name to use for remote picture description.')}
|
||||
placement="top-start"
|
||||
>
|
||||
{$i18n.t('Model')}
|
||||
</Tooltip>
|
||||
</div>
|
||||
<input
|
||||
class=" w-full rounded-lg py-1.5 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
|
||||
placeholder={$i18n.t('Enter Model Name')}
|
||||
bind:value={RAGConfig.DOCLING_PICTURE_DESCRIPTION_API_MODEL}
|
||||
/>
|
||||
</div>
|
||||
<div class="flex items-center gap-2">
|
||||
<div class="min-w-fit text-xs font-medium">
|
||||
<Tooltip
|
||||
content={$i18n.t('Prompt to use for describing the image via remote API.')}
|
||||
placement="top-start"
|
||||
>
|
||||
{$i18n.t('Prompt')}
|
||||
</Tooltip>
|
||||
</div>
|
||||
<input
|
||||
class=" w-full rounded-lg py-1.5 px-4 text-sm bg-gray-50 dark:text-gray-300 dark:bg-gray-850 outline-hidden"
|
||||
placeholder={$i18n.t('Describe this image in a few sentences.')}
|
||||
bind:value={RAGConfig.DOCLING_PICTURE_DESCRIPTION_API_PROMPT}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
{/if}
|
||||
{/if}
|
||||
{:else if RAGConfig.CONTENT_EXTRACTION_ENGINE === 'document_intelligence'}
|
||||
<div class="my-0.5 flex gap-2 pr-2">
|
||||
<input
|
||||
|
Loading…
Reference in New Issue
Block a user