mirror of
https://github.com/open-webui/open-webui
synced 2025-06-26 18:26:48 +00:00
feat: non-english youtube support
This commit is contained in:
parent
87daf122db
commit
d3822f782c
@ -124,6 +124,10 @@ app.state.OPENAI_API_KEY = RAG_OPENAI_API_KEY
|
|||||||
app.state.PDF_EXTRACT_IMAGES = PDF_EXTRACT_IMAGES
|
app.state.PDF_EXTRACT_IMAGES = PDF_EXTRACT_IMAGES
|
||||||
|
|
||||||
|
|
||||||
|
app.state.YOUTUBE_LOADER_LANGUAGE = ["en"]
|
||||||
|
app.state.YOUTUBE_LOADER_TRANSLATION = None
|
||||||
|
|
||||||
|
|
||||||
def update_embedding_model(
|
def update_embedding_model(
|
||||||
embedding_model: str,
|
embedding_model: str,
|
||||||
update_model: bool = False,
|
update_model: bool = False,
|
||||||
@ -314,6 +318,10 @@ async def get_rag_config(user=Depends(get_admin_user)):
|
|||||||
"chunk_overlap": app.state.CHUNK_OVERLAP,
|
"chunk_overlap": app.state.CHUNK_OVERLAP,
|
||||||
},
|
},
|
||||||
"web_loader_ssl_verification": app.state.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
|
"web_loader_ssl_verification": app.state.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
|
||||||
|
"youtube": {
|
||||||
|
"language": app.state.YOUTUBE_LOADER_LANGUAGE,
|
||||||
|
"translation": app.state.YOUTUBE_LOADER_TRANSLATION,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -322,10 +330,16 @@ class ChunkParamUpdateForm(BaseModel):
|
|||||||
chunk_overlap: int
|
chunk_overlap: int
|
||||||
|
|
||||||
|
|
||||||
|
class YoutubeLoaderConfig(BaseModel):
|
||||||
|
language: List[str]
|
||||||
|
translation: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
class ConfigUpdateForm(BaseModel):
|
class ConfigUpdateForm(BaseModel):
|
||||||
pdf_extract_images: Optional[bool] = None
|
pdf_extract_images: Optional[bool] = None
|
||||||
chunk: Optional[ChunkParamUpdateForm] = None
|
chunk: Optional[ChunkParamUpdateForm] = None
|
||||||
web_loader_ssl_verification: Optional[bool] = None
|
web_loader_ssl_verification: Optional[bool] = None
|
||||||
|
youtube: Optional[YoutubeLoaderConfig] = None
|
||||||
|
|
||||||
|
|
||||||
@app.post("/config/update")
|
@app.post("/config/update")
|
||||||
@ -352,6 +366,18 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
|
|||||||
else app.state.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION
|
else app.state.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION
|
||||||
)
|
)
|
||||||
|
|
||||||
|
app.state.YOUTUBE_LOADER_LANGUAGE = (
|
||||||
|
form_data.youtube.language
|
||||||
|
if form_data.youtube != None
|
||||||
|
else app.state.YOUTUBE_LOADER_LANGUAGE
|
||||||
|
)
|
||||||
|
|
||||||
|
app.state.YOUTUBE_LOADER_TRANSLATION = (
|
||||||
|
form_data.youtube.translation
|
||||||
|
if form_data.youtube != None
|
||||||
|
else app.state.YOUTUBE_LOADER_TRANSLATION
|
||||||
|
)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"status": True,
|
"status": True,
|
||||||
"pdf_extract_images": app.state.PDF_EXTRACT_IMAGES,
|
"pdf_extract_images": app.state.PDF_EXTRACT_IMAGES,
|
||||||
@ -360,6 +386,10 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
|
|||||||
"chunk_overlap": app.state.CHUNK_OVERLAP,
|
"chunk_overlap": app.state.CHUNK_OVERLAP,
|
||||||
},
|
},
|
||||||
"web_loader_ssl_verification": app.state.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
|
"web_loader_ssl_verification": app.state.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
|
||||||
|
"youtube": {
|
||||||
|
"language": app.state.YOUTUBE_LOADER_LANGUAGE,
|
||||||
|
"translation": app.state.YOUTUBE_LOADER_TRANSLATION,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -486,7 +516,12 @@ def query_collection_handler(
|
|||||||
@app.post("/youtube")
|
@app.post("/youtube")
|
||||||
def store_youtube_video(form_data: UrlForm, user=Depends(get_current_user)):
|
def store_youtube_video(form_data: UrlForm, user=Depends(get_current_user)):
|
||||||
try:
|
try:
|
||||||
loader = YoutubeLoader.from_youtube_url(form_data.url, add_video_info=False)
|
loader = YoutubeLoader.from_youtube_url(
|
||||||
|
form_data.url,
|
||||||
|
add_video_info=True,
|
||||||
|
language=app.state.YOUTUBE_LOADER_LANGUAGE,
|
||||||
|
translation=app.state.YOUTUBE_LOADER_TRANSLATION,
|
||||||
|
)
|
||||||
data = loader.load()
|
data = loader.load()
|
||||||
|
|
||||||
collection_name = form_data.collection_name
|
collection_name = form_data.collection_name
|
||||||
|
@ -57,3 +57,4 @@ PyJWT[crypto]==2.8.0
|
|||||||
black==24.4.2
|
black==24.4.2
|
||||||
langfuse==2.27.3
|
langfuse==2.27.3
|
||||||
youtube-transcript-api==0.6.2
|
youtube-transcript-api==0.6.2
|
||||||
|
pytube
|
@ -32,10 +32,16 @@ type ChunkConfigForm = {
|
|||||||
chunk_overlap: number;
|
chunk_overlap: number;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
type YoutubeConfigForm = {
|
||||||
|
language: string[];
|
||||||
|
translation?: string | null;
|
||||||
|
};
|
||||||
|
|
||||||
type RAGConfigForm = {
|
type RAGConfigForm = {
|
||||||
pdf_extract_images?: boolean;
|
pdf_extract_images?: boolean;
|
||||||
chunk?: ChunkConfigForm;
|
chunk?: ChunkConfigForm;
|
||||||
web_loader_ssl_verification?: boolean;
|
web_loader_ssl_verification?: boolean;
|
||||||
|
youtube?: YoutubeConfigForm;
|
||||||
};
|
};
|
||||||
|
|
||||||
export const updateRAGConfig = async (token: string, payload: RAGConfigForm) => {
|
export const updateRAGConfig = async (token: string, payload: RAGConfigForm) => {
|
||||||
|
@ -11,9 +11,16 @@
|
|||||||
|
|
||||||
let webLoaderSSLVerification = true;
|
let webLoaderSSLVerification = true;
|
||||||
|
|
||||||
|
let youtubeLanguage = 'en';
|
||||||
|
let youtubeTranslation = null;
|
||||||
|
|
||||||
const submitHandler = async () => {
|
const submitHandler = async () => {
|
||||||
const res = await updateRAGConfig(localStorage.token, {
|
const res = await updateRAGConfig(localStorage.token, {
|
||||||
web_loader_ssl_verification: webLoaderSSLVerification
|
web_loader_ssl_verification: webLoaderSSLVerification,
|
||||||
|
youtube: {
|
||||||
|
language: youtubeLanguage.split(',').map((lang) => lang.trim()),
|
||||||
|
translation: youtubeTranslation
|
||||||
|
}
|
||||||
});
|
});
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -22,6 +29,8 @@
|
|||||||
|
|
||||||
if (res) {
|
if (res) {
|
||||||
webLoaderSSLVerification = res.web_loader_ssl_verification;
|
webLoaderSSLVerification = res.web_loader_ssl_verification;
|
||||||
|
youtubeLanguage = res.youtube.language.join(',');
|
||||||
|
youtubeTranslation = res.youtube.translation;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
</script>
|
</script>
|
||||||
@ -36,7 +45,7 @@
|
|||||||
<div class=" space-y-3 pr-1.5 overflow-y-scroll h-full max-h-[22rem]">
|
<div class=" space-y-3 pr-1.5 overflow-y-scroll h-full max-h-[22rem]">
|
||||||
<div>
|
<div>
|
||||||
<div class=" mb-1 text-sm font-medium">
|
<div class=" mb-1 text-sm font-medium">
|
||||||
{$i18n.t('Retrieval Augmented Generation Settings')}
|
{$i18n.t('Web Loader Settings')}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div>
|
<div>
|
||||||
@ -61,6 +70,25 @@
|
|||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div class=" mt-2 mb-1 text-sm font-medium">
|
||||||
|
{$i18n.t('Youtube Loader Settings')}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div>
|
||||||
|
<div class=" py-0.5 flex w-full justify-between">
|
||||||
|
<div class=" w-20 text-xs font-medium self-center">{$i18n.t('Language')}</div>
|
||||||
|
<div class=" flex-1 self-center">
|
||||||
|
<input
|
||||||
|
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
|
||||||
|
type="text"
|
||||||
|
placeholder={$i18n.t('Enter language codes')}
|
||||||
|
bind:value={youtubeLanguage}
|
||||||
|
autocomplete="off"
|
||||||
|
/>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="flex justify-end pt-3 text-sm font-medium">
|
<div class="flex justify-end pt-3 text-sm font-medium">
|
||||||
|
Loading…
Reference in New Issue
Block a user