mirror of
https://github.com/open-webui/open-webui
synced 2025-01-18 00:30:51 +00:00
feat: non-english youtube support
This commit is contained in:
parent
87daf122db
commit
d3822f782c
@ -124,6 +124,10 @@ app.state.OPENAI_API_KEY = RAG_OPENAI_API_KEY
|
||||
app.state.PDF_EXTRACT_IMAGES = PDF_EXTRACT_IMAGES
|
||||
|
||||
|
||||
app.state.YOUTUBE_LOADER_LANGUAGE = ["en"]
|
||||
app.state.YOUTUBE_LOADER_TRANSLATION = None
|
||||
|
||||
|
||||
def update_embedding_model(
|
||||
embedding_model: str,
|
||||
update_model: bool = False,
|
||||
@ -314,6 +318,10 @@ async def get_rag_config(user=Depends(get_admin_user)):
|
||||
"chunk_overlap": app.state.CHUNK_OVERLAP,
|
||||
},
|
||||
"web_loader_ssl_verification": app.state.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
|
||||
"youtube": {
|
||||
"language": app.state.YOUTUBE_LOADER_LANGUAGE,
|
||||
"translation": app.state.YOUTUBE_LOADER_TRANSLATION,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@ -322,10 +330,16 @@ class ChunkParamUpdateForm(BaseModel):
|
||||
chunk_overlap: int
|
||||
|
||||
|
||||
class YoutubeLoaderConfig(BaseModel):
|
||||
language: List[str]
|
||||
translation: Optional[str] = None
|
||||
|
||||
|
||||
class ConfigUpdateForm(BaseModel):
|
||||
pdf_extract_images: Optional[bool] = None
|
||||
chunk: Optional[ChunkParamUpdateForm] = None
|
||||
web_loader_ssl_verification: Optional[bool] = None
|
||||
youtube: Optional[YoutubeLoaderConfig] = None
|
||||
|
||||
|
||||
@app.post("/config/update")
|
||||
@ -352,6 +366,18 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
|
||||
else app.state.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION
|
||||
)
|
||||
|
||||
app.state.YOUTUBE_LOADER_LANGUAGE = (
|
||||
form_data.youtube.language
|
||||
if form_data.youtube != None
|
||||
else app.state.YOUTUBE_LOADER_LANGUAGE
|
||||
)
|
||||
|
||||
app.state.YOUTUBE_LOADER_TRANSLATION = (
|
||||
form_data.youtube.translation
|
||||
if form_data.youtube != None
|
||||
else app.state.YOUTUBE_LOADER_TRANSLATION
|
||||
)
|
||||
|
||||
return {
|
||||
"status": True,
|
||||
"pdf_extract_images": app.state.PDF_EXTRACT_IMAGES,
|
||||
@ -360,6 +386,10 @@ async def update_rag_config(form_data: ConfigUpdateForm, user=Depends(get_admin_
|
||||
"chunk_overlap": app.state.CHUNK_OVERLAP,
|
||||
},
|
||||
"web_loader_ssl_verification": app.state.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION,
|
||||
"youtube": {
|
||||
"language": app.state.YOUTUBE_LOADER_LANGUAGE,
|
||||
"translation": app.state.YOUTUBE_LOADER_TRANSLATION,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
@ -486,7 +516,12 @@ def query_collection_handler(
|
||||
@app.post("/youtube")
|
||||
def store_youtube_video(form_data: UrlForm, user=Depends(get_current_user)):
|
||||
try:
|
||||
loader = YoutubeLoader.from_youtube_url(form_data.url, add_video_info=False)
|
||||
loader = YoutubeLoader.from_youtube_url(
|
||||
form_data.url,
|
||||
add_video_info=True,
|
||||
language=app.state.YOUTUBE_LOADER_LANGUAGE,
|
||||
translation=app.state.YOUTUBE_LOADER_TRANSLATION,
|
||||
)
|
||||
data = loader.load()
|
||||
|
||||
collection_name = form_data.collection_name
|
||||
|
@ -57,3 +57,4 @@ PyJWT[crypto]==2.8.0
|
||||
black==24.4.2
|
||||
langfuse==2.27.3
|
||||
youtube-transcript-api==0.6.2
|
||||
pytube
|
@ -32,10 +32,16 @@ type ChunkConfigForm = {
|
||||
chunk_overlap: number;
|
||||
};
|
||||
|
||||
type YoutubeConfigForm = {
|
||||
language: string[];
|
||||
translation?: string | null;
|
||||
};
|
||||
|
||||
type RAGConfigForm = {
|
||||
pdf_extract_images?: boolean;
|
||||
chunk?: ChunkConfigForm;
|
||||
web_loader_ssl_verification?: boolean;
|
||||
youtube?: YoutubeConfigForm;
|
||||
};
|
||||
|
||||
export const updateRAGConfig = async (token: string, payload: RAGConfigForm) => {
|
||||
|
@ -11,9 +11,16 @@
|
||||
|
||||
let webLoaderSSLVerification = true;
|
||||
|
||||
let youtubeLanguage = 'en';
|
||||
let youtubeTranslation = null;
|
||||
|
||||
const submitHandler = async () => {
|
||||
const res = await updateRAGConfig(localStorage.token, {
|
||||
web_loader_ssl_verification: webLoaderSSLVerification
|
||||
web_loader_ssl_verification: webLoaderSSLVerification,
|
||||
youtube: {
|
||||
language: youtubeLanguage.split(',').map((lang) => lang.trim()),
|
||||
translation: youtubeTranslation
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
@ -22,6 +29,8 @@
|
||||
|
||||
if (res) {
|
||||
webLoaderSSLVerification = res.web_loader_ssl_verification;
|
||||
youtubeLanguage = res.youtube.language.join(',');
|
||||
youtubeTranslation = res.youtube.translation;
|
||||
}
|
||||
});
|
||||
</script>
|
||||
@ -36,7 +45,7 @@
|
||||
<div class=" space-y-3 pr-1.5 overflow-y-scroll h-full max-h-[22rem]">
|
||||
<div>
|
||||
<div class=" mb-1 text-sm font-medium">
|
||||
{$i18n.t('Retrieval Augmented Generation Settings')}
|
||||
{$i18n.t('Web Loader Settings')}
|
||||
</div>
|
||||
|
||||
<div>
|
||||
@ -61,6 +70,25 @@
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class=" mt-2 mb-1 text-sm font-medium">
|
||||
{$i18n.t('Youtube Loader Settings')}
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<div class=" py-0.5 flex w-full justify-between">
|
||||
<div class=" w-20 text-xs font-medium self-center">{$i18n.t('Language')}</div>
|
||||
<div class=" flex-1 self-center">
|
||||
<input
|
||||
class="w-full rounded-lg py-2 px-4 text-sm dark:text-gray-300 dark:bg-gray-850 outline-none"
|
||||
type="text"
|
||||
placeholder={$i18n.t('Enter language codes')}
|
||||
bind:value={youtubeLanguage}
|
||||
autocomplete="off"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="flex justify-end pt-3 text-sm font-medium">
|
||||
|
Loading…
Reference in New Issue
Block a user