diff --git a/backend/open_webui/config.py b/backend/open_webui/config.py index 1bdf50073..76b211fb1 100644 --- a/backend/open_webui/config.py +++ b/backend/open_webui/config.py @@ -307,6 +307,18 @@ GOOGLE_CLIENT_SECRET = PersistentConfig( os.environ.get("GOOGLE_CLIENT_SECRET", ""), ) +GOOGLE_DRIVE_CLIENT_ID = PersistentConfig( + "GOOGLE_DRIVE_CLIENT_ID", + "google_drive.client_id", + os.environ.get("GOOGLE_DRIVE_CLIENT_ID", ""), +) + +GOOGLE_DRIVE_API_KEY = PersistentConfig( + "GOOGLE_DRIVE_API_KEY", + "google_drive.api_key", + os.environ.get("GOOGLE_DRIVE_API_KEY", ""), +) + GOOGLE_OAUTH_SCOPE = PersistentConfig( "GOOGLE_OAUTH_SCOPE", "oauth.google.scope", @@ -1426,6 +1438,13 @@ RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = PersistentConfig( ], ) +# If configured, Google Drive will be available as an upload option. +ENABLE_GOOGLE_DRIVE = PersistentConfig( + "ENABLE_GOOGLE_DRIVE", + "rag.drive.enable", + os.getenv("ENABLE_GOOGLE_DRIVE", "False").lower() == "true", +) + SEARXNG_QUERY_URL = PersistentConfig( "SEARXNG_QUERY_URL", "rag.web.search.searxng_query_url", diff --git a/backend/open_webui/main.py b/backend/open_webui/main.py index e7f602311..3a47702f2 100644 --- a/backend/open_webui/main.py +++ b/backend/open_webui/main.py @@ -177,10 +177,13 @@ from open_webui.config import ( MOJEEK_SEARCH_API_KEY, GOOGLE_PSE_API_KEY, GOOGLE_PSE_ENGINE_ID, + GOOGLE_DRIVE_CLIENT_ID, + GOOGLE_DRIVE_API_KEY, ENABLE_RAG_HYBRID_SEARCH, ENABLE_RAG_LOCAL_WEB_FETCH, ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, ENABLE_RAG_WEB_SEARCH, + ENABLE_GOOGLE_DRIVE, UPLOAD_DIR, # WebUI WEBUI_AUTH, @@ -483,6 +486,7 @@ app.state.config.ENABLE_RAG_WEB_SEARCH = ENABLE_RAG_WEB_SEARCH app.state.config.RAG_WEB_SEARCH_ENGINE = RAG_WEB_SEARCH_ENGINE app.state.config.RAG_WEB_SEARCH_DOMAIN_FILTER_LIST = RAG_WEB_SEARCH_DOMAIN_FILTER_LIST +app.state.config.ENABLE_GOOGLE_DRIVE = ENABLE_GOOGLE_DRIVE app.state.config.SEARXNG_QUERY_URL = SEARXNG_QUERY_URL app.state.config.GOOGLE_PSE_API_KEY = GOOGLE_PSE_API_KEY app.state.config.GOOGLE_PSE_ENGINE_ID = GOOGLE_PSE_ENGINE_ID @@ -935,6 +939,7 @@ async def get_app_config(request: Request): **( { "enable_web_search": app.state.config.ENABLE_RAG_WEB_SEARCH, + "enable_google_drive": app.state.config.ENABLE_GOOGLE_DRIVE, "enable_image_generation": app.state.config.ENABLE_IMAGE_GENERATION, "enable_community_sharing": app.state.config.ENABLE_COMMUNITY_SHARING, "enable_message_rating": app.state.config.ENABLE_MESSAGE_RATING, @@ -945,6 +950,10 @@ async def get_app_config(request: Request): else {} ), }, + "google_drive": { + "client_id": GOOGLE_DRIVE_CLIENT_ID.value, + "api_key": GOOGLE_DRIVE_API_KEY.value, + }, **( { "default_models": app.state.config.DEFAULT_MODELS, diff --git a/backend/open_webui/routers/knowledge.py b/backend/open_webui/routers/knowledge.py index 0dff2bc02..04ebcf507 100644 --- a/backend/open_webui/routers/knowledge.py +++ b/backend/open_webui/routers/knowledge.py @@ -11,7 +11,12 @@ from open_webui.models.knowledge import ( ) from open_webui.models.files import Files, FileModel from open_webui.retrieval.vector.connector import VECTOR_DB_CLIENT -from open_webui.routers.retrieval import process_file, ProcessFileForm, process_files_batch, BatchProcessFilesForm +from open_webui.routers.retrieval import ( + process_file, + ProcessFileForm, + process_files_batch, + BatchProcessFilesForm, +) from open_webui.constants import ERROR_MESSAGES @@ -519,6 +524,7 @@ async def reset_knowledge_by_id(id: str, user=Depends(get_verified_user)): # AddFilesToKnowledge ############################ + @router.post("/{id}/files/batch/add", response_model=Optional[KnowledgeFilesResponse]) def add_files_to_knowledge_batch( id: str, @@ -555,27 +561,25 @@ def add_files_to_knowledge_batch( # Process files try: - result = process_files_batch(BatchProcessFilesForm( - files=files, - collection_name=id - )) - except Exception as e: - log.error(f"add_files_to_knowledge_batch: Exception occurred: {e}", exc_info=True) - raise HTTPException( - status_code=status.HTTP_400_BAD_REQUEST, - detail=str(e) + result = process_files_batch( + BatchProcessFilesForm(files=files, collection_name=id) ) - + except Exception as e: + log.error( + f"add_files_to_knowledge_batch: Exception occurred: {e}", exc_info=True + ) + raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=str(e)) + # Add successful files to knowledge base data = knowledge.data or {} existing_file_ids = data.get("file_ids", []) - + # Only add files that were successfully processed successful_file_ids = [r.file_id for r in result.results if r.status == "completed"] for file_id in successful_file_ids: if file_id not in existing_file_ids: existing_file_ids.append(file_id) - + data["file_ids"] = existing_file_ids knowledge = Knowledges.update_knowledge_data_by_id(id=id, data=data) @@ -587,11 +591,10 @@ def add_files_to_knowledge_batch( files=Files.get_files_by_ids(existing_file_ids), warnings={ "message": "Some files failed to process", - "errors": error_details - } + "errors": error_details, + }, ) return KnowledgeFilesResponse( - **knowledge.model_dump(), - files=Files.get_files_by_ids(existing_file_ids) + **knowledge.model_dump(), files=Files.get_files_by_ids(existing_file_ids) ) diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index c6a3a0cca..d19940197 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -347,6 +347,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): return { "status": True, "pdf_extract_images": request.app.state.config.PDF_EXTRACT_IMAGES, + "enable_google_drive": request.app.state.config.ENABLE_GOOGLE_DRIVE, "content_extraction": { "engine": request.app.state.config.CONTENT_EXTRACTION_ENGINE, "tika_server_url": request.app.state.config.TIKA_SERVER_URL, @@ -369,6 +370,7 @@ async def get_rag_config(request: Request, user=Depends(get_admin_user)): "web_loader_ssl_verification": request.app.state.config.ENABLE_RAG_WEB_LOADER_SSL_VERIFICATION, "search": { "enabled": request.app.state.config.ENABLE_RAG_WEB_SEARCH, + "drive": request.app.state.config.ENABLE_GOOGLE_DRIVE, "engine": request.app.state.config.RAG_WEB_SEARCH_ENGINE, "searxng_query_url": request.app.state.config.SEARXNG_QUERY_URL, "google_pse_api_key": request.app.state.config.GOOGLE_PSE_API_KEY, @@ -445,6 +447,7 @@ class WebConfig(BaseModel): class ConfigUpdateForm(BaseModel): pdf_extract_images: Optional[bool] = None + enable_google_drive: Optional[bool] = None file: Optional[FileConfig] = None content_extraction: Optional[ContentExtractionConfig] = None chunk: Optional[ChunkParamUpdateForm] = None @@ -462,6 +465,12 @@ async def update_rag_config( else request.app.state.config.PDF_EXTRACT_IMAGES ) + request.app.state.config.ENABLE_GOOGLE_DRIVE = ( + form_data.enable_google_drive + if form_data.enable_google_drive is not None + else request.app.state.config.ENABLE_GOOGLE_DRIVE + ) + if form_data.file is not None: request.app.state.config.FILE_MAX_SIZE = form_data.file.max_size request.app.state.config.FILE_MAX_COUNT = form_data.file.max_count diff --git a/backend/open_webui/static/swagger-ui/swagger-ui-bundle.js b/backend/open_webui/static/swagger-ui/swagger-ui-bundle.js index b2e982f1d..dcd1c5313 100644 --- a/backend/open_webui/static/swagger-ui/swagger-ui-bundle.js +++ b/backend/open_webui/static/swagger-ui/swagger-ui-bundle.js @@ -37007,16 +37007,14 @@ Pe.createElement('span', { className: 'brace-close' }, '}') ), pe.size - ? pe - .entrySeq() - .map(([s, o]) => - Pe.createElement(xe, { - key: `${s}-${o}`, - propKey: s, - propVal: o, - propClass: 'property' - }) - ) + ? pe.entrySeq().map(([s, o]) => + Pe.createElement(xe, { + key: `${s}-${o}`, + propKey: s, + propVal: o, + propClass: 'property' + }) + ) : null ); } @@ -37167,16 +37165,14 @@ ) : null, C && z.size - ? z - .entrySeq() - .map(([s, o]) => - Pe.createElement(le, { - key: `${s}-${o}`, - propKey: s, - propVal: o, - propClass: rs - }) - ) + ? z.entrySeq().map(([s, o]) => + Pe.createElement(le, { + key: `${s}-${o}`, + propKey: s, + propVal: o, + propClass: rs + }) + ) : null, U ? Pe.createElement(ie, { source: U }) : null, Z && @@ -57290,20 +57286,18 @@ Pe.createElement( 'div', { className: 'modal-ux-content' }, - x - .valueSeq() - .map((x, j) => - Pe.createElement(C, { - key: j, - AST: w, - definitions: x, - getComponent: i, - errSelectors: u, - authSelectors: s, - authActions: o, - specSelectors: _ - }) - ) + x.valueSeq().map((x, j) => + Pe.createElement(C, { + key: j, + AST: w, + definitions: x, + getComponent: i, + errSelectors: u, + authSelectors: s, + authActions: o, + specSelectors: _ + }) + ) ) ) ) diff --git a/backend/requirements.txt b/backend/requirements.txt index 79e898c6a..e38624879 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -90,6 +90,11 @@ extract_msg pydub duckduckgo-search~=6.3.5 +## Google Drive +google-api-python-client +google-auth-httplib2 +google-auth-oauthlib + ## Tests docker~=7.1.0 pytest~=8.3.2 diff --git a/package-lock.json b/package-lock.json index 16542ed99..7f0f368fb 100644 --- a/package-lock.json +++ b/package-lock.json @@ -2260,9 +2260,9 @@ } }, "node_modules/@sveltejs/kit": { - "version": "2.9.0", - "resolved": "https://registry.npmjs.org/@sveltejs/kit/-/kit-2.9.0.tgz", - "integrity": "sha512-W3E7ed3ChB6kPqRs2H7tcHp+Z7oiTFC6m+lLyAQQuyXeqw6LdNuuwEUla+5VM0OGgqQD+cYD6+7Xq80vVm17Vg==", + "version": "2.12.1", + "resolved": "https://registry.npmjs.org/@sveltejs/kit/-/kit-2.12.1.tgz", + "integrity": "sha512-M3rPijGImeOkI0DBJSwjqz+YFX2DyOf6NzWgHVk3mqpT06dlYCpcv5xh1q4rYEqB58yQlk4QA1Y35PUqnUiFKw==", "hasInstallScript": true, "license": "MIT", "dependencies": { @@ -8267,15 +8267,16 @@ } }, "node_modules/nanoid": { - "version": "5.0.6", - "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-5.0.6.tgz", - "integrity": "sha512-rRq0eMHoGZxlvaFOUdK1Ev83Bd1IgzzR+WJ3IbDJ7QOSdAxYjlurSPqFs9s4lJg29RT6nPwizFtJhQS6V5xgiA==", + "version": "5.0.9", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-5.0.9.tgz", + "integrity": "sha512-Aooyr6MXU6HpvvWXKoVoXwKMs/KyVakWwg7xQfv5/S/RIgJMy0Ifa45H9qqYy7pTCszrHzP21Uk4PZq2HpEM8Q==", "funding": [ { "type": "github", "url": "https://github.com/sponsors/ai" } ], + "license": "MIT", "bin": { "nanoid": "bin/nanoid.js" }, @@ -8976,15 +8977,16 @@ "dev": true }, "node_modules/postcss/node_modules/nanoid": { - "version": "3.3.7", - "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.7.tgz", - "integrity": "sha512-eSRppjcPIatRIMC1U6UngP8XFcz8MQWGQdt1MTBQ7NaAmvXDfvNxbvWV3x2y6CdEUciCSsDHDQZbhYaB8QEo2g==", + "version": "3.3.8", + "resolved": "https://registry.npmjs.org/nanoid/-/nanoid-3.3.8.tgz", + "integrity": "sha512-WNLf5Sd8oZxOm+TzppcYk8gVOgP+l58xNy58D0nbUnOxOWRWvlcCV4kUF7ltmI6PsrLl/BgKEyS4mqsGChFN0w==", "funding": [ { "type": "github", "url": "https://github.com/sponsors/ai" } ], + "license": "MIT", "bin": { "nanoid": "bin/nanoid.cjs" }, diff --git a/src/lib/apis/retrieval/index.ts b/src/lib/apis/retrieval/index.ts index 21ae792fa..7b8d836ce 100644 --- a/src/lib/apis/retrieval/index.ts +++ b/src/lib/apis/retrieval/index.ts @@ -45,6 +45,7 @@ type YoutubeConfigForm = { type RAGConfigForm = { pdf_extract_images?: boolean; + enable_google_drive?: boolean; chunk?: ChunkConfigForm; content_extraction?: ContentExtractConfigForm; web_loader_ssl_verification?: boolean; diff --git a/src/lib/components/admin/Settings/Documents.svelte b/src/lib/components/admin/Settings/Documents.svelte index a596c293c..ff580a972 100644 --- a/src/lib/components/admin/Settings/Documents.svelte +++ b/src/lib/components/admin/Settings/Documents.svelte @@ -56,6 +56,8 @@ let chunkOverlap = 0; let pdfExtractImages = true; + let enableGoogleDrive = false; + let OpenAIUrl = ''; let OpenAIKey = ''; @@ -175,6 +177,7 @@ } const res = await updateRAGConfig(localStorage.token, { pdf_extract_images: pdfExtractImages, + enable_google_drive: enableGoogleDrive, file: { max_size: fileMaxSize === '' ? null : fileMaxSize, max_count: fileMaxCount === '' ? null : fileMaxCount @@ -245,6 +248,8 @@ fileMaxSize = res?.file.max_size ?? ''; fileMaxCount = res?.file.max_count ?? ''; + + enableGoogleDrive = res.enable_google_drive; } }); @@ -571,6 +576,8 @@ +