From 09874ab83dcf7c39babde9e5142b4caf9b2c9193 Mon Sep 17 00:00:00 2001 From: Timothy Jaeryang Baek Date: Thu, 24 Apr 2025 01:40:34 +0900 Subject: [PATCH] fix: FireCrawlLoader --- backend/open_webui/retrieval/web/utils.py | 7 ++++++- backend/open_webui/routers/retrieval.py | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/backend/open_webui/retrieval/web/utils.py b/backend/open_webui/retrieval/web/utils.py index fc46d78c4..aec2a8730 100644 --- a/backend/open_webui/retrieval/web/utils.py +++ b/backend/open_webui/retrieval/web/utils.py @@ -228,7 +228,10 @@ class SafeFireCrawlLoader(BaseLoader, RateLimitMixin, URLProcessingMixin): mode=self.mode, params=self.params, ) - yield from loader.lazy_load() + for document in loader.lazy_load(): + if not document.metadata.get("source"): + document.metadata["source"] = document.metadata.get("sourceURL") + yield document except Exception as e: if self.continue_on_failure: log.exception(f"Error loading {url}: {e}") @@ -248,6 +251,8 @@ class SafeFireCrawlLoader(BaseLoader, RateLimitMixin, URLProcessingMixin): params=self.params, ) async for document in loader.alazy_load(): + if not document.metadata.get("source"): + document.metadata["source"] = document.metadata.get("sourceURL") yield document except Exception as e: if self.continue_on_failure: diff --git a/backend/open_webui/routers/retrieval.py b/backend/open_webui/routers/retrieval.py index bcb8aa413..76d7fc76c 100644 --- a/backend/open_webui/routers/retrieval.py +++ b/backend/open_webui/routers/retrieval.py @@ -1536,8 +1536,8 @@ async def process_web_search( ) docs = await loader.aload() urls = [ - doc.metadata["source"] for doc in docs - ] # only keep URLs which could be retrieved + doc.metadata.get("source") for doc in docs if doc.metadata.get("source") + ] # only keep URLs if request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL: return {