mirror of
https://github.com/open-webui/open-webui
synced 2025-04-29 18:51:03 +00:00
fix: FireCrawlLoader
This commit is contained in:
parent
7baca2b483
commit
09874ab83d
@ -228,7 +228,10 @@ class SafeFireCrawlLoader(BaseLoader, RateLimitMixin, URLProcessingMixin):
|
||||
mode=self.mode,
|
||||
params=self.params,
|
||||
)
|
||||
yield from loader.lazy_load()
|
||||
for document in loader.lazy_load():
|
||||
if not document.metadata.get("source"):
|
||||
document.metadata["source"] = document.metadata.get("sourceURL")
|
||||
yield document
|
||||
except Exception as e:
|
||||
if self.continue_on_failure:
|
||||
log.exception(f"Error loading {url}: {e}")
|
||||
@ -248,6 +251,8 @@ class SafeFireCrawlLoader(BaseLoader, RateLimitMixin, URLProcessingMixin):
|
||||
params=self.params,
|
||||
)
|
||||
async for document in loader.alazy_load():
|
||||
if not document.metadata.get("source"):
|
||||
document.metadata["source"] = document.metadata.get("sourceURL")
|
||||
yield document
|
||||
except Exception as e:
|
||||
if self.continue_on_failure:
|
||||
|
@ -1536,8 +1536,8 @@ async def process_web_search(
|
||||
)
|
||||
docs = await loader.aload()
|
||||
urls = [
|
||||
doc.metadata["source"] for doc in docs
|
||||
] # only keep URLs which could be retrieved
|
||||
doc.metadata.get("source") for doc in docs if doc.metadata.get("source")
|
||||
] # only keep URLs
|
||||
|
||||
if request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL:
|
||||
return {
|
||||
|
Loading…
Reference in New Issue
Block a user