fix: FireCrawlLoader

This commit is contained in:
Timothy Jaeryang Baek 2025-04-24 01:40:34 +09:00
parent 7baca2b483
commit 09874ab83d
2 changed files with 8 additions and 3 deletions

View File

@ -228,7 +228,10 @@ class SafeFireCrawlLoader(BaseLoader, RateLimitMixin, URLProcessingMixin):
mode=self.mode,
params=self.params,
)
yield from loader.lazy_load()
for document in loader.lazy_load():
if not document.metadata.get("source"):
document.metadata["source"] = document.metadata.get("sourceURL")
yield document
except Exception as e:
if self.continue_on_failure:
log.exception(f"Error loading {url}: {e}")
@ -248,6 +251,8 @@ class SafeFireCrawlLoader(BaseLoader, RateLimitMixin, URLProcessingMixin):
params=self.params,
)
async for document in loader.alazy_load():
if not document.metadata.get("source"):
document.metadata["source"] = document.metadata.get("sourceURL")
yield document
except Exception as e:
if self.continue_on_failure:

View File

@ -1536,8 +1536,8 @@ async def process_web_search(
)
docs = await loader.aload()
urls = [
doc.metadata["source"] for doc in docs
] # only keep URLs which could be retrieved
doc.metadata.get("source") for doc in docs if doc.metadata.get("source")
] # only keep URLs
if request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL:
return {