fix: FireCrawlLoader

This commit is contained in:
Timothy Jaeryang Baek 2025-04-24 01:40:34 +09:00
parent 7baca2b483
commit 09874ab83d
2 changed files with 8 additions and 3 deletions

View File

@ -228,7 +228,10 @@ class SafeFireCrawlLoader(BaseLoader, RateLimitMixin, URLProcessingMixin):
mode=self.mode, mode=self.mode,
params=self.params, params=self.params,
) )
yield from loader.lazy_load() for document in loader.lazy_load():
if not document.metadata.get("source"):
document.metadata["source"] = document.metadata.get("sourceURL")
yield document
except Exception as e: except Exception as e:
if self.continue_on_failure: if self.continue_on_failure:
log.exception(f"Error loading {url}: {e}") log.exception(f"Error loading {url}: {e}")
@ -248,6 +251,8 @@ class SafeFireCrawlLoader(BaseLoader, RateLimitMixin, URLProcessingMixin):
params=self.params, params=self.params,
) )
async for document in loader.alazy_load(): async for document in loader.alazy_load():
if not document.metadata.get("source"):
document.metadata["source"] = document.metadata.get("sourceURL")
yield document yield document
except Exception as e: except Exception as e:
if self.continue_on_failure: if self.continue_on_failure:

View File

@ -1536,8 +1536,8 @@ async def process_web_search(
) )
docs = await loader.aload() docs = await loader.aload()
urls = [ urls = [
doc.metadata["source"] for doc in docs doc.metadata.get("source") for doc in docs if doc.metadata.get("source")
] # only keep URLs which could be retrieved ] # only keep URLs
if request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL: if request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL:
return { return {