mirror of
https://github.com/open-webui/open-webui
synced 2025-04-30 19:14:05 +00:00
fix: FireCrawlLoader
This commit is contained in:
parent
7baca2b483
commit
09874ab83d
@ -228,7 +228,10 @@ class SafeFireCrawlLoader(BaseLoader, RateLimitMixin, URLProcessingMixin):
|
|||||||
mode=self.mode,
|
mode=self.mode,
|
||||||
params=self.params,
|
params=self.params,
|
||||||
)
|
)
|
||||||
yield from loader.lazy_load()
|
for document in loader.lazy_load():
|
||||||
|
if not document.metadata.get("source"):
|
||||||
|
document.metadata["source"] = document.metadata.get("sourceURL")
|
||||||
|
yield document
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if self.continue_on_failure:
|
if self.continue_on_failure:
|
||||||
log.exception(f"Error loading {url}: {e}")
|
log.exception(f"Error loading {url}: {e}")
|
||||||
@ -248,6 +251,8 @@ class SafeFireCrawlLoader(BaseLoader, RateLimitMixin, URLProcessingMixin):
|
|||||||
params=self.params,
|
params=self.params,
|
||||||
)
|
)
|
||||||
async for document in loader.alazy_load():
|
async for document in loader.alazy_load():
|
||||||
|
if not document.metadata.get("source"):
|
||||||
|
document.metadata["source"] = document.metadata.get("sourceURL")
|
||||||
yield document
|
yield document
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if self.continue_on_failure:
|
if self.continue_on_failure:
|
||||||
|
@ -1536,8 +1536,8 @@ async def process_web_search(
|
|||||||
)
|
)
|
||||||
docs = await loader.aload()
|
docs = await loader.aload()
|
||||||
urls = [
|
urls = [
|
||||||
doc.metadata["source"] for doc in docs
|
doc.metadata.get("source") for doc in docs if doc.metadata.get("source")
|
||||||
] # only keep URLs which could be retrieved
|
] # only keep URLs
|
||||||
|
|
||||||
if request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL:
|
if request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL:
|
||||||
return {
|
return {
|
||||||
|
Loading…
Reference in New Issue
Block a user