Merge pull request #12517 from Ithanil/only_keep_retrieved_urls

fix: only keep URLs as sources for which the content was actually retrieved
This commit is contained in:
Timothy Jaeryang Baek 2025-04-06 15:08:47 -07:00 committed by GitHub
commit 635c08a5ab
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1478,6 +1478,7 @@ async def process_web_search(
trust_env=request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV,
)
docs = await loader.aload()
urls = [doc.metadata["source"] for doc in docs] # only keep URLs which could be retrieved
if request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL:
return {