refac: web search performance

Co-Authored-By: Mabeck <64421281+mmabeck@users.noreply.github.com>
This commit is contained in:
Timothy Jaeryang Baek
2025-05-10 17:54:41 +04:00
parent 015ac2f532
commit 34ec10a78c
2 changed files with 95 additions and 117 deletions

View File

@@ -353,8 +353,6 @@ async def chat_web_search_handler(
)
return form_data
all_results = []
await event_emitter(
{
"type": "status",
@@ -366,106 +364,75 @@ async def chat_web_search_handler(
}
)
gathered_results = await asyncio.gather(
*(
process_web_search(
request,
SearchForm(**{"query": searchQuery}),
user=user,
)
for searchQuery in queries
),
return_exceptions=True,
)
try:
results = await process_web_search(
request,
SearchForm(queries=queries),
user=user,
)
for searchQuery, results in zip(queries, gathered_results):
try:
if isinstance(results, Exception):
raise Exception(f"Error searching {searchQuery}: {str(results)}")
if results:
files = form_data.get("files", [])
if results:
all_results.append(results)
files = form_data.get("files", [])
if results.get("collection_names"):
for col_idx, collection_name in enumerate(
results.get("collection_names")
):
files.append(
{
"collection_name": collection_name,
"name": ", ".join(queries),
"type": "web_search",
"urls": results["filenames"],
}
)
elif results.get("docs"):
# Invoked when bypass embedding and retrieval is set to True
docs = results["docs"]
files.append(
{
"docs": docs,
"name": ", ".join(queries),
"type": "web_search",
"urls": results["filenames"],
}
)
if results.get("collection_names"):
for col_idx, collection_name in enumerate(
results.get("collection_names")
):
files.append(
{
"collection_name": collection_name,
"name": searchQuery,
"type": "web_search",
"urls": [results["filenames"][col_idx]],
}
)
elif results.get("docs"):
# Invoked when bypass embedding and retrieval is set to True
docs = results["docs"]
form_data["files"] = files
if len(docs) == len(results["filenames"]):
# the number of docs and filenames (urls) should be the same
for doc_idx, doc in enumerate(docs):
files.append(
{
"docs": [doc],
"name": searchQuery,
"type": "web_search",
"urls": [results["filenames"][doc_idx]],
}
)
else:
# edge case when the number of docs and filenames (urls) are not the same
# this should not happen, but if it does, we will just append the docs
files.append(
{
"docs": results.get("docs", []),
"name": searchQuery,
"type": "web_search",
"urls": results["filenames"],
}
)
form_data["files"] = files
except Exception as e:
log.exception(e)
await event_emitter(
{
"type": "status",
"data": {
"action": "web_search",
"description": 'Error searching "{{searchQuery}}"',
"query": searchQuery,
"description": "Searched {{count}} sites",
"urls": results["filenames"],
"done": True,
},
}
)
else:
await event_emitter(
{
"type": "status",
"data": {
"action": "web_search",
"description": "No search results found",
"done": True,
"error": True,
},
}
)
if all_results:
urls = []
for results in all_results:
if "filenames" in results:
urls.extend(results["filenames"])
except Exception as e:
log.exception(e)
await event_emitter(
{
"type": "status",
"data": {
"action": "web_search",
"description": "Searched {{count}} sites",
"urls": urls,
"done": True,
},
}
)
else:
await event_emitter(
{
"type": "status",
"data": {
"action": "web_search",
"description": "No search results found",
"description": "An error occurred while searching the web",
"queries": queries,
"done": True,
"error": True,
},