This commit is contained in:
Timothy Jaeryang Baek 2025-04-06 15:45:48 -07:00
parent 9825d03602
commit 155dbd5a66

View File

@ -174,7 +174,7 @@ class ProcessUrlForm(CollectionNameForm):
url: str
class SearchForm(CollectionNameForm):
class SearchForm(BaseModel):
query: str
@ -1464,10 +1464,6 @@ async def process_web_search(
log.debug(f"web_results: {web_results}")
try:
collection_basename = form_data.collection_name
if collection_basename == "" or collection_basename is None:
collection_basename = "web-search"
urls = [result.link for result in web_results]
loader = get_web_loader(
urls,
@ -1476,7 +1472,9 @@ async def process_web_search(
trust_env=request.app.state.config.RAG_WEB_SEARCH_TRUST_ENV,
)
docs = await loader.aload()
urls = [doc.metadata["source"] for doc in docs] # only keep URLs which could be retrieved
urls = [
doc.metadata["source"] for doc in docs
] # only keep URLs which could be retrieved
if request.app.state.config.BYPASS_WEB_SEARCH_EMBEDDING_AND_RETRIEVAL:
return {
@ -1495,14 +1493,17 @@ async def process_web_search(
else:
collection_names = []
for doc_idx, doc in enumerate(docs):
collection_sha = calculate_sha256_string(f"{form_data.query}-{urls[doc_idx]}")
doc_collection_name = f"{collection_basename}-{collection_sha}"[:63]
collection_names.append(doc_collection_name)
collection_name = f"web-search-{calculate_sha256_string(
f"{form_data.query}-{urls[doc_idx]}"
)}"[:63]
collection_names.append(collection_name)
await run_in_threadpool(
save_docs_to_vector_db,
request,
[doc],
doc_collection_name,
collection_name,
overwrite=True,
user=user,
)