open-webui/backend/open_webui/retrieval/vector/dbs/qdrant.py

185 lines
6.9 KiB
Python
Raw Normal View History

2024-10-09 10:51:43 +00:00
from typing import Optional
from qdrant_client import QdrantClient as Qclient
from qdrant_client.http.models import PointStruct
from qdrant_client.models import models
from open_webui.apps.retrieval.vector.main import VectorItem, SearchResult, GetResult
2024-11-14 20:06:46 +00:00
from open_webui.config import QDRANT_URI, QDRANT_API_KEY
2024-10-09 10:51:43 +00:00
2024-10-09 16:33:39 +00:00
NO_LIMIT = 999999999
2024-10-21 01:38:06 +00:00
2024-10-09 10:51:43 +00:00
class QdrantClient:
def __init__(self):
self.collection_prefix = "open-webui"
self.QDRANT_URI = QDRANT_URI
2024-11-14 20:06:46 +00:00
self.QDRANT_API_KEY = QDRANT_API_KEY
2024-11-17 07:46:12 +00:00
self.client = (
Qclient(url=self.QDRANT_URI, api_key=self.QDRANT_API_KEY)
if self.QDRANT_URI
else None
)
2024-10-09 10:51:43 +00:00
def _result_to_get_result(self, points) -> GetResult:
ids = []
documents = []
metadatas = []
for point in points:
payload = point.payload
ids.append(point.id)
documents.append(payload["text"])
metadatas.append(payload["metadata"])
return GetResult(
**{
"ids": [ids],
"documents": [documents],
"metadatas": [metadatas],
}
)
def _create_collection(self, collection_name: str, dimension: int):
collection_name_with_prefix = f"{self.collection_prefix}_{collection_name}"
self.client.create_collection(
collection_name=collection_name_with_prefix,
2024-10-21 01:38:06 +00:00
vectors_config=models.VectorParams(
size=dimension, distance=models.Distance.COSINE
),
2024-10-09 10:51:43 +00:00
)
2024-10-09 11:10:23 +00:00
print(f"collection {collection_name_with_prefix} successfully created!")
2024-10-09 10:51:43 +00:00
def _create_collection_if_not_exists(self, collection_name, dimension):
2024-10-21 01:38:06 +00:00
if not self.has_collection(collection_name=collection_name):
2024-10-09 10:51:43 +00:00
self._create_collection(
collection_name=collection_name, dimension=dimension
)
2024-10-09 16:29:14 +00:00
def _create_points(self, items: list[VectorItem]):
return [
PointStruct(
id=item["id"],
vector=item["vector"],
2024-10-21 01:38:06 +00:00
payload={"text": item["text"], "metadata": item["metadata"]},
2024-10-09 16:29:14 +00:00
)
for item in items
]
2024-10-09 10:51:43 +00:00
def has_collection(self, collection_name: str) -> bool:
2024-10-21 01:38:06 +00:00
return self.client.collection_exists(
f"{self.collection_prefix}_{collection_name}"
)
2024-10-09 10:51:43 +00:00
def delete_collection(self, collection_name: str):
2024-10-21 01:38:06 +00:00
return self.client.delete_collection(
collection_name=f"{self.collection_prefix}_{collection_name}"
)
2024-10-09 10:51:43 +00:00
def search(
2024-10-21 01:38:06 +00:00
self, collection_name: str, vectors: list[list[float | int]], limit: int
2024-10-09 10:51:43 +00:00
) -> Optional[SearchResult]:
# Search for the nearest neighbor items based on the vectors and return 'limit' number of results.
2024-10-09 11:23:04 +00:00
if limit is None:
2024-10-09 16:33:39 +00:00
limit = NO_LIMIT # otherwise qdrant would set limit to 10!
2024-10-09 10:51:43 +00:00
query_response = self.client.query_points(
collection_name=f"{self.collection_prefix}_{collection_name}",
query=vectors[0],
limit=limit,
)
get_result = self._result_to_get_result(query_response.points)
return SearchResult(
ids=get_result.ids,
documents=get_result.documents,
metadatas=get_result.metadatas,
2024-10-21 01:38:06 +00:00
distances=[[point.score for point in query_response.points]],
2024-10-09 10:51:43 +00:00
)
def query(self, collection_name: str, filter: dict, limit: Optional[int] = None):
# Construct the filter string for querying
if not self.has_collection(collection_name):
return None
try:
2024-10-09 11:23:04 +00:00
if limit is None:
2024-10-09 16:33:39 +00:00
limit = NO_LIMIT # otherwise qdrant would set limit to 10!
2024-10-09 10:51:43 +00:00
field_conditions = []
for key, value in filter.items():
field_conditions.append(
2024-10-21 01:38:06 +00:00
models.FieldCondition(
key=f"metadata.{key}", match=models.MatchValue(value=value)
)
)
2024-10-09 10:51:43 +00:00
points = self.client.query_points(
collection_name=f"{self.collection_prefix}_{collection_name}",
query_filter=models.Filter(should=field_conditions),
limit=limit,
)
return self._result_to_get_result(points.points)
except Exception as e:
print(e)
return None
def get(self, collection_name: str) -> Optional[GetResult]:
# Get all the items in the collection.
points = self.client.query_points(
collection_name=f"{self.collection_prefix}_{collection_name}",
2024-10-21 01:38:06 +00:00
limit=NO_LIMIT, # otherwise qdrant would set limit to 10!
2024-10-09 10:51:43 +00:00
)
return self._result_to_get_result(points.points)
def insert(self, collection_name: str, items: list[VectorItem]):
# Insert the items into the collection, if the collection does not exist, it will be created.
self._create_collection_if_not_exists(collection_name, len(items[0]["vector"]))
2024-10-09 16:29:14 +00:00
points = self._create_points(items)
2024-10-09 10:51:43 +00:00
self.client.upload_points(f"{self.collection_prefix}_{collection_name}", points)
def upsert(self, collection_name: str, items: list[VectorItem]):
# Update the items in the collection, if the items are not present, insert them. If the collection does not exist, it will be created.
self._create_collection_if_not_exists(collection_name, len(items[0]["vector"]))
2024-10-09 16:29:14 +00:00
points = self._create_points(items)
2024-10-09 10:51:43 +00:00
return self.client.upsert(f"{self.collection_prefix}_{collection_name}", points)
def delete(
2024-10-21 01:38:06 +00:00
self,
collection_name: str,
ids: Optional[list[str]] = None,
filter: Optional[dict] = None,
2024-10-09 10:51:43 +00:00
):
# Delete the items from the collection based on the ids.
field_conditions = []
if ids:
for id_value in ids:
field_conditions.append(
models.FieldCondition(
key="metadata.id",
match=models.MatchValue(value=id_value),
),
),
elif filter:
for key, value in filter.items():
field_conditions.append(
models.FieldCondition(
key=f"metadata.{key}",
match=models.MatchValue(value=value),
),
),
return self.client.delete(
collection_name=f"{self.collection_prefix}_{collection_name}",
points_selector=models.FilterSelector(
2024-10-21 01:38:06 +00:00
filter=models.Filter(must=field_conditions)
2024-10-09 10:51:43 +00:00
),
)
def reset(self):
# Resets the database. This will delete all collections and item entries.
collection_names = self.client.get_collections().collections
for collection_name in collection_names:
if collection_name.name.startswith(self.collection_prefix):
self.client.delete_collection(collection_name=collection_name.name)