Merge branch 'open-webui:main' into main

This commit is contained in:
JT
2025-02-04 13:06:04 -08:00
committed by GitHub
200 changed files with 7965 additions and 4116 deletions

View File

@@ -11,6 +11,8 @@ from langchain.retrievers import ContextualCompressionRetriever, EnsembleRetriev
from langchain_community.retrievers import BM25Retriever
from langchain_core.documents import Document
from open_webui.config import VECTOR_DB
from open_webui.retrieval.vector.connector import VECTOR_DB_CLIENT
from open_webui.utils.misc import get_last_user_message
@@ -203,7 +205,12 @@ def query_collection(
else:
pass
return merge_and_sort_query_results(results, k=k)
if VECTOR_DB == "chroma":
# Chroma uses unconventional cosine similarity, so we don't need to reverse the results
# https://docs.trychroma.com/docs/collections/configure#configuring-chroma-collections
return merge_and_sort_query_results(results, k=k, reverse=False)
else:
return merge_and_sort_query_results(results, k=k, reverse=True)
def query_collection_with_hybrid_search(
@@ -239,7 +246,12 @@ def query_collection_with_hybrid_search(
"Hybrid search failed for all collections. Using Non hybrid search as fallback."
)
return merge_and_sort_query_results(results, k=k, reverse=True)
if VECTOR_DB == "chroma":
# Chroma uses unconventional cosine similarity, so we don't need to reverse the results
# https://docs.trychroma.com/docs/collections/configure#configuring-chroma-collections
return merge_and_sort_query_results(results, k=k, reverse=False)
else:
return merge_and_sort_query_results(results, k=k, reverse=True)
def get_embedding_function(

View File

@@ -51,8 +51,8 @@ class ChromaClient:
def has_collection(self, collection_name: str) -> bool:
# Check if the collection exists based on the collection name.
collections = self.client.list_collections()
return collection_name in [collection.name for collection in collections]
collection_names = self.client.list_collections()
return collection_name in collection_names
def delete_collection(self, collection_name: str):
# Delete the collection based on the collection name.

View File

@@ -7,13 +7,14 @@ from typing import Optional
from open_webui.retrieval.vector.main import VectorItem, SearchResult, GetResult
from open_webui.config import (
MILVUS_URI,
MILVUS_DB,
)
class MilvusClient:
def __init__(self):
self.collection_prefix = "open_webui"
self.client = Client(uri=MILVUS_URI)
self.client = Client(uri=MILVUS_URI, database=MILVUS_DB)
def _result_to_get_result(self, result) -> GetResult:
ids = []

View File

@@ -9,6 +9,7 @@ from sqlalchemy import (
select,
text,
Text,
Table,
values,
)
from sqlalchemy.sql import true
@@ -18,6 +19,7 @@ from sqlalchemy.orm import declarative_base, scoped_session, sessionmaker
from sqlalchemy.dialects.postgresql import JSONB, array
from pgvector.sqlalchemy import Vector
from sqlalchemy.ext.mutable import MutableDict
from sqlalchemy.exc import NoSuchTableError
from open_webui.retrieval.vector.main import VectorItem, SearchResult, GetResult
from open_webui.config import PGVECTOR_DB_URL, PGVECTOR_INITIALIZE_MAX_VECTOR_LENGTH
@@ -92,31 +94,34 @@ class PgvectorClient:
Raises an exception if there is a mismatch.
"""
metadata = MetaData()
metadata.reflect(bind=self.session.bind, only=["document_chunk"])
try:
# Attempt to reflect the 'document_chunk' table
document_chunk_table = Table(
"document_chunk", metadata, autoload_with=self.session.bind
)
except NoSuchTableError:
# Table does not exist; no action needed
return
if "document_chunk" in metadata.tables:
document_chunk_table = metadata.tables["document_chunk"]
if "vector" in document_chunk_table.columns:
vector_column = document_chunk_table.columns["vector"]
vector_type = vector_column.type
if isinstance(vector_type, Vector):
db_vector_length = vector_type.dim
if db_vector_length != VECTOR_LENGTH:
raise Exception(
f"VECTOR_LENGTH {VECTOR_LENGTH} does not match existing vector column dimension {db_vector_length}. "
"Cannot change vector size after initialization without migrating the data."
)
else:
# Proceed to check the vector column
if "vector" in document_chunk_table.columns:
vector_column = document_chunk_table.columns["vector"]
vector_type = vector_column.type
if isinstance(vector_type, Vector):
db_vector_length = vector_type.dim
if db_vector_length != VECTOR_LENGTH:
raise Exception(
"The 'vector' column exists but is not of type 'Vector'."
f"VECTOR_LENGTH {VECTOR_LENGTH} does not match existing vector column dimension {db_vector_length}. "
"Cannot change vector size after initialization without migrating the data."
)
else:
raise Exception(
"The 'vector' column does not exist in the 'document_chunk' table."
"The 'vector' column exists but is not of type 'Vector'."
)
else:
# Table does not exist yet; no action needed
pass
raise Exception(
"The 'vector' column does not exist in the 'document_chunk' table."
)
def adjust_vector_length(self, vector: List[float]) -> List[float]:
# Adjust vector to have length VECTOR_LENGTH

View File

@@ -23,7 +23,7 @@ def search_bing(
filter_list: Optional[list[str]] = None,
) -> list[SearchResult]:
mkt = locale
params = {"q": query, "mkt": mkt, "answerCount": count}
params = {"q": query, "mkt": mkt, "count": count}
headers = {"Ocp-Apim-Subscription-Key": subscription_key}
try: