mirror of
https://github.com/open-webui/open-webui
synced 2025-01-19 09:16:44 +00:00
Merge pull request #8298 from jk-f5/feat/pg_vector_size
feat: Allow setting the initial vector length on pgvector document_chunk table
This commit is contained in:
commit
0e805e7dc4
@ -1211,6 +1211,9 @@ if VECTOR_DB == "pgvector" and not PGVECTOR_DB_URL.startswith("postgres"):
|
||||
raise ValueError(
|
||||
"Pgvector requires setting PGVECTOR_DB_URL or using Postgres with vector extension as the primary database."
|
||||
)
|
||||
PGVECTOR_INITIALIZE_MAX_VECTOR_LENGTH = int(
|
||||
os.environ.get("PGVECTOR_INITIALIZE_MAX_VECTOR_LENGTH", "1536")
|
||||
)
|
||||
|
||||
####################################
|
||||
# Information Retrieval (RAG)
|
||||
|
@ -5,6 +5,7 @@ from sqlalchemy import (
|
||||
create_engine,
|
||||
Column,
|
||||
Integer,
|
||||
MetaData,
|
||||
select,
|
||||
text,
|
||||
Text,
|
||||
@ -19,9 +20,9 @@ from pgvector.sqlalchemy import Vector
|
||||
from sqlalchemy.ext.mutable import MutableDict
|
||||
|
||||
from open_webui.retrieval.vector.main import VectorItem, SearchResult, GetResult
|
||||
from open_webui.config import PGVECTOR_DB_URL
|
||||
from open_webui.config import PGVECTOR_DB_URL, PGVECTOR_INITIALIZE_MAX_VECTOR_LENGTH
|
||||
|
||||
VECTOR_LENGTH = 1536
|
||||
VECTOR_LENGTH = PGVECTOR_INITIALIZE_MAX_VECTOR_LENGTH
|
||||
Base = declarative_base()
|
||||
|
||||
|
||||
@ -56,6 +57,9 @@ class PgvectorClient:
|
||||
# Ensure the pgvector extension is available
|
||||
self.session.execute(text("CREATE EXTENSION IF NOT EXISTS vector;"))
|
||||
|
||||
# Check vector length consistency
|
||||
self.check_vector_length()
|
||||
|
||||
# Create the tables if they do not exist
|
||||
# Base.metadata.create_all requires a bind (engine or connection)
|
||||
# Get the connection from the session
|
||||
@ -82,6 +86,38 @@ class PgvectorClient:
|
||||
print(f"Error during initialization: {e}")
|
||||
raise
|
||||
|
||||
def check_vector_length(self) -> None:
|
||||
"""
|
||||
Check if the VECTOR_LENGTH matches the existing vector column dimension in the database.
|
||||
Raises an exception if there is a mismatch.
|
||||
"""
|
||||
metadata = MetaData()
|
||||
metadata.reflect(bind=self.session.bind, only=["document_chunk"])
|
||||
|
||||
if "document_chunk" in metadata.tables:
|
||||
document_chunk_table = metadata.tables["document_chunk"]
|
||||
if "vector" in document_chunk_table.columns:
|
||||
vector_column = document_chunk_table.columns["vector"]
|
||||
vector_type = vector_column.type
|
||||
if isinstance(vector_type, Vector):
|
||||
db_vector_length = vector_type.dim
|
||||
if db_vector_length != VECTOR_LENGTH:
|
||||
raise Exception(
|
||||
f"VECTOR_LENGTH {VECTOR_LENGTH} does not match existing vector column dimension {db_vector_length}. "
|
||||
"Cannot change vector size after initialization without migrating the data."
|
||||
)
|
||||
else:
|
||||
raise Exception(
|
||||
"The 'vector' column exists but is not of type 'Vector'."
|
||||
)
|
||||
else:
|
||||
raise Exception(
|
||||
"The 'vector' column does not exist in the 'document_chunk' table."
|
||||
)
|
||||
else:
|
||||
# Table does not exist yet; no action needed
|
||||
pass
|
||||
|
||||
def adjust_vector_length(self, vector: List[float]) -> List[float]:
|
||||
# Adjust vector to have length VECTOR_LENGTH
|
||||
current_length = len(vector)
|
||||
|
Loading…
Reference in New Issue
Block a user