mirror of
				https://github.com/open-webui/open-webui
				synced 2025-06-26 18:26:48 +00:00 
			
		
		
		
	feat: Allow setting the initial vector length on pgvector document_chunk table
This commit is contained in:
		
							parent
							
								
									996ade9090
								
							
						
					
					
						commit
						70b74b5217
					
				@ -1211,6 +1211,9 @@ if VECTOR_DB == "pgvector" and not PGVECTOR_DB_URL.startswith("postgres"):
 | 
			
		||||
    raise ValueError(
 | 
			
		||||
        "Pgvector requires setting PGVECTOR_DB_URL or using Postgres with vector extension as the primary database."
 | 
			
		||||
    )
 | 
			
		||||
PGVECTOR_INITIALIZE_MAX_VECTOR_LENGTH = int(
 | 
			
		||||
    os.environ.get("PGVECTOR_INITIALIZE_MAX_VECTOR_LENGTH", "1536")
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
####################################
 | 
			
		||||
# Information Retrieval (RAG)
 | 
			
		||||
 | 
			
		||||
@ -5,6 +5,7 @@ from sqlalchemy import (
 | 
			
		||||
    create_engine,
 | 
			
		||||
    Column,
 | 
			
		||||
    Integer,
 | 
			
		||||
    MetaData,
 | 
			
		||||
    select,
 | 
			
		||||
    text,
 | 
			
		||||
    Text,
 | 
			
		||||
@ -19,9 +20,9 @@ from pgvector.sqlalchemy import Vector
 | 
			
		||||
from sqlalchemy.ext.mutable import MutableDict
 | 
			
		||||
 | 
			
		||||
from open_webui.retrieval.vector.main import VectorItem, SearchResult, GetResult
 | 
			
		||||
from open_webui.config import PGVECTOR_DB_URL
 | 
			
		||||
from open_webui.config import PGVECTOR_DB_URL, PGVECTOR_INITIALIZE_MAX_VECTOR_LENGTH
 | 
			
		||||
 | 
			
		||||
VECTOR_LENGTH = 1536
 | 
			
		||||
VECTOR_LENGTH = PGVECTOR_INITIALIZE_MAX_VECTOR_LENGTH
 | 
			
		||||
Base = declarative_base()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -56,6 +57,9 @@ class PgvectorClient:
 | 
			
		||||
            # Ensure the pgvector extension is available
 | 
			
		||||
            self.session.execute(text("CREATE EXTENSION IF NOT EXISTS vector;"))
 | 
			
		||||
 | 
			
		||||
            # Check vector length consistency
 | 
			
		||||
            self.check_vector_length()
 | 
			
		||||
 | 
			
		||||
            # Create the tables if they do not exist
 | 
			
		||||
            # Base.metadata.create_all requires a bind (engine or connection)
 | 
			
		||||
            # Get the connection from the session
 | 
			
		||||
@ -82,6 +86,38 @@ class PgvectorClient:
 | 
			
		||||
            print(f"Error during initialization: {e}")
 | 
			
		||||
            raise
 | 
			
		||||
 | 
			
		||||
    def check_vector_length(self) -> None:
 | 
			
		||||
        """
 | 
			
		||||
        Check if the VECTOR_LENGTH matches the existing vector column dimension in the database.
 | 
			
		||||
        Raises an exception if there is a mismatch.
 | 
			
		||||
        """
 | 
			
		||||
        metadata = MetaData()
 | 
			
		||||
        metadata.reflect(bind=self.session.bind, only=["document_chunk"])
 | 
			
		||||
 | 
			
		||||
        if "document_chunk" in metadata.tables:
 | 
			
		||||
            document_chunk_table = metadata.tables["document_chunk"]
 | 
			
		||||
            if "vector" in document_chunk_table.columns:
 | 
			
		||||
                vector_column = document_chunk_table.columns["vector"]
 | 
			
		||||
                vector_type = vector_column.type
 | 
			
		||||
                if isinstance(vector_type, Vector):
 | 
			
		||||
                    db_vector_length = vector_type.dim
 | 
			
		||||
                    if db_vector_length != VECTOR_LENGTH:
 | 
			
		||||
                        raise Exception(
 | 
			
		||||
                            f"VECTOR_LENGTH {VECTOR_LENGTH} does not match existing vector column dimension {db_vector_length}. "
 | 
			
		||||
                            "Cannot change vector size after initialization without migrating the data."
 | 
			
		||||
                        )
 | 
			
		||||
                else:
 | 
			
		||||
                    raise Exception(
 | 
			
		||||
                        "The 'vector' column exists but is not of type 'Vector'."
 | 
			
		||||
                    )
 | 
			
		||||
            else:
 | 
			
		||||
                raise Exception(
 | 
			
		||||
                    "The 'vector' column does not exist in the 'document_chunk' table."
 | 
			
		||||
                )
 | 
			
		||||
        else:
 | 
			
		||||
            # Table does not exist yet; no action needed
 | 
			
		||||
            pass
 | 
			
		||||
 | 
			
		||||
    def adjust_vector_length(self, vector: List[float]) -> List[float]:
 | 
			
		||||
        # Adjust vector to have length VECTOR_LENGTH
 | 
			
		||||
        current_length = len(vector)
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user