diff --git a/backend/open_webui/apps/retrieval/main.py b/backend/open_webui/apps/retrieval/main.py index 8a9d410e6..d54cc8f4a 100644 --- a/backend/open_webui/apps/retrieval/main.py +++ b/backend/open_webui/apps/retrieval/main.py @@ -710,7 +710,9 @@ def save_docs_to_vector_db( VECTOR_DB_CLIENT.delete_collection(collection_name=collection_name) log.info(f"deleting existing collection {collection_name}") elif add is False: - log.info(f"collection {collection_name} already exists, overwrite is False and add is False") + log.info( + f"collection {collection_name} already exists, overwrite is False and add is False" + ) return True log.info(f"adding to collection {collection_name}") @@ -823,7 +825,7 @@ def process_file( # Process the file and save the content # Usage: /files/ - file_path = file.meta.get("path", None) + file_path = file.path if file_path: loader = Loader( engine=app.state.config.CONTENT_EXTRACTION_ENGINE, diff --git a/backend/open_webui/apps/webui/models/files.py b/backend/open_webui/apps/webui/models/files.py index 20e0ffe6d..f27fdd259 100644 --- a/backend/open_webui/apps/webui/models/files.py +++ b/backend/open_webui/apps/webui/models/files.py @@ -17,14 +17,15 @@ log.setLevel(SRC_LOG_LEVELS["MODELS"]) class File(Base): __tablename__ = "file" - id = Column(String, primary_key=True) user_id = Column(String) hash = Column(Text, nullable=True) filename = Column(Text) + path = Column(Text, nullable=True) + data = Column(JSON, nullable=True) - meta = Column(JSONField) + meta = Column(JSON, nullable=True) created_at = Column(BigInteger) updated_at = Column(BigInteger) @@ -38,8 +39,10 @@ class FileModel(BaseModel): hash: Optional[str] = None filename: str + path: Optional[str] = None + data: Optional[dict] = None - meta: dict + meta: Optional[dict] = None created_at: int # timestamp in epoch updated_at: int # timestamp in epoch @@ -82,6 +85,7 @@ class FileForm(BaseModel): id: str hash: Optional[str] = None filename: str + path: str data: dict = {} meta: dict = {} diff --git a/backend/open_webui/apps/webui/routers/files.py b/backend/open_webui/apps/webui/routers/files.py index 8185971d1..8167ffc0a 100644 --- a/backend/open_webui/apps/webui/routers/files.py +++ b/backend/open_webui/apps/webui/routers/files.py @@ -57,11 +57,11 @@ def upload_file(file: UploadFile = File(...), user=Depends(get_verified_user)): **{ "id": id, "filename": filename, + "path": file_path, "meta": { "name": name, "content_type": file.content_type, "size": len(contents), - "path": file_path, }, } ), @@ -244,7 +244,7 @@ async def get_file_content_by_id(id: str, user=Depends(get_verified_user)): file = Files.get_file_by_id(id) if file and (file.user_id == user.id or user.role == "admin"): - file_path = file.meta.get("path") + file_path = file.path if file_path: file_path = Path(file_path) diff --git a/backend/open_webui/migrations/versions/c29facfe716b_update_file_table_path.py b/backend/open_webui/migrations/versions/c29facfe716b_update_file_table_path.py new file mode 100644 index 000000000..de82854b8 --- /dev/null +++ b/backend/open_webui/migrations/versions/c29facfe716b_update_file_table_path.py @@ -0,0 +1,79 @@ +"""Update file table path + +Revision ID: c29facfe716b +Revises: c69f45358db4 +Create Date: 2024-10-20 17:02:35.241684 + +""" + +from alembic import op +import sqlalchemy as sa +import json +from sqlalchemy.sql import table, column +from sqlalchemy import String, Text, JSON, and_ + + +revision = "c29facfe716b" +down_revision = "c69f45358db4" +branch_labels = None +depends_on = None + + +def upgrade(): + # 1. Add the `path` column to the "file" table. + op.add_column("file", sa.Column("path", sa.Text(), nullable=True)) + + # 2. Convert the `meta` column from Text/JSONField to `JSON()` + # Use Alembic's default batch_op for dialect compatibility. + with op.batch_alter_table("file", schema=None) as batch_op: + batch_op.alter_column( + "meta", + type_=sa.JSON(), + existing_type=sa.Text(), + existing_nullable=True, + nullable=True, + postgresql_using="meta::json", + ) + + # 3. Migrate legacy data from `meta` JSONField + # Fetch and process `meta` data from the table, add values to the new `path` column as necessary. + # We will use SQLAlchemy core bindings to ensure safety across different databases. + + file_table = table( + "file", column("id", String), column("meta", JSON), column("path", Text) + ) + + # Create connection to the database + connection = op.get_bind() + + # Get the rows where `meta` has a path and `path` column is null (new column) + # Loop through each row in the result set to update the path + results = connection.execute( + sa.select(file_table.c.id, file_table.c.meta).where( + and_(file_table.c.path.is_(None), file_table.c.meta.isnot(None)) + ) + ).fetchall() + + # Iterate over each row to extract and update the `path` from `meta` column + for row in results: + if "path" in row.meta: + # Extract the `path` field from the `meta` JSON + path = row.meta.get("path") + + # Update the `file` table with the new `path` value + connection.execute( + file_table.update() + .where(file_table.c.id == row.id) + .values({"path": path}) + ) + + +def downgrade(): + # 1. Remove the `path` column + op.drop_column("file", "path") + + # 2. Revert the `meta` column back to Text/JSONField + with op.batch_alter_table("file", schema=None) as batch_op: + batch_op.alter_column( + "meta", type_=sa.Text(), existing_type=sa.JSON(), existing_nullable=True + )