mirror of
https://github.com/open-webui/open-webui
synced 2025-05-23 14:24:22 +00:00
refac: file table migration
This commit is contained in:
parent
c5787a2b55
commit
4d46bfe03b
@ -710,7 +710,9 @@ def save_docs_to_vector_db(
|
|||||||
VECTOR_DB_CLIENT.delete_collection(collection_name=collection_name)
|
VECTOR_DB_CLIENT.delete_collection(collection_name=collection_name)
|
||||||
log.info(f"deleting existing collection {collection_name}")
|
log.info(f"deleting existing collection {collection_name}")
|
||||||
elif add is False:
|
elif add is False:
|
||||||
log.info(f"collection {collection_name} already exists, overwrite is False and add is False")
|
log.info(
|
||||||
|
f"collection {collection_name} already exists, overwrite is False and add is False"
|
||||||
|
)
|
||||||
return True
|
return True
|
||||||
|
|
||||||
log.info(f"adding to collection {collection_name}")
|
log.info(f"adding to collection {collection_name}")
|
||||||
@ -823,7 +825,7 @@ def process_file(
|
|||||||
# Process the file and save the content
|
# Process the file and save the content
|
||||||
# Usage: /files/
|
# Usage: /files/
|
||||||
|
|
||||||
file_path = file.meta.get("path", None)
|
file_path = file.path
|
||||||
if file_path:
|
if file_path:
|
||||||
loader = Loader(
|
loader = Loader(
|
||||||
engine=app.state.config.CONTENT_EXTRACTION_ENGINE,
|
engine=app.state.config.CONTENT_EXTRACTION_ENGINE,
|
||||||
|
@ -17,14 +17,15 @@ log.setLevel(SRC_LOG_LEVELS["MODELS"])
|
|||||||
|
|
||||||
class File(Base):
|
class File(Base):
|
||||||
__tablename__ = "file"
|
__tablename__ = "file"
|
||||||
|
|
||||||
id = Column(String, primary_key=True)
|
id = Column(String, primary_key=True)
|
||||||
user_id = Column(String)
|
user_id = Column(String)
|
||||||
hash = Column(Text, nullable=True)
|
hash = Column(Text, nullable=True)
|
||||||
|
|
||||||
filename = Column(Text)
|
filename = Column(Text)
|
||||||
|
path = Column(Text, nullable=True)
|
||||||
|
|
||||||
data = Column(JSON, nullable=True)
|
data = Column(JSON, nullable=True)
|
||||||
meta = Column(JSONField)
|
meta = Column(JSON, nullable=True)
|
||||||
|
|
||||||
created_at = Column(BigInteger)
|
created_at = Column(BigInteger)
|
||||||
updated_at = Column(BigInteger)
|
updated_at = Column(BigInteger)
|
||||||
@ -38,8 +39,10 @@ class FileModel(BaseModel):
|
|||||||
hash: Optional[str] = None
|
hash: Optional[str] = None
|
||||||
|
|
||||||
filename: str
|
filename: str
|
||||||
|
path: Optional[str] = None
|
||||||
|
|
||||||
data: Optional[dict] = None
|
data: Optional[dict] = None
|
||||||
meta: dict
|
meta: Optional[dict] = None
|
||||||
|
|
||||||
created_at: int # timestamp in epoch
|
created_at: int # timestamp in epoch
|
||||||
updated_at: int # timestamp in epoch
|
updated_at: int # timestamp in epoch
|
||||||
@ -82,6 +85,7 @@ class FileForm(BaseModel):
|
|||||||
id: str
|
id: str
|
||||||
hash: Optional[str] = None
|
hash: Optional[str] = None
|
||||||
filename: str
|
filename: str
|
||||||
|
path: str
|
||||||
data: dict = {}
|
data: dict = {}
|
||||||
meta: dict = {}
|
meta: dict = {}
|
||||||
|
|
||||||
|
@ -57,11 +57,11 @@ def upload_file(file: UploadFile = File(...), user=Depends(get_verified_user)):
|
|||||||
**{
|
**{
|
||||||
"id": id,
|
"id": id,
|
||||||
"filename": filename,
|
"filename": filename,
|
||||||
|
"path": file_path,
|
||||||
"meta": {
|
"meta": {
|
||||||
"name": name,
|
"name": name,
|
||||||
"content_type": file.content_type,
|
"content_type": file.content_type,
|
||||||
"size": len(contents),
|
"size": len(contents),
|
||||||
"path": file_path,
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
),
|
),
|
||||||
@ -244,7 +244,7 @@ async def get_file_content_by_id(id: str, user=Depends(get_verified_user)):
|
|||||||
file = Files.get_file_by_id(id)
|
file = Files.get_file_by_id(id)
|
||||||
|
|
||||||
if file and (file.user_id == user.id or user.role == "admin"):
|
if file and (file.user_id == user.id or user.role == "admin"):
|
||||||
file_path = file.meta.get("path")
|
file_path = file.path
|
||||||
if file_path:
|
if file_path:
|
||||||
file_path = Path(file_path)
|
file_path = Path(file_path)
|
||||||
|
|
||||||
|
@ -0,0 +1,79 @@
|
|||||||
|
"""Update file table path
|
||||||
|
|
||||||
|
Revision ID: c29facfe716b
|
||||||
|
Revises: c69f45358db4
|
||||||
|
Create Date: 2024-10-20 17:02:35.241684
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from alembic import op
|
||||||
|
import sqlalchemy as sa
|
||||||
|
import json
|
||||||
|
from sqlalchemy.sql import table, column
|
||||||
|
from sqlalchemy import String, Text, JSON, and_
|
||||||
|
|
||||||
|
|
||||||
|
revision = "c29facfe716b"
|
||||||
|
down_revision = "c69f45358db4"
|
||||||
|
branch_labels = None
|
||||||
|
depends_on = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade():
|
||||||
|
# 1. Add the `path` column to the "file" table.
|
||||||
|
op.add_column("file", sa.Column("path", sa.Text(), nullable=True))
|
||||||
|
|
||||||
|
# 2. Convert the `meta` column from Text/JSONField to `JSON()`
|
||||||
|
# Use Alembic's default batch_op for dialect compatibility.
|
||||||
|
with op.batch_alter_table("file", schema=None) as batch_op:
|
||||||
|
batch_op.alter_column(
|
||||||
|
"meta",
|
||||||
|
type_=sa.JSON(),
|
||||||
|
existing_type=sa.Text(),
|
||||||
|
existing_nullable=True,
|
||||||
|
nullable=True,
|
||||||
|
postgresql_using="meta::json",
|
||||||
|
)
|
||||||
|
|
||||||
|
# 3. Migrate legacy data from `meta` JSONField
|
||||||
|
# Fetch and process `meta` data from the table, add values to the new `path` column as necessary.
|
||||||
|
# We will use SQLAlchemy core bindings to ensure safety across different databases.
|
||||||
|
|
||||||
|
file_table = table(
|
||||||
|
"file", column("id", String), column("meta", JSON), column("path", Text)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Create connection to the database
|
||||||
|
connection = op.get_bind()
|
||||||
|
|
||||||
|
# Get the rows where `meta` has a path and `path` column is null (new column)
|
||||||
|
# Loop through each row in the result set to update the path
|
||||||
|
results = connection.execute(
|
||||||
|
sa.select(file_table.c.id, file_table.c.meta).where(
|
||||||
|
and_(file_table.c.path.is_(None), file_table.c.meta.isnot(None))
|
||||||
|
)
|
||||||
|
).fetchall()
|
||||||
|
|
||||||
|
# Iterate over each row to extract and update the `path` from `meta` column
|
||||||
|
for row in results:
|
||||||
|
if "path" in row.meta:
|
||||||
|
# Extract the `path` field from the `meta` JSON
|
||||||
|
path = row.meta.get("path")
|
||||||
|
|
||||||
|
# Update the `file` table with the new `path` value
|
||||||
|
connection.execute(
|
||||||
|
file_table.update()
|
||||||
|
.where(file_table.c.id == row.id)
|
||||||
|
.values({"path": path})
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade():
|
||||||
|
# 1. Remove the `path` column
|
||||||
|
op.drop_column("file", "path")
|
||||||
|
|
||||||
|
# 2. Revert the `meta` column back to Text/JSONField
|
||||||
|
with op.batch_alter_table("file", schema=None) as batch_op:
|
||||||
|
batch_op.alter_column(
|
||||||
|
"meta", type_=sa.Text(), existing_type=sa.JSON(), existing_nullable=True
|
||||||
|
)
|
Loading…
Reference in New Issue
Block a user