This commit is contained in:
Timothy J. Baek
2024-10-02 21:14:58 -07:00
parent a2eadb30f5
commit 351b1dbf31
6 changed files with 225 additions and 19 deletions

View File

@@ -1,3 +1,5 @@
# TODO: Merge this with the webui_app and make it a single app
import json
import logging
import mimetypes
@@ -728,11 +730,13 @@ def process_file(
docs = loader.load(file.filename, file.meta.get("content_type"), file_path)
text_content = " ".join([doc.page_content for doc in docs])
log.debug(f"text_content: {text_content}")
hash = calculate_sha256_string(text_content)
Files.update_files_data_by_id(
Files.update_file_data_by_id(
form_data.file_id,
{"content": text_content},
)
Files.update_file_hash_by_id(form_data.file_id, hash)
try:
result = save_docs_to_vector_db(

View File

@@ -20,7 +20,7 @@ class File(Base):
id = Column(String, primary_key=True)
user_id = Column(String)
hash = Column(String)
hash = Column(Text, nullable=True)
filename = Column(Text)
data = Column(JSON)
@@ -35,7 +35,7 @@ class FileModel(BaseModel):
id: str
user_id: str
hash: str
hash: Optional[str] = None
filename: str
data: dict
@@ -53,7 +53,7 @@ class FileModel(BaseModel):
class FileModelResponse(BaseModel):
id: str
user_id: str
hash: str
hash: Optional[str] = None
filename: str
data: dict
@@ -65,6 +65,7 @@ class FileModelResponse(BaseModel):
class FileForm(BaseModel):
id: str
hash: Optional[str] = None
filename: str
meta: dict = {}
@@ -120,7 +121,18 @@ class FilesTable:
for file in db.query(File).filter_by(user_id=user_id).all()
]
def update_files_data_by_id(self, id: str, data: dict) -> Optional[FileModel]:
def update_file_hash_by_id(self, id: str, hash: str) -> Optional[FileModel]:
with get_db() as db:
try:
file = db.query(File).filter_by(id=id).first()
file.hash = hash
db.commit()
return FileModel.model_validate(file)
except Exception:
return None
def update_file_data_by_id(self, id: str, data: dict) -> Optional[FileModel]:
with get_db() as db:
try:
file = db.query(File).filter_by(id=id).first()
@@ -131,7 +143,7 @@ class FilesTable:
except Exception:
return None
def update_files_metadata_by_id(self, id: str, meta: dict) -> Optional[FileModel]:
def update_file_metadata_by_id(self, id: str, meta: dict) -> Optional[FileModel]:
with get_db() as db:
try:
file = db.query(File).filter_by(id=id).first()

View File

@@ -20,7 +20,7 @@ depends_on: Union[str, Sequence[str], None] = None
def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column("file", sa.Column("hash", sa.String(), nullable=True))
op.add_column("file", sa.Column("hash", sa.Text(), nullable=True))
op.add_column("file", sa.Column("data", sa.JSON(), nullable=True))
op.add_column("file", sa.Column("updated_at", sa.BigInteger(), nullable=True))