mirror of
https://github.com/open-webui/open-webui
synced 2025-03-20 20:08:52 +00:00
refac
This commit is contained in:
parent
f2ec020b64
commit
92dd173b27
@ -726,7 +726,6 @@ def process_file(
|
||||
)
|
||||
docs = loader.load(file.filename, file.meta.get("content_type"), file_path)
|
||||
text_content = " ".join([doc.page_content for doc in docs])
|
||||
|
||||
log.debug(f"text_content: {text_content}")
|
||||
|
||||
Files.update_files_metadata_by_id(
|
||||
@ -795,10 +794,17 @@ def process_text(
|
||||
metadata={"name": form_data.name, "created_by": user.id},
|
||||
)
|
||||
]
|
||||
text_content = form_data.content
|
||||
log.debug(f"text_content: {text_content}")
|
||||
|
||||
result = save_docs_to_vector_db(docs, collection_name)
|
||||
|
||||
if result:
|
||||
return {"status": True, "collection_name": collection_name}
|
||||
return {
|
||||
"status": True,
|
||||
"collection_name": collection_name,
|
||||
"content": text_content,
|
||||
}
|
||||
else:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
@ -806,68 +812,6 @@ def process_text(
|
||||
)
|
||||
|
||||
|
||||
@app.get("/process/dir")
|
||||
def process_docs_dir(user=Depends(get_admin_user)):
|
||||
for path in Path(DOCS_DIR).rglob("./**/*"):
|
||||
try:
|
||||
if path.is_file() and not path.name.startswith("."):
|
||||
tags = extract_folders_after_data_docs(path)
|
||||
filename = path.name
|
||||
file_content_type = mimetypes.guess_type(path)
|
||||
|
||||
with open(path, "rb") as f:
|
||||
collection_name = calculate_sha256(f)[:63]
|
||||
|
||||
loader = Loader(
|
||||
engine=app.state.config.CONTENT_EXTRACTION_ENGINE,
|
||||
TIKA_SERVER_URL=app.state.config.TIKA_SERVER_URL,
|
||||
PDF_EXTRACT_IMAGES=app.state.config.PDF_EXTRACT_IMAGES,
|
||||
)
|
||||
docs = loader.load(filename, file_content_type[0], str(path))
|
||||
|
||||
try:
|
||||
result = save_docs_to_vector_db(docs, collection_name)
|
||||
|
||||
if result:
|
||||
sanitized_filename = sanitize_filename(filename)
|
||||
doc = Documents.get_doc_by_name(sanitized_filename)
|
||||
|
||||
if doc is None:
|
||||
doc = Documents.insert_new_doc(
|
||||
user.id,
|
||||
DocumentForm(
|
||||
**{
|
||||
"name": sanitized_filename,
|
||||
"title": filename,
|
||||
"collection_name": collection_name,
|
||||
"filename": filename,
|
||||
"content": (
|
||||
json.dumps(
|
||||
{
|
||||
"tags": list(
|
||||
map(
|
||||
lambda name: {"name": name},
|
||||
tags,
|
||||
)
|
||||
)
|
||||
}
|
||||
)
|
||||
if len(tags)
|
||||
else "{}"
|
||||
),
|
||||
}
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
log.exception(e)
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
log.exception(e)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@app.post("/process/youtube")
|
||||
def process_youtube_video(form_data: ProcessUrlForm, user=Depends(get_verified_user)):
|
||||
try:
|
||||
@ -882,12 +826,15 @@ def process_youtube_video(form_data: ProcessUrlForm, user=Depends(get_verified_u
|
||||
translation=app.state.YOUTUBE_LOADER_TRANSLATION,
|
||||
)
|
||||
docs = loader.load()
|
||||
text_content = " ".join([doc.page_content for doc in docs])
|
||||
log.debug(f"text_content: {text_content}")
|
||||
save_docs_to_vector_db(docs, collection_name, overwrite=True)
|
||||
|
||||
return {
|
||||
"status": True,
|
||||
"collection_name": collection_name,
|
||||
"filename": form_data.url,
|
||||
"content": text_content,
|
||||
}
|
||||
except Exception as e:
|
||||
log.exception(e)
|
||||
@ -910,12 +857,15 @@ def process_web(form_data: ProcessUrlForm, user=Depends(get_verified_user)):
|
||||
requests_per_second=app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
||||
)
|
||||
docs = loader.load()
|
||||
text_content = " ".join([doc.page_content for doc in docs])
|
||||
log.debug(f"text_content: {text_content}")
|
||||
save_docs_to_vector_db(docs, collection_name, overwrite=True)
|
||||
|
||||
return {
|
||||
"status": True,
|
||||
"collection_name": collection_name,
|
||||
"filename": form_data.url,
|
||||
"content": text_content,
|
||||
}
|
||||
except Exception as e:
|
||||
log.exception(e)
|
||||
@ -1067,6 +1017,7 @@ def process_web_search(form_data: SearchForm, user=Depends(get_verified_user)):
|
||||
|
||||
loader = get_web_loader(urls)
|
||||
docs = loader.load()
|
||||
|
||||
save_docs_to_vector_db(docs, collection_name, overwrite=True)
|
||||
|
||||
return {
|
||||
@ -1082,6 +1033,68 @@ def process_web_search(form_data: SearchForm, user=Depends(get_verified_user)):
|
||||
)
|
||||
|
||||
|
||||
@app.get("/process/dir")
|
||||
def process_docs_dir(user=Depends(get_admin_user)):
|
||||
for path in Path(DOCS_DIR).rglob("./**/*"):
|
||||
try:
|
||||
if path.is_file() and not path.name.startswith("."):
|
||||
tags = extract_folders_after_data_docs(path)
|
||||
filename = path.name
|
||||
file_content_type = mimetypes.guess_type(path)
|
||||
|
||||
with open(path, "rb") as f:
|
||||
collection_name = calculate_sha256(f)[:63]
|
||||
|
||||
loader = Loader(
|
||||
engine=app.state.config.CONTENT_EXTRACTION_ENGINE,
|
||||
TIKA_SERVER_URL=app.state.config.TIKA_SERVER_URL,
|
||||
PDF_EXTRACT_IMAGES=app.state.config.PDF_EXTRACT_IMAGES,
|
||||
)
|
||||
docs = loader.load(filename, file_content_type[0], str(path))
|
||||
|
||||
try:
|
||||
result = save_docs_to_vector_db(docs, collection_name)
|
||||
|
||||
if result:
|
||||
sanitized_filename = sanitize_filename(filename)
|
||||
doc = Documents.get_doc_by_name(sanitized_filename)
|
||||
|
||||
if doc is None:
|
||||
doc = Documents.insert_new_doc(
|
||||
user.id,
|
||||
DocumentForm(
|
||||
**{
|
||||
"name": sanitized_filename,
|
||||
"title": filename,
|
||||
"collection_name": collection_name,
|
||||
"filename": filename,
|
||||
"content": (
|
||||
json.dumps(
|
||||
{
|
||||
"tags": list(
|
||||
map(
|
||||
lambda name: {"name": name},
|
||||
tags,
|
||||
)
|
||||
)
|
||||
}
|
||||
)
|
||||
if len(tags)
|
||||
else "{}"
|
||||
),
|
||||
}
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
log.exception(e)
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
log.exception(e)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
class QueryDocForm(BaseModel):
|
||||
collection_name: str
|
||||
query: str
|
||||
|
@ -30,7 +30,7 @@
|
||||
const uploadWeb = async (url) => {
|
||||
console.log(url);
|
||||
|
||||
const doc = {
|
||||
const fileItem = {
|
||||
type: 'doc',
|
||||
name: url,
|
||||
collection_name: '',
|
||||
@ -40,12 +40,14 @@
|
||||
};
|
||||
|
||||
try {
|
||||
files = [...files, doc];
|
||||
files = [...files, fileItem];
|
||||
const res = await processWeb(localStorage.token, '', url);
|
||||
|
||||
if (res) {
|
||||
doc.status = 'processed';
|
||||
doc.collection_name = res.collection_name;
|
||||
fileItem.status = 'processed';
|
||||
fileItem.collection_name = res.collection_name;
|
||||
fileItem.content = res.content;
|
||||
|
||||
files = files;
|
||||
}
|
||||
} catch (e) {
|
||||
@ -58,7 +60,7 @@
|
||||
const uploadYoutubeTranscription = async (url) => {
|
||||
console.log(url);
|
||||
|
||||
const doc = {
|
||||
const fileItem = {
|
||||
type: 'doc',
|
||||
name: url,
|
||||
collection_name: '',
|
||||
@ -68,12 +70,13 @@
|
||||
};
|
||||
|
||||
try {
|
||||
files = [...files, doc];
|
||||
files = [...files, fileItem];
|
||||
const res = await processYoutubeVideo(localStorage.token, url);
|
||||
|
||||
if (res) {
|
||||
doc.status = 'processed';
|
||||
doc.collection_name = res.collection_name;
|
||||
fileItem.status = 'processed';
|
||||
fileItem.collection_name = res.collection_name;
|
||||
fileItem.content = res.content;
|
||||
files = files;
|
||||
}
|
||||
} catch (e) {
|
||||
|
@ -39,6 +39,8 @@
|
||||
if (url) {
|
||||
if (type === 'file') {
|
||||
window.open(`${url}/content`, '_blank').focus();
|
||||
} else {
|
||||
window.open(`${url}`, '_blank').focus();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -20,8 +20,14 @@
|
||||
<div class="font-primary px-6 py-5 w-full flex flex-col justify-center dark:text-gray-400">
|
||||
<div class="flex items-start justify-between pb-2">
|
||||
<div>
|
||||
<div class=" font-medium text-lg line-clamp-1 dark:text-gray-100">
|
||||
{file?.name ?? 'File'}
|
||||
<div class=" font-medium text-lg dark:text-gray-100">
|
||||
<a
|
||||
href={file.url ? (file.type === 'file' ? `${file.url}/content` : `${file.url}`) : '#'}
|
||||
target="_blank"
|
||||
class="hover:underline line-clamp-1"
|
||||
>
|
||||
{file?.name ?? 'File'}
|
||||
</a>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
|
Loading…
Reference in New Issue
Block a user