mirror of
https://github.com/open-webui/open-webui
synced 2025-04-16 05:22:46 +00:00
refac
This commit is contained in:
parent
f2ec020b64
commit
92dd173b27
@ -726,7 +726,6 @@ def process_file(
|
|||||||
)
|
)
|
||||||
docs = loader.load(file.filename, file.meta.get("content_type"), file_path)
|
docs = loader.load(file.filename, file.meta.get("content_type"), file_path)
|
||||||
text_content = " ".join([doc.page_content for doc in docs])
|
text_content = " ".join([doc.page_content for doc in docs])
|
||||||
|
|
||||||
log.debug(f"text_content: {text_content}")
|
log.debug(f"text_content: {text_content}")
|
||||||
|
|
||||||
Files.update_files_metadata_by_id(
|
Files.update_files_metadata_by_id(
|
||||||
@ -795,10 +794,17 @@ def process_text(
|
|||||||
metadata={"name": form_data.name, "created_by": user.id},
|
metadata={"name": form_data.name, "created_by": user.id},
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
|
text_content = form_data.content
|
||||||
|
log.debug(f"text_content: {text_content}")
|
||||||
|
|
||||||
result = save_docs_to_vector_db(docs, collection_name)
|
result = save_docs_to_vector_db(docs, collection_name)
|
||||||
|
|
||||||
if result:
|
if result:
|
||||||
return {"status": True, "collection_name": collection_name}
|
return {
|
||||||
|
"status": True,
|
||||||
|
"collection_name": collection_name,
|
||||||
|
"content": text_content,
|
||||||
|
}
|
||||||
else:
|
else:
|
||||||
raise HTTPException(
|
raise HTTPException(
|
||||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||||
@ -806,68 +812,6 @@ def process_text(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.get("/process/dir")
|
|
||||||
def process_docs_dir(user=Depends(get_admin_user)):
|
|
||||||
for path in Path(DOCS_DIR).rglob("./**/*"):
|
|
||||||
try:
|
|
||||||
if path.is_file() and not path.name.startswith("."):
|
|
||||||
tags = extract_folders_after_data_docs(path)
|
|
||||||
filename = path.name
|
|
||||||
file_content_type = mimetypes.guess_type(path)
|
|
||||||
|
|
||||||
with open(path, "rb") as f:
|
|
||||||
collection_name = calculate_sha256(f)[:63]
|
|
||||||
|
|
||||||
loader = Loader(
|
|
||||||
engine=app.state.config.CONTENT_EXTRACTION_ENGINE,
|
|
||||||
TIKA_SERVER_URL=app.state.config.TIKA_SERVER_URL,
|
|
||||||
PDF_EXTRACT_IMAGES=app.state.config.PDF_EXTRACT_IMAGES,
|
|
||||||
)
|
|
||||||
docs = loader.load(filename, file_content_type[0], str(path))
|
|
||||||
|
|
||||||
try:
|
|
||||||
result = save_docs_to_vector_db(docs, collection_name)
|
|
||||||
|
|
||||||
if result:
|
|
||||||
sanitized_filename = sanitize_filename(filename)
|
|
||||||
doc = Documents.get_doc_by_name(sanitized_filename)
|
|
||||||
|
|
||||||
if doc is None:
|
|
||||||
doc = Documents.insert_new_doc(
|
|
||||||
user.id,
|
|
||||||
DocumentForm(
|
|
||||||
**{
|
|
||||||
"name": sanitized_filename,
|
|
||||||
"title": filename,
|
|
||||||
"collection_name": collection_name,
|
|
||||||
"filename": filename,
|
|
||||||
"content": (
|
|
||||||
json.dumps(
|
|
||||||
{
|
|
||||||
"tags": list(
|
|
||||||
map(
|
|
||||||
lambda name: {"name": name},
|
|
||||||
tags,
|
|
||||||
)
|
|
||||||
)
|
|
||||||
}
|
|
||||||
)
|
|
||||||
if len(tags)
|
|
||||||
else "{}"
|
|
||||||
),
|
|
||||||
}
|
|
||||||
),
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
log.exception(e)
|
|
||||||
pass
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
log.exception(e)
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
@app.post("/process/youtube")
|
@app.post("/process/youtube")
|
||||||
def process_youtube_video(form_data: ProcessUrlForm, user=Depends(get_verified_user)):
|
def process_youtube_video(form_data: ProcessUrlForm, user=Depends(get_verified_user)):
|
||||||
try:
|
try:
|
||||||
@ -882,12 +826,15 @@ def process_youtube_video(form_data: ProcessUrlForm, user=Depends(get_verified_u
|
|||||||
translation=app.state.YOUTUBE_LOADER_TRANSLATION,
|
translation=app.state.YOUTUBE_LOADER_TRANSLATION,
|
||||||
)
|
)
|
||||||
docs = loader.load()
|
docs = loader.load()
|
||||||
|
text_content = " ".join([doc.page_content for doc in docs])
|
||||||
|
log.debug(f"text_content: {text_content}")
|
||||||
save_docs_to_vector_db(docs, collection_name, overwrite=True)
|
save_docs_to_vector_db(docs, collection_name, overwrite=True)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"status": True,
|
"status": True,
|
||||||
"collection_name": collection_name,
|
"collection_name": collection_name,
|
||||||
"filename": form_data.url,
|
"filename": form_data.url,
|
||||||
|
"content": text_content,
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.exception(e)
|
log.exception(e)
|
||||||
@ -910,12 +857,15 @@ def process_web(form_data: ProcessUrlForm, user=Depends(get_verified_user)):
|
|||||||
requests_per_second=app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
requests_per_second=app.state.config.RAG_WEB_SEARCH_CONCURRENT_REQUESTS,
|
||||||
)
|
)
|
||||||
docs = loader.load()
|
docs = loader.load()
|
||||||
|
text_content = " ".join([doc.page_content for doc in docs])
|
||||||
|
log.debug(f"text_content: {text_content}")
|
||||||
save_docs_to_vector_db(docs, collection_name, overwrite=True)
|
save_docs_to_vector_db(docs, collection_name, overwrite=True)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"status": True,
|
"status": True,
|
||||||
"collection_name": collection_name,
|
"collection_name": collection_name,
|
||||||
"filename": form_data.url,
|
"filename": form_data.url,
|
||||||
|
"content": text_content,
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.exception(e)
|
log.exception(e)
|
||||||
@ -1067,6 +1017,7 @@ def process_web_search(form_data: SearchForm, user=Depends(get_verified_user)):
|
|||||||
|
|
||||||
loader = get_web_loader(urls)
|
loader = get_web_loader(urls)
|
||||||
docs = loader.load()
|
docs = loader.load()
|
||||||
|
|
||||||
save_docs_to_vector_db(docs, collection_name, overwrite=True)
|
save_docs_to_vector_db(docs, collection_name, overwrite=True)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@ -1082,6 +1033,68 @@ def process_web_search(form_data: SearchForm, user=Depends(get_verified_user)):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/process/dir")
|
||||||
|
def process_docs_dir(user=Depends(get_admin_user)):
|
||||||
|
for path in Path(DOCS_DIR).rglob("./**/*"):
|
||||||
|
try:
|
||||||
|
if path.is_file() and not path.name.startswith("."):
|
||||||
|
tags = extract_folders_after_data_docs(path)
|
||||||
|
filename = path.name
|
||||||
|
file_content_type = mimetypes.guess_type(path)
|
||||||
|
|
||||||
|
with open(path, "rb") as f:
|
||||||
|
collection_name = calculate_sha256(f)[:63]
|
||||||
|
|
||||||
|
loader = Loader(
|
||||||
|
engine=app.state.config.CONTENT_EXTRACTION_ENGINE,
|
||||||
|
TIKA_SERVER_URL=app.state.config.TIKA_SERVER_URL,
|
||||||
|
PDF_EXTRACT_IMAGES=app.state.config.PDF_EXTRACT_IMAGES,
|
||||||
|
)
|
||||||
|
docs = loader.load(filename, file_content_type[0], str(path))
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = save_docs_to_vector_db(docs, collection_name)
|
||||||
|
|
||||||
|
if result:
|
||||||
|
sanitized_filename = sanitize_filename(filename)
|
||||||
|
doc = Documents.get_doc_by_name(sanitized_filename)
|
||||||
|
|
||||||
|
if doc is None:
|
||||||
|
doc = Documents.insert_new_doc(
|
||||||
|
user.id,
|
||||||
|
DocumentForm(
|
||||||
|
**{
|
||||||
|
"name": sanitized_filename,
|
||||||
|
"title": filename,
|
||||||
|
"collection_name": collection_name,
|
||||||
|
"filename": filename,
|
||||||
|
"content": (
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"tags": list(
|
||||||
|
map(
|
||||||
|
lambda name: {"name": name},
|
||||||
|
tags,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
)
|
||||||
|
if len(tags)
|
||||||
|
else "{}"
|
||||||
|
),
|
||||||
|
}
|
||||||
|
),
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
log.exception(e)
|
||||||
|
pass
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
log.exception(e)
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
class QueryDocForm(BaseModel):
|
class QueryDocForm(BaseModel):
|
||||||
collection_name: str
|
collection_name: str
|
||||||
query: str
|
query: str
|
||||||
|
@ -30,7 +30,7 @@
|
|||||||
const uploadWeb = async (url) => {
|
const uploadWeb = async (url) => {
|
||||||
console.log(url);
|
console.log(url);
|
||||||
|
|
||||||
const doc = {
|
const fileItem = {
|
||||||
type: 'doc',
|
type: 'doc',
|
||||||
name: url,
|
name: url,
|
||||||
collection_name: '',
|
collection_name: '',
|
||||||
@ -40,12 +40,14 @@
|
|||||||
};
|
};
|
||||||
|
|
||||||
try {
|
try {
|
||||||
files = [...files, doc];
|
files = [...files, fileItem];
|
||||||
const res = await processWeb(localStorage.token, '', url);
|
const res = await processWeb(localStorage.token, '', url);
|
||||||
|
|
||||||
if (res) {
|
if (res) {
|
||||||
doc.status = 'processed';
|
fileItem.status = 'processed';
|
||||||
doc.collection_name = res.collection_name;
|
fileItem.collection_name = res.collection_name;
|
||||||
|
fileItem.content = res.content;
|
||||||
|
|
||||||
files = files;
|
files = files;
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
@ -58,7 +60,7 @@
|
|||||||
const uploadYoutubeTranscription = async (url) => {
|
const uploadYoutubeTranscription = async (url) => {
|
||||||
console.log(url);
|
console.log(url);
|
||||||
|
|
||||||
const doc = {
|
const fileItem = {
|
||||||
type: 'doc',
|
type: 'doc',
|
||||||
name: url,
|
name: url,
|
||||||
collection_name: '',
|
collection_name: '',
|
||||||
@ -68,12 +70,13 @@
|
|||||||
};
|
};
|
||||||
|
|
||||||
try {
|
try {
|
||||||
files = [...files, doc];
|
files = [...files, fileItem];
|
||||||
const res = await processYoutubeVideo(localStorage.token, url);
|
const res = await processYoutubeVideo(localStorage.token, url);
|
||||||
|
|
||||||
if (res) {
|
if (res) {
|
||||||
doc.status = 'processed';
|
fileItem.status = 'processed';
|
||||||
doc.collection_name = res.collection_name;
|
fileItem.collection_name = res.collection_name;
|
||||||
|
fileItem.content = res.content;
|
||||||
files = files;
|
files = files;
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
|
@ -39,6 +39,8 @@
|
|||||||
if (url) {
|
if (url) {
|
||||||
if (type === 'file') {
|
if (type === 'file') {
|
||||||
window.open(`${url}/content`, '_blank').focus();
|
window.open(`${url}/content`, '_blank').focus();
|
||||||
|
} else {
|
||||||
|
window.open(`${url}`, '_blank').focus();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -20,8 +20,14 @@
|
|||||||
<div class="font-primary px-6 py-5 w-full flex flex-col justify-center dark:text-gray-400">
|
<div class="font-primary px-6 py-5 w-full flex flex-col justify-center dark:text-gray-400">
|
||||||
<div class="flex items-start justify-between pb-2">
|
<div class="flex items-start justify-between pb-2">
|
||||||
<div>
|
<div>
|
||||||
<div class=" font-medium text-lg line-clamp-1 dark:text-gray-100">
|
<div class=" font-medium text-lg dark:text-gray-100">
|
||||||
|
<a
|
||||||
|
href={file.url ? (file.type === 'file' ? `${file.url}/content` : `${file.url}`) : '#'}
|
||||||
|
target="_blank"
|
||||||
|
class="hover:underline line-clamp-1"
|
||||||
|
>
|
||||||
{file?.name ?? 'File'}
|
{file?.name ?? 'File'}
|
||||||
|
</a>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div>
|
<div>
|
||||||
|
Loading…
Reference in New Issue
Block a user