From a84e488a4ea681c580a2b9cca22fe176f8c0014c Mon Sep 17 00:00:00 2001 From: Rory <16675082+roryeckel@users.noreply.github.com> Date: Sat, 1 Feb 2025 22:58:28 -0600 Subject: [PATCH] Fix playwright in docker by updating unstructured --- backend/open_webui/retrieval/web/utils.py | 6 +++--- backend/requirements.txt | 2 +- pyproject.toml | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/backend/open_webui/retrieval/web/utils.py b/backend/open_webui/retrieval/web/utils.py index 3c0c34074..0568c795c 100644 --- a/backend/open_webui/retrieval/web/utils.py +++ b/backend/open_webui/retrieval/web/utils.py @@ -175,7 +175,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader): yield document except Exception as e: if self.continue_on_failure: - log.error(f"Error processing {url}, exception: {e}") + log.exception(e, "Error loading %s", url) continue raise e @@ -190,7 +190,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader): yield document except Exception as e: if self.continue_on_failure: - log.error(f"Error processing {url}, exception: {e}") + log.exception(e, "Error loading %s", url) continue raise e @@ -210,7 +210,7 @@ class SafeWebBaseLoader(WebBaseLoader): yield Document(page_content=text, metadata=metadata) except Exception as e: # Log the error and continue with the next URL - log.error(f"Error loading {path}: {e}") + log.exception(e, "Error loading %s", path) RAG_WEB_LOADERS = defaultdict(lambda: SafeWebBaseLoader) RAG_WEB_LOADERS["playwright"] = SafePlaywrightURLLoader diff --git a/backend/requirements.txt b/backend/requirements.txt index bb124bf11..cf5cb4a2f 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -60,7 +60,7 @@ fpdf2==2.8.2 pymdown-extensions==10.11.2 docx2txt==0.8 python-pptx==1.0.0 -unstructured==0.15.9 +unstructured==0.16.17 nltk==3.9.1 Markdown==3.7 pypandoc==1.13 diff --git a/pyproject.toml b/pyproject.toml index 41c79ddb8..6e7f607b3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,7 +66,7 @@ dependencies = [ "pymdown-extensions==10.11.2", "docx2txt==0.8", "python-pptx==1.0.0", - "unstructured==0.15.9", + "unstructured==0.16.17", "nltk==3.9.1", "Markdown==3.7", "pypandoc==1.13",