Fix playwright in docker by updating unstructured

This commit is contained in:
Rory 2025-02-01 22:58:28 -06:00
parent 77ae73e659
commit a84e488a4e
3 changed files with 5 additions and 5 deletions

View File

@ -175,7 +175,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader):
yield document
except Exception as e:
if self.continue_on_failure:
log.error(f"Error processing {url}, exception: {e}")
log.exception(e, "Error loading %s", url)
continue
raise e
@ -190,7 +190,7 @@ class SafePlaywrightURLLoader(PlaywrightURLLoader):
yield document
except Exception as e:
if self.continue_on_failure:
log.error(f"Error processing {url}, exception: {e}")
log.exception(e, "Error loading %s", url)
continue
raise e
@ -210,7 +210,7 @@ class SafeWebBaseLoader(WebBaseLoader):
yield Document(page_content=text, metadata=metadata)
except Exception as e:
# Log the error and continue with the next URL
log.error(f"Error loading {path}: {e}")
log.exception(e, "Error loading %s", path)
RAG_WEB_LOADERS = defaultdict(lambda: SafeWebBaseLoader)
RAG_WEB_LOADERS["playwright"] = SafePlaywrightURLLoader

View File

@ -60,7 +60,7 @@ fpdf2==2.8.2
pymdown-extensions==10.11.2
docx2txt==0.8
python-pptx==1.0.0
unstructured==0.15.9
unstructured==0.16.17
nltk==3.9.1
Markdown==3.7
pypandoc==1.13

View File

@ -66,7 +66,7 @@ dependencies = [
"pymdown-extensions==10.11.2",
"docx2txt==0.8",
"python-pptx==1.0.0",
"unstructured==0.15.9",
"unstructured==0.16.17",
"nltk==3.9.1",
"Markdown==3.7",
"pypandoc==1.13",