This commit is contained in:
Timothy Jaeryang Baek
2025-12-20 18:12:03 +04:00
parent 4c2e5c93e9
commit fe653a1336
3 changed files with 160 additions and 164 deletions

View File

@@ -331,13 +331,21 @@ class Loader:
elif self.engine == "mineru" and file_ext in [
"pdf"
]: # MinerU currently only supports PDF
mineru_timeout = self.kwargs.get("MINERU_API_TIMEOUT", 300)
if mineru_timeout:
try:
mineru_timeout = int(mineru_timeout)
except ValueError:
mineru_timeout = 300
loader = MinerULoader(
file_path=file_path,
api_mode=self.kwargs.get("MINERU_API_MODE", "local"),
api_url=self.kwargs.get("MINERU_API_URL", "http://localhost:8000"),
api_key=self.kwargs.get("MINERU_API_KEY", ""),
params=self.kwargs.get("MINERU_PARAMS", {}),
timeout=int(self.kwargs.get("MINERU_API_TIMEOUT", 300)),
timeout=mineru_timeout,
)
elif (
self.engine == "mistral_ocr"

View File

@@ -26,7 +26,7 @@ class MinerULoader:
api_url: str = "http://localhost:8000",
api_key: str = "",
params: dict = None,
timeout: int = 300,
timeout: Optional[int] = 300,
):
self.file_path = file_path
self.api_mode = api_mode.lower()
@@ -103,7 +103,7 @@ class MinerULoader:
f"{self.api_url}/file_parse",
data=form_data,
files=files,
timeout=self.timeout, # 5 minute timeout for large documents
timeout=self.timeout,
)
response.raise_for_status()
@@ -302,7 +302,7 @@ class MinerULoader:
response = requests.put(
upload_url,
data=f,
timeout=self.timeout, # 5 minute timeout for large files
timeout=self.timeout,
)
response.raise_for_status()
except FileNotFoundError: