refactor: update MistralLoader documentation and adjust parameters for signed URL retrieval

This commit is contained in:
Patrick Wachter 2025-04-01 19:14:26 +02:00
parent 93d7702e8c
commit c5a8d2f857
No known key found for this signature in database

View File

@ -14,7 +14,7 @@ log.setLevel(SRC_LOG_LEVELS["RAG"])
class MistralLoader: class MistralLoader:
""" """
Loads documents by processing them through the Mistral OCR API using requests. Loads documents by processing them through the Mistral OCR API.
""" """
BASE_API_URL = "https://api.mistral.ai/v1" BASE_API_URL = "https://api.mistral.ai/v1"
@ -64,7 +64,7 @@ class MistralLoader:
with open(self.file_path, "rb") as f: with open(self.file_path, "rb") as f:
files = {"file": (file_name, f, "application/pdf")} files = {"file": (file_name, f, "application/pdf")}
data = {"purpose": "ocr"} data = {"purpose": "ocr"}
# No explicit Content-Type header needed here, requests handles it for multipart/form-data
upload_headers = self.headers.copy() # Avoid modifying self.headers upload_headers = self.headers.copy() # Avoid modifying self.headers
response = requests.post( response = requests.post(
@ -85,8 +85,7 @@ class MistralLoader:
"""Retrieves a temporary signed URL for the uploaded file.""" """Retrieves a temporary signed URL for the uploaded file."""
log.info(f"Getting signed URL for file ID: {file_id}") log.info(f"Getting signed URL for file ID: {file_id}")
url = f"{self.BASE_API_URL}/files/{file_id}/url" url = f"{self.BASE_API_URL}/files/{file_id}/url"
# Using expiry=24 as per the curl example; adjust if needed. params = {"expiry": 1}
params = {"expiry": 24}
signed_url_headers = {**self.headers, "Accept": "application/json"} signed_url_headers = {**self.headers, "Accept": "application/json"}
try: try:
@ -116,7 +115,7 @@ class MistralLoader:
"type": "document_url", "type": "document_url",
"document_url": signed_url, "document_url": signed_url,
}, },
# "include_image_base64": False # Explicitly set if needed, default seems false "include_image_base64": False,
} }
try: try: