From e12a79c0e2d6651ce8d2748f1973634a5b138f53 Mon Sep 17 00:00:00 2001 From: Hisma Date: Tue, 27 May 2025 01:12:03 -0400 Subject: [PATCH] fix: handle json output format correctly --- .../loaders/datalab_marker_loader.py | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/backend/open_webui/retrieval/loaders/datalab_marker_loader.py b/backend/open_webui/retrieval/loaders/datalab_marker_loader.py index 8bb1b4356..784c803b8 100644 --- a/backend/open_webui/retrieval/loaders/datalab_marker_loader.py +++ b/backend/open_webui/retrieval/loaders/datalab_marker_loader.py @@ -92,13 +92,7 @@ class DatalabMarkerLoader: "output_format": self.output_format, } - request_params = { - "filename": filename, - "mime_type": mime_type, - **form_data, - } - - log.info(f"Datalab Marker POST request parameters: {request_params}") + log.info(f"Datalab Marker POST request parameters: {{'filename': '{filename}', 'mime_type': '{mime_type}', **{form_data}}}") try: with open(self.file_path, "rb") as f: @@ -138,7 +132,6 @@ class DatalabMarkerLoader: success_val = poll_result.get("success") if status_val == "complete": - # Log key details summary = { k: poll_result.get(k) for k in ("status", "output_format", "success", "error", "page_count", "total_cost") @@ -158,11 +151,18 @@ class DatalabMarkerLoader: raise HTTPException(status.HTTP_400_BAD_REQUEST, detail=f"Final processing failed: {error_msg}") content_key = self.output_format.lower() - full_text = poll_result.get(content_key, "").strip() + raw_content = poll_result.get(content_key) + + if content_key == "json": + full_text = json.dumps(raw_content, indent=2) + elif content_key in {"markdown", "html"}: + full_text = str(raw_content).strip() + else: + raise HTTPException(status.HTTP_400_BAD_REQUEST, detail=f"Unsupported output format: {self.output_format}") + if not full_text: raise HTTPException(status.HTTP_400_BAD_REQUEST, detail="Datalab Marker returned empty content") - # Write output to uploads/marker_output marker_output_dir = os.path.join("/app/backend/data/uploads", "marker_output") os.makedirs(marker_output_dir, exist_ok=True) @@ -197,4 +197,4 @@ class DatalabMarkerLoader: elif v is None: metadata[k] = "" - return [Document(page_content=full_text, metadata=metadata)] \ No newline at end of file + return [Document(page_content=full_text, metadata=metadata)]