From 115e46a6a207b1fefc40a6b438e3bb3fe808f730 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Iv=C3=A1n=20Baldo?= Date: Tue, 25 Mar 2025 14:53:14 -0300 Subject: [PATCH] Fix: Tika 3.1.0.0 sends a lot of blank lines which degrades the RAG results, strip them. --- backend/open_webui/retrieval/loaders/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/open_webui/retrieval/loaders/main.py b/backend/open_webui/retrieval/loaders/main.py index 5bcd2d321..85f871925 100644 --- a/backend/open_webui/retrieval/loaders/main.py +++ b/backend/open_webui/retrieval/loaders/main.py @@ -105,7 +105,7 @@ class TikaLoader: if r.ok: raw_metadata = r.json() - text = raw_metadata.get("X-TIKA:content", "") + text = raw_metadata.get("X-TIKA:content", "").strip() if "Content-Type" in raw_metadata: headers["Content-Type"] = raw_metadata["Content-Type"]