From d064c42adbc7e48ca2b16b600e0d0b00577acdd0 Mon Sep 17 00:00:00 2001 From: vox hunter Date: Sun, 6 Apr 2025 12:26:08 +0700 Subject: [PATCH 1/3] added docling --- docs/tutorials/integrations/docling.md | 66 ++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 docs/tutorials/integrations/docling.md diff --git a/docs/tutorials/integrations/docling.md b/docs/tutorials/integrations/docling.md new file mode 100644 index 0000000..007e938 --- /dev/null +++ b/docs/tutorials/integrations/docling.md @@ -0,0 +1,66 @@ +--- +sidebar_position: 4000 +title: "🐤 Docling Document Extraction" +--- + +:::warning +This tutorial is a community contribution and is not supported by the Open WebUI team. It serves only as a demonstration on how to customize Open WebUI for your specific use case. Want to contribute? Check out the contributing tutorial. +::: + +## 🐤 Docling Document Extraction + +This documentation provides a step-by-step guide to integrating Apache Tika with Open WebUI. Docling is a document processing library designed to transform a wide range of file formats—including PDFs, Word documents, spreadsheets, HTML, and images—into structured data such as JSON or Markdown. With built-in support for layout detection, table parsing, and language-aware processing, Docling streamlines document preparation for AI applications like search, summarization, and retrieval-augmented generation, all through a unified and extensible interface. + +Prerequisites +------------ + +* Open WebUI instance +* Docker installed on your system +* Docker network set up for Open WebUI + +Integration Steps +---------------- + +### Step 1: Run the Docker Command for Docling-Serve + +```bash +docker run -p 5001:5001 -e DOCLING_SERVE_ENABLE_UI=true quay.io/docling-project/docling-serve +``` + +### Step 2: Configure OpenWebUI to use Docling + +* Log in to your Open WebUI instance. +* Navigate to the `Admin Panel` settings menu. +* Click on `Settings`. +* Click on the `Documents` tab. +* Change the `Default` content extraction engine dropdown to `Docling`. +* Update the context extraction engine URL to `http://host.docker.internal:5001`. +* Save the changes. + +Verifying Docling in Docker +===================================== + +To verify that Docling is working correctly in a Docker environment, you can follow these steps: + +### 1. Start the Docling Docker Container + +First, ensure that the Docling Docker container is running. You can start it using the following command: + +```bash +docker run -p 5001:5001 -e DOCLING_SERVE_ENABLE_UI=true quay.io/docling-project/docling-serve +``` + +This command starts the Docling container and maps port 5001 from the container to port 5001 on your local machine. + +### 2. Verify the Server is Running + +* Go to `http://127.0.0.1:5001/ui/` +* The URL should lead to a UI to use Docling + +### 3. Verify the Integration + +* You can try uploading some files via the UI and it should return output in MD format or your desired format + +### Conclusion + +Integrating Docling with Open WebUI is a simple and effective way to enhance document processing and content extraction capabilities. By following the steps in this guide, you can set up Docling as the default extraction engine and verify it’s working smoothly in a Docker environment. Once configured, Docling enables powerful, format-agnostic document parsing to support more advanced AI features in Open WebUI. \ No newline at end of file From b147e40254d65764d5b99b85b4c78aaefc6b13a6 Mon Sep 17 00:00:00 2001 From: vox hunter Date: Sun, 6 Apr 2025 12:30:44 +0700 Subject: [PATCH 2/3] update docusaurus.config.ts --- docusaurus.config.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docusaurus.config.ts b/docusaurus.config.ts index 8efa918..8071952 100644 --- a/docusaurus.config.ts +++ b/docusaurus.config.ts @@ -12,7 +12,7 @@ const config: Config = { url: "https://openwebui.com", // Set the // pathname under which your site is served // For GitHub pages deployment, it is often '//' - baseUrl: "/", + baseUrl: "/docs/", // GitHub pages deployment config. // If you aren't using GitHub pages, you don't need these. From 22ba6ce248ac1d737d9f8d6488988014cf482065 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=8E=9D=E2=8E=9D=EF=BC=B6=EF=BC=AF=EF=BC=B8=20=E2=8E=A0?= =?UTF-8?q?=E2=8E=A0?= Date: Mon, 7 Apr 2025 09:27:31 +0700 Subject: [PATCH 3/3] Update docusaurus.config.ts --- docusaurus.config.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docusaurus.config.ts b/docusaurus.config.ts index 8071952..8efa918 100644 --- a/docusaurus.config.ts +++ b/docusaurus.config.ts @@ -12,7 +12,7 @@ const config: Config = { url: "https://openwebui.com", // Set the // pathname under which your site is served // For GitHub pages deployment, it is often '//' - baseUrl: "/docs/", + baseUrl: "/", // GitHub pages deployment config. // If you aren't using GitHub pages, you don't need these.