From f74c64de423936873d360c4351f972562312b278 Mon Sep 17 00:00:00 2001 From: vox hunter Date: Mon, 7 Apr 2025 12:09:31 +0700 Subject: [PATCH 1/7] organize: document-extraction --- .../integrations/{ => document-extraction}/apachetika.md | 0 docs/tutorials/integrations/{ => document-extraction}/docling.md | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename docs/tutorials/integrations/{ => document-extraction}/apachetika.md (100%) rename docs/tutorials/integrations/{ => document-extraction}/docling.md (100%) diff --git a/docs/tutorials/integrations/apachetika.md b/docs/tutorials/integrations/document-extraction/apachetika.md similarity index 100% rename from docs/tutorials/integrations/apachetika.md rename to docs/tutorials/integrations/document-extraction/apachetika.md diff --git a/docs/tutorials/integrations/docling.md b/docs/tutorials/integrations/document-extraction/docling.md similarity index 100% rename from docs/tutorials/integrations/docling.md rename to docs/tutorials/integrations/document-extraction/docling.md From 6724006fc83449ddc6b0dc9d7fbb9c9eee75284f Mon Sep 17 00:00:00 2001 From: vox hunter Date: Mon, 7 Apr 2025 12:11:13 +0700 Subject: [PATCH 2/7] test --- docusaurus.config.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docusaurus.config.ts b/docusaurus.config.ts index bae2306..a026ed9 100644 --- a/docusaurus.config.ts +++ b/docusaurus.config.ts @@ -12,7 +12,7 @@ const config: Config = { url: "https://openwebui.com", // Set the // pathname under which your site is served // For GitHub pages deployment, it is often '//' - baseUrl: "/", + baseUrl: "/docs/", // GitHub pages deployment config. // If you aren't using GitHub pages, you don't need these. From 5c3f44d2a03492f51fe9932b336c5623a2a38939 Mon Sep 17 00:00:00 2001 From: vox hunter Date: Mon, 7 Apr 2025 12:17:17 +0700 Subject: [PATCH 3/7] Renamed --- .../integrations/document-extraction/_category_.json | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 docs/tutorials/integrations/document-extraction/_category_.json diff --git a/docs/tutorials/integrations/document-extraction/_category_.json b/docs/tutorials/integrations/document-extraction/_category_.json new file mode 100644 index 0000000..a997f4e --- /dev/null +++ b/docs/tutorials/integrations/document-extraction/_category_.json @@ -0,0 +1,7 @@ +{ + "label": "πŸ“„ Documents", + "position": 2, + "link": { + "type": "generated-index" + } +} From 550b25e2626f6c1fa1cb99ce519f14870eb7a74b Mon Sep 17 00:00:00 2001 From: vox hunter Date: Mon, 7 Apr 2025 12:28:39 +0700 Subject: [PATCH 4/7] testing --- .../integrations/document-extraction/_category_.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/tutorials/integrations/document-extraction/_category_.json b/docs/tutorials/integrations/document-extraction/_category_.json index a997f4e..3f7501c 100644 --- a/docs/tutorials/integrations/document-extraction/_category_.json +++ b/docs/tutorials/integrations/document-extraction/_category_.json @@ -2,6 +2,6 @@ "label": "πŸ“„ Documents", "position": 2, "link": { - "type": "generated-index" + "type": "generated-index" } -} + } \ No newline at end of file From 5165a6b1bafe68ed16c703f0e4dddcb74184cbae Mon Sep 17 00:00:00 2001 From: vox hunter Date: Mon, 7 Apr 2025 12:34:44 +0700 Subject: [PATCH 5/7] changed position --- docs/tutorials/integrations/document-extraction/_category_.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorials/integrations/document-extraction/_category_.json b/docs/tutorials/integrations/document-extraction/_category_.json index 3f7501c..efee8d7 100644 --- a/docs/tutorials/integrations/document-extraction/_category_.json +++ b/docs/tutorials/integrations/document-extraction/_category_.json @@ -1,6 +1,6 @@ { "label": "πŸ“„ Documents", - "position": 2, + "position": 12, "link": { "type": "generated-index" } From 3dd87be4d6e10b343c6b05d490c6f8fce2517c03 Mon Sep 17 00:00:00 2001 From: vox hunter Date: Mon, 7 Apr 2025 13:25:59 +0700 Subject: [PATCH 6/7] feat: enhance document extraction capabilities with new tutorials and integrations for Apache Tika and Docling --- docs/features/chat-features/index.mdx | 18 ++++++- docs/features/code-execution/index.md | 12 ++++- .../document-extraction/apachetika.md | 0 .../document-extraction/docling.md | 0 docs/features/document-extraction/index.md | 29 +++++++++++ .../document-extraction/mistral-ocr.md | 52 +++++++++++++++++++ docs/features/index.mdx | 2 + .../document-extraction/_category_.json | 7 --- 8 files changed, 110 insertions(+), 10 deletions(-) rename docs/{tutorials/integrations => features}/document-extraction/apachetika.md (100%) rename docs/{tutorials/integrations => features}/document-extraction/docling.md (100%) create mode 100644 docs/features/document-extraction/index.md create mode 100644 docs/features/document-extraction/mistral-ocr.md delete mode 100644 docs/tutorials/integrations/document-extraction/_category_.json diff --git a/docs/features/chat-features/index.mdx b/docs/features/chat-features/index.mdx index 6ed06be..6edff57 100644 --- a/docs/features/chat-features/index.mdx +++ b/docs/features/chat-features/index.mdx @@ -1,4 +1,18 @@ --- sidebar_position: 1 -title: "😏 Chat Features" ---- \ No newline at end of file +title: "πŸ’¬ Chat Features" +--- + +# Chat Features Overview + +Open WebUI provides a comprehensive set of chat features designed to enhance your interactions with AI models. This page provides an overview of the key chat capabilities, with links to dedicated pages for more detailed information. + +## Core Chat Features + +- **[πŸ—‚οΈ Conversation Organization](./conversation-organization.md)**: Organize chats with folders and tags to keep your workspace tidy and structured. + +- **[πŸ”— URL Parameters](./url-params.md)**: Configure chat sessions through URL parameters, enabling quick setup of models, tools, and other features. + +- **[βš™οΈ Chat Parameters](./chat-params.md)**: Control system prompts and advanced parameters at different levels (per-chat, per-account, or per-model). + +- **[πŸ—¨οΈ Chat Sharing](./chatshare.md)**: Share conversations locally or via the Open WebUI Community platform with controllable privacy settings. diff --git a/docs/features/code-execution/index.md b/docs/features/code-execution/index.md index 41be1d4..05cb47e 100644 --- a/docs/features/code-execution/index.md +++ b/docs/features/code-execution/index.md @@ -3,4 +3,14 @@ sidebar_position: 5 title: "🐍 Code Execution" --- -COMING SOON! +Open WebUI offers powerful code execution capabilities directly within your chat interface, enabling you to transform ideas into actionable results without leaving the platform. + +## Key Features + +- **Python Code Execution**: Run Python scripts directly in your browser using Pyodide, with support for popular libraries like pandas and matplotlib no setup required. + +- **MermaidJS Rendering**: Create and visualize flowcharts, diagrams, and other visual representations with MermaidJS syntax that automatically renders in your chat. + +- **Interactive Artifacts**: Generate and interact with rich content like HTML websites, SVG graphics, and JavaScript visualizations directly within your conversations. + +These execution capabilities bridge the gap between conversation and implementation, allowing you to explore ideas, analyze data, and create visual content seamlessly while chatting with AI models. \ No newline at end of file diff --git a/docs/tutorials/integrations/document-extraction/apachetika.md b/docs/features/document-extraction/apachetika.md similarity index 100% rename from docs/tutorials/integrations/document-extraction/apachetika.md rename to docs/features/document-extraction/apachetika.md diff --git a/docs/tutorials/integrations/document-extraction/docling.md b/docs/features/document-extraction/docling.md similarity index 100% rename from docs/tutorials/integrations/document-extraction/docling.md rename to docs/features/document-extraction/docling.md diff --git a/docs/features/document-extraction/index.md b/docs/features/document-extraction/index.md new file mode 100644 index 0000000..9d0184c --- /dev/null +++ b/docs/features/document-extraction/index.md @@ -0,0 +1,29 @@ +--- +sidebar_position: 6 +title: "πŸ“„ Document Extraction" +--- + +# Document Extraction in Open WebUI + +Open WebUI provides powerful document extraction capabilities that allow you to process and analyze various types of documents within your RAG (Retrieval Augmented Generation) workflows. Document extraction is essential for transforming unstructured document content into structured data that can be effectively used by language models. + +## What is Document Extraction? + +Document extraction refers to the process of automatically identifying and extracting text and data from various file formats, including: +- PDFs (both text-based and scanned) +- Images containing text +- Handwritten documents +- And more + +With proper document extraction, Open WebUI can help you: +- Convert image-based documents to searchable text +- Preserve document structure and layout information +- Extract data in structured formats for further processing +- Support multilingual content recognition + +## Available Extraction Methods + +Open WebUI supports multiple document extraction engines to accommodate different needs and document types. Each extraction method has its own strengths and is suitable for different scenarios. + +Explore the documentation for each available extraction method to learn how to set it up and use it effectively with your Open WebUI instance. + diff --git a/docs/features/document-extraction/mistral-ocr.md b/docs/features/document-extraction/mistral-ocr.md new file mode 100644 index 0000000..002cce0 --- /dev/null +++ b/docs/features/document-extraction/mistral-ocr.md @@ -0,0 +1,52 @@ +--- +sidebar_position: 4000 +title: "πŸ‘οΈ Mistral OCR" +--- + +:::warning +This tutorial is a community contribution and is not supported by the Open WebUI team. It serves only as a demonstration on how to customize Open WebUI for your specific use case. Want to contribute? Check out the contributing tutorial. +::: + +## πŸ‘οΈ Mistral OCR + +This documentation provides a step-by-step guide to integrating Mistral OCR with Open WebUI. Mistral OCR is an optical character recognition library designed to extract text from a variety of image-based file formatsβ€”including scanned PDFs, images, and handwritten documentsβ€”into structured data such as JSON or plain text. With advanced support for multilingual text recognition, layout analysis, and handwriting interpretation, Mistral OCR simplifies the process of digitizing and processing documents for AI applications like search, summarization, and data extraction, all through a robust and customizable interface. + +Prerequisites +------------ + +* Open WebUI instance +* Mistral AI account + +Integration Steps +---------------- + +### Step 1: Sign Up or Login to Mistral AI console + +* Go to `https://console.mistral.ai` +* Follow the instructions as instructed on the process +* After successful authorization, you should be welcomed to the Console Home + +### Step 2: Generate an API key + +* Go to `API Keys` or `https://console.mistral.ai/api-keys` +* Create a new key and make sure to copy it + +### Step 3: Configure OpenWebUI to use Mistral OCR + +* Log in to your Open WebUI instance. +* Navigate to the `Admin Panel` settings menu. +* Click on `Settings`. +* Click on the `Documents` tab. +* Change the `Default` content extraction engine dropdown to `Mistral OCR`. +* Paste the API Key on the field +* Save the Admin Panel. + +Verifying Mistral OCR +===================================== + +To verify that Mistral OCR is working correctly in script, please refer to `https://docs.mistral.ai/capabilities/document/` + + +### Conclusion + +Integrating Mistral OCR with Open WebUI is a simple and effective way to enhance document processing and content extraction capabilities. By following the steps in this guide, you can set up Mistral OCR as the default extraction engine and leverage its advanced text recognition features. Once configured, Mistral OCR enables powerful, multilingual document parsing with support for various formats, enhancing AI-driven document analysis capabilities in Open WebUI. diff --git a/docs/features/index.mdx b/docs/features/index.mdx index 74fee00..6ea5851 100644 --- a/docs/features/index.mdx +++ b/docs/features/index.mdx @@ -33,6 +33,8 @@ import { TopBanners } from "@site/src/components/TopBanners"; - πŸ“š **Local and Remote RAG Integration**: Dive into the future of chat interactions and explore your documents with our cutting-edge Retrieval Augmented Generation (RAG) technology within your chats. Documents can be loaded into the `Documents` tab of the Workspace, after which they can be accessed using the pound key [`#`] before a query, or by starting the prompt with the pound key [`#`], followed by a URL for webpage content integration. +- πŸ“„ **Document Extraction**: Extract text and data from various document formats including PDFs, Word documents, Excel spreadsheets, PowerPoint presentations, and more. Our advanced document processing capabilities enable seamless integration with your knowledge base, allowing for accurate retrieval and generation of information from complex documents while preserving their structure and formatting. + - πŸ” **Web Search for RAG**: You can perform web searches using a selection of various search providers and inject the results directly into your local Retrieval Augmented Generation (RAG) experience. - 🌐 **Web Browsing Capabilities**: Integrate websites seamlessly into your chat experience by using the `#` command followed by a URL. This feature enables the incorporation of web content directly into your conversations, thereby enhancing the richness and depth of your interactions. diff --git a/docs/tutorials/integrations/document-extraction/_category_.json b/docs/tutorials/integrations/document-extraction/_category_.json deleted file mode 100644 index efee8d7..0000000 --- a/docs/tutorials/integrations/document-extraction/_category_.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "label": "πŸ“„ Documents", - "position": 12, - "link": { - "type": "generated-index" - } - } \ No newline at end of file From 969b32939fc02703779d7ff825719862eb32f7db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E2=8E=9D=E2=8E=9D=EF=BC=B6=EF=BC=AF=EF=BC=B8=20=E2=8E=A0?= =?UTF-8?q?=E2=8E=A0?= Date: Mon, 7 Apr 2025 13:35:48 +0700 Subject: [PATCH 7/7] Update docusaurus.config.ts --- docusaurus.config.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docusaurus.config.ts b/docusaurus.config.ts index a026ed9..bae2306 100644 --- a/docusaurus.config.ts +++ b/docusaurus.config.ts @@ -12,7 +12,7 @@ const config: Config = { url: "https://openwebui.com", // Set the // pathname under which your site is served // For GitHub pages deployment, it is often '//' - baseUrl: "/docs/", + baseUrl: "/", // GitHub pages deployment config. // If you aren't using GitHub pages, you don't need these.