mirror of
https://github.com/open-webui/open-webui
synced 2024-11-06 16:59:42 +00:00
Merge pull request #6156 from noczero/add-pdf-generator
Feat: Enhance PDF Downloader to Export Chat
This commit is contained in:
commit
9c1820f785
@ -61,6 +61,9 @@ class ChatModel(BaseModel):
|
||||
class ChatForm(BaseModel):
|
||||
chat: dict
|
||||
|
||||
class ChatTitleMessagesForm(BaseModel):
|
||||
title: str
|
||||
messages: list[dict]
|
||||
|
||||
class ChatTitleForm(BaseModel):
|
||||
title: str
|
||||
|
@ -1,16 +1,14 @@
|
||||
import site
|
||||
from pathlib import Path
|
||||
|
||||
import black
|
||||
import markdown
|
||||
|
||||
from open_webui.apps.webui.models.chats import ChatTitleMessagesForm
|
||||
from open_webui.config import DATA_DIR, ENABLE_ADMIN_EXPORT
|
||||
from open_webui.env import FONTS_DIR
|
||||
from open_webui.constants import ERROR_MESSAGES
|
||||
from fastapi import APIRouter, Depends, HTTPException, Response, status
|
||||
from fpdf import FPDF
|
||||
from pydantic import BaseModel
|
||||
from starlette.responses import FileResponse
|
||||
from open_webui.utils.misc import get_gravatar_url
|
||||
from open_webui.utils.pdf_generator import PDFGenerator
|
||||
from open_webui.utils.utils import get_admin_user
|
||||
|
||||
router = APIRouter()
|
||||
@ -56,58 +54,10 @@ class ChatForm(BaseModel):
|
||||
|
||||
@router.post("/pdf")
|
||||
async def download_chat_as_pdf(
|
||||
form_data: ChatForm,
|
||||
form_data: ChatTitleMessagesForm,
|
||||
):
|
||||
global FONTS_DIR
|
||||
|
||||
pdf = FPDF()
|
||||
pdf.add_page()
|
||||
|
||||
# When running using `pip install` the static directory is in the site packages.
|
||||
if not FONTS_DIR.exists():
|
||||
FONTS_DIR = Path(site.getsitepackages()[0]) / "static/fonts"
|
||||
# When running using `pip install -e .` the static directory is in the site packages.
|
||||
# This path only works if `open-webui serve` is run from the root of this project.
|
||||
if not FONTS_DIR.exists():
|
||||
FONTS_DIR = Path("./backend/static/fonts")
|
||||
|
||||
pdf.add_font("NotoSans", "", f"{FONTS_DIR}/NotoSans-Regular.ttf")
|
||||
pdf.add_font("NotoSans", "b", f"{FONTS_DIR}/NotoSans-Bold.ttf")
|
||||
pdf.add_font("NotoSans", "i", f"{FONTS_DIR}/NotoSans-Italic.ttf")
|
||||
pdf.add_font("NotoSansKR", "", f"{FONTS_DIR}/NotoSansKR-Regular.ttf")
|
||||
pdf.add_font("NotoSansJP", "", f"{FONTS_DIR}/NotoSansJP-Regular.ttf")
|
||||
pdf.add_font("NotoSansSC", "", f"{FONTS_DIR}/NotoSansSC-Regular.ttf")
|
||||
|
||||
pdf.set_font("NotoSans", size=12)
|
||||
pdf.set_fallback_fonts(["NotoSansKR", "NotoSansJP", "NotoSansSC"])
|
||||
|
||||
pdf.set_auto_page_break(auto=True, margin=15)
|
||||
|
||||
# Adjust the effective page width for multi_cell
|
||||
effective_page_width = (
|
||||
pdf.w - 2 * pdf.l_margin - 10
|
||||
) # Subtracted an additional 10 for extra padding
|
||||
|
||||
# Add chat messages
|
||||
for message in form_data.messages:
|
||||
role = message["role"]
|
||||
content = message["content"]
|
||||
pdf.set_font("NotoSans", "B", size=14) # Bold for the role
|
||||
pdf.multi_cell(effective_page_width, 10, f"{role.upper()}", 0, "L")
|
||||
pdf.ln(1) # Extra space between messages
|
||||
|
||||
pdf.set_font("NotoSans", size=10) # Regular for content
|
||||
pdf.multi_cell(effective_page_width, 6, content, 0, "L")
|
||||
pdf.ln(1.5) # Extra space between messages
|
||||
|
||||
# Save the pdf with name .pdf
|
||||
pdf_bytes = pdf.output()
|
||||
|
||||
return Response(
|
||||
content=bytes(pdf_bytes),
|
||||
media_type="application/pdf",
|
||||
headers={"Content-Disposition": "attachment;filename=chat.pdf"},
|
||||
)
|
||||
response = PDFGenerator(form_data).generate_chat_pdf()
|
||||
return response
|
||||
|
||||
|
||||
@router.get("/db/download")
|
||||
|
283
backend/open_webui/static/assets/pdf-style.css
Normal file
283
backend/open_webui/static/assets/pdf-style.css
Normal file
@ -0,0 +1,283 @@
|
||||
/* HTML and Body */
|
||||
html {
|
||||
box-sizing: border-box;
|
||||
font-size: 14px; /* Default font size */
|
||||
line-height: 1.5;
|
||||
}
|
||||
|
||||
*, *::before, *::after {
|
||||
box-sizing: inherit;
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 0;
|
||||
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
|
||||
color: #212529;
|
||||
background-color: #fff;
|
||||
width: auto;
|
||||
}
|
||||
|
||||
/* Typography */
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
font-weight: 500;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 2.5rem;
|
||||
}
|
||||
|
||||
h2 {
|
||||
font-size: 2rem;
|
||||
}
|
||||
|
||||
h3 {
|
||||
font-size: 1.75rem;
|
||||
}
|
||||
|
||||
h4 {
|
||||
font-size: 1.5rem;
|
||||
}
|
||||
|
||||
h5 {
|
||||
font-size: 1.25rem;
|
||||
}
|
||||
|
||||
h6 {
|
||||
font-size: 1rem;
|
||||
}
|
||||
|
||||
p {
|
||||
margin-top: 0;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
/* Grid System */
|
||||
.container {
|
||||
width: 100%;
|
||||
padding-right: 15px;
|
||||
padding-left: 15px;
|
||||
margin-right: auto;
|
||||
margin-left: auto;
|
||||
}
|
||||
|
||||
/* Utilities */
|
||||
.text-center {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
/* Additional Text Utilities */
|
||||
.text-muted {
|
||||
color: #6c757d; /* Muted text color */
|
||||
}
|
||||
|
||||
/* Small Text */
|
||||
small {
|
||||
font-size: 80%; /* Smaller font size relative to the base */
|
||||
color: #6c757d; /* Lighter text color for secondary information */
|
||||
margin-bottom: 0;
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
/* Strong Element Styles */
|
||||
strong {
|
||||
font-weight: bolder; /* Ensures the text is bold */
|
||||
color: inherit; /* Inherits the color from its parent element */
|
||||
}
|
||||
|
||||
/* link */
|
||||
a {
|
||||
color: #007bff;
|
||||
text-decoration: none;
|
||||
background-color: transparent;
|
||||
}
|
||||
|
||||
a:hover {
|
||||
color: #0056b3;
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
/* General styles for lists */
|
||||
ol, ul, li {
|
||||
padding-left: 40px; /* Increase padding to move bullet points to the right */
|
||||
margin-left: 20px; /* Indent lists from the left */
|
||||
}
|
||||
|
||||
/* Ordered list styles */
|
||||
ol {
|
||||
list-style-type: decimal; /* Use numbers for ordered lists */
|
||||
margin-bottom: 10px; /* Space after each list */
|
||||
}
|
||||
|
||||
|
||||
ol li {
|
||||
margin-bottom: 0.5rem; /* Space between ordered list items */
|
||||
}
|
||||
|
||||
/* Unordered list styles */
|
||||
ul {
|
||||
list-style-type: disc; /* Use bullets for unordered lists */
|
||||
margin-bottom: 10px; /* Space after each list */
|
||||
}
|
||||
|
||||
ul li {
|
||||
margin-bottom: 0.5rem; /* Space between unordered list items */
|
||||
}
|
||||
|
||||
/* List item styles */
|
||||
li {
|
||||
margin-bottom: 5px; /* Space between list items */
|
||||
line-height: 1.5; /* Line height for better readability */
|
||||
}
|
||||
|
||||
/* Nested lists */
|
||||
ol ol, ol ul, ul ol, ul ul {
|
||||
padding-left: 20px;
|
||||
margin-left: 30px; /* Further indent nested lists */
|
||||
margin-bottom: 0; /* Remove extra margin at the bottom of nested lists */
|
||||
}
|
||||
|
||||
/* Code blocks */
|
||||
pre {
|
||||
background-color: #f4f4f4;
|
||||
padding: 10px;
|
||||
overflow-x: auto;
|
||||
max-width: 100%; /* Ensure it doesn't overflow the page */
|
||||
width: 80%; /* Set a specific width for a container-like appearance */
|
||||
margin: 0 1em; /* Center the pre block */
|
||||
box-sizing: border-box; /* Include padding in the width */
|
||||
border: 1px solid #ccc; /* Optional: Add a border for better definition */
|
||||
border-radius: 4px; /* Optional: Add rounded corners */
|
||||
}
|
||||
|
||||
code {
|
||||
font-family: 'Courier New', Courier, monospace;
|
||||
background-color: #f4f4f4;
|
||||
padding: 2px 4px;
|
||||
border-radius: 4px;
|
||||
box-sizing: border-box; /* Include padding in the width */
|
||||
}
|
||||
|
||||
.message {
|
||||
margin-top: 8px;
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
|
||||
/* Table Styles */
|
||||
table {
|
||||
width: 100%;
|
||||
margin-bottom: 1rem;
|
||||
color: #212529;
|
||||
border-collapse: collapse; /* Removes the space between borders */
|
||||
}
|
||||
|
||||
th, td {
|
||||
margin: 0;
|
||||
padding: 0.75rem;
|
||||
vertical-align: top;
|
||||
border-top: 1px solid #dee2e6;
|
||||
}
|
||||
|
||||
thead th {
|
||||
vertical-align: bottom;
|
||||
border-bottom: 2px solid #dee2e6;
|
||||
}
|
||||
|
||||
tbody + tbody {
|
||||
border-top: 2px solid #dee2e6;
|
||||
}
|
||||
|
||||
/* markdown-section styles */
|
||||
.markdown-section blockquote,
|
||||
.markdown-section h1,
|
||||
.markdown-section h2,
|
||||
.markdown-section h3,
|
||||
.markdown-section h4,
|
||||
.markdown-section h5,
|
||||
.markdown-section h6,
|
||||
.markdown-section p,
|
||||
.markdown-section pre,
|
||||
.markdown-section table,
|
||||
.markdown-section ul {
|
||||
/* Give most block elements margin top and bottom */
|
||||
margin-top: 1rem;
|
||||
}
|
||||
|
||||
/* Remove top margin if it's the first child */
|
||||
.markdown-section blockquote:first-child,
|
||||
.markdown-section h1:first-child,
|
||||
.markdown-section h2:first-child,
|
||||
.markdown-section h3:first-child,
|
||||
.markdown-section h4:first-child,
|
||||
.markdown-section h5:first-child,
|
||||
.markdown-section h6:first-child,
|
||||
.markdown-section p:first-child,
|
||||
.markdown-section pre:first-child,
|
||||
.markdown-section table:first-child,
|
||||
.markdown-section ul:first-child {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
|
||||
/* Remove top margin of <ul> following a <p> */
|
||||
.markdown-section p + ul {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
/* Remove bottom margin of <p> if it is followed by a <ul> */
|
||||
/* Note: :has is not supported in CSS, so you would need JavaScript for this behavior */
|
||||
.markdown-section p {
|
||||
margin-bottom: 0;
|
||||
}
|
||||
|
||||
/* Add a rule to reset margin-bottom for <p> not followed by <ul> */
|
||||
.markdown-section p + ul {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
/* List item styles */
|
||||
.markdown-section li {
|
||||
padding: 2px;
|
||||
}
|
||||
|
||||
.markdown-section li p {
|
||||
margin-bottom: 0;
|
||||
padding: 0;
|
||||
}
|
||||
|
||||
/* Avoid margins for nested lists */
|
||||
.markdown-section li > ul {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
|
||||
/* Table styles */
|
||||
.markdown-section table {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
margin: 1rem 0;
|
||||
}
|
||||
|
||||
.markdown-section th,
|
||||
.markdown-section td {
|
||||
border: 1px solid #ddd;
|
||||
padding: 0.5rem;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
.markdown-section th {
|
||||
background-color: #f2f2f2;
|
||||
}
|
||||
|
||||
.markdown-section pre {
|
||||
padding: 10px;
|
||||
margin: 10px;
|
||||
}
|
||||
|
||||
.markdown-section pre code {
|
||||
position: relative;
|
||||
color: rgb(172, 0, 95);
|
||||
}
|
||||
|
||||
|
||||
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
134
backend/open_webui/utils/pdf_generator.py
Normal file
134
backend/open_webui/utils/pdf_generator.py
Normal file
@ -0,0 +1,134 @@
|
||||
from datetime import datetime
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from typing import Dict, Any, List
|
||||
|
||||
from markdown import markdown
|
||||
from starlette.responses import Response
|
||||
from xhtml2pdf import pisa
|
||||
|
||||
from open_webui.apps.webui.models.chats import ChatTitleMessagesForm
|
||||
|
||||
|
||||
class PDFGenerator:
|
||||
"""
|
||||
Description:
|
||||
The `PDFGenerator` class is designed to create PDF documents from chat messages.
|
||||
The process involves transforming markdown content into HTML and then into a PDF format,
|
||||
which can be easily returned as a response to the routes.
|
||||
|
||||
It depends on xhtml2pdf for converting HTML to PDF (more details at https://github.com/xhtml2pdf/xhtml2pdf).
|
||||
I found xhtml2pdf issues when rendering list html tag, see https://github.com/xhtml2pdf/xhtml2pdf/issues/550
|
||||
and https://github.com/xhtml2pdf/xhtml2pdf/issues/756.
|
||||
|
||||
Attributes:
|
||||
- `form_data`: An instance of `ChatTitleMessagesForm` containing title and messages.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, form_data: ChatTitleMessagesForm):
|
||||
self.html_body = None
|
||||
self.messages_html = None
|
||||
self.form_data = form_data
|
||||
self.css_style_file = Path("./backend/open_webui/static/assets/pdf-style.css")
|
||||
|
||||
def build_html_message(self, message: Dict[str, Any]) -> str:
|
||||
"""Build HTML for a single message."""
|
||||
role = message.get("role", "user")
|
||||
content = message.get("content", "")
|
||||
timestamp = message.get('timestamp')
|
||||
|
||||
model = message.get('model') if role == 'assistant' else ''
|
||||
|
||||
date_str = self.format_timestamp(timestamp) if timestamp else ''
|
||||
|
||||
# extends pymdownx extension to convert markdown to html.
|
||||
# - https://facelessuser.github.io/pymdown-extensions/usage_notes/
|
||||
html_content = markdown(content, extensions=['pymdownx.extra'])
|
||||
|
||||
html_message = f"""
|
||||
<div class="message">
|
||||
<small> {date_str} </small>
|
||||
<div>
|
||||
<h2>
|
||||
<strong>{role.title()}</strong>
|
||||
<small class="text-muted">{model}</small>
|
||||
</h2>
|
||||
</div>
|
||||
<div class="markdown-section">
|
||||
{html_content}
|
||||
</div>
|
||||
</div>
|
||||
"""
|
||||
return html_message
|
||||
|
||||
def create_pdf_from_html(self) -> bytes:
|
||||
"""Convert HTML content to PDF and return the bytes."""
|
||||
pdf_buffer = BytesIO()
|
||||
pisa_status = pisa.CreatePDF(src=self.html_body, dest=pdf_buffer)
|
||||
if pisa_status.err:
|
||||
raise RuntimeError("Error generating PDF")
|
||||
|
||||
return pdf_buffer.getvalue()
|
||||
|
||||
def format_timestamp(self, timestamp: float) -> str:
|
||||
"""Convert a UNIX timestamp to a formatted date string."""
|
||||
try:
|
||||
date_time = datetime.fromtimestamp(timestamp)
|
||||
return date_time.strftime("%Y-%m-%d, %H:%M:%S")
|
||||
except (ValueError, TypeError) as e:
|
||||
# Log the error if necessary
|
||||
return ''
|
||||
|
||||
def generate_chat_pdf(self) -> Response:
|
||||
"""
|
||||
Generate a PDF from chat messages.
|
||||
|
||||
Returns:
|
||||
A FastAPI Response with the generated PDF or an error message.
|
||||
"""
|
||||
try:
|
||||
# Build HTML messages
|
||||
messages_html_list: List[str] = [self.build_html_message(msg) for msg in self.form_data.messages]
|
||||
self.messages_html = '<div>' + ''.join(messages_html_list) + '</div>'
|
||||
|
||||
# Generate full HTML body
|
||||
self.html_body = self.generate_html_body()
|
||||
|
||||
# Create PDF
|
||||
pdf_bytes = self.create_pdf_from_html()
|
||||
|
||||
# Return PDF as response
|
||||
return Response(
|
||||
content=pdf_bytes,
|
||||
media_type="application/pdf",
|
||||
headers={"Content-Disposition": "attachment;filename=chat.pdf"},
|
||||
)
|
||||
except RuntimeError as pdf_error:
|
||||
# Handle PDF generation errors
|
||||
return Response(content=str(pdf_error), status_code=500)
|
||||
except Exception as e:
|
||||
# Handle other unexpected errors
|
||||
return Response(content="An unexpected error occurred.", status_code=500)
|
||||
|
||||
def generate_html_body(self) -> str:
|
||||
"""Generate the full HTML body for the PDF."""
|
||||
return f"""
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<link rel="stylesheet" href="{self.css_style_file.as_posix()}">
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<div class="text-center">
|
||||
<h1>{self.form_data.title}</h1>
|
||||
</div>
|
||||
<div>
|
||||
{self.messages_html}
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
@ -51,6 +51,8 @@ einops==0.8.0
|
||||
|
||||
ftfy==6.2.3
|
||||
pypdf==4.3.1
|
||||
xhtml2pdf==0.2.16
|
||||
pymdown-extensions==10.11.2
|
||||
docx2txt==0.8
|
||||
python-pptx==1.0.0
|
||||
unstructured==0.15.9
|
||||
|
Loading…
Reference in New Issue
Block a user