Merge pull request #6156 from noczero/add-pdf-generator

Feat: Enhance PDF Downloader to Export Chat
This commit is contained in:
Timothy Jaeryang Baek 2024-10-12 22:46:50 -07:00 committed by GitHub
commit 9c1820f785
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 428 additions and 56 deletions

View File

@ -61,6 +61,9 @@ class ChatModel(BaseModel):
class ChatForm(BaseModel):
chat: dict
class ChatTitleMessagesForm(BaseModel):
title: str
messages: list[dict]
class ChatTitleForm(BaseModel):
title: str

View File

@ -1,16 +1,14 @@
import site
from pathlib import Path
import black
import markdown
from open_webui.apps.webui.models.chats import ChatTitleMessagesForm
from open_webui.config import DATA_DIR, ENABLE_ADMIN_EXPORT
from open_webui.env import FONTS_DIR
from open_webui.constants import ERROR_MESSAGES
from fastapi import APIRouter, Depends, HTTPException, Response, status
from fpdf import FPDF
from pydantic import BaseModel
from starlette.responses import FileResponse
from open_webui.utils.misc import get_gravatar_url
from open_webui.utils.pdf_generator import PDFGenerator
from open_webui.utils.utils import get_admin_user
router = APIRouter()
@ -56,58 +54,10 @@ class ChatForm(BaseModel):
@router.post("/pdf")
async def download_chat_as_pdf(
form_data: ChatForm,
form_data: ChatTitleMessagesForm,
):
global FONTS_DIR
pdf = FPDF()
pdf.add_page()
# When running using `pip install` the static directory is in the site packages.
if not FONTS_DIR.exists():
FONTS_DIR = Path(site.getsitepackages()[0]) / "static/fonts"
# When running using `pip install -e .` the static directory is in the site packages.
# This path only works if `open-webui serve` is run from the root of this project.
if not FONTS_DIR.exists():
FONTS_DIR = Path("./backend/static/fonts")
pdf.add_font("NotoSans", "", f"{FONTS_DIR}/NotoSans-Regular.ttf")
pdf.add_font("NotoSans", "b", f"{FONTS_DIR}/NotoSans-Bold.ttf")
pdf.add_font("NotoSans", "i", f"{FONTS_DIR}/NotoSans-Italic.ttf")
pdf.add_font("NotoSansKR", "", f"{FONTS_DIR}/NotoSansKR-Regular.ttf")
pdf.add_font("NotoSansJP", "", f"{FONTS_DIR}/NotoSansJP-Regular.ttf")
pdf.add_font("NotoSansSC", "", f"{FONTS_DIR}/NotoSansSC-Regular.ttf")
pdf.set_font("NotoSans", size=12)
pdf.set_fallback_fonts(["NotoSansKR", "NotoSansJP", "NotoSansSC"])
pdf.set_auto_page_break(auto=True, margin=15)
# Adjust the effective page width for multi_cell
effective_page_width = (
pdf.w - 2 * pdf.l_margin - 10
) # Subtracted an additional 10 for extra padding
# Add chat messages
for message in form_data.messages:
role = message["role"]
content = message["content"]
pdf.set_font("NotoSans", "B", size=14) # Bold for the role
pdf.multi_cell(effective_page_width, 10, f"{role.upper()}", 0, "L")
pdf.ln(1) # Extra space between messages
pdf.set_font("NotoSans", size=10) # Regular for content
pdf.multi_cell(effective_page_width, 6, content, 0, "L")
pdf.ln(1.5) # Extra space between messages
# Save the pdf with name .pdf
pdf_bytes = pdf.output()
return Response(
content=bytes(pdf_bytes),
media_type="application/pdf",
headers={"Content-Disposition": "attachment;filename=chat.pdf"},
)
response = PDFGenerator(form_data).generate_chat_pdf()
return response
@router.get("/db/download")

View File

@ -0,0 +1,283 @@
/* HTML and Body */
html {
box-sizing: border-box;
font-size: 14px; /* Default font size */
line-height: 1.5;
}
*, *::before, *::after {
box-sizing: inherit;
}
body {
margin: 0;
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
color: #212529;
background-color: #fff;
width: auto;
}
/* Typography */
h1, h2, h3, h4, h5, h6 {
font-weight: 500;
margin: 0;
}
h1 {
font-size: 2.5rem;
}
h2 {
font-size: 2rem;
}
h3 {
font-size: 1.75rem;
}
h4 {
font-size: 1.5rem;
}
h5 {
font-size: 1.25rem;
}
h6 {
font-size: 1rem;
}
p {
margin-top: 0;
margin-bottom: 1rem;
}
/* Grid System */
.container {
width: 100%;
padding-right: 15px;
padding-left: 15px;
margin-right: auto;
margin-left: auto;
}
/* Utilities */
.text-center {
text-align: center;
}
/* Additional Text Utilities */
.text-muted {
color: #6c757d; /* Muted text color */
}
/* Small Text */
small {
font-size: 80%; /* Smaller font size relative to the base */
color: #6c757d; /* Lighter text color for secondary information */
margin-bottom: 0;
margin-top: 0;
}
/* Strong Element Styles */
strong {
font-weight: bolder; /* Ensures the text is bold */
color: inherit; /* Inherits the color from its parent element */
}
/* link */
a {
color: #007bff;
text-decoration: none;
background-color: transparent;
}
a:hover {
color: #0056b3;
text-decoration: underline;
}
/* General styles for lists */
ol, ul, li {
padding-left: 40px; /* Increase padding to move bullet points to the right */
margin-left: 20px; /* Indent lists from the left */
}
/* Ordered list styles */
ol {
list-style-type: decimal; /* Use numbers for ordered lists */
margin-bottom: 10px; /* Space after each list */
}
ol li {
margin-bottom: 0.5rem; /* Space between ordered list items */
}
/* Unordered list styles */
ul {
list-style-type: disc; /* Use bullets for unordered lists */
margin-bottom: 10px; /* Space after each list */
}
ul li {
margin-bottom: 0.5rem; /* Space between unordered list items */
}
/* List item styles */
li {
margin-bottom: 5px; /* Space between list items */
line-height: 1.5; /* Line height for better readability */
}
/* Nested lists */
ol ol, ol ul, ul ol, ul ul {
padding-left: 20px;
margin-left: 30px; /* Further indent nested lists */
margin-bottom: 0; /* Remove extra margin at the bottom of nested lists */
}
/* Code blocks */
pre {
background-color: #f4f4f4;
padding: 10px;
overflow-x: auto;
max-width: 100%; /* Ensure it doesn't overflow the page */
width: 80%; /* Set a specific width for a container-like appearance */
margin: 0 1em; /* Center the pre block */
box-sizing: border-box; /* Include padding in the width */
border: 1px solid #ccc; /* Optional: Add a border for better definition */
border-radius: 4px; /* Optional: Add rounded corners */
}
code {
font-family: 'Courier New', Courier, monospace;
background-color: #f4f4f4;
padding: 2px 4px;
border-radius: 4px;
box-sizing: border-box; /* Include padding in the width */
}
.message {
margin-top: 8px;
margin-bottom: 8px;
}
/* Table Styles */
table {
width: 100%;
margin-bottom: 1rem;
color: #212529;
border-collapse: collapse; /* Removes the space between borders */
}
th, td {
margin: 0;
padding: 0.75rem;
vertical-align: top;
border-top: 1px solid #dee2e6;
}
thead th {
vertical-align: bottom;
border-bottom: 2px solid #dee2e6;
}
tbody + tbody {
border-top: 2px solid #dee2e6;
}
/* markdown-section styles */
.markdown-section blockquote,
.markdown-section h1,
.markdown-section h2,
.markdown-section h3,
.markdown-section h4,
.markdown-section h5,
.markdown-section h6,
.markdown-section p,
.markdown-section pre,
.markdown-section table,
.markdown-section ul {
/* Give most block elements margin top and bottom */
margin-top: 1rem;
}
/* Remove top margin if it's the first child */
.markdown-section blockquote:first-child,
.markdown-section h1:first-child,
.markdown-section h2:first-child,
.markdown-section h3:first-child,
.markdown-section h4:first-child,
.markdown-section h5:first-child,
.markdown-section h6:first-child,
.markdown-section p:first-child,
.markdown-section pre:first-child,
.markdown-section table:first-child,
.markdown-section ul:first-child {
margin-top: 0;
}
/* Remove top margin of <ul> following a <p> */
.markdown-section p + ul {
margin-top: 0;
}
/* Remove bottom margin of <p> if it is followed by a <ul> */
/* Note: :has is not supported in CSS, so you would need JavaScript for this behavior */
.markdown-section p {
margin-bottom: 0;
}
/* Add a rule to reset margin-bottom for <p> not followed by <ul> */
.markdown-section p + ul {
margin-top: 0;
}
/* List item styles */
.markdown-section li {
padding: 2px;
}
.markdown-section li p {
margin-bottom: 0;
padding: 0;
}
/* Avoid margins for nested lists */
.markdown-section li > ul {
margin-top: 0;
margin-bottom: 0;
}
/* Table styles */
.markdown-section table {
width: 100%;
border-collapse: collapse;
margin: 1rem 0;
}
.markdown-section th,
.markdown-section td {
border: 1px solid #ddd;
padding: 0.5rem;
text-align: left;
}
.markdown-section th {
background-color: #f2f2f2;
}
.markdown-section pre {
padding: 10px;
margin: 10px;
}
.markdown-section pre code {
position: relative;
color: rgb(172, 0, 95);
}

View File

@ -0,0 +1,134 @@
from datetime import datetime
from io import BytesIO
from pathlib import Path
from typing import Dict, Any, List
from markdown import markdown
from starlette.responses import Response
from xhtml2pdf import pisa
from open_webui.apps.webui.models.chats import ChatTitleMessagesForm
class PDFGenerator:
"""
Description:
The `PDFGenerator` class is designed to create PDF documents from chat messages.
The process involves transforming markdown content into HTML and then into a PDF format,
which can be easily returned as a response to the routes.
It depends on xhtml2pdf for converting HTML to PDF (more details at https://github.com/xhtml2pdf/xhtml2pdf).
I found xhtml2pdf issues when rendering list html tag, see https://github.com/xhtml2pdf/xhtml2pdf/issues/550
and https://github.com/xhtml2pdf/xhtml2pdf/issues/756.
Attributes:
- `form_data`: An instance of `ChatTitleMessagesForm` containing title and messages.
"""
def __init__(self, form_data: ChatTitleMessagesForm):
self.html_body = None
self.messages_html = None
self.form_data = form_data
self.css_style_file = Path("./backend/open_webui/static/assets/pdf-style.css")
def build_html_message(self, message: Dict[str, Any]) -> str:
"""Build HTML for a single message."""
role = message.get("role", "user")
content = message.get("content", "")
timestamp = message.get('timestamp')
model = message.get('model') if role == 'assistant' else ''
date_str = self.format_timestamp(timestamp) if timestamp else ''
# extends pymdownx extension to convert markdown to html.
# - https://facelessuser.github.io/pymdown-extensions/usage_notes/
html_content = markdown(content, extensions=['pymdownx.extra'])
html_message = f"""
<div class="message">
<small> {date_str} </small>
<div>
<h2>
<strong>{role.title()}</strong>
<small class="text-muted">{model}</small>
</h2>
</div>
<div class="markdown-section">
{html_content}
</div>
</div>
"""
return html_message
def create_pdf_from_html(self) -> bytes:
"""Convert HTML content to PDF and return the bytes."""
pdf_buffer = BytesIO()
pisa_status = pisa.CreatePDF(src=self.html_body, dest=pdf_buffer)
if pisa_status.err:
raise RuntimeError("Error generating PDF")
return pdf_buffer.getvalue()
def format_timestamp(self, timestamp: float) -> str:
"""Convert a UNIX timestamp to a formatted date string."""
try:
date_time = datetime.fromtimestamp(timestamp)
return date_time.strftime("%Y-%m-%d, %H:%M:%S")
except (ValueError, TypeError) as e:
# Log the error if necessary
return ''
def generate_chat_pdf(self) -> Response:
"""
Generate a PDF from chat messages.
Returns:
A FastAPI Response with the generated PDF or an error message.
"""
try:
# Build HTML messages
messages_html_list: List[str] = [self.build_html_message(msg) for msg in self.form_data.messages]
self.messages_html = '<div>' + ''.join(messages_html_list) + '</div>'
# Generate full HTML body
self.html_body = self.generate_html_body()
# Create PDF
pdf_bytes = self.create_pdf_from_html()
# Return PDF as response
return Response(
content=pdf_bytes,
media_type="application/pdf",
headers={"Content-Disposition": "attachment;filename=chat.pdf"},
)
except RuntimeError as pdf_error:
# Handle PDF generation errors
return Response(content=str(pdf_error), status_code=500)
except Exception as e:
# Handle other unexpected errors
return Response(content="An unexpected error occurred.", status_code=500)
def generate_html_body(self) -> str:
"""Generate the full HTML body for the PDF."""
return f"""
<html>
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<link rel="stylesheet" href="{self.css_style_file.as_posix()}">
</head>
<body>
<div class="container">
<div class="text-center">
<h1>{self.form_data.title}</h1>
</div>
<div>
{self.messages_html}
</div>
</div>
</body>
</html>
"""

View File

@ -51,6 +51,8 @@ einops==0.8.0
ftfy==6.2.3
pypdf==4.3.1
xhtml2pdf==0.2.16
pymdown-extensions==10.11.2
docx2txt==0.8
python-pptx==1.0.0
unstructured==0.15.9