This commit is contained in:
Timothy J. Baek 2024-10-13 00:21:06 -07:00
parent 112cbdccbb
commit 5273dc4535
2 changed files with 11 additions and 45 deletions

View File

@ -171,9 +171,9 @@ def update_embedding_model(
auto_update: bool = False, auto_update: bool = False,
): ):
if embedding_model and app.state.config.RAG_EMBEDDING_ENGINE == "": if embedding_model and app.state.config.RAG_EMBEDDING_ENGINE == "":
import sentence_transformers from sentence_transformers import SentenceTransformer
app.state.sentence_transformer_ef = sentence_transformers.SentenceTransformer( app.state.sentence_transformer_ef = SentenceTransformer(
get_model_path(embedding_model, auto_update), get_model_path(embedding_model, auto_update),
device=DEVICE_TYPE, device=DEVICE_TYPE,
trust_remote_code=RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE, trust_remote_code=RAG_EMBEDDING_MODEL_TRUST_REMOTE_CODE,

View File

@ -4,7 +4,6 @@ from pathlib import Path
from typing import Dict, Any, List from typing import Dict, Any, List
from markdown import markdown from markdown import markdown
from xhtml2pdf import pisa
import site import site
from fpdf import FPDF from fpdf import FPDF
@ -17,12 +16,7 @@ class PDFGenerator:
""" """
Description: Description:
The `PDFGenerator` class is designed to create PDF documents from chat messages. The `PDFGenerator` class is designed to create PDF documents from chat messages.
The process involves transforming markdown content into HTML and then into a PDF format, The process involves transforming markdown content into HTML and then into a PDF format
which can be easily returned as a response to the routes.
It depends on xhtml2pdf for converting HTML to PDF (more details at https://github.com/xhtml2pdf/xhtml2pdf).
I found xhtml2pdf issues when rendering list html tag, see https://github.com/xhtml2pdf/xhtml2pdf/issues/550
and https://github.com/xhtml2pdf/xhtml2pdf/issues/756.
Attributes: Attributes:
- `form_data`: An instance of `ChatTitleMessagesForm` containing title and messages. - `form_data`: An instance of `ChatTitleMessagesForm` containing title and messages.
@ -75,25 +69,6 @@ class PDFGenerator:
""" """
return html_message return html_message
def _fetch_resources(self, uri: str, rel: str) -> str:
print(str(STATIC_DIR / uri))
return str(STATIC_DIR / uri)
def _create_pdf_from_html(self) -> bytes:
"""Convert HTML content to PDF and return the bytes."""
pdf_buffer = BytesIO()
pisa_status = pisa.CreatePDF(
src=self.html_body.encode("UTF-8"),
dest=pdf_buffer,
encoding="UTF-8",
link_callback=self._fetch_resources,
)
if pisa_status.err:
raise RuntimeError("Error generating PDF")
return pdf_buffer.getvalue()
def _generate_html_body(self) -> str: def _generate_html_body(self) -> str:
"""Generate the full HTML body for the PDF.""" """Generate the full HTML body for the PDF."""
return f""" return f"""
@ -101,9 +76,6 @@ class PDFGenerator:
<head> <head>
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<style type="text/css">
{self.css}
</style>
</head> </head>
<body> <body>
<div class="container"> <div class="container">
@ -148,22 +120,16 @@ class PDFGenerator:
pdf.set_auto_page_break(auto=True, margin=15) pdf.set_auto_page_break(auto=True, margin=15)
# Adjust the effective page width for multi_cell # Build HTML messages
effective_page_width = ( messages_html_list: List[str] = [
pdf.w - 2 * pdf.l_margin - 10 self._build_html_message(msg) for msg in self.form_data.messages
) # Subtracted an additional 10 for extra padding ]
self.messages_html = "<div>" + "".join(messages_html_list) + "</div>"
# Add chat messages # Generate full HTML body
for message in self.form_data.messages: self.html_body = self._generate_html_body()
role = message["role"]
content = message["content"]
pdf.set_font("NotoSans", "B", size=14) # Bold for the role
pdf.multi_cell(effective_page_width, 10, f"{role.upper()}", 0, "L")
pdf.ln(1) # Extra space between messages
pdf.set_font("NotoSans", size=10) # Regular for content pdf.write_html(self.html_body)
pdf.multi_cell(effective_page_width, 6, content, 0, "L")
pdf.ln(1.5) # Extra space between messages
# Save the pdf with name .pdf # Save the pdf with name .pdf
pdf_bytes = pdf.output() pdf_bytes = pdf.output()