Merge pull request #7919 from denispol/main
Some checks are pending
Deploy to HuggingFace Spaces / check-secret (push) Waiting to run
Deploy to HuggingFace Spaces / deploy (push) Blocked by required conditions
Create and publish Docker images with specific build args / build-main-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-main-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / build-cuda-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-cuda-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / build-ollama-image (linux/amd64) (push) Waiting to run
Create and publish Docker images with specific build args / build-ollama-image (linux/arm64) (push) Waiting to run
Create and publish Docker images with specific build args / merge-main-images (push) Blocked by required conditions
Create and publish Docker images with specific build args / merge-cuda-images (push) Blocked by required conditions
Create and publish Docker images with specific build args / merge-ollama-images (push) Blocked by required conditions
Python CI / Format Backend (3.11) (push) Waiting to run
Frontend Build / Format & Build Frontend (push) Waiting to run
Frontend Build / Frontend Unit Tests (push) Waiting to run
Integration Test / Run Cypress Integration Tests (push) Waiting to run
Integration Test / Run Migration Tests (push) Waiting to run

fix: enhance Markdown text cleaning for TTS compatibility
This commit is contained in:
Timothy Jaeryang Baek 2024-12-17 13:54:58 -08:00 committed by GitHub
commit c7e3692678
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -552,7 +552,31 @@ export const removeEmojis = (str: string) => {
};
export const removeFormattings = (str: string) => {
return str.replace(/(\*)(.*?)\1/g, '').replace(/(```)(.*?)\1/gs, '');
return str
// Block elements (remove completely)
.replace(/(```[\s\S]*?```)/g, '') // Code blocks
.replace(/^\|.*\|$/gm, '') // Tables
// Inline elements (preserve content)
.replace(/(?:\*\*|__)(.*?)(?:\*\*|__)/g, '$1') // Bold
.replace(/(?:[*_])(.*?)(?:[*_])/g, '$1') // Italic
.replace(/~~(.*?)~~/g, '$1') // Strikethrough
.replace(/`([^`]+)`/g, '$1') // Inline code
// Links and images
.replace(/!?\[([^\]]*)\](?:\([^)]+\)|\[[^\]]*\])/g, '$1') // Links & images
.replace(/^\[[^\]]+\]:\s*.*$/gm, '') // Reference definitions
// Block formatting
.replace(/^#{1,6}\s+/gm, '') // Headers
.replace(/^\s*[-*+]\s+/gm, '') // Lists
.replace(/^\s*(?:\d+\.)\s+/gm, '') // Numbered lists
.replace(/^\s*>[> ]*/gm, '') // Blockquotes
.replace(/^\s*:\s+/gm, '') // Definition lists
// Cleanup
.replace(/\[\^[^\]]*\]/g, '') // Footnotes
.replace(/[-*_~]/g, '') // Remaining markers
.replace(/\n{2,}/g, '\n') // Multiple newlines
};
export const cleanText = (content: string) => {