From eab74e356139905bf943d23cdcfd9dd8f4fa559c Mon Sep 17 00:00:00 2001 From: SimonOriginal Date: Fri, 28 Jun 2024 16:51:37 +0200 Subject: [PATCH] Fix table delimiters in translated text --- .../filters/google_translation_filter_pipeline.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/examples/filters/google_translation_filter_pipeline.py b/examples/filters/google_translation_filter_pipeline.py index 32b6b7d..9e89b39 100644 --- a/examples/filters/google_translation_filter_pipeline.py +++ b/examples/filters/google_translation_filter_pipeline.py @@ -8,6 +8,7 @@ description: This pipeline integrates Google Translate for automatic translation without requiring an API key. It supports multilingual communication by translating based on specified source and target languages. """ + import re from typing import List, Optional from schemas import OpenAIChatMessage @@ -27,7 +28,7 @@ class Pipeline: source_user: Optional[str] = "auto" target_user: Optional[str] = "en" source_assistant: Optional[str] = "en" - target_assistant: Optional[str] = "es" + target_assistant: Optional[str] = "uk" def __init__(self): self.type = "filter" @@ -86,6 +87,10 @@ class Pipeline: else: return [text, ""] + def clean_table_delimiters(self, text: str) -> str: + # Remove extra spaces from table delimiters + return re.sub(r'(\|\s*-+\s*)+', lambda m: m.group(0).replace(' ', ''), text) + async def inlet(self, body: dict, user: Optional[dict] = None) -> dict: print(f"inlet:{__name__}") @@ -109,6 +114,9 @@ class Pipeline: translated_user_message = translated_before_table + table_text + # Clean table delimiters + translated_user_message = self.clean_table_delimiters(translated_user_message) + print(f"Translated user message: {translated_user_message}") for message in reversed(messages): @@ -142,6 +150,9 @@ class Pipeline: translated_assistant_message = translated_before_table + table_text + # Clean table delimiters + translated_assistant_message = self.clean_table_delimiters(translated_assistant_message) + print(f"Translated assistant message: {translated_assistant_message}") for message in reversed(messages):