Merge pull request #130 from OriginalSimon/main

Google Translate Integration
This commit is contained in:
Timothy Jaeryang Baek 2024-06-29 19:31:34 -07:00 committed by GitHub
commit 9fd6d1eba0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -0,0 +1,185 @@
"""
title: Google Translate Filter
author: SimonOriginal
date: 2024-06-28
version: 1.0
license: MIT
description: This pipeline integrates Google Translate for automatic translation of user and assistant messages
without requiring an API key. It supports multilingual communication by translating based on specified source
and target languages.
"""
import re
from typing import List, Optional
from schemas import OpenAIChatMessage
from pydantic import BaseModel
import requests
import os
import time
import asyncio
from functools import lru_cache
from utils.pipelines.main import get_last_user_message, get_last_assistant_message
class Pipeline:
class Valves(BaseModel):
pipelines: List[str] = []
priority: int = 0
source_user: Optional[str] = "auto"
target_user: Optional[str] = "en"
source_assistant: Optional[str] = "en"
target_assistant: Optional[str] = "uk"
def __init__(self):
self.type = "filter"
self.name = "Google Translate Filter"
self.valves = self.Valves(
**{
"pipelines": ["*"],
}
)
# Initialize translation cache
self.translation_cache = {}
self.code_blocks = [] # List to store code blocks
async def on_startup(self):
print(f"on_startup:{__name__}")
pass
async def on_shutdown(self):
print(f"on_shutdown:{__name__}")
pass
async def on_valves_updated(self):
pass
# @lru_cache(maxsize=128) # LRU cache to store translation results
def translate(self, text: str, source: str, target: str) -> str:
url = "https://translate.googleapis.com/translate_a/single"
params = {
"client": "gtx",
"sl": source,
"tl": target,
"dt": "t",
"q": text,
}
try:
r = requests.get(url, params=params)
r.raise_for_status()
result = r.json()
translated_text = ''.join([sentence[0] for sentence in result[0]])
return translated_text
except requests.exceptions.RequestException as e:
print(f"Network error: {e}")
time.sleep(1)
return self.translate(text, source, target)
except Exception as e:
print(f"Error translating text: {e}")
return text
def split_text_around_table(self, text: str) -> List[str]:
table_regex = r'((?:^.*?\|.*?\n)+)(?=\n[^\|\s].*?\|)'
matches = re.split(table_regex, text, flags=re.MULTILINE)
if len(matches) > 1:
return [matches[0], matches[1]]
else:
return [text, ""]
def clean_table_delimiters(self, text: str) -> str:
# Remove extra spaces from table delimiters
return re.sub(r'(\|\s*-+\s*)+', lambda m: m.group(0).replace(' ', '-'), text)
async def inlet(self, body: dict, user: Optional[dict] = None) -> dict:
print(f"inlet:{__name__}")
messages = body["messages"]
user_message = get_last_user_message(messages)
print(f"User message: {user_message}")
# Find and store code blocks
code_block_regex = r'```[\s\S]+?```'
self.code_blocks = re.findall(code_block_regex, user_message)
# Replace code blocks with placeholders
user_message_no_code = re.sub(code_block_regex, '__CODE_BLOCK__', user_message)
parts = self.split_text_around_table(user_message_no_code)
text_before_table, table_text = parts
# Check translation cache for text before table
translated_before_table = self.translation_cache.get(text_before_table)
if translated_before_table is None:
translated_before_table = self.translate(
text_before_table,
self.valves.source_user,
self.valves.target_user,
)
self.translation_cache[text_before_table] = translated_before_table
translated_user_message = translated_before_table + table_text
# Clean table delimiters
translated_user_message = self.clean_table_delimiters(translated_user_message)
# Restore code blocks
for code_block in self.code_blocks:
translated_user_message = translated_user_message.replace('__CODE_BLOCK__', code_block, 1)
print(f"Translated user message: {translated_user_message}")
for message in reversed(messages):
if message["role"] == "user":
message["content"] = translated_user_message
break
body = {**body, "messages": messages}
return body
async def outlet(self, body: dict, user: Optional[dict] = None) -> dict:
print(f"outlet:{__name__}")
messages = body["messages"]
assistant_message = get_last_assistant_message(messages)
print(f"Assistant message: {assistant_message}")
# Find and store code blocks
code_block_regex = r'```[\s\S]+?```'
self.code_blocks = re.findall(code_block_regex, assistant_message)
# Replace code blocks with placeholders
assistant_message_no_code = re.sub(code_block_regex, '__CODE_BLOCK__', assistant_message)
parts = self.split_text_around_table(assistant_message_no_code)
text_before_table, table_text = parts
# Check translation cache for text before table
translated_before_table = self.translation_cache.get(text_before_table)
if translated_before_table is None:
translated_before_table = self.translate(
text_before_table,
self.valves.source_assistant,
self.valves.target_assistant,
)
self.translation_cache[text_before_table] = translated_before_table
translated_assistant_message = translated_before_table + table_text
# Clean table delimiters
translated_assistant_message = self.clean_table_delimiters(translated_assistant_message)
# Restore code blocks
for code_block in self.code_blocks:
translated_assistant_message = translated_assistant_message.replace('__CODE_BLOCK__', code_block, 1)
print(f"Translated assistant message: {translated_assistant_message}")
for message in reversed(messages):
if message["role"] == "assistant":
message["content"] = translated_assistant_message
break
body = {**body, "messages": messages}
return body