|
|
|
|
|
import logging |
|
import sqlite3 |
|
from concurrent.futures import ThreadPoolExecutor |
|
import functools |
|
from deep_translator import GoogleTranslator, exceptions |
|
|
|
|
|
TRANSLATION_DATABASE_FILE = 'translation_database.db' |
|
SUPPORTED_LANGUAGES = {"af", "sq", "am", "ar", "hy", "az", "eu", "be", "bn", "bs", "bg", "ca", "ceb", "ny", "zh-CN", "zh-TW", "co", "hr", "cs", "da", "nl", "en", "eo", "et", "tl", "fi", "fr", "fy", "gl", "ka", "de", "el", "gu", "ht", "ha", "haw", "iw", "hi", "hmn", "hu", "is", "ig", "id", "ga", "it", "ja", "jw", "kn", "kk", "km", "ko", "ku", "ky", "lo", "la", "lv", "lt", "lb", "mk", "mg", "ms", "ml", "mt", "mi", "mr", "mn", "my", "ne", "no", "ps", "fa", "pl", "pt", "pa", "ro", "ru", "sm", "gd", "sr", "st", "sn", "sd", "si", "sk", "sl", "so", "es", "su", "sw", "sv", "tg", "ta", "te", "th", "tr", "uk", "ur", "uz", "vi", "cy", "xh", "yi", "yo", "zu"} |
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') |
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
def create_translation_table(): |
|
"""Creates the translation table if it doesn't exist.""" |
|
try: |
|
with sqlite3.connect(TRANSLATION_DATABASE_FILE) as conn: |
|
cursor = conn.cursor() |
|
cursor.execute(''' |
|
CREATE TABLE IF NOT EXISTS translations ( |
|
phrase TEXT PRIMARY KEY |
|
) |
|
''') |
|
|
|
|
|
cursor.execute("PRAGMA table_info(translations)") |
|
existing_columns = {col[1] for col in cursor.fetchall()} |
|
for lang_code in SUPPORTED_LANGUAGES: |
|
column_name = lang_code.replace('-', '_') |
|
if column_name == "is": |
|
column_name = "is_" |
|
if column_name not in existing_columns: |
|
try: |
|
cursor.execute(f"ALTER TABLE translations ADD COLUMN `{column_name}` TEXT") |
|
logger.info(f"Added column '{column_name}' to translations table.") |
|
except sqlite3.OperationalError as e: |
|
if "duplicate column name" in str(e).lower(): |
|
logger.debug(f"Column '{column_name}' already exists. Skipping.") |
|
else: |
|
logger.error(f"Error adding column '{column_name}': {e}") |
|
conn.commit() |
|
except Exception as e: |
|
logger.error(f"An unexpected error occurred in create_translation_table: {e}") |
|
|
|
|
|
@functools.lru_cache(maxsize=1000) |
|
def translate_cached(text, target_language, source_language="auto"): |
|
"""Translates text using Google Translate with caching.""" |
|
if not text: |
|
return "" |
|
|
|
try: |
|
translator = GoogleTranslator(source=source_language, target=target_language) |
|
translated = translator.translate(text) |
|
return translated |
|
except exceptions.TranslationNotFound: |
|
logger.error(f"Translation not found for: {text}") |
|
except Exception as e: |
|
logger.exception(f"Translation error: {e}") |
|
return None |
|
|
|
|
|
def get_translation(phrase, target_language, source_language="auto"): |
|
"""Retrieves a translation from the database or translates and stores it.""" |
|
if target_language not in SUPPORTED_LANGUAGES: |
|
logger.error(f"Unsupported target language: {target_language}") |
|
return None, False |
|
|
|
try: |
|
with sqlite3.connect(TRANSLATION_DATABASE_FILE) as conn: |
|
cursor = conn.cursor() |
|
column_name = target_language.replace('-', '_') |
|
if column_name == "is": |
|
column_name = "is_" |
|
|
|
cursor.execute(f"SELECT `{column_name}` FROM translations WHERE phrase=?", (phrase,)) |
|
result = cursor.fetchone() |
|
|
|
if result and result[0]: |
|
return result[0], True |
|
|
|
translated_text = translate_cached(phrase, target_language, source_language) |
|
if translated_text: |
|
cursor.execute(f""" |
|
INSERT INTO translations (phrase, `{column_name}`) VALUES (?, ?) |
|
ON CONFLICT(phrase) DO UPDATE SET `{column_name}`=excluded.`{column_name}` |
|
""", (phrase, translated_text)) |
|
|
|
conn.commit() |
|
return translated_text, True |
|
else: |
|
return None, False |
|
|
|
|
|
except sqlite3.Error as e: |
|
logger.error(f"Database error: {e}") |
|
return None, False |
|
except Exception as e: |
|
logger.exception(f"Unexpected error in get_translation: {e}") |
|
return None, False |
|
|
|
|
|
def batch_translate(phrases, target_language, source_languages): |
|
"""Translates multiple phrases concurrently, respecting source language.""" |
|
|
|
with ThreadPoolExecutor() as executor: |
|
|
|
futures = [executor.submit(get_translation, phrase, target_language, source_languages[i]) |
|
for i, phrase in enumerate(phrases) if phrase] |
|
|
|
results = [future.result() for future in futures] |
|
|
|
|
|
translations = {} |
|
j=0 |
|
for i, phrase in enumerate(phrases): |
|
if phrase: |
|
translations[phrase] = results[j][0] |
|
j+=1 |
|
else: |
|
translations[phrase] = None |
|
|
|
return translations |
|
|