File size: 7,351 Bytes
d14edbf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
from transformers import pipeline
import torch
from config import Config
from typing import Dict, Any
import re

# Mapping delle lingue supportate con i loro nomi per i modelli Helsinki-NLP
LANGUAGE_CODES = {
    "en": "English",
    "fr": "French", 
    "de": "German",
    "es": "Spanish",
    "pt": "Portuguese",
    "ru": "Russian",
    "ja": "Japanese",
    "ko": "Korean",
    "zh": "Chinese",
    "ar": "Arabic",
    "hi": "Hindi",
    "nl": "Dutch",
    "sv": "Swedish",
    "da": "Danish",
    "no": "Norwegian",
    "fi": "Finnish",
    "pl": "Polish",
    "cs": "Czech",
    "hu": "Hungarian",
    "ro": "Romanian",
    "bg": "Bulgarian",
    "hr": "Croatian",
    "sk": "Slovak",
    "sl": "Slovenian",
    "et": "Estonian",
    "lv": "Latvian",
    "lt": "Lithuanian",
    "mt": "Maltese",
    "el": "Greek",
    "tr": "Turkish"
}

# Mapping per i modelli di traduzione Helsinki-NLP (da italiano verso altre lingue)
HELSINKI_MODELS = {
    "en": "Helsinki-NLP/opus-mt-it-en",
    "fr": "Helsinki-NLP/opus-mt-it-fr",
    "de": "Helsinki-NLP/opus-mt-it-de", 
    "es": "Helsinki-NLP/opus-mt-it-es",
    "pt": "Helsinki-NLP/opus-mt-it-pt",
    "ru": "Helsinki-NLP/opus-mt-it-ru",
    "nl": "Helsinki-NLP/opus-mt-it-nl",
    "sv": "Helsinki-NLP/opus-mt-it-sv",
    "da": "Helsinki-NLP/opus-mt-it-da",
    "no": "Helsinki-NLP/opus-mt-it-no",
    "fi": "Helsinki-NLP/opus-mt-it-fi",
    "pl": "Helsinki-NLP/opus-mt-it-pl",
    "cs": "Helsinki-NLP/opus-mt-it-cs",
    "hu": "Helsinki-NLP/opus-mt-it-hu",
    "ro": "Helsinki-NLP/opus-mt-it-ro",
    "bg": "Helsinki-NLP/opus-mt-it-bg",
    "hr": "Helsinki-NLP/opus-mt-it-hr",
    "sk": "Helsinki-NLP/opus-mt-it-sk",
    "sl": "Helsinki-NLP/opus-mt-it-sl",
    "et": "Helsinki-NLP/opus-mt-it-et",
    "lv": "Helsinki-NLP/opus-mt-it-lv",
    "lt": "Helsinki-NLP/opus-mt-it-lt",
    "el": "Helsinki-NLP/opus-mt-it-el",
    "tr": "Helsinki-NLP/opus-mt-it-tr"
}


class TranslationService:
    def __init__(self, device: str = "cpu"):
        self.device = device
        self.translators = {}  # Cache per i translator
        
    def _get_translator(self, target_language: str):
        """Ottiene o crea un translator per la lingua target."""
        if target_language not in self.translators:
            if target_language in HELSINKI_MODELS:
                model_name = HELSINKI_MODELS[target_language]
                try:
                    self.translators[target_language] = pipeline(
                        "translation",
                        model=model_name,
                        device=0 if self.device == "cuda" else -1,
                        torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
                    )
                except Exception as e:
                    # Fallback per lingue senza modelli specifici
                    print(f"Modello non disponibile per {target_language}, usando fallback: {e}")
                    return None
            else:
                return None
        return self.translators[target_language]
        
    def _extract_placeholders(self, text: str) -> tuple[str, Dict[str, str]]:
        """Estrae i placeholders HTML e delle parentesi graffe dal testo."""
        placeholders = {}
        
        # Pattern per HTML tags
        html_pattern = r'<[^>]+>'
        html_matches = re.findall(html_pattern, text)
        
        # Pattern per parentesi graffe
        brace_pattern = r'\{[^}]+\}'
        brace_matches = re.findall(brace_pattern, text)
        
        # Sostituisce HTML tags con placeholders
        processed_text = text
        for i, match in enumerate(html_matches):
            placeholder = f"HTMLTAG{i}"
            placeholders[placeholder] = match
            processed_text = processed_text.replace(match, placeholder, 1)
        
        # Sostituisce parentesi graffe con placeholders
        for i, match in enumerate(brace_matches):
            placeholder = f"PLACEHOLDER{i}"
            placeholders[placeholder] = match
            processed_text = processed_text.replace(match, placeholder, 1)
            
        return processed_text, placeholders
    
    def _restore_placeholders(self, text: str, placeholders: Dict[str, str]) -> str:
        """Ripristina i placeholders nel testo tradotto."""
        for placeholder, original in placeholders.items():
            # Rimuovi spazi extra attorno ai placeholder
            text = text.replace(f" {placeholder} ", original)
            text = text.replace(f" {placeholder}", original)
            text = text.replace(f"{placeholder} ", original)
            text = text.replace(placeholder, original)
        return text
    
    def translate_text(self, text: str, target_language: str) -> str:
        """Traduce il testo dall'italiano alla lingua target."""
        if target_language not in LANGUAGE_CODES:
            raise ValueError(f"Lingua non supportata: {target_language}")
        
        # Estrai placeholders
        clean_text, placeholders = self._extract_placeholders(text)
        
        # Ottieni il translator
        translator = self._get_translator(target_language)
        if translator is None:
            # Traduzione semplice di fallback (mantiene il testo originale)
            return f"[TRANSLATION NOT AVAILABLE: {text}]"
            
        try:
            # Esegui la traduzione
            result = translator(clean_text, max_length=512)
            
            if isinstance(result, list) and len(result) > 0:
                translated_text = result[0]['translation_text']
            else:
                translated_text = str(result)
            
            # Ripristina i placeholders
            translated_text = self._restore_placeholders(translated_text, placeholders)
            
            return translated_text.strip()
            
        except Exception as e:
            print(f"Errore durante la traduzione: {e}")
            return f"[TRANSLATION ERROR: {text}]"
    
    def translate_locale_object(self, locale_data: Dict[str, Any], target_language: str) -> Dict[str, Any]:
        """Traduce un oggetto locale completo."""
        if target_language not in LANGUAGE_CODES:
            raise ValueError(f"Lingua non supportata: {target_language}")
        
        translated_data = {}
        
        for key, content in locale_data.items():
            if isinstance(content, str):
                translated_data[key] = self.translate_text(content, target_language)
            else:
                # Mantieni il valore originale se non è una stringa
                translated_data[key] = content
                
        return translated_data


# Istanza globale del servizio di traduzione
translation_service = None

def get_translation_service():
    global translation_service
    if translation_service is None:
        config = Config()
        translation_service = TranslationService(device=config.DEVICE)
    return translation_service

def translate_locale(locale_data: Dict[str, Any], target_language: str) -> Dict[str, Any]:
    """Funzione helper per tradurre un oggetto locale."""
    service = get_translation_service()
    return service.translate_locale_object(locale_data, target_language)

def get_supported_languages():
    """Restituisce la lista delle lingue supportate."""
    return list(HELSINKI_MODELS.keys())