Transport / translation_service.py
TuanMinhajSeedin's picture
Upload 19 files
04722ba verified
#!/usr/bin/env python3
"""
Enhanced Translation Service for Multi-Language Support
Handles translation of queries and responses for Sinhala, Tamil, Singlish, and English
"""
import requests
import json
import re
import openai
import google.generativeai as genai
from typing import Dict, Any, Optional
from config import Config
from logger import get_logger
from language_detector import LanguageDetector
class TranslationService:
def __init__(self):
self.config = Config()
self.openai_api_key = getattr(self.config, 'OPENAI_API_KEY', None)
self.google_api_key = getattr(self.config, 'GOOGLE_API_KEY', None)
self.google_model = getattr(self.config, 'GOOGLE_MODEL', 'gemini-1.5-flash')
self.logger = get_logger(self.__class__.__name__)
self.language_detector = LanguageDetector()
# Controls
import os
self.use_pattern_translation = os.getenv('USE_PATTERN_TRANSLATION', 'false').lower() == 'true'
self.force_llm_translation = os.getenv('FORCE_LLM_TRANSLATION', 'false').lower() == 'true'
self.last_translation_method: Optional[str] = None
# Free translation APIs
self.libre_translate_url = "https://libretranslate.de/translate" # Free public instance
self.mymemory_url = "https://api.mymemory.translated.net/get"
# Tamil transport terms and their English equivalents
self.tamil_transport_terms = {
# Fare related
'எவ்வளவு': 'how much',
'விலை': 'price',
'கட்டணம்': 'fare',
'செலவு': 'cost',
'பேருந்து கட்டணம்': 'bus fare',
'ரயில் கட்டணம்': 'train fare',
'டிக்கெட் விலை': 'ticket price',
# Locations
'கொழும்பு': 'Colombo',
'கண்டி': 'Kandy',
'காலி': 'Galle',
'மாத்தறை': 'Matara',
'அனுராதபுரம்': 'Anuradhapura',
'பனதுரை': 'Panadura',
'அலுத்துகமா': 'Aluthgama',
'நுகேகோடா': 'Nugegoda',
'தெஹிவாலா': 'Dehiwala',
'மொரட்டுவா': 'Moratuwa',
# Direction words
'இருந்து': 'from',
'வரை': 'to',
'வழியாக': 'via',
'மூலம்': 'through',
# Question words
'எங்கே': 'where',
'எப்போது': 'when',
'எப்படி': 'how',
'என்ன': 'what',
'யார்': 'who',
# Comparison words
'உடன்': 'with',
'மற்றும்': 'and',
'அல்லது': 'or',
'அதிகம்': 'more',
'குறைவு': 'less',
'ஒரே': 'same',
'வேறு': 'different',
'ஒப்பிடு': 'compare',
'வித்தியாசம்': 'difference',
# Time words
'இப்போது': 'now',
'இன்று': 'today',
'நாளை': 'tomorrow',
'நேற்று': 'yesterday',
# Common verbs
'போ': 'go',
'வா': 'come',
'பார்': 'see',
'தெரிந்து கொள்': 'know',
'கண்டுபிடி': 'find',
'கற்றுக்கொள்': 'learn',
'பரிந்துரை': 'recommend',
'காட்டு': 'show',
# Numbers and currency
'ரூபாய்': 'rupees',
'ரூ': 'rupees',
# Common phrases
'இடையில்': 'between',
'உடன்': 'with',
'பாதைகள்': 'routes',
'பிரபலமான': 'popular',
'சராசரி': 'average',
'தரவு': 'data',
'புள்ளிவிவரங்கள்': 'statistics'
}
# Common transport terms in Sinhala and their English equivalents
self.transport_terms = {
# Fare related
'කීයද': 'how much',
'මිල': 'price',
'වාරික': 'fare',
'වාරිකය': 'fare',
'වාරිකව': 'fare',
'ගාස්තු': 'fare',
'ගාස්තුව': 'fare',
'ප්‍රවාහන ගාස්තු': 'transport fare',
'බස් ගාස්තු': 'bus fare',
'බස් ගාස්තුව': 'bus fare',
'රේල් ගාස්තු': 'train fare',
'රේල් ගාස්තුව': 'train fare',
# Locations
'කොළඹ': 'Colombo',
'මහනුවර': 'Kandy',
'මහනුවරට': 'Kandy',
'ගාල්ල': 'Galle',
'ගාල්ලට': 'Galle',
'මාතර': 'Matara',
'මාතරට': 'Matara',
'අනුරාධපුර': 'Anuradhapura',
'අනුරාධපුරට': 'Anuradhapura',
'පානදුර': 'Panadura',
'පානදුරට': 'Panadura',
'අලුත්ගම': 'Aluthgama',
'අලුත්ගමට': 'Aluthgama',
'නුගේගොඩ': 'Nugegoda',
'නුගේගොඩට': 'Nugegoda',
'දෙහිවල': 'Dehiwala',
'දෙහිවලට': 'Dehiwala',
'මොරටුව': 'Moratuwa',
'මොරටුවට': 'Moratuwa',
# Direction words
'වලින්': 'from',
'වල': 'from',
'ට': 'to',
'වෙත': 'to',
'සිට': 'from',
'දක්වා': 'to',
'සි': 'from',
# Question words
'කොහෙද': 'where',
'කවදාද': 'when',
'කොහොමද': 'how',
'මොනවාද': 'what',
'කවුද': 'who',
# Comparison words
'සමඟ': 'with',
'සහ': 'and',
'හෝ': 'or',
'වඩා': 'more',
'අඩු': 'less',
'සමාන': 'same',
'වෙනස': 'different',
'සසඳන්න': 'compare',
'සසඳන': 'compare',
# Time words
'දැන්': 'now',
'අද': 'today',
'හෙට': 'tomorrow',
'ඊයේ': 'yesterday',
# Common verbs
'යන්න': 'go',
'යන': 'go',
'එන්න': 'come',
'බලන්න': 'see',
'දැනගන්න': 'know',
'සොයන්න': 'find',
'සොයන': 'find',
'ඉගෙනගන්න': 'learn',
'නිර්දේශ': 'recommend',
'නිර්දේශ කරන්න': 'recommend',
'පෙන්වන්න': 'show',
'පෙන්වන': 'show',
# Numbers and currency
'රුපියල්': 'rupees',
'රු': 'rupees',
'රුපියල': 'rupees',
# Common phrases
'අතර': 'between',
'සහිත': 'with',
'මාර්ග': 'routes',
'මාර්ගවල': 'routes',
'ගමනාන්ත': 'destinations',
'ප්‍රසිද්ධ': 'popular',
'සාමාන්‍ය': 'average',
'සාමාන්‍යය': 'average',
'දත්ත': 'data',
'සංඛ්‍යාලේඛන': 'statistics'
}
# Sinhala script detection pattern
self.sinhala_pattern = re.compile(r'[\u0D80-\u0DFF]')
def is_sinhala_text(self, text: str) -> bool:
"""Check if text contains Sinhala characters"""
detected = bool(self.sinhala_pattern.search(text))
self.logger.debug(f"Sinhala detection: detected={detected}, text='{text}'")
return detected
def is_tamil_text(self, text: str) -> bool:
"""Check if text contains Tamil characters"""
tamil_pattern = re.compile(r'[\u0B80-\u0BFF]')
detected = bool(tamil_pattern.search(text))
self.logger.debug(f"Tamil detection: detected={detected}, text='{text}'")
return detected
def is_singlish_text(self, text: str) -> bool:
"""Check if text is Singlish (Sinhala-English mixed)"""
detection_result = self.language_detector.detect_language(text)
return detection_result['language'] == 'singlish'
def _map_sinhala_place(self, text: str) -> str:
"""Map a Sinhala place token to its English equivalent using known terms and suffix stripping."""
candidate = text.strip()
# Direct map
if candidate in self.transport_terms:
return self.transport_terms[candidate]
# Strip common Sinhala case particles/suffixes and try again
base = re.sub(r'(ට|වෙත|දක්වා|වලින්|වල|සිට)$', '', candidate)
if base in self.transport_terms:
return self.transport_terms[base]
return candidate
def _map_tamil_place(self, text: str) -> str:
"""Map a Tamil place token to its English equivalent using known terms and suffix stripping."""
candidate = text.strip()
# Direct map
if candidate in self.tamil_transport_terms:
return self.tamil_transport_terms[candidate]
# Strip common Tamil case particles/suffixes and try again
base = re.sub(r'(இருந்து|வரை|வழியாக|மூலம்)$', '', candidate)
if base in self.tamil_transport_terms:
return self.tamil_transport_terms[base]
return candidate
def _parse_sinhala_fare_query(self, query: str) -> Optional[str]:
"""Detect simple Sinhala fare queries and build a clean English query.
Example handled: "කොළඹ සිට මහනුවරට ගාස්තුව කීයද?" -> "What is the fare from Colombo to Kandy?"
"""
try:
# Quick check for fare-related tokens to avoid false positives
if not any(tok in query for tok in ['ගාස්තු', 'ගාස්තුව', 'වාරික', 'වාරිකය', 'මිල']):
return None
# Extract source and destination around Sinhala "from" and "to" particles
m = re.search(r'([\u0D80-\u0DFF\s]+?)\s*සිට\s*([\u0D80-\u0DFF\s]+?)(?:ට|වෙත|දක්වා)', query)
if not m:
return None
src_si = m.group(1).strip()
dst_si = m.group(2).strip()
src_en = self._map_sinhala_place(src_si)
dst_en = self._map_sinhala_place(dst_si)
return f"What is the fare from {src_en} to {dst_en}?"
except Exception:
return None
def _parse_tamil_fare_query(self, query: str) -> Optional[str]:
"""Detect simple Tamil fare queries and build a clean English query.
Example handled: "கொழும்பு இருந்து கண்டி வரை கட்டணம் எவ்வளவு?" -> "What is the fare from Colombo to Kandy?"
"""
try:
# Quick check for fare-related tokens to avoid false positives
if not any(tok in query for tok in ['கட்டணம்', 'விலை', 'செலவு', 'எவ்வளவு']):
return None
# Extract source and destination around Tamil "from" and "to" particles
m = re.search(r'([\u0B80-\u0BFF\s]+?)\s*இருந்து\s*([\u0B80-\u0BFF\s]+?)(?:வரை|வழியாக)', query)
if not m:
return None
src_ta = m.group(1).strip()
dst_ta = m.group(2).strip()
src_en = self._map_tamil_place(src_ta)
dst_en = self._map_tamil_place(dst_ta)
return f"What is the fare from {src_en} to {dst_en}?"
except Exception:
return None
def translate_with_llm(self, text: str, target_lang: str, source_lang: str = 'auto') -> Optional[str]:
"""Translate using Google Gemini (preferred), fallback to OpenAI if configured. Preserve intent."""
if not (self.google_api_key or self.openai_api_key):
return None
try:
# Determine source language
if source_lang == 'auto':
detection_result = self.language_detector.detect_language(text)
detected_lang = detection_result['language']
if detected_lang == 'sinhala':
source_lang = 'si'
elif detected_lang == 'tamil':
source_lang = 'ta'
elif detected_lang == 'singlish':
source_lang = 'singlish'
else:
source_lang = 'en'
# Create language mapping
lang_map = {
('si', 'en'): 'Sinhala to English',
('en', 'si'): 'English to Sinhala',
('ta', 'en'): 'Tamil to English',
('en', 'ta'): 'English to Tamil',
('singlish', 'en'): 'Singlish to English'
}
direction = lang_map.get((source_lang, target_lang))
if not direction:
return None
prompt = f"""
Translate the following text from {direction}.
Output only the translated text without quotes or extra commentary.
Critically: Preserve the original intent and structure. Do not simplify.
- If it is a comparison (e.g., includes "සසඳා බලන්න"/"සසඳන්න"), translate as a comparison (e.g., "Compare ...").
- Preserve conjunctions like "සහ" as "and" and keep all mentioned routes.
- Keep direction words ("සිට" = from, "ට/වෙත/දක්වා" = to) and render routes fully.
Use standard English city names:
- මහනුවර = Kandy (not Mahanuwara)
- කොළඹ = Colombo
- ගාල්ල = Galle
- මාතර = Matara
- අනුරාධපුර = Anuradhapura
Text to translate: {text}
"""
# Build few-shot examples to preserve comparison/imperative structure
examples = []
# Add examples based on source language
if source_lang == 'si':
examples = [
(
"කොළඹ සිට මහනුවරට යන බස් ගාස්තුව කීයද?",
"What is the bus fare from Colombo to Kandy?"
),
(
"කොළඹ සිට ගාල්ල දක්වා ටිකට් මිල කීයද?",
"What is the ticket price from Colombo to Galle?"
),
(
"කොළඹ සිට පානදුර දක්වා සහ කොළඹ සිට ගාල்ල දක්වා ගාස්තු සසඳා බලන්න.",
"Compare fares from Colombo to Panadura and from Colombo to Galle."
),
(
"රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග පෙන්වන්න.",
"Show routes with fares under 500 rupees."
),
(
"අඩු මිලේ මාර්ග නිර්දේශ කරන්න.",
"Recommend cheap routes."
),
]
elif source_lang == 'ta':
examples = [
(
"கொழும்பு இருந்து கண்டி வரை பேருந்து கட்டணம் எவ்வளவு?",
"What is the bus fare from Colombo to Kandy?"
),
(
"கொழும்பு இருந்து காலி வரை டிக்கெட் விலை எவ்வளவு?",
"What is the ticket price from Colombo to Galle?"
),
(
"கொழும்பு இருந்து பனதுரை வரை மற்றும் கொழும்பு இருந்து காலி வரை கட்டணம் ஒப்பிடு.",
"Compare fares from Colombo to Panadura and from Colombo to Galle."
),
(
"ரூபாய் 500 க்கு குறைவான கட்டணம் உள்ள பாதைகளை காட்டு.",
"Show routes with fares under 500 rupees."
),
(
"குறைந்த விலையில் பாதைகளை பரிந்துரை.",
"Recommend cheap routes."
),
]
elif source_lang == 'singlish':
examples = [
(
"කොළඹ සිට Kandy ගාස්තුව කීයද?",
"What is the fare from Colombo to Kandy?"
),
(
"Colombo සිට ගාල්ලට bus fare කීයද?",
"What is the bus fare from Colombo to Galle?"
),
(
"කොළඹ සිට Panadura සහ Colombo සිට Galle fares compare කරන්න.",
"Compare fares from Colombo to Panadura and from Colombo to Galle."
),
]
# Compose messages with few-shot conditioning
def build_messages(txt: str):
msgs = [
{
"role": "system",
"content": (
"You are a professional translator. Translate ONLY the specific text provided. "
"Do not include examples or additional text. Return only the English translation without quotes. "
"Canonical phrasing rules (use exactly): \n"
"- Use 'Compare' for comparison requests.\n"
"- Use 'Show' for requests like 'පෙන්වන්න' (do not use Provide/List).\n"
"- Use 'How much is the' for 'කීයද' fare/price questions.\n"
"- Use 'cheap' (not 'affordable').\n"
"- Use 'under' (not 'below') for '< value'.\n"
"- Use exact place names: මහනුවර=Kandy, කොළඹ=Colombo, ගාල්ල=Galle, මාතර=Matara, අනුරාධපුර=Anuradhapura.\n"
"- Use 'from' for 'සිට' and 'to' for 'ට/වෙත/දක්වා'.\n"
),
},
]
# Add only 2-3 examples to avoid confusion
for si, en in examples[:2]:
msgs.append({"role": "user", "content": f"Translate: {si}"})
msgs.append({"role": "assistant", "content": en})
msgs.append({"role": "user", "content": f"Translate: {txt}"})
return msgs
# Try Google Gemini first
translated = None
if self.google_api_key:
try:
genai.configure(api_key=self.google_api_key)
model = genai.GenerativeModel(self.google_model)
# Create a more focused prompt for Gemini
system_prompt = (
"You are a professional translator. Translate ONLY the specific text provided. "
"Do not include examples or additional text. Return only the English translation without quotes. "
"Use exact place names: මහනුවර=Kandy, කොළඹ=Colombo, ගාල්ල=Galle, මාතර=Matara, අනුරාධපුර=Anuradhapura. "
"Use 'from' for 'සිට' and 'to' for 'ට/වෙත/දක්වා'. "
"Use 'How much is the' for fare questions. Use 'Compare' for comparison requests."
)
# Add a few examples
examples_text = ""
for si, en in examples[:2]:
examples_text += f"Example: {si} -> {en}\n"
prompt_text = f"{system_prompt}\n\n{examples_text}\nNow translate: {text}"
response = model.generate_content(prompt_text)
translated = (response.text or "").strip()
# Clean up the response - remove any extra text
if translated:
# Split by newlines and take the first line that looks like a translation
lines = translated.split('\n')
for line in lines:
line = line.strip()
if line and not line.startswith('Example:') and not line.startswith('Now translate:'):
translated = line
break
self.last_translation_method = 'gemini'
except Exception as e:
self.logger.warning(f"Gemini translation error: {e}")
translated = None
# Fallback to OpenAI if available
if not translated and self.openai_api_key:
try:
from openai import OpenAI
client = OpenAI(api_key=self.openai_api_key)
response = client.chat.completions.create(
model=self.config.OPENAI_MODEL,
max_tokens=150,
temperature=0.3,
messages=build_messages(text)
)
translated = response.choices[0].message.content.strip()
self.last_translation_method = 'llm'
except Exception as sdk_err:
import openai
try:
openai.api_key = self.openai_api_key
response = openai.ChatCompletion.create(
model=self.config.OPENAI_MODEL,
max_tokens=150,
temperature=0.3,
messages=build_messages(text)
)
translated = response.choices[0].message.content.strip()
self.last_translation_method = 'llm'
except Exception:
raise sdk_err
if translated.startswith('"') and translated.endswith('"'):
translated = translated[1:-1]
return translated if translated else None
except Exception as e:
self.logger.warning(f"LLM translation error: {e}")
return None
def translate_with_libre_translate(self, text: str, target_lang: str, source_lang: str = 'auto') -> Optional[str]:
"""Translate using LibreTranslate (free public API)"""
try:
# Map language codes
lang_map = {
'si': 'si', # Sinhala
'en': 'en', # English
'auto': 'auto'
}
source = lang_map.get(source_lang, 'auto')
target = lang_map.get(target_lang, 'en')
payload = {
'q': text,
'source': source,
'target': target,
'format': 'text'
}
headers = {
'Content-Type': 'application/json'
}
response = requests.post(
self.libre_translate_url,
json=payload,
headers=headers,
timeout=10
)
if response.status_code == 200:
result = response.json()
translated = result.get('translatedText')
self.logger.debug(f"LibreTranslate success: '{text}' -> '{translated}'")
self.last_translation_method = 'libretranslate'
return translated
return None
except Exception as e:
self.logger.warning(f"LibreTranslate error: {e}")
return None
def translate_with_mymemory(self, text: str, target_lang: str, source_lang: str = 'auto') -> Optional[str]:
"""Translate using MyMemory (free API)"""
try:
# Map language codes
lang_map = {
'si': 'si', # Sinhala
'en': 'en', # English
'auto': 'auto'
}
source = lang_map.get(source_lang, 'auto')
langpair = f"{source}|{target_lang}"
params = {
'q': text,
'langpair': langpair
}
response = requests.get(
self.mymemory_url,
params=params,
timeout=10
)
if response.status_code == 200:
result = response.json()
translated = result.get('responseData', {}).get('translatedText')
self.logger.debug(f"MyMemory success: '{text}' -> '{translated}'")
self.last_translation_method = 'mymemory'
return translated
return None
except Exception as e:
self.logger.warning(f"MyMemory translation error: {e}")
return None
def translate_with_dictionary(self, text: str, target_lang: str, source_lang: str = 'auto') -> str:
"""Translate using dictionary-based approach"""
if target_lang == 'en':
# Determine source language if auto
if source_lang == 'auto':
detection_result = self.language_detector.detect_language(text)
detected_lang = detection_result['language']
if detected_lang == 'sinhala':
source_lang = 'si'
elif detected_lang == 'tamil':
source_lang = 'ta'
else:
source_lang = 'si' # Default to Sinhala
translated = text
if source_lang == 'si':
# Sinhala to English
for sinhala, english in self.transport_terms.items():
translated = translated.replace(sinhala, english)
elif source_lang == 'ta':
# Tamil to English
for tamil, english in self.tamil_transport_terms.items():
translated = translated.replace(tamil, english)
return translated
elif target_lang == 'si':
# English to Sinhala
translated = text
for sinhala, english in self.transport_terms.items():
translated = translated.replace(english, sinhala)
return translated
elif target_lang == 'ta':
# English to Tamil
translated = text
for tamil, english in self.tamil_transport_terms.items():
translated = translated.replace(english, tamil)
return translated
return text
def translate_text(self, text: str, target_lang: str, source_lang: str = 'auto') -> str:
"""Main translation method with multiple fallbacks"""
if not text or not text.strip():
return text
# Try translation methods
if self.force_llm_translation:
translation_methods = [
('LLM', lambda: self.translate_with_llm(text, target_lang, source_lang))
]
else:
translation_methods = [
('LLM', lambda: self.translate_with_llm(text, target_lang, source_lang)),
('MyMemory', lambda: self.translate_with_mymemory(text, target_lang, source_lang)),
('LibreTranslate', lambda: self.translate_with_libre_translate(text, target_lang, source_lang)),
('Dictionary', lambda: self.translate_with_dictionary(text, target_lang))
]
for method_name, method_func in translation_methods:
try:
result = method_func()
if result and result.strip():
self.logger.info(f"Translation successful using {method_name}")
if not self.last_translation_method:
self.last_translation_method = method_name.lower()
return result.strip()
except Exception as e:
self.logger.warning(f"{method_name} translation failed: {e}")
continue
# Final fallback
result = self.translate_with_dictionary(text, target_lang, source_lang)
self.last_translation_method = 'dictionary'
return result
def translate_query(self, query: str) -> Dict[str, Any]:
"""Translate a user query from any supported language to English"""
# Detect the language of the input
detection_result = self.language_detector.detect_language(query)
detected_language = detection_result['language']
# If it's already English, return as is
if detected_language == 'english':
return {
'is_sinhala': False,
'is_tamil': False,
'is_singlish': False,
'detected_language': 'english',
'original_query': query,
'translated_query': query,
'translation_method': 'none',
'detection_confidence': detection_result['confidence']
}
# Handle pattern-based parsing for specific languages
if self.use_pattern_translation:
parsed = None
if detected_language == 'sinhala':
parsed = self._parse_sinhala_fare_query(query)
elif detected_language == 'tamil':
parsed = self._parse_tamil_fare_query(query)
if parsed:
self.logger.info(f"Pattern-based {detected_language} fare parse: '{query}' -> '{parsed}'")
return {
'is_sinhala': detected_language == 'sinhala',
'is_tamil': detected_language == 'tamil',
'is_singlish': detected_language == 'singlish',
'detected_language': detected_language,
'original_query': query,
'translated_query': parsed,
'translation_method': 'pattern',
'detection_confidence': detection_result['confidence']
}
# Determine source language code for translation
source_lang = 'si' if detected_language == 'sinhala' else 'ta' if detected_language == 'tamil' else 'si'
# Translate to English
translated = self.translate_text(query, 'en', source_lang)
# Normalize English synonyms to expected NLP vocabulary
translated = self._normalize_english_query(translated)
method = self.last_translation_method or ('llm' if self.openai_api_key else 'dictionary')
self.logger.info(f"Translated {detected_language} query ({method}): '{query}' -> '{translated}'")
return {
'is_sinhala': detected_language == 'sinhala',
'is_tamil': detected_language == 'tamil',
'is_singlish': detected_language == 'singlish',
'detected_language': detected_language,
'original_query': query,
'translated_query': translated,
'translation_method': method,
'detection_confidence': detection_result['confidence']
}
def _normalize_english_query(self, text: str) -> str:
"""Normalize English synonyms to match NLP patterns (fare/price/cost)."""
if not text:
return text
normalized = text
replacements = {
'fees': 'fare',
'fee': 'fare',
'charges': 'cost',
'charge': 'cost',
'ticket price': 'fare',
'ticket fare': 'fare',
'bus ticket': 'bus fare',
}
# Lowercase operate, then restore original casing minimally by returning lowercase; downstream lowercases anyway
lower = normalized.lower()
for old, new in replacements.items():
lower = lower.replace(old, new)
return lower
def translate_response(self, response: Dict[str, Any], target_language: str = None) -> Dict[str, Any]:
"""Translate response back to the detected language"""
translated_response = response.copy()
# Determine target language from translation_info if not provided
if target_language is None and 'translation_info' in response:
translation_info = response['translation_info']
if translation_info.get('detected_language'):
detected_lang = translation_info['detected_language']
if detected_lang == 'sinhala':
target_language = 'si'
elif detected_lang == 'tamil':
target_language = 'ta'
else:
target_language = 'si' # Default to Sinhala
else:
target_language = 'si' # Default to Sinhala
elif target_language is None:
target_language = 'si' # Default to Sinhala
# Translate the main message
if 'message' in response:
translated_response['message'] = self.translate_text(
response['message'], target_language, 'en'
)
# Translate suggestions if any
if 'suggestions' in response and response['suggestions']:
translated_response['suggestions'] = [
self.translate_text(suggestion, target_language, 'en')
for suggestion in response['suggestions']
]
# Translate corrections if any
if 'corrections' in response and response['corrections']:
translated_corrections = []
for correction in response['corrections']:
translated_correction = correction.copy()
if 'original' in correction:
translated_correction['original'] = self.translate_text(
correction['original'], target_language, 'en'
)
if 'corrected' in correction:
translated_correction['corrected'] = self.translate_text(
correction['corrected'], target_language, 'en'
)
translated_corrections.append(translated_correction)
translated_response['corrections'] = translated_corrections
# Add translation metadata
translated_response['translation_info'] = {
'translated': True,
'target_language': target_language,
'translation_method': 'llm' if self.openai_api_key else 'dictionary'
}
return translated_response
def get_sinhala_examples(self) -> Dict[str, Any]:
"""Get example queries in Sinhala"""
sinhala_examples = {
'fare_queries': [
{
'query': 'කොළඹ සිට මහනුවරට යන බස් ගාස්තුව කීයද?',
'description': 'කොළඹ සිට මහනුවරට යන බස් ගාස්තුව සොයන්න'
},
{
'query': 'මාතර සිට ගාල්ලට යන මිල කීයද?',
'description': 'මාතර සිට ගාල්ලට යන මිල සොයන්න'
},
{
'query': 'අනුරාධපුර සිට කොළඹට යන වාරිකය',
'description': 'අනුරාධපුර සිට කොළඹට යන වාරිකය සොයන්න'
}
],
'comparison_queries': [
{
'query': 'කොළඹ සිට මහනුවරට සහ කොළඹ සිට ගාල්ලට යන ගාස්තු සසඳන්න',
'description': 'විවිධ මාර්ගවල ගාස්තු සසඳන්න'
},
{
'query': 'කොළඹ සිට මහනුවරට සහ කොළඹ සිට අනුරාධපුරට යන ගාස්තුවල වෙනස කීයද?',
'description': 'මාර්ග දෙකක ගාස්තු වෙනස සොයන්න'
}
],
'range_queries': [
{
'query': 'රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග සොයන්න',
'description': 'රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග සොයන්න'
},
{
'query': 'රුපියල් 200 සහ 800 අතර ගාස්තු සහිත මාර්ග පෙන්වන්න',
'description': 'රුපියල් 200 සහ 800 අතර ගාස්තු සහිත මාර්ග සොයන්න'
}
],
'recommendation_queries': [
{
'query': 'අඩු මිලේ මාර්ග නිර්දේශ කරන්න',
'description': 'අඩු මිලේ මාර්ග නිර්දේශ කරන්න'
},
{
'query': 'ප්‍රසිද්ධ ගමනාන්ත පෙන්වන්න',
'description': 'ප්‍රසිද්ධ ගමනාන්ත සොයන්න'
}
],
'statistical_queries': [
{
'query': 'සාමාන්‍ය ගාස්තුව කීයද?',
'description': 'සාමාන්‍ය ගාස්තුව සොයන්න'
},
{
'query': 'දත්ත ගබඩා සංඛ්‍යාලේඛන',
'description': 'දත්ත ගබඩා සංඛ්‍යාලේඛන සොයන්න'
}
]
}
return sinhala_examples
def get_tamil_examples(self) -> Dict[str, Any]:
"""Get example queries in Tamil"""
tamil_examples = {
'fare_queries': [
{
'query': 'கொழும்பு இருந்து கண்டி வரை பேருந்து கட்டணம் எவ்வளவு?',
'description': 'கொழும்பு இருந்து கண்டி வரை பேருந்து கட்டணம் கண்டுபிடி'
},
{
'query': 'மாத்தறை இருந்து காலி வரை விலை எவ்வளவு?',
'description': 'மாத்தறை இருந்து காலி வரை விலை கண்டுபிடி'
},
{
'query': 'அனுராதபுரம் இருந்து கொழும்பு வரை கட்டணம்',
'description': 'அனுராதபுரம் இருந்து கொழும்பு வரை கட்டணம் கண்டுபிடி'
}
],
'comparison_queries': [
{
'query': 'கொழும்பு இருந்து கண்டி வரை மற்றும் கொழும்பு இருந்து காலி வரை கட்டணம் ஒப்பிடு',
'description': 'வெவ்வேறு பாதைகளின் கட்டணம் ஒப்பிடு'
},
{
'query': 'கொழும்பு இருந்து கண்டி வரை மற்றும் கொழும்பு இருந்து அனுராதபுரம் வரை கட்டணத்தின் வித்தியாசம் எவ்வளவு?',
'description': 'இரண்டு பாதைகளின் கட்டண வித்தியாசம் கண்டுபிடி'
}
],
'range_queries': [
{
'query': 'ரூபாய் 500 க்கு குறைவான கட்டணம் உள்ள பாதைகளை கண்டுபிடி',
'description': 'ரூபாய் 500 க்கு குறைவான கட்டணம் உள்ள பாதைகளை கண்டுபிடி'
},
{
'query': 'ரூபாய் 200 மற்றும் 800 இடையில் கட்டணம் உள்ள பாதைகளை காட்டு',
'description': 'ரூபாய் 200 மற்றும் 800 இடையில் கட்டணம் உள்ள பாதைகளை கண்டுபிடி'
}
],
'recommendation_queries': [
{
'query': 'குறைந்த விலையில் பாதைகளை பரிந்துரை',
'description': 'குறைந்த விலையில் பாதைகளை பரிந்துரை'
},
{
'query': 'பிரபலமான இலக்குகளை காட்டு',
'description': 'பிரபலமான இலக்குகளை கண்டுபிடி'
}
],
'statistical_queries': [
{
'query': 'சராசரி கட்டணம் எவ்வளவு?',
'description': 'சராசரி கட்டணம் கண்டுபிடி'
},
{
'query': 'தரவு சேமிப்பக புள்ளிவிவரங்கள்',
'description': 'தரவு சேமிப்பக புள்ளிவிவரங்கள் கண்டுபிடி'
}
]
}
return tamil_examples
def test_translation(self) -> Dict[str, Any]:
"""Test translation functionality on transportation-related queries in multiple languages."""
test_cases = [
# Sinhala test cases
{
'language': 'sinhala',
'original': 'කොළඹ සිට මහනුවරට යන බස් ගාස්තුව කීයද?',
'expected_english': 'What is the bus fare from Colombo to Kandy?'
},
{
'language': 'sinhala',
'original': 'මාතර සිට ගාල්ලට යන මිල කීයද?',
'expected_english': 'How much is the price from Matara to Galle?'
},
{
'language': 'sinhala',
'original': 'කොළඹ සිට පානදුර දක්වා සහ කොළඹ සිට ගාල්ල දක්වා ගාස්තු සසඳා බලන්න.',
'expected_english': 'Compare fares from Colombo to Panadura and from Colombo to Galle.'
},
{
'language': 'sinhala',
'original': 'රුපියල් 500 ට අඩු ගාස්තු සහිත මාර්ග පෙන්වන්න.',
'expected_english': 'Show routes with fares under 500 rupees.'
},
{
'language': 'sinhala',
'original': 'අඩු මිලේ මාර්ග නිර්දේශ කරන්න.',
'expected_english': 'Recommend cheap routes.'
},
# Tamil test cases
{
'language': 'tamil',
'original': 'கொழும்பு இருந்து கண்டி வரை பேருந்து கட்டணம் எவ்வளவு?',
'expected_english': 'What is the bus fare from Colombo to Kandy?'
},
{
'language': 'tamil',
'original': 'மாத்தறை இருந்து காலி வரை விலை எவ்வளவு?',
'expected_english': 'How much is the price from Matara to Galle?'
},
{
'language': 'tamil',
'original': 'கொழும்பு இருந்து பனதுரை வரை மற்றும் கொழும்பு இருந்து காலி வரை கட்டணம் ஒப்பிடு.',
'expected_english': 'Compare fares from Colombo to Panadura and from Colombo to Galle.'
},
{
'language': 'tamil',
'original': 'ரூபாய் 500 க்கு குறைவான கட்டணம் உள்ள பாதைகளை காட்டு.',
'expected_english': 'Show routes with fares under 500 rupees.'
},
{
'language': 'tamil',
'original': 'குறைந்த விலையில் பாதைகளை பரிந்துரை.',
'expected_english': 'Recommend cheap routes.'
},
# Singlish test cases
{
'language': 'singlish',
'original': 'කොළඹ සිට Kandy ගාස්තුව කීයද?',
'expected_english': 'What is the fare from Colombo to Kandy?'
},
{
'language': 'singlish',
'original': 'Colombo සිට ගාල්ලට bus fare කීයද?',
'expected_english': 'What is the bus fare from Colombo to Galle?'
},
{
'language': 'singlish',
'original': 'කොළඹ සිට Panadura සහ Colombo සිට Galle fares compare කරන්න.',
'expected_english': 'Compare fares from Colombo to Panadura and from Colombo to Galle.'
},
# English test cases
{
'language': 'english',
'original': 'What is the fare from Colombo to Kandy?',
'expected_english': 'What is the fare from Colombo to Kandy?'
},
{
'language': 'english',
'original': 'Show me routes from Panadura',
'expected_english': 'Show me routes from Panadura'
}
]
results = []
total_exact = 0
total_good = 0
total_tests = len(test_cases)
for test_case in test_cases:
original = test_case['original']
expected = test_case['expected_english']
language = test_case['language']
# Detect language
detection_result = self.language_detector.detect_language(original)
detected_language = detection_result['language']
# Reset method tracker and translate
self.last_translation_method = None
translated = self.translate_text(original, 'en', 'auto') or ''
tr = translated.strip()
ex = expected.strip()
tr_low = tr.lower()
ex_low = ex.lower()
# Accuracy heuristic
if tr_low == ex_low:
accuracy = 'exact'
total_exact += 1
total_good += 1
elif tr_low in ex_low or ex_low in tr_low:
accuracy = 'good'
total_good += 1
else:
accuracy = 'partial'
# Intent preservation check for comparisons
intent_preserved = True
if language in ['sinhala', 'tamil'] and ('සසඳ' in original or 'ஒப்பிடு' in original):
intent_preserved = ('compare' in tr_low)
results.append({
'original_query': original,
'language': language,
'detected_language': detected_language,
'translated_english': tr,
'expected_english': ex,
'translation_accuracy': accuracy,
'intent_preserved': intent_preserved,
'method_used': self.last_translation_method or ('llm' if self.openai_api_key else 'dictionary'),
'detection_confidence': detection_result['confidence']
})
summary = {
'total_tests': total_tests,
'exact_matches': total_exact,
'good_or_better': total_good,
'accuracy_rate_percent': round((total_good / total_tests) * 100, 2) if total_tests else 0
}
self.logger.info(f"Translation test summary: {summary}")
return {
'translation_service_status': 'active',
'supported_languages': ['sinhala', 'tamil', 'singlish', 'english'],
'available_methods': {
'llm': self.openai_api_key is not None,
'libre_translate': True,
'mymemory': True,
'dictionary': True
},
'summary': summary,
'test_results': results
}