File size: 4,217 Bytes
9d96850
fcfd7f5
 
a47d303
 
9d96850
 
18e757e
9d96850
 
18e757e
9d96850
d0b5518
 
c2e4f74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d0b5518
 
fcfd7f5
 
 
 
 
 
 
c2e4f74
 
 
fcfd7f5
 
 
 
 
 
 
 
 
 
 
c2e4f74
 
 
 
b853b65
31ed4a8
2d10fd6
9d96850
18e757e
c2e4f74
 
18e757e
 
 
 
 
 
a47d303
18e757e
 
 
 
 
 
 
fcfd7f5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import streamlit as st
from transformers import MarianMTModel, MarianTokenizer
from datasets import load_dataset
from gtts import gTTS
import os

# Streamlit app
st.title("Text Translator with Voice")

# Input text
user_text = st.text_input("Enter the text you want to translate:")

# Define a dictionary of language codes and their full names
language_names = {
    'Afrikaans': 'af', 'Albanian': 'sq', 'Amharic': 'am', 'Arabic': 'ar', 'Armenian': 'hy', 'Azerbaijani': 'az', 'Basque': 'eu',
    'Belarusian': 'be', 'Bengali': 'bn', 'Bosnian': 'bs', 'Bulgarian': 'bg', 'Catalan': 'ca', 'Cebuano': 'ceb', 'Chichewa': 'ny',
    'Chinese (Simplified)': 'zh-cn', 'Corsican': 'co', 'Croatian': 'hr', 'Czech': 'cs', 'Danish': 'da', 'Dutch': 'nl', 'English': 'en',
    'Esperanto': 'eo', 'Estonian': 'et', 'Filipino': 'tl', 'Finnish': 'fi', 'French': 'fr', 'Frisian': 'fy', 'Galician': 'gl',
    'Georgian': 'ka', 'German': 'de', 'Greek': 'el', 'Gujarati': 'gu', 'Haitian Creole': 'ht', 'Hausa': 'ha', 'Hawaiian': 'haw',
    'Hebrew': 'iw', 'Hindi': 'hi', 'Hmong': 'hmn', 'Hungarian': 'hu', 'Icelandic': 'is', 'Igbo': 'ig', 'Indonesian': 'id',
    'Irish': 'ga', 'Italian': 'it', 'Japanese': 'ja', 'Javanese': 'jw', 'Kannada': 'kn', 'Kazakh': 'kk', 'Khmer': 'km',
    'Korean': 'ko', 'Kurdish': 'ku', 'Kyrgyz': 'ky', 'Lao': 'lo', 'Latin': 'la', 'Latvian': 'lv', 'Lithuanian': 'lt',
    'Luxembourgish': 'lb', 'Macedonian': 'mk', 'Malagasy': 'mg', 'Malay': 'ms', 'Malayalam': 'ml', 'Maltese': 'mt', 'Maori': 'mi',
    'Marathi': 'mr', 'Mongolian': 'mn', 'Myanmar (Burmese)': 'my', 'Nepali': 'ne', 'Norwegian': 'no', 'Odia': 'or',
    'Pashto': 'ps', 'Persian': 'fa', 'Polish': 'pl', 'Portuguese': 'pt', 'Punjabi': 'pa', 'Romanian': 'ro', 'Russian': 'ru',
    'Samoan': 'sm', 'Scots Gaelic': 'gd', 'Serbian': 'sr', 'Sesotho': 'st', 'Shona': 'sn', 'Sindhi': 'sd', 'Sinhala': 'si',
    'Slovak': 'sk', 'Slovenian': 'sl', 'Somali': 'so', 'Spanish': 'es', 'Sundanese': 'su', 'Swahili': 'sw', 'Swedish': 'sv',
    'Tajik': 'tg', 'Tamil': 'ta', 'Tatar': 'tt', 'Telugu': 'te', 'Thai': 'th', 'Turkish': 'tr', 'Turkmen': 'tk', 'Ukrainian': 'uk',
    'Urdu': 'ur', 'Uyghur': 'ug', 'Uzbek': 'uz', 'Vietnamese': 'vi', 'Welsh': 'cy', 'Xhosa': 'xh', 'Yiddish': 'yi', 'Yoruba': 'yo',
    'Zulu': 'zu'
}

# Function to load the translation model and tokenizer
def load_translation_model(language_pair):
    model_name = f'Helsinki-NLP/opus-mt-{language_pair}'
    model = MarianMTModel.from_pretrained(model_name)
    tokenizer = MarianTokenizer.from_pretrained(model_name)
    return model, tokenizer

# Function to translate text
def translate_text(text, target_language_code):
    try:
        # Load model and tokenizer for the selected language
        language_pair = f'en-{target_language_code}'
        model, tokenizer = load_translation_model(language_pair)
        
        # Tokenize the input text
        inputs = tokenizer(text, return_tensors="pt", padding=True)
        
        # Translate the text
        translated_tokens = model.generate(**inputs)
        translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
        
        return translated_text
    except Exception as e:
        return f"Translation failed: {str(e)}"

# Target language selection
st.subheader("Select the target language for translation:")
selected_target_language = st.selectbox("Select a target language:", list(language_names.keys()))

# Display the translated text and speak it
if user_text:
    translated_text = translate_text(user_text, language_names[selected_target_language])
    
    # Apply advanced CSS for better display in a bordered box
    styled_text = f'<div style="border: 2px solid #0072B5; padding: 10px; background-color: #E5E5E5; border-radius: 5px; color: black;">{translated_text}</div>'
    st.markdown(styled_text, unsafe_allow_html=True)
    
    # Text-to-Speech (TTS) conversion and playback
    tts = gTTS(translated_text, lang=language_names[selected_target_language])
    tts_file_path = "translated_audio.mp3"
    tts.save(tts_file_path)
    
    # Play the TTS audio
    st.audio(tts_file_path, format="audio/mp3")
    
    # Clean up the TTS audio file
    os.remove(tts_file_path)