Spaces:
Running
Running
File size: 6,361 Bytes
e44a4c2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import streamlit as st
from transformers import MarianMTModel, MarianTokenizer
# Define a comprehensive dictionary of language names, codes, and model names
LANGUAGE_MODELS = {
'Afrikaans': ('af', 'Helsinki-NLP/opus-mt-en-af'),
'Albanian': ('sq', 'Helsinki-NLP/opus-mt-en-sq'),
'Amharic': ('am', 'Helsinki-NLP/opus-mt-en-am'),
'Arabic': ('ar', 'Helsinki-NLP/opus-mt-en-ar'),
'Armenian': ('hy', 'Helsinki-NLP/opus-mt-en-hy'),
'Bengali': ('bn', 'Helsinki-NLP/opus-mt-en-bn'),
'Bosnian': ('bs', 'Helsinki-NLP/opus-mt-en-bs'),
'Catalan': ('ca', 'Helsinki-NLP/opus-mt-en-ca'),
'Croatian': ('hr', 'Helsinki-NLP/opus-mt-en-hr'),
'Czech': ('cs', 'Helsinki-NLP/opus-mt-en-cs'),
'Danish': ('da', 'Helsinki-NLP/opus-mt-en-da'),
'Dutch': ('nl', 'Helsinki-NLP/opus-mt-en-nl'),
'Esperanto': ('eo', 'Helsinki-NLP/opus-mt-en-eo'),
'Estonian': ('et', 'Helsinki-NLP/opus-mt-en-et'),
'Finnish': ('fi', 'Helsinki-NLP/opus-mt-en-fi'),
'French': ('fr', 'Helsinki-NLP/opus-mt-en-fr'),
'German': ('de', 'Helsinki-NLP/opus-mt-en-de'),
'Greek': ('el', 'Helsinki-NLP/opus-mt-en-el'),
'Gujarati': ('gu', 'Helsinki-NLP/opus-mt-en-gu'),
'Haitian Creole': ('ht', 'Helsinki-NLP/opus-mt-en-ht'),
'Hausa': ('ha', 'Helsinki-NLP/opus-mt-en-ha'),
'Hawaiian': ('haw', 'Helsinki-NLP/opus-mt-en-haw'),
'Hebrew': ('he', 'Helsinki-NLP/opus-mt-en-he'),
'Hindi': ('hi', 'Helsinki-NLP/opus-mt-en-hi'),
'Hungarian': ('hu', 'Helsinki-NLP/opus-mt-en-hu'),
'Icelandic': ('is', 'Helsinki-NLP/opus-mt-en-is'),
'Igbo': ('ig', 'Helsinki-NLP/opus-mt-en-ig'),
'Indonesian': ('id', 'Helsinki-NLP/opus-mt-en-id'),
'Irish': ('ga', 'Helsinki-NLP/opus-mt-en-ga'),
'Italian': ('it', 'Helsinki-NLP/opus-mt-en-it'),
'Japanese': ('ja', 'Helsinki-NLP/opus-mt-en-ja'),
'Javanese': ('jw', 'Helsinki-NLP/opus-mt-en-jw'),
'Kannada': ('kn', 'Helsinki-NLP/opus-mt-en-kn'),
'Khmer': ('km', 'Helsinki-NLP/opus-mt-en-km'),
'Korean': ('ko', 'Helsinki-NLP/opus-mt-en-ko'),
'Latin': ('la', 'Helsinki-NLP/opus-mt-en-la'),
'Latvian': ('lv', 'Helsinki-NLP/opus-mt-en-lv'),
'Lithuanian': ('lt', 'Helsinki-NLP/opus-mt-en-lt'),
'Luxembourgish': ('lb', 'Helsinki-NLP/opus-mt-en-lb'),
'Macedonian': ('mk', 'Helsinki-NLP/opus-mt-en-mk'),
'Malagasy': ('mg', 'Helsinki-NLP/opus-mt-en-mg'),
'Malayalam': ('ml', 'Helsinki-NLP/opus-mt-en-ml'),
'Maltese': ('mt', 'Helsinki-NLP/opus-mt-en-mt'),
'Maori': ('mi', 'Helsinki-NLP/opus-mt-en-mi'),
'Marathi': ('mr', 'Helsinki-NLP/opus-mt-en-mr'),
'Myanmar': ('my', 'Helsinki-NLP/opus-mt-en-my'),
'Nepali': ('ne', 'Helsinki-NLP/opus-mt-en-ne'),
'Norwegian': ('no', 'Helsinki-NLP/opus-mt-en-no'),
'Nyanja': ('ny', 'Helsinki-NLP/opus-mt-en-ny'),
'Odia': ('or', 'Helsinki-NLP/opus-mt-en-or'),
'Oromo': ('om', 'Helsinki-NLP/opus-mt-en-om'),
'Pashto': ('ps', 'Helsinki-NLP/opus-mt-en-ps'),
'Persian': ('fa', 'Helsinki-NLP/opus-mt-en-fa'),
'Polish': ('pl', 'Helsinki-NLP/opus-mt-en-pl'),
'Portuguese': ('pt', 'Helsinki-NLP/opus-mt-en-pt'),
'Punjabi': ('pa', 'Helsinki-NLP/opus-mt-en-pa'),
'Quechua': ('qu', 'Helsinki-NLP/opus-mt-en-qu'),
'Romanian': ('ro', 'Helsinki-NLP/opus-mt-en-ro'),
'Russian': ('ru', 'Helsinki-NLP/opus-mt-en-ru'),
'Samoan': ('sm', 'Helsinki-NLP/opus-mt-en-sm'),
'Scots Gaelic': ('gd', 'Helsinki-NLP/opus-mt-en-gd'),
'Serbian': ('sr', 'Helsinki-NLP/opus-mt-en-sr'),
'Sesotho': ('st', 'Helsinki-NLP/opus-mt-en-st'),
'Shona': ('sn', 'Helsinki-NLP/opus-mt-en-sn'),
'Sindhi': ('sd', 'Helsinki-NLP/opus-mt-en-sd'),
'Sinhala': ('si', 'Helsinki-NLP/opus-mt-en-si'),
'Slovak': ('sk', 'Helsinki-NLP/opus-mt-en-sk'),
'Slovenian': ('sl', 'Helsinki-NLP/opus-mt-en-sl'),
'Somali': ('so', 'Helsinki-NLP/opus-mt-en-so'),
'Spanish': ('es', 'Helsinki-NLP/opus-mt-en-es'),
'Sundanese': ('su', 'Helsinki-NLP/opus-mt-en-su'),
'Swahili': ('sw', 'Helsinki-NLP/opus-mt-en-sw'),
'Swedish': ('sv', 'Helsinki-NLP/opus-mt-en-sv'),
'Tajik': ('tg', 'Helsinki-NLP/opus-mt-en-tg'),
'Tamil': ('ta', 'Helsinki-NLP/opus-mt-en-ta'),
'Telugu': ('te', 'Helsinki-NLP/opus-mt-en-te'),
'Thai': ('th', 'Helsinki-NLP/opus-mt-en-th'),
'Turkmen': ('tk', 'Helsinki-NLP/opus-mt-en-tk'),
'Turkish': ('tr', 'Helsinki-NLP/opus-mt-en-tr'),
'Ukrainian': ('uk', 'Helsinki-NLP/opus-mt-en-uk'),
'Urdu': ('ur', 'Helsinki-NLP/opus-mt-en-ur'),
'Vietnamese': ('vi', 'Helsinki-NLP/opus-mt-en-vi'),
'Welsh': ('cy', 'Helsinki-NLP/opus-mt-en-cy'),
'Xhosa': ('xh', 'Helsinki-NLP/opus-mt-en-xh'),
'Yiddish': ('yi', 'Helsinki-NLP/opus-mt-en-yi'),
'Yoruba': ('yo', 'Helsinki-NLP/opus-mt-en-yo'),
'Zulu': ('zu', 'Helsinki-NLP/opus-mt-en-zu'),
}
@st.cache_resource
def load_model(target_language):
code, model_name = LANGUAGE_MODELS.get(target_language, (None, None))
if not model_name:
st.error(f"Model for language '{target_language}' not found.")
return None, None
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)
return tokenizer, model
def translate_text(text, target_language):
tokenizer, model = load_model(target_language)
if tokenizer is None or model is None:
return ""
# Tokenize the input text
inputs = tokenizer(text, return_tensors="pt", padding=True)
# Generate translation
translated = model.generate(**inputs)
# Decode the translated text
translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
return translated_text
def main():
st.title("Language Translator")
st.write("Translate English text to any language.")
# Input text from the user
source_text = st.text_area("Enter text in English:", "")
# Select target language
target_language = st.selectbox(
"Select target language:",
options=list(LANGUAGE_MODELS.keys())
)
if st.button("Translate"):
if source_text:
translated_text = translate_text(source_text, target_language)
st.write(f"Translated text ({target_language}):")
st.write(translated_text)
else:
st.warning("Please enter text to translate.")
if __name__ == "__main__":
main()
|