import streamlit as st from transformers import MarianMTModel, MarianTokenizer # Title and Description st.title("Language Translation App") st.markdown(""" Translate text between multiple languages, including Urdu, for communication or learning. Powered by Helsinki-NLP translation models. """) # Supported Language Pairs language_pairs = { "English to Urdu": "Helsinki-NLP/opus-mt-en-ur", "Urdu to English": "Helsinki-NLP/opus-mt-ur-en", "English to French": "Helsinki-NLP/opus-mt-en-fr", "French to English": "Helsinki-NLP/opus-mt-fr-en", # Add more language pairs as needed } # Language Selection input_language = st.selectbox("Select Input Language", [pair.split(" to ")[0] for pair in language_pairs.keys()]) output_language = st.selectbox("Select Output Language", [pair.split(" to ")[1] for pair in language_pairs.keys()]) # Determine the model based on language pair language_pair = f"{input_language} to {output_language}" model_name = language_pairs.get(language_pair) if model_name is None: st.error("Translation for the selected language pair is not supported.") else: # Load the Model and Tokenizer @st.cache_resource def load_model_and_tokenizer(model_name): model = MarianMTModel.from_pretrained(model_name) tokenizer = MarianTokenizer.from_pretrained(model_name) return model, tokenizer model, tokenizer = load_model_and_tokenizer(model_name) # Text Input input_text = st.text_area("Enter text to translate") if st.button("Translate"): if not input_text.strip(): st.error("Please enter text to translate.") else: # Translate Text try: tokenized_text = tokenizer(input_text, return_tensors="pt", padding=True) translated_tokens = model.generate(**tokenized_text) translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True) st.text_area("Translated Text", translated_text, height=150) except Exception as e: st.error(f"An error occurred during translation: {str(e)}")