File size: 4,349 Bytes
ead05da b20ddb6 fcf5e2e ead05da 77d167a b20ddb6 ead05da 741f54e ead05da abc5e07 ead05da b2c81ec aaaa88a ead05da 5875e8f e918be7 5875e8f abc5e07 21b204f abc5e07 5875e8f abc5e07 21b204f abc5e07 21b204f abc5e07 e918be7 5875e8f ead05da 056989e ead05da 5875e8f abc5e07 5875e8f abc5e07 056989e 5875e8f abc5e07 5875e8f abc5e07 aaaa88a 056989e 5875e8f aaaa88a abc5e07 5875e8f aaaa88a abc5e07 056989e 5875e8f 056989e 5875e8f abc5e07 5875e8f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import streamlit as st
from transformers import AutoTokenizer, BartForConditionalGeneration, MarianMTModel, MarianTokenizer
def load_summarization_model():
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large-cnn')
return model, tokenizer
def load_translation_models():
models = {}
tokenizers = {}
language_pairs = {
'en-fr': 'Helsinki-NLP/opus-mt-en-fr',
'fr-en': 'Helsinki-NLP/opus-mt-fr-en',
'en-de': 'Helsinki-NLP/opus-mt-en-de',
'de-en': 'Helsinki-NLP/opus-mt-de-en',
'en-hi': 'Helsinki-NLP/opus-mt-en-hi',
'hi-en': 'Helsinki-NLP/opus-mt-hi-en'
}
for pair, model_name in language_pairs.items():
models[pair] = MarianMTModel.from_pretrained(model_name)
tokenizers[pair] = MarianTokenizer.from_pretrained(model_name)
return models, tokenizers
summarization_model, summarization_tokenizer = load_summarization_model()
translation_models, translation_tokenizers = load_translation_models()
def summarize_text(article):
inputs = summarization_tokenizer.encode("summarize: " + article, return_tensors="pt", max_length=1024, truncation=True)
summary_ids = summarization_model.generate(inputs, max_length=256, min_length=50, length_penalty=2.0, num_beams=4, early_stopping=True)
return summarization_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
def translate_text(text, source_lang, target_lang):
if source_lang == target_lang:
return text
language_pair = f'{source_lang}-{target_lang}'
model = translation_models[language_pair]
tokenizer = translation_tokenizers[language_pair]
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
translated = model.generate(**inputs)
return tokenizer.decode(translated[0], skip_special_tokens=True)
def main():
st.markdown(
'''
<style>
h1 {
color: #592f2f;
font-size: 3em;
font-weight: bold;
}
h2 {
color: #f28e2c;
font-size: 2em;
}
h3 {
color: #e15759;
font-size: 1.5em;
}
.stButton>button {
background-color: #17499c;
color: white;
font-size: 1em;
border-radius: 8px;
}
.stButton>button:hover {
background-color: #0a2b61;
color: white;
}
</style>
''',
unsafe_allow_html=True
)
st.title("Multilingual Text :blue[Summarizer] and :blue[Translator] :sunglasses:")
task = st.selectbox('Choose a task', ('Summarize', 'Translate'))
if 'summary' not in st.session_state:
st.session_state.summary = None
if task == 'Summarize':
st.header("Summarize Text")
article = st.text_area("Enter the text here:")
if st.button("Generate Summary"):
st.session_state.summary = summarize_text(article)
st.subheader("Summary:")
st.write(st.session_state.summary)
if st.session_state.summary:
st.header("Translate Summary")
source_lang = st.selectbox('Source Language', ('en', 'fr', 'de', 'hi'), key='source_lang_summary')
target_lang = st.selectbox('Translate Summary into', ('en', 'fr', 'de', 'hi'), key='target_lang_summary')
if st.button("Translate Summary"):
translated_summary = translate_text(st.session_state.summary, source_lang, target_lang)
st.subheader("Translated Summary:")
st.write(translated_summary)
else:
st.warning("Please generate a summary first.")
elif task == 'Translate':
st.header("Translate Text")
article = st.text_area("Enter the text here:")
source_lang = st.selectbox('Source Language', ('en', 'fr', 'de', 'hi'), key='source_lang_article')
target_lang = st.selectbox('Translate into', ('en', 'fr', 'de', 'hi'), key='target_lang_article')
if st.button("Translate Article"):
translated_article = translate_text(article, source_lang, target_lang)
st.subheader("Translated Article:")
st.write(translated_article)
if __name__ == "__main__":
main()
|