Abhishek-D7 commited on
Commit
ead05da
1 Parent(s): 4b0f080

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -0
app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ import streamlit as st
3
+ from transformers import T5Tokenizer, T5ForConditionalGeneration, MarianMTModel, MarianTokenizer
4
+
5
+ def load_summarization_model():
6
+ model = T5ForConditionalGeneration.from_pretrained('t5-small')
7
+ tokenizer = T5Tokenizer.from_pretrained('t5-small')
8
+ return model, tokenizer
9
+
10
+ summarization_model, summarization_tokenizer = load_summarization_model()
11
+
12
+ def load_translation_models():
13
+ models = {}
14
+ tokenizers = {}
15
+ language_pairs = {
16
+ 'en-fr': 'Helsinki-NLP/opus-mt-en-fr',
17
+ 'fr-en': 'Helsinki-NLP/opus-mt-fr-en',
18
+ 'en-de': 'Helsinki-NLP/opus-mt-en-de',
19
+ 'de-en': 'Helsinki-NLP/opus-mt-de-en',
20
+ 'en-hi': 'Helsinki-NLP/opus-mt-en-hi',
21
+ 'hi-en': 'Helsinki-NLP/opus-mt-hi-en',
22
+ 'fr-de': 'Helsinki-NLP/opus-mt-fr-de',
23
+ 'de-fr': 'Helsinki-NLP/opus-mt-de-fr',
24
+ 'fr-hi': 'Helsinki-NLP/opus-mt-fr-hi',
25
+ 'hi-fr': 'Helsinki-NLP/opus-mt-hi-fr',
26
+ 'de-hi': 'Helsinki-NLP/opus-mt-de-hi',
27
+ 'hi-de': 'Helsinki-NLP/opus-mt-hi-de'
28
+ }
29
+ for pair, model_name in language_pairs.items():
30
+ models[pair] = MarianMTModel.from_pretrained(model_name)
31
+ tokenizers[pair] = MarianTokenizer.from_pretrained(model_name)
32
+ return models, tokenizers
33
+
34
+ translation_models, translation_tokenizers = load_translation_models()
35
+
36
+ def summarize_text(article):
37
+ inputs = summarization_tokenizer.encode("summarize: " + article, return_tensors="pt", max_length=512, truncation=True)
38
+ summary_ids = summarization_model.generate(inputs, max_length=64, min_length=10, length_penalty=2.0, num_beams=4, early_stopping=True)
39
+ return summarization_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
40
+
41
+ def translate_text(text, source_lang, target_lang):
42
+ if source_lang == target_lang:
43
+ return text
44
+ language_pair = f'{source_lang}-{target_lang}'
45
+ model = translation_models[language_pair]
46
+ tokenizer = translation_tokenizers[language_pair]
47
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
48
+ translated = model.generate(**inputs)
49
+ return tokenizer.decode(translated[0], skip_special_tokens=True)
50
+
51
+
52
+ st.title("Multilingual Text Summarizer and Translator")
53
+
54
+ option = st.selectbox('Choose a task', ('Summarize and Translate'))
55
+
56
+ if option == 'Summarize and Translate':
57
+ article = st.text_area("Enter the article text here:")
58
+ source_lang = st.selectbox('Source Language', ('en', 'fr', 'de', 'hi'))
59
+
60
+ if st.button("Generate Summary"):
61
+ summary = summarize_text(article)
62
+ st.write("Summary:", summary)
63
+
64
+ target_lang = st.selectbox('Translate Summary to', ('en', 'fr', 'de', 'hi'))
65
+ if st.button("Translate Summary"):
66
+ translated_summary = translate_text(summary, source_lang, target_lang)
67
+ st.write("Translated Summary:", translated_summary)
68
+
69
+ if st.checkbox("Translate Original Article"):
70
+ target_lang_article = st.selectbox('Translate Article to', ('en', 'fr', 'de', 'hi'), key='article')
71
+ translated_article = translate_text(article, source_lang, target_lang_article)
72
+ st.write("Translated Article:", translated_article)