shubh2014shiv commited on
Commit
0ab7e65
โ€ข
1 Parent(s): 2f6ce67

Added Text Summarization

Browse files
Files changed (1) hide show
  1. app.py +62 -1
app.py CHANGED
@@ -13,7 +13,7 @@ import numpy as np
13
  st.set_page_config(layout="wide")
14
  st.title("Project - Japanese Natural Language Processing (่‡ช็„ถ่จ€่ชžๅ‡ฆ็†) using Transformers")
15
  st.sidebar.subheader("่‡ช็„ถ่จ€่ชžๅ‡ฆ็† ใƒˆใƒ”ใƒƒใ‚ฏ")
16
- topic = st.sidebar.radio(label="Select the NLP project topics", options=["Sentiment Analysis"])
17
 
18
  st.write("-" * 5)
19
  jp_review_text = None
@@ -174,3 +174,64 @@ if topic == "Sentiment Analysis":
174
  fig.update_traces(marker_color=['#FF7F7F','#32CD32'])
175
  st.plotly_chart(fig)
176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  st.set_page_config(layout="wide")
14
  st.title("Project - Japanese Natural Language Processing (่‡ช็„ถ่จ€่ชžๅ‡ฆ็†) using Transformers")
15
  st.sidebar.subheader("่‡ช็„ถ่จ€่ชžๅ‡ฆ็† ใƒˆใƒ”ใƒƒใ‚ฏ")
16
+ topic = st.sidebar.radio(label="Select the NLP project topics", options=["Sentiment Analysis","Text Summarization"])
17
 
18
  st.write("-" * 5)
19
  jp_review_text = None
 
174
  fig.update_traces(marker_color=['#FF7F7F','#32CD32'])
175
  st.plotly_chart(fig)
176
 
177
+ elif topic == "Text Summarization":
178
+ st.markdown(
179
+ "<h2 style='text-align: left; color:#EE82EE; font-size:25px;'><b>Summarizing Japanese News Article using multi-Lingual T5 (mT5)<b></h2>",
180
+ unsafe_allow_html=True)
181
+ st.markdown(
182
+ "<h3 style='text-align: center; color:#F63366; font-size:18px;'><b>Japanese News Article Data<b></h3>",
183
+ unsafe_allow_html=True)
184
+
185
+ news_articles = pd.read_csv(JAPANESE_SENTIMENT_PROJECT_PATH + "jp_news_articles.csv").sample(frac=0.75,
186
+ random_state=42)
187
+ gb = GridOptionsBuilder.from_dataframe(news_articles)
188
+ gb.configure_pagination()
189
+ gb.configure_selection(selection_mode="single", use_checkbox=True, suppressRowDeselection=False)
190
+ gridOptions = gb.build()
191
+ jp_article = AgGrid(news_articles, gridOptions=gridOptions, theme='material',
192
+ enable_enterprise_modules=True,
193
+ allow_unsafe_jscode=True, update_mode=GridUpdateMode.SELECTION_CHANGED)
194
+
195
+ # WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
196
+ if len(jp_article['selected_rows']) == 0:
197
+ st.info("Pick any one Japanese News Article by selecting the checkbox. News articles can be navigated by clicking on page navigator at right-bottom")
198
+ else:
199
+ article_text = jp_article['selected_rows'][0]['News Articles']
200
+
201
+ text = st.text_area(label="Text from selected Japanese News Article(ใƒ‹ใƒฅใƒผใ‚น่จ˜ไบ‹)", value=article_text, height=500)
202
+ summary_length = st.slider(label="Select the maximum length of summary (่ฆ็ด„ใฎๆœ€ๅคง้•ทใ‚’้ธๆŠžใ—ใพใ™ )", min_value=120,max_value=160,step=5)
203
+
204
+ if text and st.button("Summarize it! (่ฆ็ด„ใ—ใ‚ˆใ†)"):
205
+ waitPlaceholder = st.image(JAPANESE_SENTIMENT_PROJECT_PATH + "wait.gif")
206
+ summarization_model_name = "csebuetnlp/mT5_multilingual_XLSum"
207
+ tokenizer = AutoTokenizer.from_pretrained(summarization_model_name )
208
+ model = AutoModelForSeq2SeqLM.from_pretrained(summarization_model_name )
209
+
210
+ input_ids = tokenizer(
211
+ article_text,
212
+ return_tensors="pt",
213
+ padding="max_length",
214
+ truncation=True,
215
+ max_length=512
216
+ )["input_ids"]
217
+
218
+ output_ids = model.generate(
219
+ input_ids=input_ids,
220
+ max_length=summary_length,
221
+ no_repeat_ngram_size=2,
222
+ num_beams=4
223
+ )[0]
224
+
225
+ summary = tokenizer.decode(
226
+ output_ids,
227
+ skip_special_tokens=True,
228
+ clean_up_tokenization_spaces=False
229
+ )
230
+
231
+ waitPlaceholder.empty()
232
+
233
+ st.markdown(
234
+ "<h2 style='text-align: left; color:#32CD32; font-size:25px;'><b>Summary ๏ผˆ่ฆ็ด„ๆ–‡๏ผ‰<b></h2>",
235
+ unsafe_allow_html=True)
236
+
237
+ st.write(summary)