Spaces:

shubh2014shiv
/

Japanese_NLP

Runtime error

App Files Files Community

shubh2014shiv commited on Nov 7, 2021

Commit

0ab7e65

•

1 Parent(s): 2f6ce67

Added Text Summarization

Browse files

Files changed (1) hide show

app.py +62 -1

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ import numpy as np
 st.set_page_config(layout="wide")
 st.title("Project - Japanese Natural Language Processing (自然言語処理) using Transformers")
 st.sidebar.subheader("自然言語処理 トピック")
-topic = st.sidebar.radio(label="Select the NLP project topics", options=["Sentiment Analysis"])
 st.write("-" * 5)
 jp_review_text = None
@@ -174,3 +174,64 @@ if topic == "Sentiment Analysis":
                     fig.update_traces(marker_color=['#FF7F7F','#32CD32'])
                     st.plotly_chart(fig)

 st.set_page_config(layout="wide")
 st.title("Project - Japanese Natural Language Processing (自然言語処理) using Transformers")
 st.sidebar.subheader("自然言語処理 トピック")
+topic = st.sidebar.radio(label="Select the NLP project topics", options=["Sentiment Analysis","Text Summarization"])
 st.write("-" * 5)
 jp_review_text = None
                     fig.update_traces(marker_color=['#FF7F7F','#32CD32'])
                     st.plotly_chart(fig)
+elif topic == "Text Summarization":
+    st.markdown(
+        "<h2 style='text-align: left; color:#EE82EE; font-size:25px;'><b>Summarizing Japanese News Article using multi-Lingual T5 (mT5)<b></h2>",
+        unsafe_allow_html=True)
+    st.markdown(
+        "<h3 style='text-align: center; color:#F63366; font-size:18px;'><b>Japanese News Article Data<b></h3>",
+        unsafe_allow_html=True)
+    news_articles = pd.read_csv(JAPANESE_SENTIMENT_PROJECT_PATH + "jp_news_articles.csv").sample(frac=0.75,
+                                                                                                 random_state=42)
+    gb = GridOptionsBuilder.from_dataframe(news_articles)
+    gb.configure_pagination()
+    gb.configure_selection(selection_mode="single", use_checkbox=True, suppressRowDeselection=False)
+    gridOptions = gb.build()
+    jp_article = AgGrid(news_articles, gridOptions=gridOptions, theme='material',
+                        enable_enterprise_modules=True,
+                        allow_unsafe_jscode=True, update_mode=GridUpdateMode.SELECTION_CHANGED)
+    # WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
+    if len(jp_article['selected_rows']) == 0:
+        st.info("Pick any one Japanese News Article by selecting the checkbox. News articles can be navigated by clicking on page navigator at right-bottom")
+    else:
+        article_text = jp_article['selected_rows'][0]['News Articles']
+        text = st.text_area(label="Text from selected Japanese News Article(ニュース記事)", value=article_text, height=500)
+        summary_length = st.slider(label="Select the maximum length of summary (要約の最大長を選択します )", min_value=120,max_value=160,step=5)
+        if text and st.button("Summarize it! (要約しよう)"):
+            waitPlaceholder = st.image(JAPANESE_SENTIMENT_PROJECT_PATH + "wait.gif")
+            summarization_model_name = "csebuetnlp/mT5_multilingual_XLSum"
+            tokenizer = AutoTokenizer.from_pretrained(summarization_model_name )
+            model = AutoModelForSeq2SeqLM.from_pretrained(summarization_model_name )
+            input_ids = tokenizer(
+                article_text,
+                return_tensors="pt",
+                padding="max_length",
+                truncation=True,
+                max_length=512
+            )["input_ids"]
+            output_ids = model.generate(
+                input_ids=input_ids,
+                max_length=summary_length,
+                no_repeat_ngram_size=2,
+                num_beams=4
+            )[0]
+            summary = tokenizer.decode(
+                output_ids,
+                skip_special_tokens=True,
+                clean_up_tokenization_spaces=False
+            )
+            waitPlaceholder.empty()
+            st.markdown(
+                "<h2 style='text-align: left; color:#32CD32; font-size:25px;'><b>Summary （要約文）<b></h2>",
+                unsafe_allow_html=True)
+            st.write(summary)