Spaces:
Runtime error
Runtime error
shubh2014shiv
commited on
Commit
โข
0ab7e65
1
Parent(s):
2f6ce67
Added Text Summarization
Browse files
app.py
CHANGED
@@ -13,7 +13,7 @@ import numpy as np
|
|
13 |
st.set_page_config(layout="wide")
|
14 |
st.title("Project - Japanese Natural Language Processing (่ช็ถ่จ่ชๅฆ็) using Transformers")
|
15 |
st.sidebar.subheader("่ช็ถ่จ่ชๅฆ็ ใใใใฏ")
|
16 |
-
topic = st.sidebar.radio(label="Select the NLP project topics", options=["Sentiment Analysis"])
|
17 |
|
18 |
st.write("-" * 5)
|
19 |
jp_review_text = None
|
@@ -174,3 +174,64 @@ if topic == "Sentiment Analysis":
|
|
174 |
fig.update_traces(marker_color=['#FF7F7F','#32CD32'])
|
175 |
st.plotly_chart(fig)
|
176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
st.set_page_config(layout="wide")
|
14 |
st.title("Project - Japanese Natural Language Processing (่ช็ถ่จ่ชๅฆ็) using Transformers")
|
15 |
st.sidebar.subheader("่ช็ถ่จ่ชๅฆ็ ใใใใฏ")
|
16 |
+
topic = st.sidebar.radio(label="Select the NLP project topics", options=["Sentiment Analysis","Text Summarization"])
|
17 |
|
18 |
st.write("-" * 5)
|
19 |
jp_review_text = None
|
|
|
174 |
fig.update_traces(marker_color=['#FF7F7F','#32CD32'])
|
175 |
st.plotly_chart(fig)
|
176 |
|
177 |
+
elif topic == "Text Summarization":
|
178 |
+
st.markdown(
|
179 |
+
"<h2 style='text-align: left; color:#EE82EE; font-size:25px;'><b>Summarizing Japanese News Article using multi-Lingual T5 (mT5)<b></h2>",
|
180 |
+
unsafe_allow_html=True)
|
181 |
+
st.markdown(
|
182 |
+
"<h3 style='text-align: center; color:#F63366; font-size:18px;'><b>Japanese News Article Data<b></h3>",
|
183 |
+
unsafe_allow_html=True)
|
184 |
+
|
185 |
+
news_articles = pd.read_csv(JAPANESE_SENTIMENT_PROJECT_PATH + "jp_news_articles.csv").sample(frac=0.75,
|
186 |
+
random_state=42)
|
187 |
+
gb = GridOptionsBuilder.from_dataframe(news_articles)
|
188 |
+
gb.configure_pagination()
|
189 |
+
gb.configure_selection(selection_mode="single", use_checkbox=True, suppressRowDeselection=False)
|
190 |
+
gridOptions = gb.build()
|
191 |
+
jp_article = AgGrid(news_articles, gridOptions=gridOptions, theme='material',
|
192 |
+
enable_enterprise_modules=True,
|
193 |
+
allow_unsafe_jscode=True, update_mode=GridUpdateMode.SELECTION_CHANGED)
|
194 |
+
|
195 |
+
# WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
|
196 |
+
if len(jp_article['selected_rows']) == 0:
|
197 |
+
st.info("Pick any one Japanese News Article by selecting the checkbox. News articles can be navigated by clicking on page navigator at right-bottom")
|
198 |
+
else:
|
199 |
+
article_text = jp_article['selected_rows'][0]['News Articles']
|
200 |
+
|
201 |
+
text = st.text_area(label="Text from selected Japanese News Article(ใใฅใผใน่จไบ)", value=article_text, height=500)
|
202 |
+
summary_length = st.slider(label="Select the maximum length of summary (่ฆ็ดใฎๆๅคง้ทใ้ธๆใใพใ )", min_value=120,max_value=160,step=5)
|
203 |
+
|
204 |
+
if text and st.button("Summarize it! (่ฆ็ดใใใ)"):
|
205 |
+
waitPlaceholder = st.image(JAPANESE_SENTIMENT_PROJECT_PATH + "wait.gif")
|
206 |
+
summarization_model_name = "csebuetnlp/mT5_multilingual_XLSum"
|
207 |
+
tokenizer = AutoTokenizer.from_pretrained(summarization_model_name )
|
208 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(summarization_model_name )
|
209 |
+
|
210 |
+
input_ids = tokenizer(
|
211 |
+
article_text,
|
212 |
+
return_tensors="pt",
|
213 |
+
padding="max_length",
|
214 |
+
truncation=True,
|
215 |
+
max_length=512
|
216 |
+
)["input_ids"]
|
217 |
+
|
218 |
+
output_ids = model.generate(
|
219 |
+
input_ids=input_ids,
|
220 |
+
max_length=summary_length,
|
221 |
+
no_repeat_ngram_size=2,
|
222 |
+
num_beams=4
|
223 |
+
)[0]
|
224 |
+
|
225 |
+
summary = tokenizer.decode(
|
226 |
+
output_ids,
|
227 |
+
skip_special_tokens=True,
|
228 |
+
clean_up_tokenization_spaces=False
|
229 |
+
)
|
230 |
+
|
231 |
+
waitPlaceholder.empty()
|
232 |
+
|
233 |
+
st.markdown(
|
234 |
+
"<h2 style='text-align: left; color:#32CD32; font-size:25px;'><b>Summary ๏ผ่ฆ็ดๆ๏ผ<b></h2>",
|
235 |
+
unsafe_allow_html=True)
|
236 |
+
|
237 |
+
st.write(summary)
|