Add some more whitespaces and implement scoring and showing different summaries at the end of the blogpost
Browse files
app.py
CHANGED
@@ -10,6 +10,7 @@ import numpy as np
|
|
10 |
import base64
|
11 |
|
12 |
from spacy_streamlit.util import get_svg
|
|
|
13 |
|
14 |
from custom_renderer import render_sentence_custom
|
15 |
from sentence_transformers import SentenceTransformer
|
@@ -174,15 +175,28 @@ def get_and_compare_entities(first_time: bool):
|
|
174 |
|
175 |
matched_entities = list(dict.fromkeys(matched_entities))
|
176 |
unmatched_entities = list(dict.fromkeys(unmatched_entities))
|
|
|
|
|
|
|
|
|
177 |
for entity in matched_entities:
|
178 |
for substring_entity in matched_entities:
|
179 |
if entity != substring_entity and entity.lower() in substring_entity.lower():
|
180 |
-
|
181 |
|
182 |
for entity in unmatched_entities:
|
183 |
for substring_entity in unmatched_entities:
|
184 |
if entity != substring_entity and entity.lower() in substring_entity.lower():
|
185 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
186 |
return matched_entities, unmatched_entities
|
187 |
|
188 |
|
@@ -292,7 +306,7 @@ st.title('π Summarization fact checker π')
|
|
292 |
# INTRODUCTION
|
293 |
st.header("π§βπ« Introduction")
|
294 |
|
295 |
-
introduction_checkbox = st.checkbox("Show introduction text", value
|
296 |
if introduction_checkbox:
|
297 |
st.markdown("""
|
298 |
Recent work using π€ **transformers** π€ on large text corpora has shown great success when fine-tuned on
|
@@ -346,6 +360,7 @@ if summarize_button:
|
|
346 |
"factual a summary is for a given article. The idea is that in production, you could generate a set of "
|
347 |
"summaries for the same article, with different parameters (or even different models). By using "
|
348 |
"post-processing error detection, we can then select the best possible summary.*")
|
|
|
349 |
if st.session_state.article_text:
|
350 |
with st.spinner('Generating summary, this might take a while...'):
|
351 |
if selected_article != "Provide your own input" and article_text == fetch_article_contents(
|
@@ -381,6 +396,7 @@ if summarize_button:
|
|
381 |
"entities we find, the lower the factualness score of the summary. ")
|
382 |
with st.spinner("Calculating and matching entities..."):
|
383 |
entity_match_html = highlight_entities()
|
|
|
384 |
st.write(entity_match_html, unsafe_allow_html=True)
|
385 |
red_text = """<font color="black"><span style="background-color: rgb(238, 135, 135); opacity:
|
386 |
1;">red</span></font> """
|
@@ -463,43 +479,79 @@ if summarize_button:
|
|
463 |
"empirically tested they are definitely not sufficiently robust for general use-cases.")
|
464 |
st.markdown("####")
|
465 |
st.markdown(
|
466 |
-
"
|
467 |
-
"
|
468 |
-
"
|
469 |
-
"the
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
490 |
-
|
491 |
-
|
492 |
-
|
493 |
-
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
import base64
|
11 |
|
12 |
from spacy_streamlit.util import get_svg
|
13 |
+
from streamlit.proto.SessionState_pb2 import SessionState
|
14 |
|
15 |
from custom_renderer import render_sentence_custom
|
16 |
from sentence_transformers import SentenceTransformer
|
|
|
175 |
|
176 |
matched_entities = list(dict.fromkeys(matched_entities))
|
177 |
unmatched_entities = list(dict.fromkeys(unmatched_entities))
|
178 |
+
|
179 |
+
matched_entities_to_remove = []
|
180 |
+
unmatched_entities_to_remove = []
|
181 |
+
|
182 |
for entity in matched_entities:
|
183 |
for substring_entity in matched_entities:
|
184 |
if entity != substring_entity and entity.lower() in substring_entity.lower():
|
185 |
+
matched_entities_to_remove.append(entity)
|
186 |
|
187 |
for entity in unmatched_entities:
|
188 |
for substring_entity in unmatched_entities:
|
189 |
if entity != substring_entity and entity.lower() in substring_entity.lower():
|
190 |
+
unmatched_entities_to_remove.append(entity)
|
191 |
+
|
192 |
+
matched_entities_to_remove = list(dict.fromkeys(matched_entities_to_remove))
|
193 |
+
unmatched_entities_to_remove = list(dict.fromkeys(unmatched_entities_to_remove))
|
194 |
+
|
195 |
+
for entity in matched_entities_to_remove:
|
196 |
+
matched_entities.remove(entity)
|
197 |
+
for entity in unmatched_entities_to_remove:
|
198 |
+
unmatched_entities.remove(entity)
|
199 |
+
|
200 |
return matched_entities, unmatched_entities
|
201 |
|
202 |
|
|
|
306 |
# INTRODUCTION
|
307 |
st.header("π§βπ« Introduction")
|
308 |
|
309 |
+
introduction_checkbox = st.checkbox("Show introduction text", value=True)
|
310 |
if introduction_checkbox:
|
311 |
st.markdown("""
|
312 |
Recent work using π€ **transformers** π€ on large text corpora has shown great success when fine-tuned on
|
|
|
360 |
"factual a summary is for a given article. The idea is that in production, you could generate a set of "
|
361 |
"summaries for the same article, with different parameters (or even different models). By using "
|
362 |
"post-processing error detection, we can then select the best possible summary.*")
|
363 |
+
st.markdown("####")
|
364 |
if st.session_state.article_text:
|
365 |
with st.spinner('Generating summary, this might take a while...'):
|
366 |
if selected_article != "Provide your own input" and article_text == fetch_article_contents(
|
|
|
396 |
"entities we find, the lower the factualness score of the summary. ")
|
397 |
with st.spinner("Calculating and matching entities..."):
|
398 |
entity_match_html = highlight_entities()
|
399 |
+
st.markdown("####")
|
400 |
st.write(entity_match_html, unsafe_allow_html=True)
|
401 |
red_text = """<font color="black"><span style="background-color: rgb(238, 135, 135); opacity:
|
402 |
1;">red</span></font> """
|
|
|
479 |
"empirically tested they are definitely not sufficiently robust for general use-cases.")
|
480 |
st.markdown("####")
|
481 |
st.markdown(
|
482 |
+
"Below we generate 3 different kind of summaries, and based on the two discussed methods, their errors are "
|
483 |
+
"detected to estimate a factualness score. Based on this basic approach, "
|
484 |
+
"the best summary (read: the one that a human would prefer or indicate as the best one) "
|
485 |
+
"will hopefully be at the top. Summaries with the same scores will get the same rank displayed.")
|
486 |
+
st.markdown("####")
|
487 |
+
|
488 |
+
with st.spinner("Calculating more summaries and scoring them, this might take while..."):
|
489 |
+
summaries_list = []
|
490 |
+
deduction_points = []
|
491 |
+
# ENTITIES
|
492 |
+
_, amount_unmatched = get_and_compare_entities(False)
|
493 |
+
|
494 |
+
# DEPS
|
495 |
+
summary_deps = check_dependency(False)
|
496 |
+
article_deps = check_dependency(True)
|
497 |
+
total_unmatched_deps = []
|
498 |
+
for summ_dep in summary_deps:
|
499 |
+
if not any(summ_dep['identifier'] in art_dep['identifier'] for art_dep in article_deps):
|
500 |
+
total_unmatched_deps.append(summ_dep)
|
501 |
+
|
502 |
+
summaries_list.append(st.session_state.summary_output)
|
503 |
+
deduction_points.append(len(amount_unmatched) + len(total_unmatched_deps))
|
504 |
+
|
505 |
+
# FOR NEW GENERATED SUMMARY
|
506 |
+
st.session_state.summary_output = generate_abstractive_summary(st.session_state.article_text,
|
507 |
+
type="beam",
|
508 |
+
do_sample=True, num_beams=15,
|
509 |
+
no_repeat_ngram_size=5)
|
510 |
+
_, amount_unmatched = get_and_compare_entities(False)
|
511 |
+
|
512 |
+
summary_deps = check_dependency(False)
|
513 |
+
article_deps = check_dependency(True)
|
514 |
+
total_unmatched_deps = []
|
515 |
+
for summ_dep in summary_deps:
|
516 |
+
if not any(summ_dep['identifier'] in art_dep['identifier'] for art_dep in article_deps):
|
517 |
+
total_unmatched_deps.append(summ_dep)
|
518 |
+
|
519 |
+
summaries_list.append(st.session_state.summary_output)
|
520 |
+
deduction_points.append(len(amount_unmatched) + len(total_unmatched_deps))
|
521 |
+
|
522 |
+
# FOR NEW GENERATED SUMMARY
|
523 |
+
st.session_state.summary_output = generate_abstractive_summary(st.session_state.article_text,
|
524 |
+
type="top_p",
|
525 |
+
do_sample=True,
|
526 |
+
no_repeat_ngram_size=5)
|
527 |
+
_, amount_unmatched = get_and_compare_entities(False)
|
528 |
+
|
529 |
+
summary_deps = check_dependency(False)
|
530 |
+
article_deps = check_dependency(True)
|
531 |
+
total_unmatched_deps = []
|
532 |
+
for summ_dep in summary_deps:
|
533 |
+
if not any(summ_dep['identifier'] in art_dep['identifier'] for art_dep in article_deps):
|
534 |
+
total_unmatched_deps.append(summ_dep)
|
535 |
+
|
536 |
+
summaries_list.append(st.session_state.summary_output)
|
537 |
+
deduction_points.append(len(amount_unmatched) + len(total_unmatched_deps))
|
538 |
+
|
539 |
+
# RANKING AND SHOWING THE SUMMARIES
|
540 |
+
deduction_points, summaries_list = (list(t) for t in zip(*sorted(zip(deduction_points, summaries_list))))
|
541 |
+
|
542 |
+
cur_rank = 1
|
543 |
+
rank_downgrade = 0
|
544 |
+
for i in range(len(deduction_points)):
|
545 |
+
st.write(f'π Rank {cur_rank} summary: π', display_summary(summaries_list[i]), unsafe_allow_html=True)
|
546 |
+
if i < len(deduction_points) - 1:
|
547 |
+
rank_downgrade += 1
|
548 |
+
if not deduction_points[i + 1] == deduction_points[i]:
|
549 |
+
cur_rank += rank_downgrade
|
550 |
+
rank_downgrade = 0
|
551 |
+
|
552 |
+
# session = SessionState.get(code=print("TEST"))
|
553 |
+
# a = st.radio("Edit or show", ['Edit', 'Show'], 1)
|
554 |
+
# if a == 'Edit':
|
555 |
+
# session.code = st.text_input('Edit code', session.code)
|
556 |
+
# else:
|
557 |
+
# st.write(session.code)
|