MatthiasC commited on
Commit
1ba4f32
β€’
1 Parent(s): 0e6b382

Fix last things and add cached extra summaries for the examples

Browse files
app.py CHANGED
@@ -53,7 +53,7 @@ def get_summarizer_model():
53
 
54
  # Page setup
55
  st.set_page_config(
56
- page_title="πŸ“œ Post-processing summarization fact checker πŸ“œ",
57
  page_icon="",
58
  layout="centered",
59
  initial_sidebar_state="auto",
@@ -101,6 +101,12 @@ def fetch_dependency_specific_contents(filename: str) -> AnyStr:
101
  return data
102
 
103
 
 
 
 
 
 
 
104
  def fetch_dependency_svg(filename: str) -> AnyStr:
105
  with open(f'./dependency-images/{filename}.txt', 'r') as f:
106
  lines = [line.rstrip() for line in f]
@@ -301,7 +307,9 @@ nlp = get_spacy()
301
  summarization_model = get_summarizer_model()
302
 
303
  # Page
304
- st.title('πŸ“œ Summarization fact checker πŸ“œ')
 
 
305
 
306
  # INTRODUCTION
307
  st.header("πŸ§‘β€πŸ« Introduction")
@@ -471,88 +479,53 @@ if summarize_button:
471
  render_dependency_parsing(current_drawing_list)
472
 
473
  # CURRENTLY DISABLED
474
- # # OUTRO/CONCLUSION
475
- # st.header("🀝 Bringing it together")
476
- # st.markdown("We have presented 2 methods that try to detect errors in summaries via post-processing steps. Entity "
477
- # "matching can be used to solve hallucinations, while dependency comparison can be used to filter out "
478
- # "some bad sentences (and thus worse summaries). These methods highlight the possibilities of "
479
- # "post-processing AI-made summaries, but are only a first introduction. As the methods were "
480
- # "empirically tested they are definitely not sufficiently robust for general use-cases.")
481
- # st.markdown("####")
482
- # st.markdown(
483
- # "Below we generate 3 different kind of summaries, and based on the two discussed methods, their errors are "
484
- # "detected to estimate a factualness score. Based on this basic approach, "
485
- # "the best summary (read: the one that a human would prefer or indicate as the best one) "
486
- # "will hopefully be at the top. Summaries with the same scores will get the same rank displayed.")
487
- # st.markdown("####")
488
- #
489
- # with st.spinner("Calculating more summaries and scoring them, this might take a minute or two..."):
490
- # summaries_list = []
491
- # deduction_points = []
492
- # # ENTITIES
493
- # _, amount_unmatched = get_and_compare_entities(False)
494
- #
495
- # # DEPS
496
- # summary_deps = check_dependency(False)
497
- # article_deps = check_dependency(True)
498
- # total_unmatched_deps = []
499
- # for summ_dep in summary_deps:
500
- # if not any(summ_dep['identifier'] in art_dep['identifier'] for art_dep in article_deps):
501
- # total_unmatched_deps.append(summ_dep)
502
- #
503
- # summaries_list.append(st.session_state.summary_output)
504
- # deduction_points.append(len(amount_unmatched) + len(total_unmatched_deps))
505
- #
506
- # # FOR NEW GENERATED SUMMARY
507
- # st.session_state.summary_output = generate_abstractive_summary(st.session_state.article_text,
508
- # type="beam",
509
- # do_sample=True, num_beams=15,
510
- # no_repeat_ngram_size=5)
511
- # _, amount_unmatched = get_and_compare_entities(False)
512
- #
513
- # summary_deps = check_dependency(False)
514
- # article_deps = check_dependency(True)
515
- # total_unmatched_deps = []
516
- # for summ_dep in summary_deps:
517
- # if not any(summ_dep['identifier'] in art_dep['identifier'] for art_dep in article_deps):
518
- # total_unmatched_deps.append(summ_dep)
519
- #
520
- # summaries_list.append(st.session_state.summary_output)
521
- # deduction_points.append(len(amount_unmatched) + len(total_unmatched_deps))
522
- #
523
- # # FOR NEW GENERATED SUMMARY
524
- # st.session_state.summary_output = generate_abstractive_summary(st.session_state.article_text,
525
- # type="top_p",
526
- # do_sample=True,
527
- # no_repeat_ngram_size=5)
528
- # _, amount_unmatched = get_and_compare_entities(False)
529
- #
530
- # summary_deps = check_dependency(False)
531
- # article_deps = check_dependency(True)
532
- # total_unmatched_deps = []
533
- # for summ_dep in summary_deps:
534
- # if not any(summ_dep['identifier'] in art_dep['identifier'] for art_dep in article_deps):
535
- # total_unmatched_deps.append(summ_dep)
536
- #
537
- # summaries_list.append(st.session_state.summary_output)
538
- # deduction_points.append(len(amount_unmatched) + len(total_unmatched_deps))
539
- #
540
- # # RANKING AND SHOWING THE SUMMARIES
541
- # deduction_points, summaries_list = (list(t) for t in zip(*sorted(zip(deduction_points, summaries_list))))
542
- #
543
- # cur_rank = 1
544
- # rank_downgrade = 0
545
- # for i in range(len(deduction_points)):
546
- # st.write(f'πŸ† Rank {cur_rank} summary: πŸ†', display_summary(summaries_list[i]), unsafe_allow_html=True)
547
- # if i < len(deduction_points) - 1:
548
- # rank_downgrade += 1
549
- # if not deduction_points[i + 1] == deduction_points[i]:
550
- # cur_rank += rank_downgrade
551
- # rank_downgrade = 0
552
-
553
- # session = SessionState.get(code=print("TEST"))
554
- # a = st.radio("Edit or show", ['Edit', 'Show'], 1)
555
- # if a == 'Edit':
556
- # session.code = st.text_input('Edit code', session.code)
557
- # else:
558
- # st.write(session.code)
 
53
 
54
  # Page setup
55
  st.set_page_config(
56
+ page_title="πŸ“œ Hallucination detection in summaries πŸ“œ",
57
  page_icon="",
58
  layout="centered",
59
  initial_sidebar_state="auto",
 
101
  return data
102
 
103
 
104
+ def fetch_ranked_summaries(filename: str, ranknumber: int) -> AnyStr:
105
+ with open(f'./ranked-summaries/{filename}/Rank{ranknumber}.txt', 'r') as f:
106
+ data = f.read()
107
+ return data
108
+
109
+
110
  def fetch_dependency_svg(filename: str) -> AnyStr:
111
  with open(f'./dependency-images/{filename}.txt', 'r') as f:
112
  lines = [line.rstrip() for line in f]
 
307
  summarization_model = get_summarizer_model()
308
 
309
  # Page
310
+ st.title('πŸ“œ Hallucination detection πŸ“œ')
311
+ st.subheader("πŸ”Ž Detecting errors in generated abstractive summaries")
312
+ #st.title('πŸ“œ Error detection in summaries πŸ“œ')
313
 
314
  # INTRODUCTION
315
  st.header("πŸ§‘β€πŸ« Introduction")
 
479
  render_dependency_parsing(current_drawing_list)
480
 
481
  # CURRENTLY DISABLED
482
+ # OUTRO/CONCLUSION
483
+ st.header("🀝 Bringing it together")
484
+ st.markdown("We have presented 2 methods that try to detect errors in summaries via post-processing steps. Entity "
485
+ "matching can be used to solve hallucinations, while dependency comparison can be used to filter out "
486
+ "some bad sentences (and thus worse summaries). These methods highlight the possibilities of "
487
+ "post-processing AI-made summaries, but are only a first introduction. As the methods were "
488
+ "empirically tested they are definitely not sufficiently robust for general use-cases.")
489
+ st.markdown("####")
490
+ st.markdown(
491
+ "Below we generate 3 different kind of summaries, and based on the two discussed methods, their errors are "
492
+ "detected to estimate a factualness score. Based on this basic approach, "
493
+ "the best summary (read: the one that a human would prefer or indicate as the best one) "
494
+ "will hopefully be at the top. Summaries with the same scores will get the same rank displayed. We currently "
495
+ "only do this for the example articles (for which the different summmaries are already generated). The reason "
496
+ "for this is that HuggingFace spaces are limited in their CPU memory.")
497
+ st.markdown("####")
498
+
499
+ if selected_article != "Provide your own input" and article_text == fetch_article_contents(selected_article):
500
+ with st.spinner("Calculating more summaries and scoring them, this might take a minute or two..."):
501
+ summaries_list = []
502
+ deduction_points = []
503
+
504
+ # FOR NEW GENERATED SUMMARY
505
+ for i in range(1 , 4):
506
+ st.session_state.summary_output = fetch_ranked_summaries(selected_article, i)
507
+ _, amount_unmatched = get_and_compare_entities(False)
508
+
509
+ summary_deps = check_dependency(False)
510
+ article_deps = check_dependency(True)
511
+ total_unmatched_deps = []
512
+ for summ_dep in summary_deps:
513
+ if not any(summ_dep['identifier'] in art_dep['identifier'] for art_dep in article_deps):
514
+ total_unmatched_deps.append(summ_dep)
515
+
516
+ summaries_list.append(st.session_state.summary_output)
517
+ deduction_points.append(len(amount_unmatched) + len(total_unmatched_deps))
518
+
519
+
520
+ # RANKING AND SHOWING THE SUMMARIES
521
+ deduction_points, summaries_list = (list(t) for t in zip(*sorted(zip(deduction_points, summaries_list))))
522
+
523
+ cur_rank = 1
524
+ rank_downgrade = 0
525
+ for i in range(len(deduction_points)):
526
+ st.write(f'πŸ† Rank {cur_rank} summary: πŸ†', display_summary(summaries_list[i]), unsafe_allow_html=True)
527
+ if i < len(deduction_points) - 1:
528
+ rank_downgrade += 1
529
+ if not deduction_points[i + 1] == deduction_points[i]:
530
+ cur_rank += rank_downgrade
531
+ rank_downgrade = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dependency-specific-text/Tencent Holdings.txt CHANGED
@@ -0,0 +1 @@
 
 
1
+ Nothing suspicious about this summary
entity-specific-text/Novak Djokovic.txt CHANGED
@@ -0,0 +1 @@
 
 
1
+ Nothing suspicious about this summary
entity-specific-text/White House.txt CHANGED
@@ -0,0 +1 @@
 
 
1
+ Nothing suspicious about this summary
ranked-summaries/Mark Levinson Airpods/Rank1.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ The Mark Levinson No. 5909 is the company's first wireless headphones. It packs 40mm Beryllium drivers "expertly tuned to the Harman curve" The headphones will be available in black, pewter and red color options starting today for $999. A quick-charge feature will give you up to six hours of play time in 15 minutes; an app for Android and iOS will give you some control over the headphones, but the company didn't go into specifics there. Harman-owned brand Mark Levinson also owns AKG, JBL and Harman Kardon.
ranked-summaries/Mark Levinson Airpods/Rank2.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ The Mark Levinson No. 5909 is the company's first wireless headphones. It packs 40mm Beryllium drivers and adaptive active noise cancellation. The headphones start at $999 and come in black, pewter and red color options. They'll be available starting today for the same price through Mark Levinson's website and Apple's site for pre-order. Harman-owned brand Mark Levinson also owns AKG, JBL and Harman Kardon, and is a Samsung subsidiary. The No. 5909 will be available in black, Pewter and red color options starting today for $999.
ranked-summaries/Mark Levinson Airpods/Rank3.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ The Mark Levinson No. 5909 is the company's first wireless headphones. It's "reference class" and comes in black, pewter and red color options.. The headphones start at $999 and will be available starting today in the U.S. A quick-charge feature will give you up to six hours of play time in 15 minutes, the company says, via an app for Android and iOS.The company also packed in four microphones for calls that are equipped with a so-called Smart Wind Adaption feature., via Bluetooth 5.1.
ranked-summaries/Novak Djokovic/Rank1.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Novak Djokovic has been granted permission to play in the Australian Open. All players and staff at the tournament must be vaccinated or have an exemption. Djokovic has not spoken about his vaccination status, but last year said he was "opposed to vaccination" Australian Prime Minister Scott Morrison said there should be no special rules. He said Djokovic would be required to present evidence upon arrival that he has a genuine medical exemption from vaccination, or he would be "on the next plane home" The controversy comes as the country is seeing tens of thousands of Covid-19 cases for the first time.
ranked-summaries/Novak Djokovic/Rank2.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Novak Djokovic has been granted permission to play in the Australian Open. All players and staff at the tournament must be vaccinated or have an exemption. Djokovic has not spoken about his vaccination status, but last year said he was "opposed to vaccination" Australian Prime Minister Scott Morrison said there should be no special rules. The controversy comes as the country is seeing tens of thousands of Covid-19 cases. Over 90% of Australia's over-16 population is fully vaccinated, but some Australians still cannot travel interstate or globally because of restrictions. A&E doctor Stephen Parnis tweeted: "I don't care how good a tennis player he is. If he's refusing to get vaccinated, he shouldn't be allowed in"
ranked-summaries/Novak Djokovic/Rank3.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Novak Djokovic has been granted permission to play in the Australian Open. All players and staff at the tournament must be vaccinated or have an exemption.. Djokovic has not spoken about his vaccination status, but last year said he was "opposed to vaccination" Australian Prime Minister Scott Morrison says there should be no special rules for Djokovic, but adds he would be "on the next plane home" if he did not have the right evidence. in Australia is seeing tens of thousands of Covid-19 cases for the first time after enduring some of the world's strictest restrictions.
ranked-summaries/OnePlus 10 Pro/Rank1.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ The OnePlus 10 Pro is the company's first flagship phone. It will have the brand-new Qualcomm Snapdragon 8 Gen 1 processor. The phone is launching in China first on January 11. We don't have a price yet, but OnePlus' flagship prices have gone up every year so far. There's also no word on a US release date yet. This is not an official picture, but OnLeaks' clearly accurate render from November is still our only look at the front of the phone. We don't actually have a picture of the front yet, so above is OnLeaks' unofficial render from a few months ago. This has the camera hole on the left side instead of the middle. Other than that, it looks like every other Android phone on the market. It might be because of Oppo's influence, but OnePlus' launch is all sorts of weird this year.
ranked-summaries/OnePlus 10 Pro/Rank2.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ The OnePlus 10 Pro is the company's first flagship phone. It will have the brand-new Qualcomm Snapdragon 8 Gen 1 processor. The phone is launching in China first on January 11. There's no price yet, but the 9 Pro was $969. There's also no word on a US release date yet. The phone will go on sale in China on January 11 and then the U.S. later this year. We're already seeing the effects of the Oppo merger on the OnePlus 10 Pro. We got a glimpse of this design direction via the OnePlus 9's Android 12 update, and the reviews were not kind. But what really matters is the software, which will see OnePlus adopt Oppo's Color OS Android skin with a few custom tweaks rather than the separates the two companies were running.
ranked-summaries/OnePlus 10 Pro/Rank3.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ The OnePlus 10 Pro is the company's first flagship phone. It's the result of a merger between OnePlus and Oppo, which will be called "SuperVOOC" The phone is launching in China first on January 11. There's also no word on a US release date yet. The 10 Pro will have a 6.7-inch display and three cameras on the back. We don't have a price yet, but OnePlus' flagship prices have gone up every year so far, and the 9 Pro was $969. The phone will go on sale January 11 in China and January 18 in the U.S.
ranked-summaries/Tencent Holdings/Rank1.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Tencent said late on Tuesday it had entered into a deal to reduce its stake in the Singapore-based gaming and e-commerce group to 18.7% from 21.3%. The company plans to retain the substantial majority of its stake in Sea for the long term. The sale comes after Tencent said last month it would divest $16.4 billion of its stake in JD.com. Sea's shares fell 11.4% on Tuesday in New York to $197.8 following the divestment news. Tencent will be subject to a lockup period that restricts further sale of Sea shares by Tencent during the next six months.
ranked-summaries/Tencent Holdings/Rank2.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Tencent said late on Tuesday it had entered into a deal to reduce its stake in the Singapore-based gaming and e-commerce group to 18.7% from 21.3%. The company plans to retain the substantial majority of its stake in Sea for the long term. The sale comes after Tencent said last month it would divest $16.4 billion of its stake in JD.com. Sea's shares fell 11.4% on Tuesday in New York to $197.8 following the divestment news. Sea is expanding its e-commerce operations globally, benefiting from roaring demand for its services from consumers.
ranked-summaries/Tencent Holdings/Rank3.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Tencent Holdings Ltd has raised $3 billion by selling 14.5 million shares in Sea. Sea owns e-commerce firm Shopee, according to a term sheet seen by Reuters on Wednesday. Tencent said late on Tuesday it had entered into a deal to reduce its stake in the Singapore-based group to 18.7% from 21.3%. The sale comes after Tencent said last month it would divest $16.4 billion of its stakes in JD.com and Six9, weakening its ties to China's second-biggest e- commerce firm. SEA's shares fell 11.4% on Tuesday in New York to $197.8 following the divestment news.
ranked-summaries/White House/Rank1.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Mark Meadows will no longer cooperate with the House select committee, according to a letter from his attorney. The committee says it will move forward with a scheduled deposition with Meadows on Wednesday. By proceeding with the scheduled deposition, the committee is setting up a path to hold Meadows in criminal contempt. The panel is weighing multiple options, including immunity, that could pave the way for it to get the information it wants from Meadows. A source familiar with the matter told CNN that among the 6,000 pages of documents Meadows has already provided to the committee are communications from January 6.
ranked-summaries/White House/Rank2.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Mark Meadows will no longer cooperate with the House select committee, his attorney says. The committee says it will move forward with a scheduled deposition with Meadows on Wednesday. By proceeding with the deposition, the committee is setting up a path to hold Meadows in criminal contempt. The panel is weighing multiple options, including immunity, that could pave the way for it to get the information it wants from Meadows. A source familiar with the matter told CNN that among the 6,000 pages of documents Meadows has already provided to the committee are communications from January 6. That is still unclear who communicated with Meadows but the source said that "many people had Meadows' cell phone"
ranked-summaries/White House/Rank3.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Former White House chief of staff Mark Meadows will no longer cooperate with the House select committee. The committee says it will move forward with a scheduled deposition with Meadows on Wednesday. By proceeding with the scheduled deposition, the committee is setting up a path to hold Meadows in criminal contempt. A source familiar with the matter told CNN that among the 6,000 pages of documents Meadows has already provided to the committee are communications from January 6. A Democratic member of the committee said Meadows' about-face is due in part to learning over the weekend that the committee had "issued wide ranging subpoenas for information from a third party communications provider".