Blaise-g commited on
Commit
e519161
Β·
1 Parent(s): f02c32b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -16
app.py CHANGED
@@ -77,7 +77,7 @@ def proc_submission(
77
 
78
  if processed["was_truncated"]:
79
  tr_in = processed["truncated_text"]
80
- msg = f"Input text was truncated to {max_input_length} words to fit within the computational constraints"
81
  logging.warning(msg)
82
  history["WARNING"] = msg
83
  else:
@@ -92,18 +92,12 @@ def proc_submission(
92
  **settings,
93
  )
94
  sum_text = [f"Section {i}: " + s["summary"][0] for i, s in enumerate(_summaries)]
95
- sum_scores = [
96
- f" - Section {i}: {round(s['summary_score'],4)}"
97
- for i, s in enumerate(_summaries)
98
- ]
99
  rates = [
100
  f" - Section {i}: {round(s['compression_rate'],3)}"
101
  for i, s in enumerate(_summaries)
102
  ]
103
 
104
  sum_text_out = "\n".join(sum_text)
105
- history["Summary Scores"] = "<br><br>"
106
- scores_out = "\n".join(sum_scores)
107
  history["Compression Rates"] = "<br><br>"
108
  rates_out = "\n".join(rates)
109
  rt = round((time.perf_counter() - st) / 60, 2)
@@ -188,7 +182,7 @@ if __name__ == "__main__":
188
  )
189
  num_beams = gr.Radio(
190
  choices=[2, 3, 4],
191
- label="Beam Search: # of Beams",
192
  value=2,
193
  )
194
  gr.Markdown(
@@ -249,12 +243,6 @@ if __name__ == "__main__":
249
  gr.Markdown(
250
  "The summary scores can be thought of as representing the quality of the summary. less-negative numbers (closer to 0) are better:"
251
  )
252
- summary_scores = gr.Textbox(
253
- label="Summary Scores ", placeholder="Summary scores will appear here"
254
- )
255
- gr.Markdown(
256
- "The compression rate indicates the ratio between the machine-generated summary length and the input text (from 0% to 100%). The higher the compression rate the more extreme the summary is."
257
- )
258
  compression_rate = gr.Textbox(
259
  label="Compression rate πŸ—œ", placeholder="The πŸ—œ will appear here"
260
  )
@@ -266,7 +254,7 @@ if __name__ == "__main__":
266
  "- [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) is a fine-tuned checkpoint of [Stancld/longt5-tglobal-large-16384-pubmed-3k_steps](https://huggingface.co/Stancld/longt5-tglobal-large-16384-pubmed-3k_steps) on the [SumPubMed dataset](https://aclanthology.org/2021.acl-srw.30/). [Blaise-g/longt5_tglobal_large_scitldr](https://huggingface.co/Blaise-g/longt5_tglobal_large_scitldr) is a fine-tuned checkpoint of [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) on the [Scitldr dataset](https://arxiv.org/abs/2004.15011). The goal was to create two models capable of handling the complex information contained in long biomedical documents and subsequently producing scientific summaries according to one of the two possible levels of conciseness: 1) A long explanatory synopsis that retains the majority of domain-specific language used in the original source text. 2)A one sentence long, TLDR style summary."
267
  )
268
  gr.Markdown(
269
- "- The two most important text generation parameters are the `num_beams` and `token_batch_length`. However, increasing them will also increase the amount of execution time needed to generate a summary."
270
  )
271
  gr.Markdown("---")
272
 
@@ -287,7 +275,7 @@ if __name__ == "__main__":
287
  token_batch_length,
288
  length_penalty,
289
  ],
290
- outputs=[output_text, summary_text, summary_scores, compression_rate],
291
  )
292
 
293
  demo.launch(enable_queue=True, share=False)
 
77
 
78
  if processed["was_truncated"]:
79
  tr_in = processed["truncated_text"]
80
+ msg = f"Input text was truncated to {max_input_length} words to fit within the computational constraints of the inference API"
81
  logging.warning(msg)
82
  history["WARNING"] = msg
83
  else:
 
92
  **settings,
93
  )
94
  sum_text = [f"Section {i}: " + s["summary"][0] for i, s in enumerate(_summaries)]
 
 
 
 
95
  rates = [
96
  f" - Section {i}: {round(s['compression_rate'],3)}"
97
  for i, s in enumerate(_summaries)
98
  ]
99
 
100
  sum_text_out = "\n".join(sum_text)
 
 
101
  history["Compression Rates"] = "<br><br>"
102
  rates_out = "\n".join(rates)
103
  rt = round((time.perf_counter() - st) / 60, 2)
 
182
  )
183
  num_beams = gr.Radio(
184
  choices=[2, 3, 4],
185
+ label="Beam Search: Number of Beams",
186
  value=2,
187
  )
188
  gr.Markdown(
 
243
  gr.Markdown(
244
  "The summary scores can be thought of as representing the quality of the summary. less-negative numbers (closer to 0) are better:"
245
  )
 
 
 
 
 
 
246
  compression_rate = gr.Textbox(
247
  label="Compression rate πŸ—œ", placeholder="The πŸ—œ will appear here"
248
  )
 
254
  "- [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) is a fine-tuned checkpoint of [Stancld/longt5-tglobal-large-16384-pubmed-3k_steps](https://huggingface.co/Stancld/longt5-tglobal-large-16384-pubmed-3k_steps) on the [SumPubMed dataset](https://aclanthology.org/2021.acl-srw.30/). [Blaise-g/longt5_tglobal_large_scitldr](https://huggingface.co/Blaise-g/longt5_tglobal_large_scitldr) is a fine-tuned checkpoint of [Blaise-g/longt5_tglobal_large_sumpubmed](https://huggingface.co/Blaise-g/longt5_tglobal_large_sumpubmed) on the [Scitldr dataset](https://arxiv.org/abs/2004.15011). The goal was to create two models capable of handling the complex information contained in long biomedical documents and subsequently producing scientific summaries according to one of the two possible levels of conciseness: 1) A long explanatory synopsis that retains the majority of domain-specific language used in the original source text. 2)A one sentence long, TLDR style summary."
255
  )
256
  gr.Markdown(
257
+ "- The two most important text generation parameters are the `num_beams` and 'length_penalty': 1) Choosing a higher number of beams for the beam search algorithm results in generating a summary with higher probability (hence theoretically higher quality) at the cost of increasing computation times and memory usage. 2) The length penalty encourages the model to generate longer or shorter summary sequences by placing an exponential penalty on the beam score according to the current sequence length."
258
  )
259
  gr.Markdown("---")
260
 
 
275
  token_batch_length,
276
  length_penalty,
277
  ],
278
+ outputs=[output_text, summary_text, compression_rate],
279
  )
280
 
281
  demo.launch(enable_queue=True, share=False)