pszemraj commited on
Commit
c13ffb4
โ€ข
1 Parent(s): 665f924

Signed-off-by: peter szemraj <peterszemraj@gmail.com>

Files changed (1) hide show
  1. app.py +33 -34
app.py CHANGED
@@ -72,7 +72,7 @@ def proc_submission(
72
  # create elaborate HTML warning
73
  input_wc = re.split(r"\s+", input_text)
74
  msg = f"""
75
- <div style="background-color: #f44336; color: white; padding: 20px;">
76
  <h3>Warning</h3>
77
  <p>Input text was truncated to {max_input_length} words. This is about {100*max_input_length/len(input_wc):.2f}% of the submission.</p>
78
  </div>
@@ -104,7 +104,7 @@ def proc_submission(
104
  html = ""
105
  html += f"<p>Runtime: {rt} minutes on CPU</p>"
106
  if msg is not None:
107
- html += f"<h2>WARNING:</h2><hr><b>{msg}</b><br><br>"
108
 
109
  html += ""
110
 
@@ -225,36 +225,7 @@ if __name__ == "__main__":
225
  label="Beam Search: # of Beams",
226
  value=2,
227
  )
228
- gr.Markdown(
229
- "_The base model is less performant than the large model, but is faster and will accept up to 2048 words per input (Large model accepts up to 768)._"
230
- )
231
- with gr.Row():
232
- length_penalty = gr.inputs.Slider(
233
- minimum=0.5,
234
- maximum=1.0,
235
- label="length penalty",
236
- default=0.7,
237
- step=0.05,
238
- )
239
- token_batch_length = gr.Radio(
240
- choices=[512, 768, 1024, 1536],
241
- label="token batch length",
242
- value=1024,
243
- )
244
 
245
- with gr.Row():
246
- repetition_penalty = gr.inputs.Slider(
247
- minimum=1.0,
248
- maximum=5.0,
249
- label="repetition penalty",
250
- default=3.5,
251
- step=0.1,
252
- )
253
- no_repeat_ngram_size = gr.Radio(
254
- choices=[2, 3, 4],
255
- label="no repeat ngram size",
256
- value=3,
257
- )
258
  with gr.Row():
259
  example_name = gr.Dropdown(
260
  list(name_to_path.keys()),
@@ -268,10 +239,10 @@ if __name__ == "__main__":
268
  label="Input Text (for summarization)",
269
  placeholder="Enter text to summarize, the text will be cleaned and truncated on Spaces. Narrative, academic (both papers and lecture transcription), and article text work well. May take a bit to generate depending on the input text :)",
270
  )
271
- gr.Markdown("Upload your own file:")
272
  with gr.Row():
273
  uploaded_file = gr.File(
274
- label="Upload a text file",
275
  file_count="single",
276
  type="file",
277
  )
@@ -302,9 +273,37 @@ if __name__ == "__main__":
302
  )
303
 
304
  gr.Markdown("---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
305
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
  with gr.Column():
307
- gr.Markdown("## About the Model")
308
  gr.Markdown(
309
  "- [This model](https://huggingface.co/pszemraj/led-large-book-summary) is a fine-tuned checkpoint of [allenai/led-large-16384](https://huggingface.co/allenai/led-large-16384) on the [BookSum dataset](https://arxiv.org/abs/2105.08209).The goal was to create a model that can generalize well and is useful in summarizing lots of text in academic and daily usage."
310
  )
 
72
  # create elaborate HTML warning
73
  input_wc = re.split(r"\s+", input_text)
74
  msg = f"""
75
+ <div style="background-color: #FFA500; color: white; padding: 20px;">
76
  <h3>Warning</h3>
77
  <p>Input text was truncated to {max_input_length} words. This is about {100*max_input_length/len(input_wc):.2f}% of the submission.</p>
78
  </div>
 
104
  html = ""
105
  html += f"<p>Runtime: {rt} minutes on CPU</p>"
106
  if msg is not None:
107
+ html += msg
108
 
109
  html += ""
110
 
 
225
  label="Beam Search: # of Beams",
226
  value=2,
227
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  with gr.Row():
230
  example_name = gr.Dropdown(
231
  list(name_to_path.keys()),
 
239
  label="Input Text (for summarization)",
240
  placeholder="Enter text to summarize, the text will be cleaned and truncated on Spaces. Narrative, academic (both papers and lecture transcription), and article text work well. May take a bit to generate depending on the input text :)",
241
  )
242
+ gr.Markdown("Upload a file (`.txt` or `.pdf`)")
243
  with gr.Row():
244
  uploaded_file = gr.File(
245
+ label="Upload file",
246
  file_count="single",
247
  type="file",
248
  )
 
273
  )
274
 
275
  gr.Markdown("---")
276
+ with gr.Column():
277
+ gr.Markdown("### Advanced Settings")
278
+ with gr.Row():
279
+ length_penalty = gr.inputs.Slider(
280
+ minimum=0.5,
281
+ maximum=1.0,
282
+ label="length penalty",
283
+ default=0.7,
284
+ step=0.05,
285
+ )
286
+ token_batch_length = gr.Radio(
287
+ choices=[512, 768, 1024, 1536],
288
+ label="token batch length",
289
+ value=1024,
290
+ )
291
 
292
+ with gr.Row():
293
+ repetition_penalty = gr.inputs.Slider(
294
+ minimum=1.0,
295
+ maximum=5.0,
296
+ label="repetition penalty",
297
+ default=3.5,
298
+ step=0.1,
299
+ )
300
+ no_repeat_ngram_size = gr.Radio(
301
+ choices=[2, 3, 4],
302
+ label="no repeat ngram size",
303
+ value=3,
304
+ )
305
  with gr.Column():
306
+ gr.Markdown("### About the Model")
307
  gr.Markdown(
308
  "- [This model](https://huggingface.co/pszemraj/led-large-book-summary) is a fine-tuned checkpoint of [allenai/led-large-16384](https://huggingface.co/allenai/led-large-16384) on the [BookSum dataset](https://arxiv.org/abs/2105.08209).The goal was to create a model that can generalize well and is useful in summarizing lots of text in academic and daily usage."
309
  )