Peter commited on
Commit
98a3ea7
·
1 Parent(s): 8281a66

add base model for faster rt

Browse files
Files changed (1) hide show
  1. app.py +7 -3
app.py CHANGED
@@ -43,6 +43,7 @@ def truncate_word_count(text, max_words=512):
43
 
44
  def proc_submission(
45
  input_text: str,
 
46
  num_beams,
47
  token_batch_length,
48
  length_penalty,
@@ -74,6 +75,7 @@ def proc_submission(
74
 
75
  history = {}
76
  clean_text = clean(input_text, lower=False)
 
77
  processed = truncate_word_count(clean_text, max_input_length)
78
  if processed["was_truncated"]:
79
  tr_in = processed["truncated_text"]
@@ -86,8 +88,8 @@ def proc_submission(
86
 
87
  _summaries = summarize_via_tokenbatches(
88
  tr_in,
89
- model,
90
- tokenizer,
91
  batch_length=token_batch_length,
92
  **settings,
93
  )
@@ -128,6 +130,7 @@ def load_examples(examples_dir="examples"):
128
  if __name__ == "__main__":
129
 
130
  model, tokenizer = load_model_and_tokenizer("pszemraj/led-large-book-summary")
 
131
  title = "Long-Form Summarization: LED & BookSum"
132
  description = "A simple demo of how to use a fine-tuned LED model to summarize long-form text. [This model](https://huggingface.co/pszemraj/led-large-book-summary) is a fine-tuned version of [allenai/led-large-16384](https://huggingface.co/allenai/led-large-16384) on the [BookSum dataset](https://arxiv.org/abs/2105.08209). The goal was to create a model that can generalize well and is useful in summarizing lots of text in academic and daily usage."
133
 
@@ -137,8 +140,9 @@ if __name__ == "__main__":
137
  gr.inputs.Textbox(
138
  lines=10,
139
  label="input text",
140
- placeholder="Enter text to summarize, the text will be cleaned and truncated to 512 words on Spaces. Narrative, academic (both papers and lecture transcription), and article text work well.",
141
  ),
 
142
  gr.inputs.Slider(
143
  minimum=1, maximum=6, label="num_beams", default=4, step=1
144
  ),
 
43
 
44
  def proc_submission(
45
  input_text: str,
46
+ model_size: str,
47
  num_beams,
48
  token_batch_length,
49
  length_penalty,
 
75
 
76
  history = {}
77
  clean_text = clean(input_text, lower=False)
78
+ max_input_length = 1024 if model_size == "base" else max_input_length
79
  processed = truncate_word_count(clean_text, max_input_length)
80
  if processed["was_truncated"]:
81
  tr_in = processed["truncated_text"]
 
88
 
89
  _summaries = summarize_via_tokenbatches(
90
  tr_in,
91
+ model_sm if model_size == "base" else model,
92
+ tokenizer_sm if model_size == "base" else tokenizer,
93
  batch_length=token_batch_length,
94
  **settings,
95
  )
 
130
  if __name__ == "__main__":
131
 
132
  model, tokenizer = load_model_and_tokenizer("pszemraj/led-large-book-summary")
133
+ model_sm, tokenizer_sm = load_model_and_tokenizer("pszemraj/led-base-book-summary")
134
  title = "Long-Form Summarization: LED & BookSum"
135
  description = "A simple demo of how to use a fine-tuned LED model to summarize long-form text. [This model](https://huggingface.co/pszemraj/led-large-book-summary) is a fine-tuned version of [allenai/led-large-16384](https://huggingface.co/allenai/led-large-16384) on the [BookSum dataset](https://arxiv.org/abs/2105.08209). The goal was to create a model that can generalize well and is useful in summarizing lots of text in academic and daily usage."
136
 
 
140
  gr.inputs.Textbox(
141
  lines=10,
142
  label="input text",
143
+ placeholder="Enter text to summarize, the text will be cleaned and truncated on Spaces. Narrative, academic (both papers and lecture transcription), and article text work well. May take a bit to generate depending on the input text :)",
144
  ),
145
+ gr.inputs.radio(choices=['base', 'large'], label='model size', default='base'),
146
  gr.inputs.Slider(
147
  minimum=1, maximum=6, label="num_beams", default=4, step=1
148
  ),