Peter commited on
Commit
4dc1508
·
1 Parent(s): a40d998

:sparkles: add file upload widget

Browse files
Files changed (1) hide show
  1. app.py +67 -23
app.py CHANGED
@@ -29,14 +29,20 @@ def proc_submission(
29
  max_input_length: int = 768,
30
  ):
31
  """
32
- proc_submission - a helper function for the gradio module
33
- Parameters
34
- ----------
35
- input_text : str, required, the text to be processed
36
- max_input_length : int, optional, the maximum length of the input text, default=512
37
- Returns
38
- -------
39
- str of HTML, the interactive HTML form for the model
 
 
 
 
 
 
40
  """
41
 
42
  settings = {
@@ -110,6 +116,27 @@ def load_single_example_text(
110
  text = clean(raw_text, lower=False)
111
  return text
112
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
  if __name__ == "__main__":
115
 
@@ -130,21 +157,21 @@ if __name__ == "__main__":
130
 
131
  gr.Markdown("## Load Inputs & Select Parameters")
132
  gr.Markdown(
133
- "Enter your text below or choose an example, and select the model size and parameters. Press the button to load examples."
134
  )
135
 
136
- model_size = gr.inputs.Radio(
137
- choices=["base", "large"], label="model size", default="large"
138
  )
139
- num_beams = gr.inputs.Slider(
140
- minimum=2, maximum=4, label="num_beams", default=2, step=1
 
 
141
  )
142
- token_batch_length = gr.inputs.Slider(
143
- minimum=512,
144
- maximum=1024,
145
- label="token_batch_length",
146
- default=512,
147
- step=256,
148
  )
149
  length_penalty = gr.inputs.Slider(
150
  minimum=0.5, maximum=1.0, label="length penalty", default=0.7, step=0.05
@@ -156,12 +183,14 @@ if __name__ == "__main__":
156
  default=3.5,
157
  step=0.1,
158
  )
159
- no_repeat_ngram_size = gr.inputs.Slider(
160
- minimum=2, maximum=4, label="no repeat ngram size", default=3, step=1
 
 
161
  )
162
  example_name = gr.Dropdown(
163
  list(name_to_path.keys()),
164
- label="Load Example",
165
  )
166
  load_examples_button = gr.Button(
167
  "Load Example",
@@ -171,10 +200,21 @@ if __name__ == "__main__":
171
  label="input text",
172
  placeholder="Enter text to summarize, the text will be cleaned and truncated on Spaces. Narrative, academic (both papers and lecture transcription), and article text work well. May take a bit to generate depending on the input text :)",
173
  )
 
 
 
 
 
 
 
 
 
174
 
175
  with gr.Column():
176
  gr.Markdown("## Generate Summary")
177
- gr.Markdown("Summary generation should take approximately 1-2 minutes for most settings.")
 
 
178
  summarize_button = gr.Button("Summarize!")
179
 
180
  output_text = gr.HTML("<p><em>Output will appear below:</em></p>")
@@ -202,6 +242,10 @@ if __name__ == "__main__":
202
  fn=load_single_example_text, inputs=[example_name], outputs=[input_text]
203
  )
204
 
 
 
 
 
205
  summarize_button.click(
206
  fn=proc_submission,
207
  inputs=[
 
29
  max_input_length: int = 768,
30
  ):
31
  """
32
+ proc_submission - a helper function for the gradio module to process submissions
33
+
34
+ Args:
35
+ input_text (str): the input text to summarize
36
+ model_size (str): the size of the model to use
37
+ num_beams (int): the number of beams to use
38
+ token_batch_length (int): the length of the token batches to use
39
+ length_penalty (float): the length penalty to use
40
+ repetition_penalty (float): the repetition penalty to use
41
+ no_repeat_ngram_size (int): the no repeat ngram size to use
42
+ max_input_length (int, optional): the maximum input length to use. Defaults to 768.
43
+
44
+ Returns:
45
+ str in HTML format, string of the summary, str of score
46
  """
47
 
48
  settings = {
 
116
  text = clean(raw_text, lower=False)
117
  return text
118
 
119
+ def load_uploaded_file(file_obj):
120
+ """
121
+ load_uploaded_file - process an uploaded file
122
+
123
+ Args:
124
+ file_obj (_type_): Gradio file object
125
+
126
+ Returns:
127
+ str, the uploaded file contents
128
+ """
129
+
130
+ file_path = Path(file_obj[0].name)
131
+
132
+ try:
133
+ with open(file_path, "r", encoding='utf-8', errors='ignore') as f:
134
+ raw_text = f.read()
135
+ text = clean(raw_text, lower=False)
136
+ return text
137
+ except Exception as e:
138
+ logging.info(f"Trying to load file with path {file_path}, error: {e}")
139
+ return "Error: Could not read file. Ensure that it is a valid text file with encoding UTF-8."
140
 
141
  if __name__ == "__main__":
142
 
 
157
 
158
  gr.Markdown("## Load Inputs & Select Parameters")
159
  gr.Markdown(
160
+ "Enter text below in the text area. The text will be summarized using the selected parameters. Optionally load an example from the list below or upload a file."
161
  )
162
 
163
+ model_size = gr.Radio(
164
+ choices=["base", "large"], label="model size", value="large"
165
  )
166
+ num_beams = gr.Radio(
167
+ choices=[2, 3, 4],
168
+ label="num beams",
169
+ value=2,
170
  )
171
+ token_batch_length = gr.Radio(
172
+ choices=[512, 768, 1024],
173
+ label="token batch length",
174
+ value=512,
 
 
175
  )
176
  length_penalty = gr.inputs.Slider(
177
  minimum=0.5, maximum=1.0, label="length penalty", default=0.7, step=0.05
 
183
  default=3.5,
184
  step=0.1,
185
  )
186
+ no_repeat_ngram_size = gr.Radio(
187
+ choices=[2, 3, 4],
188
+ label="no repeat ngram size",
189
+ value=3,
190
  )
191
  example_name = gr.Dropdown(
192
  list(name_to_path.keys()),
193
+ label="Choose an Example",
194
  )
195
  load_examples_button = gr.Button(
196
  "Load Example",
 
200
  label="input text",
201
  placeholder="Enter text to summarize, the text will be cleaned and truncated on Spaces. Narrative, academic (both papers and lecture transcription), and article text work well. May take a bit to generate depending on the input text :)",
202
  )
203
+ gr.Markdown("Upload your own file:")
204
+ uploaded_file = gr.File(
205
+ label="Upload a text file",
206
+ file_count=1,
207
+ type="file",
208
+ )
209
+ load_file_button = gr.Button(
210
+ "Load Uploaded File"
211
+ )
212
 
213
  with gr.Column():
214
  gr.Markdown("## Generate Summary")
215
+ gr.Markdown(
216
+ "Summary generation should take approximately 1-2 minutes for most settings."
217
+ )
218
  summarize_button = gr.Button("Summarize!")
219
 
220
  output_text = gr.HTML("<p><em>Output will appear below:</em></p>")
 
242
  fn=load_single_example_text, inputs=[example_name], outputs=[input_text]
243
  )
244
 
245
+ load_file_button.click(
246
+ fn=load_uploaded_file, inputs=[uploaded_file], outputs=[input_text]
247
+ )
248
+
249
  summarize_button.click(
250
  fn=proc_submission,
251
  inputs=[