pszemraj commited on
Commit
55b49e6
1 Parent(s): 73feb19

Signed-off-by: peter szemraj <peterszemraj@gmail.com>

Files changed (3) hide show
  1. app.py +1 -0
  2. summarize.py +2 -0
  3. utils.py +3 -3
app.py CHANGED
@@ -334,6 +334,7 @@ if __name__ == "__main__":
334
  uploaded_file = gr.File(
335
  label="File Upload",
336
  file_count="single",
 
337
  type="file",
338
  )
339
  with gr.Row():
 
334
  uploaded_file = gr.File(
335
  label="File Upload",
336
  file_count="single",
337
+ file_types=[".txt", ".md", ".pdf"],
338
  type="file",
339
  )
340
  with gr.Row():
summarize.py CHANGED
@@ -114,7 +114,9 @@ def summarize_via_tokenbatches(
114
  tokenizer (): the tokenizer to use for summarization
115
  batch_length (int, optional): the length of each batch. Defaults to 2048.
116
  batch_stride (int, optional): the stride of each batch. Defaults to 16. The stride is the number of tokens that overlap between batches.
 
117
 
 
118
  Returns:
119
  list: a list of dictionaries containing the input tokens, the summary, and the summary score
120
  """
 
114
  tokenizer (): the tokenizer to use for summarization
115
  batch_length (int, optional): the length of each batch. Defaults to 2048.
116
  batch_stride (int, optional): the stride of each batch. Defaults to 16. The stride is the number of tokens that overlap between batches.
117
+ min_batch_length (int, optional): the minimum length of each batch. Defaults to 512.
118
 
119
+ **kwargs: any additional arguments to pass to the model for inference
120
  Returns:
121
  list: a list of dictionaries containing the input tokens, the summary, and the summary score
122
  """
utils.py CHANGED
@@ -156,7 +156,7 @@ def extract_keywords(
156
  for keyword in keywords:
157
  if not any(fuzz.ratio(keyword, other) > 70 for other in final_keywords):
158
  final_keywords.append(keyword)
159
- logger.info(f"Keywords (final):\t{final_keywords}")
160
  return final_keywords
161
 
162
 
@@ -178,9 +178,9 @@ def saves_summary(
178
  full_summary = "\n".join(sum_text)
179
 
180
  keywords = "_".join(extract_keywords(full_summary))
181
- logger.info(f"kw:\t{keywords}")
182
  outpath = (
183
- Path.cwd() / f"document_summary_{get_timestamp()}_{keywords}.txt"
184
  if outpath is None
185
  else Path(outpath)
186
  )
 
156
  for keyword in keywords:
157
  if not any(fuzz.ratio(keyword, other) > 70 for other in final_keywords):
158
  final_keywords.append(keyword)
159
+ logger.debug(f"Keywords (final):\t{final_keywords}")
160
  return final_keywords
161
 
162
 
 
178
  full_summary = "\n".join(sum_text)
179
 
180
  keywords = "_".join(extract_keywords(full_summary))
181
+ logger.debug(f"kw:\t{keywords}")
182
  outpath = (
183
+ Path.cwd() / f"document_summary_{keywords}_{get_timestamp()}.txt"
184
  if outpath is None
185
  else Path(outpath)
186
  )