Peter commited on
Commit
34de38e
1 Parent(s): 50d040d

✨ add ability to download outputs

Browse files

Signed-off-by: Peter <74869040+pszemraj@users.noreply.github.com>

Files changed (2) hide show
  1. app.py +13 -3
  2. utils.py +49 -2
app.py CHANGED
@@ -14,7 +14,7 @@ from doctr.models import ocr_predictor
14
  from pdf2text import convert_PDF_to_Text
15
 
16
  from summarize import load_model_and_tokenizer, summarize_via_tokenbatches
17
- from utils import load_example_filenames, truncate_word_count
18
 
19
  _here = Path(__file__).parent
20
 
@@ -125,7 +125,10 @@ def proc_submission(
125
 
126
  html += ""
127
 
128
- return html, sum_text_out, scores_out
 
 
 
129
 
130
 
131
  def load_single_example_text(
@@ -295,6 +298,13 @@ if __name__ == "__main__":
295
  label="Summary Scores", placeholder="Summary scores will appear here"
296
  )
297
 
 
 
 
 
 
 
 
298
  gr.Markdown("---")
299
  with gr.Column():
300
  gr.Markdown("### Advanced Settings")
@@ -351,7 +361,7 @@ if __name__ == "__main__":
351
  repetition_penalty,
352
  no_repeat_ngram_size,
353
  ],
354
- outputs=[output_text, summary_text, summary_scores],
355
  )
356
 
357
  demo.launch(enable_queue=True)
 
14
  from pdf2text import convert_PDF_to_Text
15
 
16
  from summarize import load_model_and_tokenizer, summarize_via_tokenbatches
17
+ from utils import load_example_filenames, truncate_word_count, saves_summary
18
 
19
  _here = Path(__file__).parent
20
 
 
125
 
126
  html += ""
127
 
128
+ # save to file
129
+ saved_file = saves_summary(_summaries)
130
+
131
+ return html, sum_text_out, scores_out, saved_file
132
 
133
 
134
  def load_single_example_text(
 
298
  label="Summary Scores", placeholder="Summary scores will appear here"
299
  )
300
 
301
+ text_file = gr.File(
302
+ label="Download Summary as Text File",
303
+ file_count="single",
304
+ type="file",
305
+ interactive=False,
306
+ )
307
+
308
  gr.Markdown("---")
309
  with gr.Column():
310
  gr.Markdown("### Advanced Settings")
 
361
  repetition_penalty,
362
  no_repeat_ngram_size,
363
  ],
364
+ outputs=[output_text, summary_text, summary_scores, text_file],
365
  )
366
 
367
  demo.launch(enable_queue=True)
utils.py CHANGED
@@ -4,11 +4,17 @@
4
 
5
  import re
6
  from pathlib import Path
7
-
8
  from natsort import natsorted
9
  import subprocess
10
 
11
-
 
 
 
 
 
 
12
  def truncate_word_count(text, max_words=512):
13
  """
14
  truncate_word_count - a helper function for the gradio module
@@ -67,3 +73,44 @@ def load_example_filenames(example_path: str or Path):
67
  # load the examples into a list
68
  examples = {f.name: f for f in example_path.glob("*.txt")}
69
  return examples
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  import re
6
  from pathlib import Path
7
+ from datetime import datetime
8
  from natsort import natsorted
9
  import subprocess
10
 
11
+ def get_timestamp()->str:
12
+ """
13
+ get_timestamp - get a timestamp for the current time
14
+ Returns:
15
+ str, the timestamp
16
+ """
17
+ return datetime.now().strftime("%Y%m%d_%H%M%S")
18
  def truncate_word_count(text, max_words=512):
19
  """
20
  truncate_word_count - a helper function for the gradio module
 
73
  # load the examples into a list
74
  examples = {f.name: f for f in example_path.glob("*.txt")}
75
  return examples
76
+
77
+ def saves_summary(summarize_output, outpath:str or Path=None, add_signature=True):
78
+ """
79
+
80
+ saves_summary - save the summary generated from summarize_via_tokenbatches() to a text file
81
+
82
+ _summaries = summarize_via_tokenbatches(
83
+ text,
84
+ batch_length=token_batch_length,
85
+ batch_stride=batch_stride,
86
+ **settings,
87
+ )
88
+ """
89
+
90
+
91
+ outpath = Path.cwd() / f"document_summary_{get_timestamp()}.txt" if outpath is None else Path(outpath)
92
+ sum_text = [s["summary"][0] for s in summarize_output]
93
+ sum_scores = [f"\n - {round(s['summary_score'],4)}" for s in summarize_output]
94
+ scores_text = "\n".join(sum_scores)
95
+ full_summary = "\n\t".join(sum_text)
96
+
97
+ with open(
98
+ outpath,
99
+ "w",
100
+ ) as fo:
101
+ if add_signature:
102
+ fo.write(
103
+ "Generated with the Document Summarization space :) https://hf.co/spaces/pszemraj/document-summarization\n\n"
104
+ )
105
+ fo.writelines(full_summary)
106
+ with open(
107
+ outpath,
108
+ "a",
109
+ ) as fo:
110
+
111
+ fo.write("\n" * 3)
112
+ fo.write(f"\n\nSection Scores:\n")
113
+ fo.writelines(scores_text)
114
+ fo.write("\n\n---\n")
115
+
116
+ return outpath