Alioth86 commited on
Commit
f76c355
1 Parent(s): dba7062

Add application file

Browse files
Files changed (1) hide show
  1. app.py +12 -6
app.py CHANGED
@@ -14,11 +14,13 @@ import sentencepiece as spm
14
  import os
15
  import tempfile
16
  import gradio as gr
 
17
 
18
- description = """**SpeechAbstractor**\n
19
  This app enables users to upload academic articles in PDF format, specifically focusing on abstracts.
20
  It efficiently summarizes the abstract and provides an audio playback of the summarized content.
21
- Below are some example PDFs for you to experiment with. Feel free to explore the functionality of SpeechAbstractor!"""
 
22
 
23
  examples = [
24
  ["Article_7.pdf"],["Article_11.pdf"]
@@ -90,11 +92,15 @@ def extract_abstract(text_per_pagy):
90
  if page_text:
91
  page_text = page_text.replace("- ", "")
92
 
93
- start_index = page_text.find("Abstract")
94
- if start_index != -1:
95
- start_index += len("Abstract") + 1
 
 
 
96
 
97
- end_markers = ["Introduction", "Summary", "Overview", "Background", "Contents"]
 
98
  end_index = -1
99
 
100
  for marker in end_markers:
 
14
  import os
15
  import tempfile
16
  import gradio as gr
17
+ from IPython.display import display, Markdown
18
 
19
+ description = display(Markdown("""##**SpeechAbstractor**\n
20
  This app enables users to upload academic articles in PDF format, specifically focusing on abstracts.
21
  It efficiently summarizes the abstract and provides an audio playback of the summarized content.
22
+ Below are some example PDFs for you to experiment with. Feel free to explore the functionality of SpeechAbstractor!
23
+ (Please note: it works only with articles with an Abstract)."""))
24
 
25
  examples = [
26
  ["Article_7.pdf"],["Article_11.pdf"]
 
92
  if page_text:
93
  page_text = page_text.replace("- ", "")
94
 
95
+ start_index = -1
96
+ for variant in ["Abstract", "abstract", "ABSTRACT"]:
97
+ start_index = page_text.find(variant)
98
+ if start_index != -1:
99
+ start_index += len(variant) + 1
100
+ break
101
 
102
+ if start_index != -1:
103
+ end_markers = ["Introduction", "INTRODUCTION", "Background", "Contents", "Keywords"]
104
  end_index = -1
105
 
106
  for marker in end_markers: