raannakasturi commited on
Commit
9183d8e
·
1 Parent(s): 4a5f826

Refactor app.py for improved response handling and formatting; update extract_text.py and math_summarizer.py for consistent string encoding

Browse files
Files changed (3) hide show
  1. app.py +14 -13
  2. extract_text.py +1 -1
  3. math_summarizer.py +2 -2
app.py CHANGED
@@ -4,15 +4,16 @@ import json
4
 
5
  def rexplore_summarizer(url, id, access_key):
6
  response = json.loads(main(url, id, access_key))
 
7
  if response["mindmap_status"] != "success":
8
- mindmap = "Error generating Mindmap"
9
  else:
10
  mindmap = response["mindmap"]
11
  if response["summary_status"] != "success":
12
- summary = "Error generating Summary"
13
  else:
14
  summary = response["summary"]
15
- return response, summary, mindmap
16
 
17
  def clear_everything(url, id, access_key, raw_data, summary, mindmap):
18
  return None, None, None, None, None, None
@@ -22,29 +23,29 @@ theme = gr.themes.Soft(
22
  secondary_hue="cyan",
23
  neutral_hue="slate",
24
  font=[
25
- gr.themes.GoogleFont('Syne'),
26
- gr.themes.GoogleFont('Poppins'),
27
- gr.themes.GoogleFont('Poppins'),
28
- gr.themes.GoogleFont('Poppins')
29
  ],
30
  )
31
 
32
  with gr.Blocks(theme=theme, title="ReXplore Summarizer", fill_height=True) as app:
33
  gr.HTML(
34
- value ='''
35
- <h1 style="text-align: center;">ReXplore Summarizer <p style="text-align: center;">Designed and Developed by <a href='https://raannakasturi.eu.org' target="_blank" rel="nofollow noreferrer external">Nayan Kasturi</a></p> </h1>
36
  <p style="text-align: center;">This app uses a hybrid approach to summarize PDF documents based on CPU as well as GPU.</p>
37
  <p style="text-align: center;">The app uses traditional methodologies such as TextRank, LSA, Luhn algorithms as well as large language model (LLM) to generate summaries as well as mindmaps.</p>
38
  <p style="text-align: center;">The summarization process can take some time depending on the size of the text corpus and the complexity of the content.</p>
39
- ''')
40
  with gr.Row():
41
  with gr.Column():
42
  url = gr.Textbox(label="PDF URL", placeholder="Paste the PDF URL here")
43
  id = gr.Textbox(label="DOI/arXiv ID", placeholder="Enter the DOI or arXiv ID of the document")
44
- access_key = gr.Textbox(label="Access Key", placeholder="Enter the Access Key", type='password')
45
  with gr.Row():
46
- clear_btn = gr.Button(value="Clear", variant='stop')
47
- summarize_btn = gr.Button(value="Summarize", variant='primary')
48
  raw_data = gr.TextArea(label="Raw Data", placeholder="The generated raw data will be displayed here", lines=7, interactive=False, show_copy_button=True)
49
  with gr.Row():
50
  summary = gr.TextArea(label="Summary", placeholder="The generated summary will be displayed here", lines=7, interactive=False, show_copy_button=True)
 
4
 
5
  def rexplore_summarizer(url, id, access_key):
6
  response = json.loads(main(url, id, access_key))
7
+ data = json.dumps(response, indent=4, ensure_ascii=False)
8
  if response["mindmap_status"] != "success":
9
+ mindmap = "error"
10
  else:
11
  mindmap = response["mindmap"]
12
  if response["summary_status"] != "success":
13
+ summary = "error"
14
  else:
15
  summary = response["summary"]
16
+ return data, summary, mindmap
17
 
18
  def clear_everything(url, id, access_key, raw_data, summary, mindmap):
19
  return None, None, None, None, None, None
 
23
  secondary_hue="cyan",
24
  neutral_hue="slate",
25
  font=[
26
+ gr.themes.GoogleFont("Syne"),
27
+ gr.themes.GoogleFont("Poppins"),
28
+ gr.themes.GoogleFont("Poppins"),
29
+ gr.themes.GoogleFont("Poppins")
30
  ],
31
  )
32
 
33
  with gr.Blocks(theme=theme, title="ReXplore Summarizer", fill_height=True) as app:
34
  gr.HTML(
35
+ value ="""
36
+ <h1 style="text-align: center;">ReXplore Summarizer <p style="text-align: center;">Designed and Developed by <a href="https://raannakasturi.eu.org" target="_blank" rel="nofollow noreferrer external">Nayan Kasturi</a></p> </h1>
37
  <p style="text-align: center;">This app uses a hybrid approach to summarize PDF documents based on CPU as well as GPU.</p>
38
  <p style="text-align: center;">The app uses traditional methodologies such as TextRank, LSA, Luhn algorithms as well as large language model (LLM) to generate summaries as well as mindmaps.</p>
39
  <p style="text-align: center;">The summarization process can take some time depending on the size of the text corpus and the complexity of the content.</p>
40
+ """)
41
  with gr.Row():
42
  with gr.Column():
43
  url = gr.Textbox(label="PDF URL", placeholder="Paste the PDF URL here")
44
  id = gr.Textbox(label="DOI/arXiv ID", placeholder="Enter the DOI or arXiv ID of the document")
45
+ access_key = gr.Textbox(label="Access Key", placeholder="Enter the Access Key", type="password")
46
  with gr.Row():
47
+ clear_btn = gr.Button(value="Clear", variant="stop")
48
+ summarize_btn = gr.Button(value="Summarize", variant="primary")
49
  raw_data = gr.TextArea(label="Raw Data", placeholder="The generated raw data will be displayed here", lines=7, interactive=False, show_copy_button=True)
50
  with gr.Row():
51
  summary = gr.TextArea(label="Summary", placeholder="The generated summary will be displayed here", lines=7, interactive=False, show_copy_button=True)
extract_text.py CHANGED
@@ -10,7 +10,7 @@ def download_pdf(url, id):
10
  try:
11
  response = requests.get(url)
12
  response.raise_for_status() # Raise an error for bad responses
13
- with open(file_path, 'wb') as file:
14
  file.write(response.content)
15
  except Exception as e:
16
  print(f"Error downloading PDF: {e}")
 
10
  try:
11
  response = requests.get(url)
12
  response.raise_for_status() # Raise an error for bad responses
13
+ with open(file_path, "wb") as file:
14
  file.write(response.content)
15
  except Exception as e:
16
  print(f"Error downloading PDF: {e}")
math_summarizer.py CHANGED
@@ -19,8 +19,8 @@ def generate_luhn_summary(research_paper_text):
19
 
20
  def sanitize_text(input_string):
21
  try:
22
- encoded_bytes = input_string.encode('utf-8')
23
- decoded_string = encoded_bytes.decode('utf-8')
24
  return decoded_string
25
  except UnicodeEncodeError as e:
26
  print(f"Encoding error: {e}")
 
19
 
20
  def sanitize_text(input_string):
21
  try:
22
+ encoded_bytes = input_string.encode("utf-8")
23
+ decoded_string = encoded_bytes.decode("utf-8")
24
  return decoded_string
25
  except UnicodeEncodeError as e:
26
  print(f"Encoding error: {e}")