Commit
·
9183d8e
1
Parent(s):
4a5f826
Refactor app.py for improved response handling and formatting; update extract_text.py and math_summarizer.py for consistent string encoding
Browse files- app.py +14 -13
- extract_text.py +1 -1
- math_summarizer.py +2 -2
app.py
CHANGED
|
@@ -4,15 +4,16 @@ import json
|
|
| 4 |
|
| 5 |
def rexplore_summarizer(url, id, access_key):
|
| 6 |
response = json.loads(main(url, id, access_key))
|
|
|
|
| 7 |
if response["mindmap_status"] != "success":
|
| 8 |
-
mindmap = "
|
| 9 |
else:
|
| 10 |
mindmap = response["mindmap"]
|
| 11 |
if response["summary_status"] != "success":
|
| 12 |
-
summary = "
|
| 13 |
else:
|
| 14 |
summary = response["summary"]
|
| 15 |
-
return
|
| 16 |
|
| 17 |
def clear_everything(url, id, access_key, raw_data, summary, mindmap):
|
| 18 |
return None, None, None, None, None, None
|
|
@@ -22,29 +23,29 @@ theme = gr.themes.Soft(
|
|
| 22 |
secondary_hue="cyan",
|
| 23 |
neutral_hue="slate",
|
| 24 |
font=[
|
| 25 |
-
gr.themes.GoogleFont(
|
| 26 |
-
gr.themes.GoogleFont(
|
| 27 |
-
gr.themes.GoogleFont(
|
| 28 |
-
gr.themes.GoogleFont(
|
| 29 |
],
|
| 30 |
)
|
| 31 |
|
| 32 |
with gr.Blocks(theme=theme, title="ReXplore Summarizer", fill_height=True) as app:
|
| 33 |
gr.HTML(
|
| 34 |
-
value =
|
| 35 |
-
<h1 style="text-align: center;">ReXplore Summarizer <p style="text-align: center;">Designed and Developed by <a href=
|
| 36 |
<p style="text-align: center;">This app uses a hybrid approach to summarize PDF documents based on CPU as well as GPU.</p>
|
| 37 |
<p style="text-align: center;">The app uses traditional methodologies such as TextRank, LSA, Luhn algorithms as well as large language model (LLM) to generate summaries as well as mindmaps.</p>
|
| 38 |
<p style="text-align: center;">The summarization process can take some time depending on the size of the text corpus and the complexity of the content.</p>
|
| 39 |
-
|
| 40 |
with gr.Row():
|
| 41 |
with gr.Column():
|
| 42 |
url = gr.Textbox(label="PDF URL", placeholder="Paste the PDF URL here")
|
| 43 |
id = gr.Textbox(label="DOI/arXiv ID", placeholder="Enter the DOI or arXiv ID of the document")
|
| 44 |
-
access_key = gr.Textbox(label="Access Key", placeholder="Enter the Access Key", type=
|
| 45 |
with gr.Row():
|
| 46 |
-
clear_btn = gr.Button(value="Clear", variant=
|
| 47 |
-
summarize_btn = gr.Button(value="Summarize", variant=
|
| 48 |
raw_data = gr.TextArea(label="Raw Data", placeholder="The generated raw data will be displayed here", lines=7, interactive=False, show_copy_button=True)
|
| 49 |
with gr.Row():
|
| 50 |
summary = gr.TextArea(label="Summary", placeholder="The generated summary will be displayed here", lines=7, interactive=False, show_copy_button=True)
|
|
|
|
| 4 |
|
| 5 |
def rexplore_summarizer(url, id, access_key):
|
| 6 |
response = json.loads(main(url, id, access_key))
|
| 7 |
+
data = json.dumps(response, indent=4, ensure_ascii=False)
|
| 8 |
if response["mindmap_status"] != "success":
|
| 9 |
+
mindmap = "error"
|
| 10 |
else:
|
| 11 |
mindmap = response["mindmap"]
|
| 12 |
if response["summary_status"] != "success":
|
| 13 |
+
summary = "error"
|
| 14 |
else:
|
| 15 |
summary = response["summary"]
|
| 16 |
+
return data, summary, mindmap
|
| 17 |
|
| 18 |
def clear_everything(url, id, access_key, raw_data, summary, mindmap):
|
| 19 |
return None, None, None, None, None, None
|
|
|
|
| 23 |
secondary_hue="cyan",
|
| 24 |
neutral_hue="slate",
|
| 25 |
font=[
|
| 26 |
+
gr.themes.GoogleFont("Syne"),
|
| 27 |
+
gr.themes.GoogleFont("Poppins"),
|
| 28 |
+
gr.themes.GoogleFont("Poppins"),
|
| 29 |
+
gr.themes.GoogleFont("Poppins")
|
| 30 |
],
|
| 31 |
)
|
| 32 |
|
| 33 |
with gr.Blocks(theme=theme, title="ReXplore Summarizer", fill_height=True) as app:
|
| 34 |
gr.HTML(
|
| 35 |
+
value ="""
|
| 36 |
+
<h1 style="text-align: center;">ReXplore Summarizer <p style="text-align: center;">Designed and Developed by <a href="https://raannakasturi.eu.org" target="_blank" rel="nofollow noreferrer external">Nayan Kasturi</a></p> </h1>
|
| 37 |
<p style="text-align: center;">This app uses a hybrid approach to summarize PDF documents based on CPU as well as GPU.</p>
|
| 38 |
<p style="text-align: center;">The app uses traditional methodologies such as TextRank, LSA, Luhn algorithms as well as large language model (LLM) to generate summaries as well as mindmaps.</p>
|
| 39 |
<p style="text-align: center;">The summarization process can take some time depending on the size of the text corpus and the complexity of the content.</p>
|
| 40 |
+
""")
|
| 41 |
with gr.Row():
|
| 42 |
with gr.Column():
|
| 43 |
url = gr.Textbox(label="PDF URL", placeholder="Paste the PDF URL here")
|
| 44 |
id = gr.Textbox(label="DOI/arXiv ID", placeholder="Enter the DOI or arXiv ID of the document")
|
| 45 |
+
access_key = gr.Textbox(label="Access Key", placeholder="Enter the Access Key", type="password")
|
| 46 |
with gr.Row():
|
| 47 |
+
clear_btn = gr.Button(value="Clear", variant="stop")
|
| 48 |
+
summarize_btn = gr.Button(value="Summarize", variant="primary")
|
| 49 |
raw_data = gr.TextArea(label="Raw Data", placeholder="The generated raw data will be displayed here", lines=7, interactive=False, show_copy_button=True)
|
| 50 |
with gr.Row():
|
| 51 |
summary = gr.TextArea(label="Summary", placeholder="The generated summary will be displayed here", lines=7, interactive=False, show_copy_button=True)
|
extract_text.py
CHANGED
|
@@ -10,7 +10,7 @@ def download_pdf(url, id):
|
|
| 10 |
try:
|
| 11 |
response = requests.get(url)
|
| 12 |
response.raise_for_status() # Raise an error for bad responses
|
| 13 |
-
with open(file_path,
|
| 14 |
file.write(response.content)
|
| 15 |
except Exception as e:
|
| 16 |
print(f"Error downloading PDF: {e}")
|
|
|
|
| 10 |
try:
|
| 11 |
response = requests.get(url)
|
| 12 |
response.raise_for_status() # Raise an error for bad responses
|
| 13 |
+
with open(file_path, "wb") as file:
|
| 14 |
file.write(response.content)
|
| 15 |
except Exception as e:
|
| 16 |
print(f"Error downloading PDF: {e}")
|
math_summarizer.py
CHANGED
|
@@ -19,8 +19,8 @@ def generate_luhn_summary(research_paper_text):
|
|
| 19 |
|
| 20 |
def sanitize_text(input_string):
|
| 21 |
try:
|
| 22 |
-
encoded_bytes = input_string.encode(
|
| 23 |
-
decoded_string = encoded_bytes.decode(
|
| 24 |
return decoded_string
|
| 25 |
except UnicodeEncodeError as e:
|
| 26 |
print(f"Encoding error: {e}")
|
|
|
|
| 19 |
|
| 20 |
def sanitize_text(input_string):
|
| 21 |
try:
|
| 22 |
+
encoded_bytes = input_string.encode("utf-8")
|
| 23 |
+
decoded_string = encoded_bytes.decode("utf-8")
|
| 24 |
return decoded_string
|
| 25 |
except UnicodeEncodeError as e:
|
| 26 |
print(f"Encoding error: {e}")
|