Spaces:

mrsk1883
/

testingwspace

Sleeping

App Files Files Community

mrsk1883 commited on Dec 9, 2023

Commit

6416b3a

•

1 Parent(s): ac303b5

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -45

app.py CHANGED Viewed

@@ -10,53 +10,56 @@ model_name = "pszemraj/led-large-book-summary"
 summarizer = pipeline("summarization", model=model_name, tokenizer=model_name)
 def extract_abstract_and_summarize(pdf_file):
-  try:
-    with open(pdf_file, "rb") as file:
-      pdf_reader = PdfReader(file)
-      abstract_text = ""
-      for page_num in range(len(pdf_reader.pages)):
-        page = pdf_reader.pages[page_num]
-        text = page.extract_text()
-        abstract_match = re.search(r"\bAbstract\b", text, re.IGNORECASE)
-        if abstract_match:
-          start_index = abstract_match.end()
-          introduction_match = re.search(r"\bIntroduction\b", text[start_index:], re.IGNORECASE)
-          if introduction_match:
-            end_index = start_index + introduction_match.start()
-          else:
-            end_index = None
-          abstract_text = text[start_index:end_index]
-          break
-      # Summarize the extracted abstract using the LED-large model
-      result = summarizer(abstract_text)
-      # Print the entire result for debugging
-      print("Result:", result)
-      # Check if 'summary' is present in the result
-      if result and isinstance(result, list) and len(result) > 0:
-        summary = result[0].get('summary', 'Summary not available.')
-      else:
-        summary = "Summary not available."
-      # Generate audio
-      speech = gTTS(text=summary, lang="en")
-      speech_bytes = BytesIO()
-      speech.write_to_fp(speech_bytes)
-      # Return individual output values
-      return summary, speech_bytes.getvalue(), abstract_text.strip()
-  except Exception as e:
-    raise Exception(str(e))
 interface = gr.Interface(
-  fn=extract_abstract_and_summarize,
-  inputs=[gr.File(label="Upload PDF")],
-  outputs=[gr.Textbox(label="Summary"), gr.Audio()],
-  title="PDF Summarization & Audio Tool",
-  description="""PDF Summarization App. This app extracts the abstract from a PDF, summarizes it using the 'pszemraj/led-large-book-summary' model, and generates an audio of it. Only upload PDFs with abstracts. Please read the README.MD for information about the app and sample PDFs."""
 )
 interface.launch()

 summarizer = pipeline("summarization", model=model_name, tokenizer=model_name)
 def extract_abstract_and_summarize(pdf_file):
+    try:
+        if pdf_file is None:
+            raise ValueError("PDF file is not provided.")
+        with open(pdf_file, "rb") as file:
+            pdf_reader = PdfReader(file)
+            abstract_text = ""
+            for page_num in range(len(pdf_reader.pages)):
+                page = pdf_reader.pages[page_num]
+                text = page.extract_text()
+                abstract_match = re.search(r"\bAbstract\b", text, re.IGNORECASE)
+                if abstract_match:
+                    start_index = abstract_match.end()
+                    introduction_match = re.search(r"\bIntroduction\b", text[start_index:], re.IGNORECASE)
+                    if introduction_match:
+                        end_index = start_index + introduction_match.start()
+                    else:
+                        end_index = None
+                    abstract_text = text[start_index:end_index]
+                    break
+            # Summarize the extracted abstract using the LED-large model
+            result = summarizer(abstract_text)
+            # Print the entire result for debugging
+            print("Result:", result)
+            # Check if 'summary' is present in the result
+            if result and isinstance(result, list) and len(result) > 0:
+                summary = result[0].get('summary', 'Summary not available.')
+            else:
+                summary = "Summary not available."
+            # Generate audio
+            speech = gTTS(text=summary, lang="en")
+            speech_bytes = BytesIO()
+            speech.write_to_fp(speech_bytes)
+            # Return individual output values
+            return summary, speech_bytes.getvalue(), abstract_text.strip()
+    except Exception as e:
+        raise Exception(str(e))
 interface = gr.Interface(
+    fn=extract_abstract_and_summarize,
+    inputs=[gr.File(label="Upload PDF")],
+    outputs=[gr.Textbox(label="Summary"), gr.Audio()],
+    title="PDF Summarization & Audio Tool",
+    description="""PDF Summarization App. This app extracts the abstract from a PDF, summarizes it using the 'pszemraj/led-large-book-summary' model, and generates an audio of it. Only upload PDFs with abstracts. Please read the README.MD for information about the app and sample PDFs."""
 )
 interface.launch()