Spaces:

gianb
/

PDF_Summarizer_and_TTS

Running

App Files Files Community

gianb commited on Dec 4, 2023

Commit

8a04506

•

1 Parent(s): 507b907

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -15

app.py CHANGED Viewed

@@ -4,7 +4,6 @@
 import gradio as gr
 import PyPDF2
-import pdfplumber
 from transformers import pipeline, AutoProcessor, AutoModel, AutoTokenizer
 from PyPDF2 import PdfReader
 import torch
@@ -16,33 +15,43 @@ import io
 #Here is the code
-summarization = pipeline ('summarization', model = "pszemraj/long-t5-tglobal-base-16384-book-summary")
 synthesiser = pipeline("text-to-speech", model='facebook/mms-tts-eng')
 def summarize_and_speech(pdf_file):
-    pdf_bytes_io = io.BytesIO(pdf_file)
-    pdf_reader = PyPDF2.PdfReader(pdf_bytes_io)
-    abstract_text = pdf_reader.pages[0].extract_text()
-    summary = summarization(abstract_text, max_length=13, min_length=10)[0]['summary_text']
-    print(summary)
-    # Use a text-to-speech model to generate audio
-    synthesiser = pipeline("text-to-speech", model='facebook/mms-tts-eng')
     tts_output = synthesiser(summary)
-    print(tts_output)
     audio_data = tts_output[0]["audio"]
     return summary, audio_data
 iface = gr.Interface(
-    fn= summarize_and_speech,
     inputs=gr.File(label="Upload PDF", type="binary"),
-    outputs=[gr.Textbox(label="Abstract Summary:"), gr.Audio(type="filepath", label="Summary_Speech")],
     live=True,
-    title="Abstract_Research_Paper_Summarizer",
-    description="Upload a Research Paper PDF File. The model will generate a one line summary of the Abstract section and a speech audio.",
 )
 iface.launch()

 import gradio as gr
 import PyPDF2
 from transformers import pipeline, AutoProcessor, AutoModel, AutoTokenizer
 from PyPDF2 import PdfReader
 import torch
 #Here is the code
+summarization = pipeline('summarization', model='pszemraj/long-t5-tglobal-base-16384-book-summary')
 synthesiser = pipeline("text-to-speech", model='facebook/mms-tts-eng')
+def abstract_extract(uploaded_file):
+    pdf_bytes = BytesIO(uploaded_file)
+    pdf_reader = PyPDF2.PdfReader(pdf_bytes)
+    abstract = ""
+    for page_number in range(len(pdf_reader.pages)):
+        text = pdf_reader.pages[page_number].extract_text()
+        if "abstract" in text.lower():
+            start_index = text.lower().find("abstract")
+            end_index = text.lower().find("introduction")
+            abstract = text[start_index:end_index]
+            break
+    return abstract
 def summarize_and_speech(pdf_file):
+    abstract_text = abstract_extract(pdf_file)
+    summary = summarization(abstract_text, max_length=15, min_length=10)[0]['summary_text']
     tts_output = synthesiser(summary)
     audio_data = tts_output[0]["audio"]
     return summary, audio_data
 iface = gr.Interface(
+    fn=summarize_and_speech,
     inputs=gr.File(label="Upload PDF", type="binary"),
+    outputs=[gr.Textbox(label="Abstract Summary:"), gr.Audio(type="filepath", label="Summary Speech")],
     live=True,
+    title="Abstract Research Paper Summarizer",
+    description="Upload a Research Paper PDF File. The model will generate a one line summary of the Abstract section and a speech audio."
 )
 iface.launch()