gianb commited on
Commit
8a04506
1 Parent(s): 507b907

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -15
app.py CHANGED
@@ -4,7 +4,6 @@
4
 
5
  import gradio as gr
6
  import PyPDF2
7
- import pdfplumber
8
  from transformers import pipeline, AutoProcessor, AutoModel, AutoTokenizer
9
  from PyPDF2 import PdfReader
10
  import torch
@@ -16,33 +15,43 @@ import io
16
 
17
  #Here is the code
18
 
19
- summarization = pipeline ('summarization', model = "pszemraj/long-t5-tglobal-base-16384-book-summary")
20
  synthesiser = pipeline("text-to-speech", model='facebook/mms-tts-eng')
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  def summarize_and_speech(pdf_file):
 
23
 
24
- pdf_bytes_io = io.BytesIO(pdf_file)
25
- pdf_reader = PyPDF2.PdfReader(pdf_bytes_io)
26
- abstract_text = pdf_reader.pages[0].extract_text()
27
- summary = summarization(abstract_text, max_length=13, min_length=10)[0]['summary_text']
28
- print(summary)
29
 
30
- # Use a text-to-speech model to generate audio
31
- synthesiser = pipeline("text-to-speech", model='facebook/mms-tts-eng')
32
  tts_output = synthesiser(summary)
33
- print(tts_output)
34
  audio_data = tts_output[0]["audio"]
35
 
36
  return summary, audio_data
37
 
38
-
39
  iface = gr.Interface(
40
- fn= summarize_and_speech,
41
  inputs=gr.File(label="Upload PDF", type="binary"),
42
- outputs=[gr.Textbox(label="Abstract Summary:"), gr.Audio(type="filepath", label="Summary_Speech")],
43
  live=True,
44
- title="Abstract_Research_Paper_Summarizer",
45
- description="Upload a Research Paper PDF File. The model will generate a one line summary of the Abstract section and a speech audio.",
46
  )
47
 
48
  iface.launch()
 
4
 
5
  import gradio as gr
6
  import PyPDF2
 
7
  from transformers import pipeline, AutoProcessor, AutoModel, AutoTokenizer
8
  from PyPDF2 import PdfReader
9
  import torch
 
15
 
16
  #Here is the code
17
 
18
+ summarization = pipeline('summarization', model='pszemraj/long-t5-tglobal-base-16384-book-summary')
19
  synthesiser = pipeline("text-to-speech", model='facebook/mms-tts-eng')
20
 
21
+ def abstract_extract(uploaded_file):
22
+ pdf_bytes = BytesIO(uploaded_file)
23
+ pdf_reader = PyPDF2.PdfReader(pdf_bytes)
24
+
25
+ abstract = ""
26
+
27
+ for page_number in range(len(pdf_reader.pages)):
28
+ text = pdf_reader.pages[page_number].extract_text()
29
+
30
+ if "abstract" in text.lower():
31
+ start_index = text.lower().find("abstract")
32
+ end_index = text.lower().find("introduction")
33
+ abstract = text[start_index:end_index]
34
+ break
35
+
36
+ return abstract
37
+
38
  def summarize_and_speech(pdf_file):
39
+ abstract_text = abstract_extract(pdf_file)
40
 
41
+ summary = summarization(abstract_text, max_length=15, min_length=10)[0]['summary_text']
 
 
 
 
42
 
 
 
43
  tts_output = synthesiser(summary)
 
44
  audio_data = tts_output[0]["audio"]
45
 
46
  return summary, audio_data
47
 
 
48
  iface = gr.Interface(
49
+ fn=summarize_and_speech,
50
  inputs=gr.File(label="Upload PDF", type="binary"),
51
+ outputs=[gr.Textbox(label="Abstract Summary:"), gr.Audio(type="filepath", label="Summary Speech")],
52
  live=True,
53
+ title="Abstract Research Paper Summarizer",
54
+ description="Upload a Research Paper PDF File. The model will generate a one line summary of the Abstract section and a speech audio."
55
  )
56
 
57
  iface.launch()