File size: 1,478 Bytes
a582605
2753e83
a582605
 
2753e83
 
a582605
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37c818f
 
a582605
 
 
 
 
 
 
 
 
4c3923d
a582605
 
 
5e46dcf
a582605
 
37c818f
5e46dcf
 
a582605
 
4c3923d
a582605
b8e363b
5e46dcf
5cdcbb9
c2e2b8a
 
 
a582605
 
4c3923d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import gradio as gr
import torch
import PyPDF2
from transformers import pipeline 
import numpy
import scipy
from gtts import gTTS
from io import BytesIO

def extract_text(pdf_file):
    pdfReader = PyPDF2.PdfReader(pdf_file)
    pageObj = pdfReader.pages[0]
    return pageObj.extract_text()

def summarize_text(text):
    sentences = text.split(". ")
    for i, sentence in enumerate(sentences):
        if "Abstract" in sentence:
            start = i + 1
            end = start + 3
            break
    abstract = ". ".join(sentences[start:end+1]) 
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn") 
    summary = summarizer(abstract, max_length=50, min_length=30,
                         do_sample=False)
    return summary[0]['summary_text']

def text_to_audio(text):
    tts = gTTS(text, lang='en') 
    buffer = BytesIO()
    tts.write_to_fp(buffer)
    buffer.seek(0)  
    return buffer.read()

def audio_pdf(pdf_file):
    text = extract_text(pdf_file)
    summary = summarize_text(text) 
    audio = text_to_audio(summary)
    return summary, audio

inputs = gr.File() 
summary_text = gr.Text()
audio_summary = gr.Audio()


iface = gr.Interface(
    fn=audio_pdf,
    inputs=inputs,
    outputs=[summary_text,audio_summary],
    title="PDF Audio Summarizer ",
    description="App to turn an abstract into audio",
    examples=["Attention_is_all_you_need.pdf", 
              "ImageNet_Classification.pdf"
             ]
)

iface.launch()