File size: 1,623 Bytes
a582605 2753e83 a582605 2753e83 a582605 5f6f8b5 a582605 1647f6a a582605 5f6f8b5 a582605 5f6f8b5 8c41423 37c818f a582605 4c3923d a582605 5e46dcf a582605 37c818f 5e46dcf a582605 4c3923d a582605 b8e363b 9ce669c 2d4f77c c2e2b8a a582605 4c3923d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import gradio as gr
import torch
import PyPDF2
from transformers import pipeline
import numpy
import scipy
from gtts import gTTS
from io import BytesIO
from transformers import BartTokenizer
def extract_text(pdf_file):
pdfReader = PyPDF2.PdfReader(pdf_file)
pageObj = pdfReader.pages[0]
return pageObj.extract_text()
def summarize_text(text):
sentences = text.split(". ")
for i, sentence in enumerate(sentences):
if "Abstract" in sentence:
start = i + 1
end = start + 3
break
abstract = ". ".join(sentences[start:end+1])
tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", tokenizer=tokenizer)
summary = summarizer(abstract, max_length=30, min_length=30,
do_sample=False)
return summary[0]['summary_text']
def text_to_audio(text):
tts = gTTS(text, lang='en')
buffer = BytesIO()
tts.write_to_fp(buffer)
buffer.seek(0)
return buffer.read()
def audio_pdf(pdf_file):
text = extract_text(pdf_file)
summary = summarize_text(text)
audio = text_to_audio(summary)
return summary, audio
inputs = gr.File()
summary_text = gr.Text()
audio_summary = gr.Audio()
iface = gr.Interface(
fn=audio_pdf,
inputs=inputs,
outputs=[summary_text,audio_summary],
title="PDF Audio Summarizer 📻",
description="App that converts an abstract into audio",
examples=["Attention_is_all_you_need.pdf",
"ImageNet_Classification.pdf"
]
)
iface.launch() |