import gradio as gr import torch import PyPDF2 from transformers import pipeline import numpy import scipy from gtts import gTTS from io import BytesIO def extract_text(pdf_file): pdfReader = PyPDF2.PdfReader(pdf_file) pageObj = pdfReader.pages[0] return pageObj.extract_text() def summarize_text(text): sentences = text.split(". ") for i, sentence in enumerate(sentences): if "Abstract" in sentence: start = i + 1 end = start + 3 break abstract = ". ".join(sentences[start:end+1]) summarizer = pipeline("summarization", model="facebook/bart-large-cnn") summary = summarizer(abstract, max_length=50, min_length=30, do_sample=False) return summary[0]['summary_text'] def text_to_audio(text): tts = gTTS(text, lang='en') buffer = BytesIO() tts.write_to_fp(buffer) buffer.seek(0) return buffer.read() def audio_pdf(pdf_file): text = extract_text(pdf_file) summary = summarize_text(text) audio = text_to_audio(summary) return summary, audio inputs = gr.File() summary_text = gr.Text() audio_summary = gr.Audio() iface = gr.Interface( fn=audio_pdf, inputs=inputs, outputs=[summary_text,audio_summary], title="PDF Audio Summarizer ", description="App to turn an abstract into audio", examples=["Attention_is_all_you_need.pdf", "ImageNet_Classification.pdf" ] ) iface.launch()