import gradio as gr import torch import PyPDF2 from transformers import pipeline import numpy import scipy from gtts import gTTS from io import BytesIO def extract_text(pdf_file): pdfReader = PyPDF2.PdfReader(pdf_file) pageObj = pdfReader.pages[0] return pageObj.extract_text() def summarize_text(text): sentences = text.split(". ") for i, sentence in enumerate(sentences): if "Abstract" in sentence: start = i + 1 end = start + 3 break abstract = ". ".join(sentences[start:end+1]) summarizer = pipeline("summarization", model="facebook/bart-large-cnn") summary = summarizer(abstract, max_length=50, min_length=30) return summary[0]['summary_text'] def text_to_audio(text): tts = gTTS(text, lang='en') buffer = BytesIO() tts.write_to_fp(buffer) buffer.seek(0) return buffer.read() def audio_pdf(pdf_file): text = extract_text(pdf_file) summary = summarize_text(text) audio = text_to_audio(summary) return audio inputs = gr.File() audio_summary = gr.Audio() iface = gr.Interface( fn=audio_pdf, inputs=inputs, outputs=audio_summary, title="PDF Summarizer" ) iface.launch()