# https://huggingface.co/spaces/azsalihu/AbstractSummary_To_Audio # Here are the imports import torch import PyPDF2 import gradio as gr from IPython.display import Audio, display from transformers import pipeline from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import numpy as np import scipy from gtts import gTTS from io import BytesIO # Extracting Text function def extract_text(article): pdfReader = PyPDF2.PdfReader(article) pageObj = pdfReader.pages[0] return pageObj.extract_text() # Summarization Function def summarize_abstract(text): sentences = text.split(". ") for i, sentence in enumerate(sentences): if "Abstract" in sentence: start = i + 1 end = start + 6 break abstract = ". ".join(sentences[start:end+1]) tokenizer = AutoTokenizer.from_pretrained("pszemraj/led-base-book-summary") model = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/led-base-book-summary") # Tokenize abstract inputs = tokenizer(abstract, max_length=1024, return_tensors="pt", truncation=True) # Generate summary summary_ids = model.generate(inputs['input_ids'], max_length=50, min_length=30, no_repeat_ngram_size=3, encoder_no_repeat_ngram_size=3, repetition_penalty=3.5, num_beams=4, do_sample=True,early_stopping=False) summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) if '.' in summary: index = summary.rindex('.') if index != -1: summary = summary[:index+1] return summary # Abstract to Audio Fuction def abstract_to_audio(text): tts = gTTS(text, lang='en') buffer = BytesIO() tts.write_to_fp(buffer) buffer.seek(0) return buffer.read() # Combining Extracting text, Summarization, Abstract to Audio functions def abstract_audio(article): text = extract_text(article) summary = summarize_abstract(text) audio = abstract_to_audio(summary) return summary, audio inputs = gr.File() summary_text = gr.Text() audio_summary = gr.Audio() # Building Gradio Interface myApp = gr.Interface( fn= abstract_audio, inputs=gr.File(), outputs=[gr.Text(),gr.Audio()], title="Summary of Abstract to Audio ", description="An App that helps you summarises the abstract of an Article\Journal and gives the audio of the summary", examples=["NIPS-2015-hidden-technical-debt-in-machine-learning-systems-Paper.pdf"] ) myApp.launch()