Spaces:
Runtime error
Runtime error
# https://huggingface.co/spaces/azsalihu/AbstractSummary_To_Audio | |
# Here are the imports | |
import torch | |
import PyPDF2 | |
import gradio as gr | |
from IPython.display import Audio, display | |
from transformers import pipeline | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
import numpy as np | |
import scipy | |
from gtts import gTTS | |
from io import BytesIO | |
# Extracting Text function | |
def extract_text(article): | |
pdfReader = PyPDF2.PdfReader(article) | |
pageObj = pdfReader.pages[0] | |
return pageObj.extract_text() | |
# Summarization Function | |
def summarize_abstract(text): | |
sentences = text.split(". ") | |
for i, sentence in enumerate(sentences): | |
if "Abstract" in sentence: | |
start = i + 1 | |
end = start + 6 | |
break | |
abstract = ". ".join(sentences[start:end+1]) | |
tokenizer = AutoTokenizer.from_pretrained("pszemraj/led-base-book-summary") | |
model = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/led-base-book-summary") | |
# Tokenize abstract | |
inputs = tokenizer(abstract, max_length=1024, return_tensors="pt", truncation=True) | |
# Generate summary | |
summary_ids = model.generate(inputs['input_ids'], max_length=50, min_length=30, no_repeat_ngram_size=3, encoder_no_repeat_ngram_size=3, repetition_penalty=3.5, num_beams=4, do_sample=True,early_stopping=False) | |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True) | |
if '.' in summary: | |
index = summary.rindex('.') | |
if index != -1: | |
summary = summary[:index+1] | |
return summary | |
# Abstract to Audio Fuction | |
def abstract_to_audio(text): | |
tts = gTTS(text, lang='en') | |
buffer = BytesIO() | |
tts.write_to_fp(buffer) | |
buffer.seek(0) | |
return buffer.read() | |
# Combining Extracting text, Summarization, Abstract to Audio functions | |
def abstract_audio(article): | |
text = extract_text(article) | |
summary = summarize_abstract(text) | |
audio = abstract_to_audio(summary) | |
return summary, audio | |
inputs = gr.File() | |
summary_text = gr.Text() | |
audio_summary = gr.Audio() | |
# Building Gradio Interface | |
myApp = gr.Interface( fn= abstract_audio, inputs=gr.File(), | |
outputs=[gr.Text(),gr.Audio()], title="Summary of Abstract to Audio ", description="An App that helps you summarises the abstract of an Article\Journal and gives the audio of the summary", examples=["NIPS-2015-hidden-technical-debt-in-machine-learning-systems-Paper.pdf"] | |
) | |
myApp.launch() |