azsalihu's picture
Update app.py
0b067a3
raw
history blame contribute delete
No virus
2.43 kB
# https://huggingface.co/spaces/azsalihu/AbstractSummary_To_Audio
# Here are the imports
import torch
import PyPDF2
import gradio as gr
from IPython.display import Audio, display
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import numpy as np
import scipy
from gtts import gTTS
from io import BytesIO
# Extracting Text function
def extract_text(article):
pdfReader = PyPDF2.PdfReader(article)
pageObj = pdfReader.pages[0]
return pageObj.extract_text()
# Summarization Function
def summarize_abstract(text):
sentences = text.split(". ")
for i, sentence in enumerate(sentences):
if "Abstract" in sentence:
start = i + 1
end = start + 6
break
abstract = ". ".join(sentences[start:end+1])
tokenizer = AutoTokenizer.from_pretrained("pszemraj/led-base-book-summary")
model = AutoModelForSeq2SeqLM.from_pretrained("pszemraj/led-base-book-summary")
# Tokenize abstract
inputs = tokenizer(abstract, max_length=1024, return_tensors="pt", truncation=True)
# Generate summary
summary_ids = model.generate(inputs['input_ids'], max_length=50, min_length=30, no_repeat_ngram_size=3, encoder_no_repeat_ngram_size=3, repetition_penalty=3.5, num_beams=4, do_sample=True,early_stopping=False)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
if '.' in summary:
index = summary.rindex('.')
if index != -1:
summary = summary[:index+1]
return summary
# Abstract to Audio Fuction
def abstract_to_audio(text):
tts = gTTS(text, lang='en')
buffer = BytesIO()
tts.write_to_fp(buffer)
buffer.seek(0)
return buffer.read()
# Combining Extracting text, Summarization, Abstract to Audio functions
def abstract_audio(article):
text = extract_text(article)
summary = summarize_abstract(text)
audio = abstract_to_audio(summary)
return summary, audio
inputs = gr.File()
summary_text = gr.Text()
audio_summary = gr.Audio()
# Building Gradio Interface
myApp = gr.Interface( fn= abstract_audio, inputs=gr.File(),
outputs=[gr.Text(),gr.Audio()], title="Summary of Abstract to Audio ", description="An App that helps you summarises the abstract of an Article\Journal and gives the audio of the summary", examples=["NIPS-2015-hidden-technical-debt-in-machine-learning-systems-Paper.pdf"]
)
myApp.launch()