hannahisrael03's picture
Update app.py
0abd3dc verified
# Set Up
import torch
from transformers import pipeline
from datasets import load_dataset
import soundfile as sf
from transformers import AutoTokenizer
from transformers import AutoModelForSeq2SeqLM
from datasets import load_dataset
import gradio as gr
# Loading the dataset
dataset = load_dataset("multi_news")
# AUDIO
speech_name = "microsoft/speecht5_tts"
synthesiser = pipeline("text-to-speech", speech_name)
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
# Web Scrapping
from goose3 import Goose
# GRADIO: NEWS SUMMARIZER + AUDIO READER (URL + TEXT INPUT)
# Load the fine-tuned T5 news summarizer
tokenizer = AutoTokenizer.from_pretrained("hannahisrael03/t5_news_summarizer")
model = AutoModelForSeq2SeqLM.from_pretrained("hannahisrael03/t5_news_summarizer")
def summarize_and_synthesize(input_type, input_value):
if input_type == "URL":
ARTICLE_MINIMUM_LENGTH = 600
g = Goose()
article = g.extract(url=url)
article_title = article.title
article_body = article.cleaned_text
if len(article_body) < ARTICLE_MINIMUM_LENGTH:
return "The article is too short or could not be properly scraped.", None
elif input_type== 'Text':
article_body = input_value
# Summarize the news article
inputs = tokenizer(article_body, return_tensors="pt", max_length=512, truncation=True, padding="max_length")
summary_ids = model.generate(inputs["input_ids"], min_length=30, max_length=100, length_penalty=2.0, num_beams=4, early_stopping=True)
summary_text = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
# Synthesize the summary into audio
speech = synthesiser(summary_text, forward_params={"speaker_embeddings": speaker_embedding})
audio_path = "summary_speech.wav"
sf.write(audio_path, speech["audio"], samplerate=speech["sampling_rate"])
return summary_text, audio_path
iface = gr.Interface(
fn=summarize_and_synthesize,
inputs=[
gr.Radio(["URL", "Text"], label="Input Type"),
gr.Textbox(label="Input Value")
],
outputs=[
gr.Textbox(label="Summary Text"),
gr.Audio(label="Summary Audio", type="filepath")
],
title="News Article Summarizer and Reader",
description="Select 'URL' to enter the URL of a news article, or select 'Text' to paste the article text directly. You will get a summary and hear the summary read aloud."
)
iface.launch()