Spaces:
Runtime error
Runtime error
# Set Up | |
import torch | |
from transformers import pipeline | |
from datasets import load_dataset | |
import soundfile as sf | |
from transformers import AutoTokenizer | |
from transformers import AutoModelForSeq2SeqLM | |
from datasets import load_dataset | |
import gradio as gr | |
# Loading the dataset | |
dataset = load_dataset("multi_news") | |
# AUDIO | |
speech_name = "microsoft/speecht5_tts" | |
synthesiser = pipeline("text-to-speech", speech_name) | |
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation") | |
speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0) | |
# Web Scrapping | |
from goose3 import Goose | |
# GRADIO: NEWS SUMMARIZER + AUDIO READER (URL + TEXT INPUT) | |
# Load the fine-tuned T5 news summarizer | |
tokenizer = AutoTokenizer.from_pretrained("hannahisrael03/t5_news_summarizer") | |
model = AutoModelForSeq2SeqLM.from_pretrained("hannahisrael03/t5_news_summarizer") | |
def summarize_and_synthesize(input_type, input_value): | |
if input_type == "URL": | |
ARTICLE_MINIMUM_LENGTH = 600 | |
g = Goose() | |
article = g.extract(url=url) | |
article_title = article.title | |
article_body = article.cleaned_text | |
if len(article_body) < ARTICLE_MINIMUM_LENGTH: | |
return "The article is too short or could not be properly scraped.", None | |
elif input_type== 'Text': | |
article_body = input_value | |
# Summarize the news article | |
inputs = tokenizer(article_body, return_tensors="pt", max_length=512, truncation=True, padding="max_length") | |
summary_ids = model.generate(inputs["input_ids"], min_length=30, max_length=100, length_penalty=2.0, num_beams=4, early_stopping=True) | |
summary_text = tokenizer.decode(summary_ids[0], skip_special_tokens=True) | |
# Synthesize the summary into audio | |
speech = synthesiser(summary_text, forward_params={"speaker_embeddings": speaker_embedding}) | |
audio_path = "summary_speech.wav" | |
sf.write(audio_path, speech["audio"], samplerate=speech["sampling_rate"]) | |
return summary_text, audio_path | |
iface = gr.Interface( | |
fn=summarize_and_synthesize, | |
inputs=[ | |
gr.Radio(["URL", "Text"], label="Input Type"), | |
gr.Textbox(label="Input Value") | |
], | |
outputs=[ | |
gr.Textbox(label="Summary Text"), | |
gr.Audio(label="Summary Audio", type="filepath") | |
], | |
title="News Article Summarizer and Reader", | |
description="Select 'URL' to enter the URL of a news article, or select 'Text' to paste the article text directly. You will get a summary and hear the summary read aloud." | |
) | |
iface.launch() | |