|
|
|
|
|
import torch |
|
from transformers import pipeline |
|
from datasets import load_dataset |
|
import soundfile as sf |
|
from transformers import AutoTokenizer |
|
from transformers import AutoModelForSeq2SeqLM |
|
from datasets import load_dataset |
|
import gradio as gr |
|
|
|
|
|
dataset = load_dataset("multi_news",trust_remote_code=True) |
|
|
|
|
|
speech_name = "microsoft/speecht5_tts" |
|
synthesiser = pipeline("text-to-speech", speech_name) |
|
|
|
embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation") |
|
speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0) |
|
|
|
|
|
|
|
from goose3 import Goose |
|
|
|
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("hannahisrael03/t5_news_summarizer") |
|
model = AutoModelForSeq2SeqLM.from_pretrained("hannahisrael03/t5_news_summarizer") |
|
|
|
def summarize_and_synthesize(input_type, input_value): |
|
if input_type == "URL": |
|
ARTICLE_MINIMUM_LENGTH = 600 |
|
g = Goose() |
|
article = g.extract(url=url) |
|
|
|
article_title = article.title |
|
article_body = article.cleaned_text |
|
if len(article_body) < ARTICLE_MINIMUM_LENGTH: |
|
return "The article is too short or could not be properly scraped.", None |
|
|
|
elif input_type== 'Text': |
|
article_body = input_value |
|
|
|
|
|
inputs = tokenizer(article_body, return_tensors="pt", max_length=512, truncation=True, padding="max_length") |
|
summary_ids = model.generate(inputs["input_ids"], min_length=30, max_length=100, length_penalty=2.0, num_beams=4, early_stopping=True) |
|
summary_text = tokenizer.decode(summary_ids[0], skip_special_tokens=True) |
|
|
|
|
|
speech = synthesiser(summary_text, forward_params={"speaker_embeddings": speaker_embedding}) |
|
audio_path = "summary_speech.wav" |
|
sf.write(audio_path, speech["audio"], samplerate=speech["sampling_rate"]) |
|
|
|
return summary_text, audio_path |
|
|
|
|
|
iface = gr.Interface( |
|
fn=summarize_and_synthesize, |
|
inputs=[ |
|
gr.Radio(["URL", "Text"], label="Input Type"), |
|
gr.Textbox(label="Input Value") |
|
], |
|
outputs=[ |
|
gr.Textbox(label="Summary Text"), |
|
gr.Audio(label="Summary Audio", type="filepath") |
|
], |
|
title="News Article Summarizer and Reader", |
|
description="Select 'URL' to enter the URL of a news article, or select 'Text' to paste the article text directly. You will get a summary and hear the summary read aloud." |
|
) |
|
|
|
iface.launch() |
|
|