# Set Up import torch from transformers import pipeline from datasets import load_dataset import soundfile as sf from transformers import AutoTokenizer from transformers import AutoModelForSeq2SeqLM from datasets import load_dataset import gradio as gr # Loading the dataset dataset = load_dataset("multi_news") # AUDIO speech_name = "microsoft/speecht5_tts" synthesiser = pipeline("text-to-speech", speech_name) embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation") speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0) # Web Scrapping from goose3 import Goose # GRADIO: NEWS SUMMARIZER + AUDIO READER (URL + TEXT INPUT) # Load the fine-tuned T5 news summarizer tokenizer = AutoTokenizer.from_pretrained("hannahisrael03/t5_news_summarizer") model = AutoModelForSeq2SeqLM.from_pretrained("hannahisrael03/t5_news_summarizer") def summarize_and_synthesize(input_type, input_value): if input_type == "URL": ARTICLE_MINIMUM_LENGTH = 600 g = Goose() article = g.extract(url=url) article_title = article.title article_body = article.cleaned_text if len(article_body) < ARTICLE_MINIMUM_LENGTH: return "The article is too short or could not be properly scraped.", None elif input_type== 'Text': article_body = input_value # Summarize the news article inputs = tokenizer(article_body, return_tensors="pt", max_length=512, truncation=True, padding="max_length") summary_ids = model.generate(inputs["input_ids"], min_length=30, max_length=100, length_penalty=2.0, num_beams=4, early_stopping=True) summary_text = tokenizer.decode(summary_ids[0], skip_special_tokens=True) # Synthesize the summary into audio speech = synthesiser(summary_text, forward_params={"speaker_embeddings": speaker_embedding}) audio_path = "summary_speech.wav" sf.write(audio_path, speech["audio"], samplerate=speech["sampling_rate"]) return summary_text, audio_path iface = gr.Interface( fn=summarize_and_synthesize, inputs=[ gr.Radio(["URL", "Text"], label="Input Type"), gr.Textbox(label="Input Value") ], outputs=[ gr.Textbox(label="Summary Text"), gr.Audio(label="Summary Audio", type="filepath") ], title="News Article Summarizer and Reader", description="Select 'URL' to enter the URL of a news article, or select 'Text' to paste the article text directly. You will get a summary and hear the summary read aloud." ) iface.launch()