import gradio as gr import random from datasets import load_dataset import requests from bs4 import BeautifulSoup import os dataset = load_dataset("cnn_dailymail", "3.0.0") NEWS_API_KEY = os.environ['NEWS_API_KEY'] HF_TOKEN = os.environ['HF_TOKEN'] def summarize(model_name, article): API_URL = f"https://api-inference.huggingface.co/models/{model_name}" headers = {"Authorization": "Bearer {HF_TOKEN}"} payload = {"inputs": article} response = requests.post(API_URL, headers=headers, json=payload) # Check if the response is successful if response.status_code == 200: # Assuming the response structure has a 'generated_text' field return format(response.json()) else: # Handle different types of errors if response.status_code == 401: return "Error: Unauthorized. Check your API token." elif response.status_code == 503: return "Error: Service unavailable or model is currently loading." else: return f"{response} - Error: Encountered an issue (status code: {response.status_code}). Please try again." return format(response.json()) def format(response): return response[0]['generated_text'] def get_random_article(): random.seed() val_example = dataset["validation"].shuffle().select(range(1)) val_article = val_example['article'][0][:1024] return val_article def load_article(): return get_random_article() def get_news_article(): url = 'https://newsapi.org/v2/top-headlines' news_url = '' params = { 'apiKey': NEWS_API_KEY, 'country': 'us', # You can change this as needed 'pageSize': 100 } response = requests.get(url, params=params) articles = response.json().get('articles', []) if articles: random_article = random.choice(articles) news_url = random_article.get('url') else: return None if news_url: full_article, title = scrape_article(news_url) return full_article, title else: return "No news article found.", "" def scrape_article(url): try: response = requests.get(url) soup = BeautifulSoup(response.content, 'html.parser') # Extracting the title - this is a general approach title = soup.title.string if soup.title else "No Title Available" article_content = soup.find_all('p') # This is a simplification text = ' '.join([p.get_text() for p in article_content]) words = text.split() truncated_text = ' '.join(words[:512]) # Truncate to first 1024 words return truncated_text, title except Exception as e: return "Error scraping article: " + str(e), "" # Using Gradio Blocks with improved layout and styling with gr.Blocks() as demo: gr.Markdown("# News Summary App", elem_id="header") gr.Markdown("Enter a news text and get its summary, or load a random article.") with gr.Row(): with gr.Column(): with gr.Row(): load_dataset_article_button = gr.Button("Load Random Article from Val Dataset") load_news_article_button = gr.Button("Pull Random News Article from NewsAPI") article_title = gr.Label() # Component to display the article title input_text = gr.Textbox(lines=10, label="Input Text") with gr.Column(): with gr.Row(): summarize_button = gr.Button("Summarize") model_name = gr.Dropdown(label="Model Name", choices=["liamvbetts/bart-news-summary-v1", "liamvbetts/bart-base-cnn-v1", "liamvbetts/bart-large-cnn-v2", "liamvbetts/bart-large-cnn-v4"], value="liamvbetts/bart-news-summary-v1") output_text = gr.Textbox(label="Summary") load_dataset_article_button.click(fn=load_article, inputs=[], outputs=input_text) load_news_article_button.click(fn=get_news_article, inputs=[], outputs=[input_text, article_title]) summarize_button.click(fn=summarize, inputs=[model_name, input_text], outputs=output_text) demo.launch()