import gradio as gr
import random
from datasets import load_dataset
import requests
from bs4 import BeautifulSoup
import os

dataset = load_dataset("cnn_dailymail", "3.0.0")

NEWS_API_KEY = os.environ['NEWS_API_KEY']
HF_TOKEN = os.environ['HF_TOKEN']

def summarize(model_name, article):
    API_URL = f"https://api-inference.huggingface.co/models/{model_name}"
    headers = {"Authorization": f"Bearer {HF_TOKEN}"}

    payload = {"inputs": article}
    response = requests.post(API_URL, headers=headers, json=payload)

    # Check if the response is successful
    if response.status_code == 200:
        # Assuming the response structure has a 'generated_text' field
        return format(response.json())
    else:
        # Handle different types of errors
        if response.status_code == 401:
            return "Error: Unauthorized. Check your API token."
        elif response.status_code == 503:
            return "Error: Service unavailable or model is currently loading."
        else:
            return f"{response} - Error: Encountered an issue (status code: {response.status_code}). Please try again."
    return format(response.json())

def format(response):
    return response[0]['generated_text']

def get_random_article():
    random.seed()
    val_example = dataset["validation"].shuffle().select(range(1))
    val_article = val_example['article'][0][:1024]
    return val_article

def load_article():
    return get_random_article()

def get_news_article():
    url = 'https://newsapi.org/v2/top-headlines'
    news_url = ''
    params = {
        'apiKey': NEWS_API_KEY,
        'country': 'us',  # You can change this as needed
        'pageSize': 100
    }
    response = requests.get(url, params=params)
    articles = response.json().get('articles', [])
    if articles:
        random_article = random.choice(articles)
        news_url = random_article.get('url')
    else:
        return None
    
    if news_url:
        full_article, title = scrape_article(news_url)
        return full_article, title
    else:
        return "No news article found.", ""

def scrape_article(url):
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Extracting the title - this is a general approach
        title = soup.title.string if soup.title else "No Title Available"

        article_content = soup.find_all('p')  # This is a simplification
        
        text = ' '.join([p.get_text() for p in article_content])
        words = text.split()
        truncated_text = ' '.join(words[:512])  # Truncate to first 1024 words

        return truncated_text, title
    except Exception as e:
        return "Error scraping article: " + str(e), ""

with gr.Blocks() as demo:
    gr.Markdown("# News Summary App")
    gr.Markdown("Enter a news text and get its summary, or load a random article.")
    
    with gr.Row():
        with gr.Column():
            load_dataset_article_button = gr.Button("Load Random Article from Dataset")
            load_news_article_button = gr.Button("Load News Article")
            article_title = gr.Label()  # Component to display the article title
            input_text = gr.Textbox(lines=10, label="Input Text", placeholder="Enter article text or load a random article...")
        with gr.Column():
            model_name = gr.Dropdown(label="Model Name", choices=["liamvbetts/bart-news-summary-v1", "liamvbetts/bart-base-cnn-v1", "liamvbetts/bart-large-cnn-v2", "liamvbetts/bart-large-cnn-v4"], value="liamvbetts/bart-news-summary-v1")
            summarize_button = gr.Button("Summarize")
            output_text = gr.Textbox(label="Summary", placeholder="Summary will appear here...")

    load_dataset_article_button.click(fn=load_article, inputs=[], outputs=[input_text, article_title])
    load_news_article_button.click(fn=get_news_article, inputs=[], outputs=[input_text, article_title])
    summarize_button.click(fn=summarize, inputs=[model_name, input_text], outputs=output_text)

demo.launch()