File size: 4,043 Bytes
d61cd07
e673826
 
bc1a0a8
 
8df474c
d2894f7
e673826
 
8df474c
 
 
 
 
7218a55
8df474c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc1a0a8
8df474c
 
d2894f7
e673826
 
 
9b95b6c
e673826
 
9b95b6c
5d174b9
 
bc1a0a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8df474c
bc1a0a8
 
 
 
 
9b95b6c
7218a55
bc1a0a8
8df474c
9b95b6c
8df474c
7218a55
 
8df474c
7218a55
8df474c
 
7218a55
 
bc1a0a8
7218a55
bc1a0a8
8df474c
d2894f7
9b95b6c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import gradio as gr
import random
from datasets import load_dataset
import requests
from bs4 import BeautifulSoup
import os

dataset = load_dataset("cnn_dailymail", "3.0.0")

NEWS_API_KEY = os.environ['NEWS_API_KEY']
HF_TOKEN = os.environ['HF_TOKEN']

def summarize(model_name, article):
    API_URL = f"https://api-inference.huggingface.co/models/{model_name}"
    headers = {"Authorization": f"Bearer {HF_TOKEN}"}

    payload = {"inputs": article}
    response = requests.post(API_URL, headers=headers, json=payload)

    # Check if the response is successful
    if response.status_code == 200:
        # Assuming the response structure has a 'generated_text' field
        return format(response.json())
    else:
        # Handle different types of errors
        if response.status_code == 401:
            return "Error: Unauthorized. Check your API token."
        elif response.status_code == 503:
            return "Error: Service unavailable or model is currently loading."
        else:
            return f"{response} - Error: Encountered an issue (status code: {response.status_code}). Please try again."
    return format(response.json())

def format(response):
    return response[0]['generated_text']

def get_random_article():
    random.seed()
    val_example = dataset["validation"].shuffle().select(range(1))
    val_article = val_example['article'][0][:1024]
    return val_article

def load_article():
    return get_random_article()

def get_news_article():
    url = 'https://newsapi.org/v2/top-headlines'
    news_url = ''
    params = {
        'apiKey': NEWS_API_KEY,
        'country': 'us',  # You can change this as needed
        'pageSize': 100
    }
    response = requests.get(url, params=params)
    articles = response.json().get('articles', [])
    if articles:
        random_article = random.choice(articles)
        news_url = random_article.get('url')
    else:
        return None
    
    if news_url:
        full_article, title = scrape_article(news_url)
        return full_article, title
    else:
        return "No news article found.", ""

def scrape_article(url):
    try:
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        
        # Extracting the title - this is a general approach
        title = soup.title.string if soup.title else "No Title Available"

        article_content = soup.find_all('p')  # This is a simplification
        
        text = ' '.join([p.get_text() for p in article_content])
        words = text.split()
        truncated_text = ' '.join(words[:512])  # Truncate to first 1024 words

        return truncated_text, title
    except Exception as e:
        return "Error scraping article: " + str(e), ""

with gr.Blocks() as demo:
    gr.Markdown("# News Summary App")
    gr.Markdown("Enter a news text and get its summary, or load a random article.")
    
    with gr.Row():
        with gr.Column():
            load_dataset_article_button = gr.Button("Load Random Article from Dataset")
            load_news_article_button = gr.Button("Load News Article")
            article_title = gr.Label()  # Component to display the article title
            input_text = gr.Textbox(lines=10, label="Input Text", placeholder="Enter article text or load a random article...")
        with gr.Column():
            model_name = gr.Dropdown(label="Model Name", choices=["liamvbetts/bart-news-summary-v1", "liamvbetts/bart-base-cnn-v1", "liamvbetts/bart-large-cnn-v2", "liamvbetts/bart-large-cnn-v4"], value="liamvbetts/bart-news-summary-v1")
            summarize_button = gr.Button("Summarize")
            output_text = gr.Textbox(label="Summary", placeholder="Summary will appear here...")

    load_dataset_article_button.click(fn=load_article, inputs=[], outputs=[input_text, article_title])
    load_news_article_button.click(fn=get_news_article, inputs=[], outputs=[input_text, article_title])
    summarize_button.click(fn=summarize, inputs=[model_name, input_text], outputs=output_text)

demo.launch()