Spaces:
Sleeping
Sleeping
liamvbetts
commited on
Commit
•
bc1a0a8
1
Parent(s):
9b95b6c
news api integration
Browse files- app.py +53 -3
- requirements.txt +1 -0
app.py
CHANGED
@@ -2,12 +2,16 @@ import gradio as gr
|
|
2 |
import random
|
3 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
4 |
from datasets import load_dataset
|
|
|
|
|
5 |
|
6 |
tokenizer = AutoTokenizer.from_pretrained("liamvbetts/bart-large-cnn-v4")
|
7 |
model = AutoModelForSeq2SeqLM.from_pretrained("liamvbetts/bart-large-cnn-v4")
|
8 |
|
9 |
dataset = load_dataset("cnn_dailymail", "3.0.0")
|
10 |
|
|
|
|
|
11 |
def summarize(article):
|
12 |
inputs = tokenizer(article, return_tensors="pt").input_ids
|
13 |
outputs = model.generate(inputs, max_new_tokens=128, do_sample=False)
|
@@ -23,15 +27,61 @@ def get_random_article():
|
|
23 |
def load_article():
|
24 |
return get_random_article()
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
# Using Gradio Blocks
|
27 |
with gr.Blocks() as demo:
|
28 |
gr.Markdown("## News Summary App")
|
29 |
-
gr.Markdown("Enter a news text and get its summary, or load a random article
|
30 |
with gr.Row():
|
|
|
31 |
input_text = gr.Textbox(lines=10, label="Input Text")
|
32 |
output_text = gr.Textbox(label="Summary")
|
33 |
-
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
35 |
summarize_button = gr.Button("Summarize")
|
36 |
summarize_button.click(fn=summarize, inputs=input_text, outputs=output_text)
|
37 |
|
|
|
2 |
import random
|
3 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
4 |
from datasets import load_dataset
|
5 |
+
import requests
|
6 |
+
from bs4 import BeautifulSoup
|
7 |
|
8 |
tokenizer = AutoTokenizer.from_pretrained("liamvbetts/bart-large-cnn-v4")
|
9 |
model = AutoModelForSeq2SeqLM.from_pretrained("liamvbetts/bart-large-cnn-v4")
|
10 |
|
11 |
dataset = load_dataset("cnn_dailymail", "3.0.0")
|
12 |
|
13 |
+
NEWS_API_KEY = "da2cc601304341e7a39cb5604d0b076b"
|
14 |
+
|
15 |
def summarize(article):
|
16 |
inputs = tokenizer(article, return_tensors="pt").input_ids
|
17 |
outputs = model.generate(inputs, max_new_tokens=128, do_sample=False)
|
|
|
27 |
def load_article():
|
28 |
return get_random_article()
|
29 |
|
30 |
+
def get_news_article():
|
31 |
+
url = 'https://newsapi.org/v2/top-headlines'
|
32 |
+
news_url = ''
|
33 |
+
params = {
|
34 |
+
'apiKey': NEWS_API_KEY,
|
35 |
+
'country': 'us', # You can change this as needed
|
36 |
+
'pageSize': 100
|
37 |
+
}
|
38 |
+
response = requests.get(url, params=params)
|
39 |
+
articles = response.json().get('articles', [])
|
40 |
+
if articles:
|
41 |
+
random_article = random.choice(articles)
|
42 |
+
news_url = random_article.get('url')
|
43 |
+
else:
|
44 |
+
return None
|
45 |
+
|
46 |
+
if news_url:
|
47 |
+
full_article, title = scrape_article(news_url)
|
48 |
+
return full_article, title
|
49 |
+
else:
|
50 |
+
return "No news article found.", ""
|
51 |
+
|
52 |
+
def scrape_article(url):
|
53 |
+
try:
|
54 |
+
response = requests.get(url)
|
55 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
56 |
+
|
57 |
+
# Extracting the title - this is a general approach
|
58 |
+
title = soup.title.string if soup.title else "No Title Available"
|
59 |
+
|
60 |
+
article_content = soup.find_all('p') # This is a simplification
|
61 |
+
|
62 |
+
text = ' '.join([p.get_text() for p in article_content])
|
63 |
+
words = text.split()
|
64 |
+
truncated_text = ' '.join(words[:1024]) # Truncate to first 1024 words
|
65 |
+
|
66 |
+
return truncated_text, title
|
67 |
+
except Exception as e:
|
68 |
+
return "Error scraping article: " + str(e), ""
|
69 |
+
|
70 |
# Using Gradio Blocks
|
71 |
with gr.Blocks() as demo:
|
72 |
gr.Markdown("## News Summary App")
|
73 |
+
gr.Markdown("Enter a news text and get its summary, or load a random article.")
|
74 |
with gr.Row():
|
75 |
+
article_title = gr.Label() # Component to display the article title
|
76 |
input_text = gr.Textbox(lines=10, label="Input Text")
|
77 |
output_text = gr.Textbox(label="Summary")
|
78 |
+
|
79 |
+
load_dataset_article_button = gr.Button("Load Random Article from Dataset")
|
80 |
+
load_news_article_button = gr.Button("Load News Article")
|
81 |
+
|
82 |
+
load_dataset_article_button.click(fn=load_article, inputs=[], outputs=input_text)
|
83 |
+
load_news_article_button.click(fn=get_news_article, inputs=[], outputs=[input_text, article_title])
|
84 |
+
|
85 |
summarize_button = gr.Button("Summarize")
|
86 |
summarize_button.click(fn=summarize, inputs=input_text, outputs=output_text)
|
87 |
|
requirements.txt
CHANGED
@@ -4,3 +4,4 @@ datasets
|
|
4 |
evaluate
|
5 |
accelerate
|
6 |
torch
|
|
|
|
4 |
evaluate
|
5 |
accelerate
|
6 |
torch
|
7 |
+
beautifulsoup4
|