liamvbetts commited on
Commit
bc1a0a8
1 Parent(s): 9b95b6c

news api integration

Browse files
Files changed (2) hide show
  1. app.py +53 -3
  2. requirements.txt +1 -0
app.py CHANGED
@@ -2,12 +2,16 @@ import gradio as gr
2
  import random
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
  from datasets import load_dataset
 
 
5
 
6
  tokenizer = AutoTokenizer.from_pretrained("liamvbetts/bart-large-cnn-v4")
7
  model = AutoModelForSeq2SeqLM.from_pretrained("liamvbetts/bart-large-cnn-v4")
8
 
9
  dataset = load_dataset("cnn_dailymail", "3.0.0")
10
 
 
 
11
  def summarize(article):
12
  inputs = tokenizer(article, return_tensors="pt").input_ids
13
  outputs = model.generate(inputs, max_new_tokens=128, do_sample=False)
@@ -23,15 +27,61 @@ def get_random_article():
23
  def load_article():
24
  return get_random_article()
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  # Using Gradio Blocks
27
  with gr.Blocks() as demo:
28
  gr.Markdown("## News Summary App")
29
- gr.Markdown("Enter a news text and get its summary, or load a random article from the validation set.")
30
  with gr.Row():
 
31
  input_text = gr.Textbox(lines=10, label="Input Text")
32
  output_text = gr.Textbox(label="Summary")
33
- load_article_button = gr.Button("Load Random Article")
34
- load_article_button.click(fn=load_article, inputs=[], outputs=input_text)
 
 
 
 
 
35
  summarize_button = gr.Button("Summarize")
36
  summarize_button.click(fn=summarize, inputs=input_text, outputs=output_text)
37
 
 
2
  import random
3
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
  from datasets import load_dataset
5
+ import requests
6
+ from bs4 import BeautifulSoup
7
 
8
  tokenizer = AutoTokenizer.from_pretrained("liamvbetts/bart-large-cnn-v4")
9
  model = AutoModelForSeq2SeqLM.from_pretrained("liamvbetts/bart-large-cnn-v4")
10
 
11
  dataset = load_dataset("cnn_dailymail", "3.0.0")
12
 
13
+ NEWS_API_KEY = "da2cc601304341e7a39cb5604d0b076b"
14
+
15
  def summarize(article):
16
  inputs = tokenizer(article, return_tensors="pt").input_ids
17
  outputs = model.generate(inputs, max_new_tokens=128, do_sample=False)
 
27
  def load_article():
28
  return get_random_article()
29
 
30
+ def get_news_article():
31
+ url = 'https://newsapi.org/v2/top-headlines'
32
+ news_url = ''
33
+ params = {
34
+ 'apiKey': NEWS_API_KEY,
35
+ 'country': 'us', # You can change this as needed
36
+ 'pageSize': 100
37
+ }
38
+ response = requests.get(url, params=params)
39
+ articles = response.json().get('articles', [])
40
+ if articles:
41
+ random_article = random.choice(articles)
42
+ news_url = random_article.get('url')
43
+ else:
44
+ return None
45
+
46
+ if news_url:
47
+ full_article, title = scrape_article(news_url)
48
+ return full_article, title
49
+ else:
50
+ return "No news article found.", ""
51
+
52
+ def scrape_article(url):
53
+ try:
54
+ response = requests.get(url)
55
+ soup = BeautifulSoup(response.content, 'html.parser')
56
+
57
+ # Extracting the title - this is a general approach
58
+ title = soup.title.string if soup.title else "No Title Available"
59
+
60
+ article_content = soup.find_all('p') # This is a simplification
61
+
62
+ text = ' '.join([p.get_text() for p in article_content])
63
+ words = text.split()
64
+ truncated_text = ' '.join(words[:1024]) # Truncate to first 1024 words
65
+
66
+ return truncated_text, title
67
+ except Exception as e:
68
+ return "Error scraping article: " + str(e), ""
69
+
70
  # Using Gradio Blocks
71
  with gr.Blocks() as demo:
72
  gr.Markdown("## News Summary App")
73
+ gr.Markdown("Enter a news text and get its summary, or load a random article.")
74
  with gr.Row():
75
+ article_title = gr.Label() # Component to display the article title
76
  input_text = gr.Textbox(lines=10, label="Input Text")
77
  output_text = gr.Textbox(label="Summary")
78
+
79
+ load_dataset_article_button = gr.Button("Load Random Article from Dataset")
80
+ load_news_article_button = gr.Button("Load News Article")
81
+
82
+ load_dataset_article_button.click(fn=load_article, inputs=[], outputs=input_text)
83
+ load_news_article_button.click(fn=get_news_article, inputs=[], outputs=[input_text, article_title])
84
+
85
  summarize_button = gr.Button("Summarize")
86
  summarize_button.click(fn=summarize, inputs=input_text, outputs=output_text)
87
 
requirements.txt CHANGED
@@ -4,3 +4,4 @@ datasets
4
  evaluate
5
  accelerate
6
  torch
 
 
4
  evaluate
5
  accelerate
6
  torch
7
+ beautifulsoup4