liamvbetts commited on
Commit
caaf3b3
1 Parent(s): 7218a55

searchable

Browse files
Files changed (2) hide show
  1. app.py +26 -34
  2. requirements.txt +0 -1
app.py CHANGED
@@ -1,12 +1,9 @@
1
  import gradio as gr
2
  import random
3
- from datasets import load_dataset
4
  import requests
5
  from bs4 import BeautifulSoup
6
  import os
7
 
8
- dataset = load_dataset("cnn_dailymail", "3.0.0")
9
-
10
  NEWS_API_KEY = os.environ['NEWS_API_KEY']
11
  HF_TOKEN = os.environ['HF_TOKEN']
12
 
@@ -17,12 +14,9 @@ def summarize(model_name, article):
17
  payload = {"inputs": article}
18
  response = requests.post(API_URL, headers=headers, json=payload)
19
 
20
- # Check if the response is successful
21
  if response.status_code == 200:
22
- # Assuming the response structure has a 'generated_text' field
23
  return format(response.json())
24
  else:
25
- # Handle different types of errors
26
  if response.status_code == 401:
27
  return "Error: Unauthorized. Check your API token."
28
  elif response.status_code == 503:
@@ -34,23 +28,23 @@ def summarize(model_name, article):
34
  def format(response):
35
  return response[0]['generated_text']
36
 
37
- def get_random_article():
38
- random.seed()
39
- val_example = dataset["validation"].shuffle().select(range(1))
40
- val_article = val_example['article'][0][:1024]
41
- return val_article
42
-
43
- def load_article():
44
- return get_random_article()
 
 
 
 
 
 
 
 
45
 
46
- def get_news_article():
47
- url = 'https://newsapi.org/v2/top-headlines'
48
- news_url = ''
49
- params = {
50
- 'apiKey': NEWS_API_KEY,
51
- 'country': 'us', # You can change this as needed
52
- 'pageSize': 100
53
- }
54
  response = requests.get(url, params=params)
55
  articles = response.json().get('articles', [])
56
  if articles:
@@ -58,7 +52,7 @@ def get_news_article():
58
  news_url = random_article.get('url')
59
  else:
60
  return None
61
-
62
  if news_url:
63
  full_article, title = scrape_article(news_url)
64
  return full_article, title
@@ -70,36 +64,34 @@ def scrape_article(url):
70
  response = requests.get(url)
71
  soup = BeautifulSoup(response.content, 'html.parser')
72
 
73
- # Extracting the title - this is a general approach
74
  title = soup.title.string if soup.title else "No Title Available"
75
 
76
- article_content = soup.find_all('p') # This is a simplification
77
 
78
  text = ' '.join([p.get_text() for p in article_content])
79
  words = text.split()
80
- truncated_text = ' '.join(words[:512]) # Truncate to first 1024 words
81
-
82
  return truncated_text, title
83
  except Exception as e:
84
  return "Error scraping article: " + str(e), ""
85
 
86
  with gr.Blocks() as demo:
87
  gr.Markdown("# News Summary App")
88
- gr.Markdown("Enter a news text and get its summary, or load a random article.")
89
 
90
  with gr.Row():
91
  with gr.Column():
92
- load_dataset_article_button = gr.Button("Load Random Article from Dataset")
93
- load_news_article_button = gr.Button("Load News Article")
94
- article_title = gr.Label() # Component to display the article title
95
- input_text = gr.Textbox(lines=10, label="Input Text", placeholder="Enter article text or load a random article...")
96
  with gr.Column():
97
  model_name = gr.Dropdown(label="Model Name", choices=["liamvbetts/bart-news-summary-v1", "liamvbetts/bart-base-cnn-v1", "liamvbetts/bart-large-cnn-v2", "liamvbetts/bart-large-cnn-v4"], value="liamvbetts/bart-news-summary-v1")
98
  summarize_button = gr.Button("Summarize")
99
  output_text = gr.Textbox(label="Summary", placeholder="Summary will appear here...")
100
 
101
- load_dataset_article_button.click(fn=load_article, inputs=[], outputs=[input_text, article_title])
102
- load_news_article_button.click(fn=get_news_article, inputs=[], outputs=[input_text, article_title])
103
  summarize_button.click(fn=summarize, inputs=[model_name, input_text], outputs=output_text)
104
 
105
  demo.launch()
 
1
  import gradio as gr
2
  import random
 
3
  import requests
4
  from bs4 import BeautifulSoup
5
  import os
6
 
 
 
7
  NEWS_API_KEY = os.environ['NEWS_API_KEY']
8
  HF_TOKEN = os.environ['HF_TOKEN']
9
 
 
14
  payload = {"inputs": article}
15
  response = requests.post(API_URL, headers=headers, json=payload)
16
 
 
17
  if response.status_code == 200:
 
18
  return format(response.json())
19
  else:
 
20
  if response.status_code == 401:
21
  return "Error: Unauthorized. Check your API token."
22
  elif response.status_code == 503:
 
28
  def format(response):
29
  return response[0]['generated_text']
30
 
31
+ def get_news_article(search_query):
32
+ if search_query.strip():
33
+ url = 'https://newsapi.org/v2/everything'
34
+ params = {
35
+ 'apiKey': NEWS_API_KEY,
36
+ 'q': search_query,
37
+ 'pageSize': 100,
38
+ 'language': 'en'
39
+ }
40
+ else:
41
+ url = 'https://newsapi.org/v2/top-headlines'
42
+ params = {
43
+ 'apiKey': NEWS_API_KEY,
44
+ 'country': 'us',
45
+ 'pageSize': 100
46
+ }
47
 
 
 
 
 
 
 
 
 
48
  response = requests.get(url, params=params)
49
  articles = response.json().get('articles', [])
50
  if articles:
 
52
  news_url = random_article.get('url')
53
  else:
54
  return None
55
+
56
  if news_url:
57
  full_article, title = scrape_article(news_url)
58
  return full_article, title
 
64
  response = requests.get(url)
65
  soup = BeautifulSoup(response.content, 'html.parser')
66
 
 
67
  title = soup.title.string if soup.title else "No Title Available"
68
 
69
+ article_content = soup.find_all('p')
70
 
71
  text = ' '.join([p.get_text() for p in article_content])
72
  words = text.split()
73
+ truncated_text = ' '.join(words[:512])
74
+
75
  return truncated_text, title
76
  except Exception as e:
77
  return "Error scraping article: " + str(e), ""
78
 
79
  with gr.Blocks() as demo:
80
  gr.Markdown("# News Summary App")
81
+ gr.Markdown("Enter a news text, search for news articles, or load a random article.")
82
 
83
  with gr.Row():
84
  with gr.Column():
85
+ search_query_input = gr.Textbox(label="Search for News", placeholder="Enter a topic to search...")
86
+ load_news_article_button = gr.Button("Search News Article")
87
+ article_title = gr.Label()
88
+ input_text = gr.Textbox(lines=10, label="Input Text", placeholder="Enter article text, load a random article, or search for news...")
89
  with gr.Column():
90
  model_name = gr.Dropdown(label="Model Name", choices=["liamvbetts/bart-news-summary-v1", "liamvbetts/bart-base-cnn-v1", "liamvbetts/bart-large-cnn-v2", "liamvbetts/bart-large-cnn-v4"], value="liamvbetts/bart-news-summary-v1")
91
  summarize_button = gr.Button("Summarize")
92
  output_text = gr.Textbox(label="Summary", placeholder="Summary will appear here...")
93
 
94
+ load_news_article_button.click(fn=get_news_article, inputs=[search_query_input], outputs=[input_text, article_title])
 
95
  summarize_button.click(fn=summarize, inputs=[model_name, input_text], outputs=output_text)
96
 
97
  demo.launch()
requirements.txt CHANGED
@@ -1,3 +1,2 @@
1
  gradio
2
- datasets
3
  beautifulsoup4
 
1
  gradio
 
2
  beautifulsoup4