lalithadevi commited on
Commit
67a9e75
1 Parent(s): c4461d3

Update news_extractor/news_extractor.py

Browse files
Files changed (1) hide show
  1. news_extractor/news_extractor.py +3 -15
news_extractor/news_extractor.py CHANGED
@@ -6,6 +6,7 @@ import regex as re
6
  from dateutil import parser
7
  import logging
8
  import multiprocessing
 
9
 
10
 
11
  def date_time_parser(dt):
@@ -91,9 +92,8 @@ def news_agg(rss):
91
  'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36'
92
  }
93
 
94
- timeout = 5
95
 
96
- resp = r.get(rss, timeout=timeout, headers=headers)
97
  logging.warning(f'{rss}: {resp.status_code}')
98
  b = BeautifulSoup(resp.content, "xml")
99
  items = b.find_all("item")
@@ -121,19 +121,7 @@ def news_agg(rss):
121
 
122
 
123
  # List of RSS feeds
124
- rss = ['https://www.economictimes.indiatimes.com/rssfeedstopstories.cms',
125
- 'https://www.thehindu.com/news/feeder/default.rss',
126
- # 'https://telanganatoday.com/feed',
127
- 'https://www.businesstoday.in/rssfeeds/?id=225346',
128
- 'https://feeds.feedburner.com/ndtvnews-latest',
129
- 'https://www.hindustantimes.com/feeds/rss/world-news/rssfeed.xml',
130
- 'https://www.indiatoday.in/rss/1206578',
131
-
132
- 'https://www.moneycontrol.com/rss/latestnews.xml',
133
- 'https://www.livemint.com/rss/news',
134
-
135
- 'https://www.zeebiz.com/latest.xml/feed',
136
- 'https://www.timesofindia.indiatimes.com/rssfeedmostrecent.cms']
137
 
138
 
139
  def get_news_rss(url):
 
6
  from dateutil import parser
7
  import logging
8
  import multiprocessing
9
+ from config import NEWS_EXTRACTOR_URL_TIMEOUT, RSS_FEEDS_TO_EXTRACT
10
 
11
 
12
  def date_time_parser(dt):
 
92
  'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36'
93
  }
94
 
 
95
 
96
+ resp = r.get(rss, timeout=NEWS_EXTRACTOR_URL_TIMEOUT, headers=headers)
97
  logging.warning(f'{rss}: {resp.status_code}')
98
  b = BeautifulSoup(resp.content, "xml")
99
  items = b.find_all("item")
 
121
 
122
 
123
  # List of RSS feeds
124
+ rss = RSS_FEEDS_TO_EXTRACT
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
 
127
  def get_news_rss(url):