lalithadevi commited on
Commit
18c9a32
1 Parent(s): 93cee42

Update news_extractor/news_extractor.py

Browse files
Files changed (1) hide show
  1. news_extractor/news_extractor.py +4 -5
news_extractor/news_extractor.py CHANGED
@@ -47,8 +47,8 @@ def rss_parser(i):
47
  date = "Sat, 12 Aug 2000 13:39:15 +05:30" if ((b1.find("pubDate") == "") or (b1.find("pubDate") is None)) else b1.find("pubDate").get_text()
48
  if url.find("businesstoday.in") >= 0:
49
  date = date.replace("GMT", "+05:30")
50
- if url.find("bbc") >= 0:
51
- date = date.replace(" GMT", "+00:00")
52
  date1 = parser.parse(date)
53
  return pd.DataFrame({"title": title,
54
  "url": url,
@@ -66,8 +66,7 @@ def src_parse(rss):
66
  rss = 'ndtv profit'
67
  if rss.find('ndtv') >= 0:
68
  rss = 'ndtv.com'
69
- if rss.find('bbc') >= 0:
70
- rss = 'bbc.co.uk'
71
  rss = rss.replace("https://www.", "")
72
  rss = rss.split("/")
73
  return rss[0]
@@ -112,7 +111,7 @@ def news_agg(rss):
112
  # List of RSS feeds
113
  rss = ['https://www.economictimes.indiatimes.com/rssfeedstopstories.cms',
114
  'https://www.thehindu.com/news/feeder/default.rss',
115
- 'https://feeds.bbci.co.uk/news/world/rss.xml',
116
  'https://www.businesstoday.in/rssfeeds/?id=225346',
117
  'https://feeds.feedburner.com/ndtvnews-latest',
118
  'https://www.hindustantimes.com/feeds/rss/world-news/rssfeed.xml',
 
47
  date = "Sat, 12 Aug 2000 13:39:15 +05:30" if ((b1.find("pubDate") == "") or (b1.find("pubDate") is None)) else b1.find("pubDate").get_text()
48
  if url.find("businesstoday.in") >= 0:
49
  date = date.replace("GMT", "+05:30")
50
+ if url.find("deccanchronicle") >= 0:
51
+ date = date.replace("GMT", "+05:30")
52
  date1 = parser.parse(date)
53
  return pd.DataFrame({"title": title,
54
  "url": url,
 
66
  rss = 'ndtv profit'
67
  if rss.find('ndtv') >= 0:
68
  rss = 'ndtv.com'
69
+
 
70
  rss = rss.replace("https://www.", "")
71
  rss = rss.split("/")
72
  return rss[0]
 
111
  # List of RSS feeds
112
  rss = ['https://www.economictimes.indiatimes.com/rssfeedstopstories.cms',
113
  'https://www.thehindu.com/news/feeder/default.rss',
114
+ 'https://www.deccanchronicle.com/google_feeds.xml',
115
  'https://www.businesstoday.in/rssfeeds/?id=225346',
116
  'https://feeds.feedburner.com/ndtvnews-latest',
117
  'https://www.hindustantimes.com/feeds/rss/world-news/rssfeed.xml',