Commit
•
18c9a32
1
Parent(s):
93cee42
Update news_extractor/news_extractor.py
Browse files
news_extractor/news_extractor.py
CHANGED
@@ -47,8 +47,8 @@ def rss_parser(i):
|
|
47 |
date = "Sat, 12 Aug 2000 13:39:15 +05:30" if ((b1.find("pubDate") == "") or (b1.find("pubDate") is None)) else b1.find("pubDate").get_text()
|
48 |
if url.find("businesstoday.in") >= 0:
|
49 |
date = date.replace("GMT", "+05:30")
|
50 |
-
if url.find("
|
51 |
-
date = date.replace("
|
52 |
date1 = parser.parse(date)
|
53 |
return pd.DataFrame({"title": title,
|
54 |
"url": url,
|
@@ -66,8 +66,7 @@ def src_parse(rss):
|
|
66 |
rss = 'ndtv profit'
|
67 |
if rss.find('ndtv') >= 0:
|
68 |
rss = 'ndtv.com'
|
69 |
-
|
70 |
-
rss = 'bbc.co.uk'
|
71 |
rss = rss.replace("https://www.", "")
|
72 |
rss = rss.split("/")
|
73 |
return rss[0]
|
@@ -112,7 +111,7 @@ def news_agg(rss):
|
|
112 |
# List of RSS feeds
|
113 |
rss = ['https://www.economictimes.indiatimes.com/rssfeedstopstories.cms',
|
114 |
'https://www.thehindu.com/news/feeder/default.rss',
|
115 |
-
'https://
|
116 |
'https://www.businesstoday.in/rssfeeds/?id=225346',
|
117 |
'https://feeds.feedburner.com/ndtvnews-latest',
|
118 |
'https://www.hindustantimes.com/feeds/rss/world-news/rssfeed.xml',
|
|
|
47 |
date = "Sat, 12 Aug 2000 13:39:15 +05:30" if ((b1.find("pubDate") == "") or (b1.find("pubDate") is None)) else b1.find("pubDate").get_text()
|
48 |
if url.find("businesstoday.in") >= 0:
|
49 |
date = date.replace("GMT", "+05:30")
|
50 |
+
if url.find("deccanchronicle") >= 0:
|
51 |
+
date = date.replace("GMT", "+05:30")
|
52 |
date1 = parser.parse(date)
|
53 |
return pd.DataFrame({"title": title,
|
54 |
"url": url,
|
|
|
66 |
rss = 'ndtv profit'
|
67 |
if rss.find('ndtv') >= 0:
|
68 |
rss = 'ndtv.com'
|
69 |
+
|
|
|
70 |
rss = rss.replace("https://www.", "")
|
71 |
rss = rss.split("/")
|
72 |
return rss[0]
|
|
|
111 |
# List of RSS feeds
|
112 |
rss = ['https://www.economictimes.indiatimes.com/rssfeedstopstories.cms',
|
113 |
'https://www.thehindu.com/news/feeder/default.rss',
|
114 |
+
'https://www.deccanchronicle.com/google_feeds.xml',
|
115 |
'https://www.businesstoday.in/rssfeeds/?id=225346',
|
116 |
'https://feeds.feedburner.com/ndtvnews-latest',
|
117 |
'https://www.hindustantimes.com/feeds/rss/world-news/rssfeed.xml',
|