lalithadevi
commited on
Commit
•
68c6e68
1
Parent(s):
ca6c453
Update news_extractor/news_extractor.py
Browse files
news_extractor/news_extractor.py
CHANGED
@@ -82,7 +82,9 @@ def news_agg(rss):
|
|
82 |
b = BeautifulSoup(resp.content, "xml")
|
83 |
items = b.find_all("item")
|
84 |
for i in items:
|
85 |
-
rss_df = rss_df.append(rss_parser(i)).copy()
|
|
|
|
|
86 |
rss_df["description"] = rss_df["description"].replace([" NULL", ''], np.nan)
|
87 |
rss_df.dropna(inplace=True)
|
88 |
rss_df["src"] = src_parse(rss)
|
@@ -114,7 +116,9 @@ rss = ['https://www.economictimes.indiatimes.com/rssfeedstopstories.cms',
|
|
114 |
def get_news():
|
115 |
final_df = pd.DataFrame()
|
116 |
for i in rss:
|
117 |
-
final_df = final_df.append(news_agg(i))
|
|
|
|
|
118 |
|
119 |
final_df.sort_values(by="elapsed_time", inplace=True)
|
120 |
# final_df['src_time'] = final_df['src'] + (" " * 5) + final_df["elapsed_time_str"]
|
|
|
82 |
b = BeautifulSoup(resp.content, "xml")
|
83 |
items = b.find_all("item")
|
84 |
for i in items:
|
85 |
+
# rss_df = rss_df.append(rss_parser(i)).copy()
|
86 |
+
rss_df = pd.concat([rss_df, rss_parser(i)], axis=0)
|
87 |
+
rss_df.reset_index(drop=True, inplace=True)
|
88 |
rss_df["description"] = rss_df["description"].replace([" NULL", ''], np.nan)
|
89 |
rss_df.dropna(inplace=True)
|
90 |
rss_df["src"] = src_parse(rss)
|
|
|
116 |
def get_news():
|
117 |
final_df = pd.DataFrame()
|
118 |
for i in rss:
|
119 |
+
# final_df = final_df.append(news_agg(i))
|
120 |
+
final_df = pd.concat([final_df, news_agg(i)], axis=0)
|
121 |
+
final_df.reset_index(drop=True, inplace=True)
|
122 |
|
123 |
final_df.sort_values(by="elapsed_time", inplace=True)
|
124 |
# final_df['src_time'] = final_df['src'] + (" " * 5) + final_df["elapsed_time_str"]
|