lalithadevi commited on
Commit
68c6e68
1 Parent(s): ca6c453

Update news_extractor/news_extractor.py

Browse files
Files changed (1) hide show
  1. news_extractor/news_extractor.py +6 -2
news_extractor/news_extractor.py CHANGED
@@ -82,7 +82,9 @@ def news_agg(rss):
82
  b = BeautifulSoup(resp.content, "xml")
83
  items = b.find_all("item")
84
  for i in items:
85
- rss_df = rss_df.append(rss_parser(i)).copy()
 
 
86
  rss_df["description"] = rss_df["description"].replace([" NULL", ''], np.nan)
87
  rss_df.dropna(inplace=True)
88
  rss_df["src"] = src_parse(rss)
@@ -114,7 +116,9 @@ rss = ['https://www.economictimes.indiatimes.com/rssfeedstopstories.cms',
114
  def get_news():
115
  final_df = pd.DataFrame()
116
  for i in rss:
117
- final_df = final_df.append(news_agg(i))
 
 
118
 
119
  final_df.sort_values(by="elapsed_time", inplace=True)
120
  # final_df['src_time'] = final_df['src'] + (" " * 5) + final_df["elapsed_time_str"]
 
82
  b = BeautifulSoup(resp.content, "xml")
83
  items = b.find_all("item")
84
  for i in items:
85
+ # rss_df = rss_df.append(rss_parser(i)).copy()
86
+ rss_df = pd.concat([rss_df, rss_parser(i)], axis=0)
87
+ rss_df.reset_index(drop=True, inplace=True)
88
  rss_df["description"] = rss_df["description"].replace([" NULL", ''], np.nan)
89
  rss_df.dropna(inplace=True)
90
  rss_df["src"] = src_parse(rss)
 
116
  def get_news():
117
  final_df = pd.DataFrame()
118
  for i in rss:
119
+ # final_df = final_df.append(news_agg(i))
120
+ final_df = pd.concat([final_df, news_agg(i)], axis=0)
121
+ final_df.reset_index(drop=True, inplace=True)
122
 
123
  final_df.sort_values(by="elapsed_time", inplace=True)
124
  # final_df['src_time'] = final_df['src'] + (" " * 5) + final_df["elapsed_time_str"]