lalithadevi commited on
Commit
4db51e3
1 Parent(s): 03d3a0e

Update news_extractor/news_extractor.py

Browse files
Files changed (1) hide show
  1. news_extractor/news_extractor.py +1 -13
news_extractor/news_extractor.py CHANGED
@@ -12,16 +12,6 @@ from logger import get_logger
12
  logger = get_logger()
13
 
14
 
15
- def date_time_parser(dt):
16
- """
17
- Computes the minutes elapsed since published time.
18
- :param dt: date
19
- :return: int, minutes elapsed.
20
- """
21
- try:
22
- return int(np.round((dt.now(dt.tz) - dt).total_seconds() / 60, 0))
23
- except:
24
- return 100000
25
 
26
  def text_clean(desc):
27
  """
@@ -117,7 +107,6 @@ def news_agg(rss):
117
  rss_df["description"] = rss_df["description"].replace([" NULL", ''], np.nan)
118
 
119
  rss_df["src"] = src_parse(rss)
120
- rss_df["elapsed_time"] = rss_df["parsed_date"].apply(date_time_parser)
121
  rss_df["parsed_date"] = rss_df["parsed_date"].astype("str")
122
 
123
  if len(rss_df) == 0:
@@ -141,8 +130,7 @@ def get_news_rss(url):
141
  if final_df is not None:
142
  final_df.reset_index(drop=True, inplace=True)
143
 
144
- final_df.sort_values(by="elapsed_time", inplace=True)
145
- final_df.drop(columns=['elapsed_time'], inplace=True)
146
 
147
  final_df.drop_duplicates(subset='url', inplace=True)
148
 
 
12
  logger = get_logger()
13
 
14
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def text_clean(desc):
17
  """
 
107
  rss_df["description"] = rss_df["description"].replace([" NULL", ''], np.nan)
108
 
109
  rss_df["src"] = src_parse(rss)
 
110
  rss_df["parsed_date"] = rss_df["parsed_date"].astype("str")
111
 
112
  if len(rss_df) == 0:
 
130
  if final_df is not None:
131
  final_df.reset_index(drop=True, inplace=True)
132
 
133
+
 
134
 
135
  final_df.drop_duplicates(subset='url', inplace=True)
136