lalithadevi commited on
Commit
07d0294
1 Parent(s): 59792ae

Update news_category_similar_news_prediction.py

Browse files
news_category_similar_news_prediction.py CHANGED
@@ -15,6 +15,7 @@ logger = get_logger()
15
  from dateutil import parser
16
  def correct_date(x):
17
  if (not isinstance(x, str)) or (str(x).find(":") == -1):
 
18
  return "2020-11-07 00:36:44+05:30"
19
  return x
20
 
@@ -27,16 +28,17 @@ def date_time_parser(dt):
27
  try:
28
  return int(np.round((dt.now(dt.tz) - dt).total_seconds() / 60, 0))
29
  except:
 
30
  return 100000
31
 
32
  def delete_outdated_news(final_df: pd.DataFrame):
33
  logger.warning("Entering delete_outdated_news()")
34
  final_df = final_df.copy()
35
- # final_df["parsed_date"] = final_df["parsed_date"].map(correct_date)
36
- final_df["parsed_date"] = final_df["parsed_date"].map(parser.parse)
37
- final_df["elapsed_time"] = final_df["parsed_date"].apply(date_time_parser)
38
  final_df = final_df.loc[final_df["elapsed_time"] <= 720, :].copy() # 1440=24 hrs and 720=12 hrs
39
- final_df.drop(columns='elapsed_time', inplace=True)
40
  final_df.reset_index(drop=True, inplace=True)
41
  logger.warning("Exiting delete_outdated_news()")
42
  return final_df
 
15
  from dateutil import parser
16
  def correct_date(x):
17
  if (not isinstance(x, str)) or (str(x).find(":") == -1):
18
+ logger.warning(f'correct_date() error: {x} is not the right date format')
19
  return "2020-11-07 00:36:44+05:30"
20
  return x
21
 
 
28
  try:
29
  return int(np.round((dt.now(dt.tz) - dt).total_seconds() / 60, 0))
30
  except:
31
+ logger.warning(f'date_time_parser() error: {dt} is not the right date format')
32
  return 100000
33
 
34
  def delete_outdated_news(final_df: pd.DataFrame):
35
  logger.warning("Entering delete_outdated_news()")
36
  final_df = final_df.copy()
37
+ final_df["parsed_date_1"] = final_df["parsed_date"].map(correct_date)
38
+ final_df["parsed_date_1"] = final_df["parsed_date_1"].map(parser.parse)
39
+ final_df["elapsed_time"] = final_df["parsed_date_1"].apply(date_time_parser)
40
  final_df = final_df.loc[final_df["elapsed_time"] <= 720, :].copy() # 1440=24 hrs and 720=12 hrs
41
+ final_df.drop(columns=['elapsed_time', 'parsed_date_1'], inplace=True)
42
  final_df.reset_index(drop=True, inplace=True)
43
  logger.warning("Exiting delete_outdated_news()")
44
  return final_df