lalithadevi
commited on
Commit
•
07d0294
1
Parent(s):
59792ae
Update news_category_similar_news_prediction.py
Browse files
news_category_similar_news_prediction.py
CHANGED
@@ -15,6 +15,7 @@ logger = get_logger()
|
|
15 |
from dateutil import parser
|
16 |
def correct_date(x):
|
17 |
if (not isinstance(x, str)) or (str(x).find(":") == -1):
|
|
|
18 |
return "2020-11-07 00:36:44+05:30"
|
19 |
return x
|
20 |
|
@@ -27,16 +28,17 @@ def date_time_parser(dt):
|
|
27 |
try:
|
28 |
return int(np.round((dt.now(dt.tz) - dt).total_seconds() / 60, 0))
|
29 |
except:
|
|
|
30 |
return 100000
|
31 |
|
32 |
def delete_outdated_news(final_df: pd.DataFrame):
|
33 |
logger.warning("Entering delete_outdated_news()")
|
34 |
final_df = final_df.copy()
|
35 |
-
|
36 |
-
final_df["
|
37 |
-
final_df["elapsed_time"] = final_df["
|
38 |
final_df = final_df.loc[final_df["elapsed_time"] <= 720, :].copy() # 1440=24 hrs and 720=12 hrs
|
39 |
-
final_df.drop(columns='elapsed_time', inplace=True)
|
40 |
final_df.reset_index(drop=True, inplace=True)
|
41 |
logger.warning("Exiting delete_outdated_news()")
|
42 |
return final_df
|
|
|
15 |
from dateutil import parser
|
16 |
def correct_date(x):
|
17 |
if (not isinstance(x, str)) or (str(x).find(":") == -1):
|
18 |
+
logger.warning(f'correct_date() error: {x} is not the right date format')
|
19 |
return "2020-11-07 00:36:44+05:30"
|
20 |
return x
|
21 |
|
|
|
28 |
try:
|
29 |
return int(np.round((dt.now(dt.tz) - dt).total_seconds() / 60, 0))
|
30 |
except:
|
31 |
+
logger.warning(f'date_time_parser() error: {dt} is not the right date format')
|
32 |
return 100000
|
33 |
|
34 |
def delete_outdated_news(final_df: pd.DataFrame):
|
35 |
logger.warning("Entering delete_outdated_news()")
|
36 |
final_df = final_df.copy()
|
37 |
+
final_df["parsed_date_1"] = final_df["parsed_date"].map(correct_date)
|
38 |
+
final_df["parsed_date_1"] = final_df["parsed_date_1"].map(parser.parse)
|
39 |
+
final_df["elapsed_time"] = final_df["parsed_date_1"].apply(date_time_parser)
|
40 |
final_df = final_df.loc[final_df["elapsed_time"] <= 720, :].copy() # 1440=24 hrs and 720=12 hrs
|
41 |
+
final_df.drop(columns=['elapsed_time', 'parsed_date_1'], inplace=True)
|
42 |
final_df.reset_index(drop=True, inplace=True)
|
43 |
logger.warning("Exiting delete_outdated_news()")
|
44 |
return final_df
|