lalithadevi
commited on
Commit
•
60be9e8
1
Parent(s):
06d4f50
Update news_category_similar_news_prediction.py
Browse files
news_category_similar_news_prediction.py
CHANGED
@@ -10,6 +10,39 @@ from find_similar_news import find_similar_news
|
|
10 |
|
11 |
logger = get_logger()
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
def parse_prediction(tflite_pred, label_encoder):
|
14 |
tflite_pred_argmax = np.argmax(tflite_pred, axis=1)
|
15 |
tflite_pred_label = label_encoder.inverse_transform(tflite_pred_argmax)
|
@@ -53,12 +86,14 @@ def predict_news_category_similar_news(old_news: pd.DataFrame, new_news: pd.Data
|
|
53 |
if not isinstance(new_news, pd.DataFrame):
|
54 |
raise Exception('No New News Found')
|
55 |
else:
|
56 |
-
new_news = new_news.copy()
|
|
|
57 |
logger.warning(f'new news columns: {[*new_news.columns]}')
|
58 |
logger.warning(f'{len(new_news)} new news items found')
|
59 |
|
60 |
if isinstance(old_news, pd.DataFrame):
|
61 |
-
old_news = old_news.copy()
|
|
|
62 |
logger.warning(f'old news columns: {[*old_news.columns]}')
|
63 |
logger.warning(f'{len(old_news)} old news items found')
|
64 |
old_news.drop(columns='_id', inplace=True)
|
|
|
10 |
|
11 |
logger = get_logger()
|
12 |
|
13 |
+
##########################
|
14 |
+
|
15 |
+
from dateutil import parser
|
16 |
+
def correct_date(x):
|
17 |
+
if (not isinstance(x, str)) or (str(x).find(":") == -1):
|
18 |
+
return "2020-11-07 00:36:44+05:30"
|
19 |
+
return x
|
20 |
+
|
21 |
+
def date_time_parser(dt):
|
22 |
+
"""
|
23 |
+
Computes the minutes elapsed since published time.
|
24 |
+
:param dt: date
|
25 |
+
:return: int, minutes elapsed.
|
26 |
+
"""
|
27 |
+
try:
|
28 |
+
return int(np.round((dt.now(dt.tz) - dt).total_seconds() / 60, 0))
|
29 |
+
except:
|
30 |
+
return 100000
|
31 |
+
|
32 |
+
def delete_outdated_news(final_df: pd.DataFrame):
|
33 |
+
final_df = final_df.copy()
|
34 |
+
final_df["parsed_date"] = final_df["parsed_date"].map(correct_date)
|
35 |
+
final_df["parsed_date"] = final_df["parsed_date"].map(parser.parse)
|
36 |
+
final_df["elapsed_time"] = final_df["parsed_date"].apply(date_time_parser)
|
37 |
+
final_df = final_df.loc[final_df["elapsed_time"] <= 720, :].copy() # 1440=24 hrs and 720=12 hrs
|
38 |
+
final_df.drop(columns='elapsed_time', inplace=True)
|
39 |
+
return final_df
|
40 |
+
|
41 |
+
|
42 |
+
|
43 |
+
############################################
|
44 |
+
|
45 |
+
|
46 |
def parse_prediction(tflite_pred, label_encoder):
|
47 |
tflite_pred_argmax = np.argmax(tflite_pred, axis=1)
|
48 |
tflite_pred_label = label_encoder.inverse_transform(tflite_pred_argmax)
|
|
|
86 |
if not isinstance(new_news, pd.DataFrame):
|
87 |
raise Exception('No New News Found')
|
88 |
else:
|
89 |
+
# new_news = new_news.copy()
|
90 |
+
new_news = delete_outdated_news(new_news)
|
91 |
logger.warning(f'new news columns: {[*new_news.columns]}')
|
92 |
logger.warning(f'{len(new_news)} new news items found')
|
93 |
|
94 |
if isinstance(old_news, pd.DataFrame):
|
95 |
+
# old_news = old_news.copy()
|
96 |
+
old_news = delete_outdated_news(old_news)
|
97 |
logger.warning(f'old news columns: {[*old_news.columns]}')
|
98 |
logger.warning(f'{len(old_news)} old news items found')
|
99 |
old_news.drop(columns='_id', inplace=True)
|