lalithadevi
commited on
Commit
•
ad4ed94
1
Parent(s):
aff870b
Update news_category_similar_news_prediction.py
Browse files
news_category_similar_news_prediction.py
CHANGED
@@ -3,7 +3,7 @@ import numpy as np
|
|
3 |
import tensorflow as tf
|
4 |
from config import (DISTILBERT_TOKENIZER_N_TOKENS,
|
5 |
NEWS_CATEGORY_CLASSIFIER_N_CLASSES,
|
6 |
-
CLASSIFIER_THRESHOLD)
|
7 |
|
8 |
from logger import get_logger
|
9 |
from find_similar_news import find_similar_news
|
@@ -37,7 +37,7 @@ def delete_outdated_news(final_df: pd.DataFrame):
|
|
37 |
final_df["parsed_date_1"] = [correct_date(date_) for date_ in final_df['parsed_date']]
|
38 |
final_df["parsed_date_1"] = [parser.parse(date_) for date_ in final_df['parsed_date_1']]
|
39 |
final_df["elapsed_time"] = [date_time_parser(date_) for date_ in final_df['parsed_date_1']]
|
40 |
-
final_df = final_df.loc[final_df["elapsed_time"] <=
|
41 |
final_df.drop(columns=['elapsed_time', 'parsed_date_1'], inplace=True)
|
42 |
final_df.reset_index(drop=True, inplace=True)
|
43 |
logger.warning("Exiting delete_outdated_news()")
|
|
|
3 |
import tensorflow as tf
|
4 |
from config import (DISTILBERT_TOKENIZER_N_TOKENS,
|
5 |
NEWS_CATEGORY_CLASSIFIER_N_CLASSES,
|
6 |
+
CLASSIFIER_THRESHOLD, NEWS_RETENTION_SECONDS)
|
7 |
|
8 |
from logger import get_logger
|
9 |
from find_similar_news import find_similar_news
|
|
|
37 |
final_df["parsed_date_1"] = [correct_date(date_) for date_ in final_df['parsed_date']]
|
38 |
final_df["parsed_date_1"] = [parser.parse(date_) for date_ in final_df['parsed_date_1']]
|
39 |
final_df["elapsed_time"] = [date_time_parser(date_) for date_ in final_df['parsed_date_1']]
|
40 |
+
final_df = final_df.loc[final_df["elapsed_time"] <= NEWS_RETENTION_SECONDS, :].copy()
|
41 |
final_df.drop(columns=['elapsed_time', 'parsed_date_1'], inplace=True)
|
42 |
final_df.reset_index(drop=True, inplace=True)
|
43 |
logger.warning("Exiting delete_outdated_news()")
|