Commit
•
e7474d5
1
Parent(s):
561ed17
Update news_category_similar_news_prediction.py
Browse files
news_category_similar_news_prediction.py
CHANGED
@@ -137,6 +137,8 @@ def predict_news_category_similar_news(old_news: pd.DataFrame, new_news: pd.Data
|
|
137 |
final_df['similar_news'] = sim_news
|
138 |
final_df.reset_index(drop=True, inplace=True)
|
139 |
final_df.loc[final_df['pred_proba']<CLASSIFIER_THRESHOLD, 'category'] = 'OTHERS'
|
|
|
|
|
140 |
else:
|
141 |
logger.warning('Prior predictions found in old news')
|
142 |
if not cols_check([*new_news.columns], [*old_news.columns][:-3]):
|
@@ -155,6 +157,8 @@ def predict_news_category_similar_news(old_news: pd.DataFrame, new_news: pd.Data
|
|
155 |
final_df.drop_duplicates(subset='url', keep='first', inplace=True)
|
156 |
final_df.reset_index(drop=True, inplace=True)
|
157 |
final_df.loc[final_df['pred_proba']<CLASSIFIER_THRESHOLD, 'category'] = 'OTHERS'
|
|
|
|
|
158 |
else:
|
159 |
logger.warning('INFO: Old & New Articles are the same. There is no requirement of updating them in the database. Database is not updated.')
|
160 |
db_updation_required = 0
|
|
|
137 |
final_df['similar_news'] = sim_news
|
138 |
final_df.reset_index(drop=True, inplace=True)
|
139 |
final_df.loc[final_df['pred_proba']<CLASSIFIER_THRESHOLD, 'category'] = 'OTHERS'
|
140 |
+
final_df.loc[(final_df['title'].str.contains('Pakistan')) & (final_df['category'] == 'NATION'), 'category'] = 'WORLD'
|
141 |
+
logger.warning('Updated category of articles having Pakistan in title and category=NATION to WORLD')
|
142 |
else:
|
143 |
logger.warning('Prior predictions found in old news')
|
144 |
if not cols_check([*new_news.columns], [*old_news.columns][:-3]):
|
|
|
157 |
final_df.drop_duplicates(subset='url', keep='first', inplace=True)
|
158 |
final_df.reset_index(drop=True, inplace=True)
|
159 |
final_df.loc[final_df['pred_proba']<CLASSIFIER_THRESHOLD, 'category'] = 'OTHERS'
|
160 |
+
final_df.loc[(final_df['title'].str.contains('Pakistan')) & (final_df['category'] == 'NATION'), 'category'] = 'WORLD'
|
161 |
+
logger.warning('Updated category of articles having Pakistan in title and category=NATION to WORLD')
|
162 |
else:
|
163 |
logger.warning('INFO: Old & New Articles are the same. There is no requirement of updating them in the database. Database is not updated.')
|
164 |
db_updation_required = 0
|