lalithadevi commited on
Commit
e7474d5
1 Parent(s): 561ed17

Update news_category_similar_news_prediction.py

Browse files
news_category_similar_news_prediction.py CHANGED
@@ -137,6 +137,8 @@ def predict_news_category_similar_news(old_news: pd.DataFrame, new_news: pd.Data
137
  final_df['similar_news'] = sim_news
138
  final_df.reset_index(drop=True, inplace=True)
139
  final_df.loc[final_df['pred_proba']<CLASSIFIER_THRESHOLD, 'category'] = 'OTHERS'
 
 
140
  else:
141
  logger.warning('Prior predictions found in old news')
142
  if not cols_check([*new_news.columns], [*old_news.columns][:-3]):
@@ -155,6 +157,8 @@ def predict_news_category_similar_news(old_news: pd.DataFrame, new_news: pd.Data
155
  final_df.drop_duplicates(subset='url', keep='first', inplace=True)
156
  final_df.reset_index(drop=True, inplace=True)
157
  final_df.loc[final_df['pred_proba']<CLASSIFIER_THRESHOLD, 'category'] = 'OTHERS'
 
 
158
  else:
159
  logger.warning('INFO: Old & New Articles are the same. There is no requirement of updating them in the database. Database is not updated.')
160
  db_updation_required = 0
 
137
  final_df['similar_news'] = sim_news
138
  final_df.reset_index(drop=True, inplace=True)
139
  final_df.loc[final_df['pred_proba']<CLASSIFIER_THRESHOLD, 'category'] = 'OTHERS'
140
+ final_df.loc[(final_df['title'].str.contains('Pakistan')) & (final_df['category'] == 'NATION'), 'category'] = 'WORLD'
141
+ logger.warning('Updated category of articles having Pakistan in title and category=NATION to WORLD')
142
  else:
143
  logger.warning('Prior predictions found in old news')
144
  if not cols_check([*new_news.columns], [*old_news.columns][:-3]):
 
157
  final_df.drop_duplicates(subset='url', keep='first', inplace=True)
158
  final_df.reset_index(drop=True, inplace=True)
159
  final_df.loc[final_df['pred_proba']<CLASSIFIER_THRESHOLD, 'category'] = 'OTHERS'
160
+ final_df.loc[(final_df['title'].str.contains('Pakistan')) & (final_df['category'] == 'NATION'), 'category'] = 'WORLD'
161
+ logger.warning('Updated category of articles having Pakistan in title and category=NATION to WORLD')
162
  else:
163
  logger.warning('INFO: Old & New Articles are the same. There is no requirement of updating them in the database. Database is not updated.')
164
  db_updation_required = 0