lalithadevi commited on
Commit
a364621
1 Parent(s): 367d701

Update news_category_similar_news_prediction.py

Browse files
news_category_similar_news_prediction.py CHANGED
@@ -170,12 +170,6 @@ def predict_news_category_similar_news(old_news: pd.DataFrame, new_news: pd.Data
170
  final_df['category'] = label
171
  final_df['pred_proba'] = prob
172
  final_df['similar_news'] = sim_news
173
- # final_df.reset_index(drop=True, inplace=True)
174
- # final_df.loc[final_df['pred_proba']<CLASSIFIER_THRESHOLD, 'category'] = 'NATION'
175
- # final_df.loc[(final_df['title'].str.contains('Pakistan')) & (final_df['category'] == 'NATION'), 'category'] = 'WORLD'
176
- # logger.warning('Updated category of articles having Pakistan in title and category=NATION to WORLD')
177
- # final_df.loc[(final_df['title'].str.contains('Zodiac Sign', case=False)) | (final_df['title'].str.contains('Horoscope', case=False)), 'category'] = 'SCIENCE'
178
- # logger.warning('Updated category of articles having Zodiac Sign in title to SCIENCE')
179
 
180
  final_df = process_prediction_df(final_df, df_type="production & prediction")
181
  prediction_df = final_df.copy()
@@ -208,13 +202,6 @@ def predict_news_category_similar_news(old_news: pd.DataFrame, new_news: pd.Data
208
  prediction_df = new_news.copy()
209
  prediction_df = add_flags_to_prediction_df(prediction_df)
210
 
211
- # final_df.drop_duplicates(subset='url', keep='first', inplace=True)
212
- # final_df.reset_index(drop=True, inplace=True)
213
- # final_df.loc[final_df['pred_proba']<CLASSIFIER_THRESHOLD, 'category'] = 'NATION'
214
- # final_df.loc[(final_df['title'].str.contains('Pakistan')) & (final_df['category'] == 'NATION'), 'category'] = 'WORLD'
215
- # logger.warning('Updated category of articles having Pakistan in title and category=NATION to WORLD')
216
- # final_df.loc[(final_df['title'].str.contains('Zodiac Sign', case=False)) | (final_df['title'].str.contains('Horoscope', case=False)), 'category'] = 'SCIENCE'
217
- # logger.warning('Updated category of articles having Zodiac Sign in title to SCIENCE')
218
  else:
219
  logger.warning('INFO: Old & New Articles are the same. There is no requirement of updating them in the database. Database is not updated.')
220
  db_updation_required = 0
 
170
  final_df['category'] = label
171
  final_df['pred_proba'] = prob
172
  final_df['similar_news'] = sim_news
 
 
 
 
 
 
173
 
174
  final_df = process_prediction_df(final_df, df_type="production & prediction")
175
  prediction_df = final_df.copy()
 
202
  prediction_df = new_news.copy()
203
  prediction_df = add_flags_to_prediction_df(prediction_df)
204
 
 
 
 
 
 
 
 
205
  else:
206
  logger.warning('INFO: Old & New Articles are the same. There is no requirement of updating them in the database. Database is not updated.')
207
  db_updation_required = 0