lalithadevi
commited on
Commit
•
c1b9bc4
1
Parent(s):
203e844
Update news_category_similar_news_prediction.py
Browse files
news_category_similar_news_prediction.py
CHANGED
@@ -143,7 +143,7 @@ def predict_news_category_similar_news(old_news: pd.DataFrame, new_news: pd.Data
|
|
143 |
final_df['pred_proba'] = prob
|
144 |
final_df['similar_news'] = sim_news
|
145 |
final_df.reset_index(drop=True, inplace=True)
|
146 |
-
final_df.loc[final_df['pred_proba']<CLASSIFIER_THRESHOLD, 'category'] = '
|
147 |
final_df.loc[(final_df['title'].str.contains('Pakistan')) & (final_df['category'] == 'NATION'), 'category'] = 'WORLD'
|
148 |
logger.warning('Updated category of articles having Pakistan in title and category=NATION to WORLD')
|
149 |
final_df.loc[(final_df['title'].str.contains('Zodiac Sign', case=False)) | (final_df['title'].str.contains('Horoscope', case=False)), 'category'] = 'SCIENCE'
|
@@ -172,7 +172,7 @@ def predict_news_category_similar_news(old_news: pd.DataFrame, new_news: pd.Data
|
|
172 |
final_df = pd.concat([old_news, new_news], axis=0, ignore_index=True)
|
173 |
final_df.drop_duplicates(subset='url', keep='first', inplace=True)
|
174 |
final_df.reset_index(drop=True, inplace=True)
|
175 |
-
final_df.loc[final_df['pred_proba']<CLASSIFIER_THRESHOLD, 'category'] = '
|
176 |
final_df.loc[(final_df['title'].str.contains('Pakistan')) & (final_df['category'] == 'NATION'), 'category'] = 'WORLD'
|
177 |
logger.warning('Updated category of articles having Pakistan in title and category=NATION to WORLD')
|
178 |
final_df.loc[(final_df['title'].str.contains('Zodiac Sign', case=False)) | (final_df['title'].str.contains('Horoscope', case=False)), 'category'] = 'SCIENCE'
|
|
|
143 |
final_df['pred_proba'] = prob
|
144 |
final_df['similar_news'] = sim_news
|
145 |
final_df.reset_index(drop=True, inplace=True)
|
146 |
+
final_df.loc[final_df['pred_proba']<CLASSIFIER_THRESHOLD, 'category'] = 'NATION'
|
147 |
final_df.loc[(final_df['title'].str.contains('Pakistan')) & (final_df['category'] == 'NATION'), 'category'] = 'WORLD'
|
148 |
logger.warning('Updated category of articles having Pakistan in title and category=NATION to WORLD')
|
149 |
final_df.loc[(final_df['title'].str.contains('Zodiac Sign', case=False)) | (final_df['title'].str.contains('Horoscope', case=False)), 'category'] = 'SCIENCE'
|
|
|
172 |
final_df = pd.concat([old_news, new_news], axis=0, ignore_index=True)
|
173 |
final_df.drop_duplicates(subset='url', keep='first', inplace=True)
|
174 |
final_df.reset_index(drop=True, inplace=True)
|
175 |
+
final_df.loc[final_df['pred_proba']<CLASSIFIER_THRESHOLD, 'category'] = 'NATION'
|
176 |
final_df.loc[(final_df['title'].str.contains('Pakistan')) & (final_df['category'] == 'NATION'), 'category'] = 'WORLD'
|
177 |
logger.warning('Updated category of articles having Pakistan in title and category=NATION to WORLD')
|
178 |
final_df.loc[(final_df['title'].str.contains('Zodiac Sign', case=False)) | (final_df['title'].str.contains('Horoscope', case=False)), 'category'] = 'SCIENCE'
|