lalithadevi commited on
Commit
61f5068
1 Parent(s): 178e147

Update news_category_similar_news_prediction.py

Browse files
news_category_similar_news_prediction.py CHANGED
@@ -75,7 +75,7 @@ def predict_news_category_similar_news(old_news: pd.DataFrame, new_news: pd.Data
75
  final_df.drop_duplicates(subset='url', keep='first', inplace=True)
76
  headlines = [*final_df['title']].copy()
77
  label, prob = inference(headlines, interpreter, label_encoder, tokenizer)
78
- sent_embs = vectorizer.vectorize_(headlines)
79
  sim_news = [find_similar_news(search_vec, collection, vectorizer, sent_model, ce_model) for search_vec in sent_embs]
80
  final_df['category'] = label
81
  final_df['pred_proba'] = prob
@@ -91,7 +91,7 @@ def predict_news_category_similar_news(old_news: pd.DataFrame, new_news: pd.Data
91
  if len(new_news) > 0:
92
  headlines = [*new_news['title']].copy()
93
  label, prob = inference(headlines, interpreter, label_encoder, tokenizer)
94
- sent_embs = vectorizer.vectorize_(headlines)
95
  sim_news = [find_similar_news(search_vec, collection, vectorizer, sent_model, ce_model) for search_vec in sent_embs]
96
  new_news['category'] = label
97
  new_news['pred_proba'] = prob
 
75
  final_df.drop_duplicates(subset='url', keep='first', inplace=True)
76
  headlines = [*final_df['title']].copy()
77
  label, prob = inference(headlines, interpreter, label_encoder, tokenizer)
78
+ sent_embs = vectorizer.vectorize_(headlines, sent_model)
79
  sim_news = [find_similar_news(search_vec, collection, vectorizer, sent_model, ce_model) for search_vec in sent_embs]
80
  final_df['category'] = label
81
  final_df['pred_proba'] = prob
 
91
  if len(new_news) > 0:
92
  headlines = [*new_news['title']].copy()
93
  label, prob = inference(headlines, interpreter, label_encoder, tokenizer)
94
+ sent_embs = vectorizer.vectorize_(headlines, sent_model)
95
  sim_news = [find_similar_news(search_vec, collection, vectorizer, sent_model, ce_model) for search_vec in sent_embs]
96
  new_news['category'] = label
97
  new_news['pred_proba'] = prob