Commit
•
61f5068
1
Parent(s):
178e147
Update news_category_similar_news_prediction.py
Browse files
news_category_similar_news_prediction.py
CHANGED
@@ -75,7 +75,7 @@ def predict_news_category_similar_news(old_news: pd.DataFrame, new_news: pd.Data
|
|
75 |
final_df.drop_duplicates(subset='url', keep='first', inplace=True)
|
76 |
headlines = [*final_df['title']].copy()
|
77 |
label, prob = inference(headlines, interpreter, label_encoder, tokenizer)
|
78 |
-
sent_embs = vectorizer.vectorize_(headlines)
|
79 |
sim_news = [find_similar_news(search_vec, collection, vectorizer, sent_model, ce_model) for search_vec in sent_embs]
|
80 |
final_df['category'] = label
|
81 |
final_df['pred_proba'] = prob
|
@@ -91,7 +91,7 @@ def predict_news_category_similar_news(old_news: pd.DataFrame, new_news: pd.Data
|
|
91 |
if len(new_news) > 0:
|
92 |
headlines = [*new_news['title']].copy()
|
93 |
label, prob = inference(headlines, interpreter, label_encoder, tokenizer)
|
94 |
-
sent_embs = vectorizer.vectorize_(headlines)
|
95 |
sim_news = [find_similar_news(search_vec, collection, vectorizer, sent_model, ce_model) for search_vec in sent_embs]
|
96 |
new_news['category'] = label
|
97 |
new_news['pred_proba'] = prob
|
|
|
75 |
final_df.drop_duplicates(subset='url', keep='first', inplace=True)
|
76 |
headlines = [*final_df['title']].copy()
|
77 |
label, prob = inference(headlines, interpreter, label_encoder, tokenizer)
|
78 |
+
sent_embs = vectorizer.vectorize_(headlines, sent_model)
|
79 |
sim_news = [find_similar_news(search_vec, collection, vectorizer, sent_model, ce_model) for search_vec in sent_embs]
|
80 |
final_df['category'] = label
|
81 |
final_df['pred_proba'] = prob
|
|
|
91 |
if len(new_news) > 0:
|
92 |
headlines = [*new_news['title']].copy()
|
93 |
label, prob = inference(headlines, interpreter, label_encoder, tokenizer)
|
94 |
+
sent_embs = vectorizer.vectorize_(headlines, sent_model)
|
95 |
sim_news = [find_similar_news(search_vec, collection, vectorizer, sent_model, ce_model) for search_vec in sent_embs]
|
96 |
new_news['category'] = label
|
97 |
new_news['pred_proba'] = prob
|