latest_news_backend_with_cat_pred_similar_news

Running

lalithadevi commited on Mar 9

Commit

bb7fe56

•

1 Parent(s): fa2ba95

Update news_category_prediction.py

Files changed (1) hide show

news_category_prediction.py CHANGED Viewed

@@ -12,18 +12,16 @@ def parse_prediction(tflite_pred, label_encoder):
 def inference(text, interpreter, label_encoder, tokenizer):
     batch_size = len(text)
-    MAX_LEN = 80
-    N_CLASSES = 8
     if text != "":
-        tokens = tokenizer(text, max_length=MAX_LEN, padding="max_length", truncation=True, return_tensors="tf")
         # tflite model inference
         interpreter.allocate_tensors()
         input_details = interpreter.get_input_details()
         output_details = interpreter.get_output_details()[0]
         attention_mask, input_ids = tokens['attention_mask'], tokens['input_ids']
-        interpreter.resize_tensor_input(input_details[0]['index'],[batch_size, MAX_LEN])
-        interpreter.resize_tensor_input(input_details[1]['index'],[batch_size, MAX_LEN])
-        interpreter.resize_tensor_input(output_details['index'],[batch_size, N_CLASSES])
         interpreter.allocate_tensors()
         interpreter.set_tensor(input_details[0]["index"], attention_mask)
         interpreter.set_tensor(input_details[1]["index"], input_ids)
@@ -66,6 +64,6 @@ def predict_news_category(old_news: pd.DataFrame, new_news: pd.DataFrame, interp
         final_df = pd.concat([old_news, new_news], axis=0, ignore_index=True)
         final_df.drop_duplicates(subset='url', keep='first', inplace=True)
     final_df.reset_index(drop=True, inplace=True)
-    final_df.loc[final_df['pred_proba']<0.65, 'category'] = 'OTHERS'
     return final_df

 def inference(text, interpreter, label_encoder, tokenizer):
     batch_size = len(text)
     if text != "":
+        tokens = tokenizer(text, max_length=DISTILBERT_TOKENIZER_N_TOKENS, padding="max_length", truncation=True, return_tensors="tf")
         # tflite model inference
         interpreter.allocate_tensors()
         input_details = interpreter.get_input_details()
         output_details = interpreter.get_output_details()[0]
         attention_mask, input_ids = tokens['attention_mask'], tokens['input_ids']
+        interpreter.resize_tensor_input(input_details[0]['index'],[batch_size, DISTILBERT_TOKENIZER_N_TOKENS])
+        interpreter.resize_tensor_input(input_details[1]['index'],[batch_size, DISTILBERT_TOKENIZER_N_TOKENS])
+        interpreter.resize_tensor_input(output_details['index'],[batch_size, NEWS_CATEGORY_CLASSIFIER_N_CLASSES])
         interpreter.allocate_tensors()
         interpreter.set_tensor(input_details[0]["index"], attention_mask)
         interpreter.set_tensor(input_details[1]["index"], input_ids)
         final_df = pd.concat([old_news, new_news], axis=0, ignore_index=True)
         final_df.drop_duplicates(subset='url', keep='first', inplace=True)
     final_df.reset_index(drop=True, inplace=True)
+    final_df.loc[final_df['pred_proba']<CLASSIFIER_THRESHOLD, 'category'] = 'OTHERS'
     return final_df