ajit commited on
Commit
db6839f
1 Parent(s): 026e9d4

CS predictions unconditionally override CI predictions for non ensemble

Browse files

use case. TBD. Rexamine this for ensembling too. In general, if CS
predictions are very confident, then CI is subservient to it. CS
predictions wil not be confident when there is true ambiguity in that
position for a model trained for that domain. So overriding CS by CI
needs to be done only in the case CS allows for it by not being so
confident. This needs to be factored in the ensemble case too.

Files changed (1) hide show
  1. aggregate_server_json.py +6 -3
aggregate_server_json.py CHANGED
@@ -248,7 +248,8 @@ class AggregateNER:
248
  n1 = flip_category(orig_cs_entity)
249
  n1["e"] = prefix + n1["e"]
250
  n2 = flip_category(consolidated_entity)
251
- ret_obj["e"] = n2["e"] + "/" + n1["e"]
 
252
  return ret_obj
253
  else:
254
  #if we come here consolidated is same as cs prediction. So we try to either use ci or the second cs prediction if ci is out of domain
@@ -262,6 +263,7 @@ class AggregateNER:
262
  n1["e"] = prefix + n1["e"]
263
  n2 = flip_category(orig_ci_entity)
264
  n2["e"] = prefix + n2["e"]
 
265
  ret_obj["e"] = n1["e"] + "/" + n2["e"]
266
  return ret_obj
267
  else:
@@ -287,7 +289,7 @@ class AggregateNER:
287
  else:
288
  return flip_category(results[server_index]["ner"][run_index])
289
  else:
290
- #here cs and ci are same. So use two cs predictions if meaningful
291
  if (len(results[server_index]["orig_cs_prediction_details"][pivot_index]['cs_distribution']) >= 2):
292
  ret_arr = self.get_predictions_above_threshold(results[server_index]["orig_cs_prediction_details"][pivot_index])
293
  orig_cs_second_entity = results[server_index]["orig_cs_prediction_details"][pivot_index]['cs_distribution'][1]
@@ -302,7 +304,8 @@ class AggregateNER:
302
  n1["e"] = prefix + n1["e"]
303
  n2 = flip_category(orig_cs_entity)
304
  n2["e"] = prefix + n2["e"]
305
- ret_obj["e"] = n2["e"] + "/" + n1["e"]
 
306
  return ret_obj
307
  else:
308
  return flip_category(results[server_index]["ner"][run_index])
 
248
  n1 = flip_category(orig_cs_entity)
249
  n1["e"] = prefix + n1["e"]
250
  n2 = flip_category(consolidated_entity)
251
+ print("consolidated != orig cs. P1 case. Emit orig cs first")
252
+ ret_obj["e"] = n1["e"] + "/" + n2["e"] #emit orig cs first
253
  return ret_obj
254
  else:
255
  #if we come here consolidated is same as cs prediction. So we try to either use ci or the second cs prediction if ci is out of domain
 
263
  n1["e"] = prefix + n1["e"]
264
  n2 = flip_category(orig_ci_entity)
265
  n2["e"] = prefix + n2["e"]
266
+ print("consolidated == orig cs. P2 case. Emit orig cs first. Then ci")
267
  ret_obj["e"] = n1["e"] + "/" + n2["e"]
268
  return ret_obj
269
  else:
 
289
  else:
290
  return flip_category(results[server_index]["ner"][run_index])
291
  else:
292
+ #here cs and ci are same. So use two consecutive cs predictions if meaningful
293
  if (len(results[server_index]["orig_cs_prediction_details"][pivot_index]['cs_distribution']) >= 2):
294
  ret_arr = self.get_predictions_above_threshold(results[server_index]["orig_cs_prediction_details"][pivot_index])
295
  orig_cs_second_entity = results[server_index]["orig_cs_prediction_details"][pivot_index]['cs_distribution'][1]
 
304
  n1["e"] = prefix + n1["e"]
305
  n2 = flip_category(orig_cs_entity)
306
  n2["e"] = prefix + n2["e"]
307
+ print("consolidated == orig cs. P3 case. Emit orig cs first. Then ci")
308
+ ret_obj["e"] = n2["e"] + "/" + n1["e"] #when using single server twice, best to keep cs first
309
  return ret_obj
310
  else:
311
  return flip_category(results[server_index]["ner"][run_index])