Spaces:

ajitrajasekharan
/

self-supervised-ner-biomedical

Runtime error

ajit commited on Feb 5, 2022

Commit

db6839f

•

1 Parent(s): 026e9d4

CS predictions unconditionally override CI predictions for non ensemble

use case. TBD. Rexamine this for ensembling too. In general, if CS
predictions are very confident, then CI is subservient to it. CS
predictions wil not be confident when there is true ambiguity in that
position for a model trained for that domain. So overriding CS by CI
needs to be done only in the case CS allows for it by not being so
confident. This needs to be factored in the ensemble case too.

Files changed (1) hide show

aggregate_server_json.py +6 -3

aggregate_server_json.py CHANGED Viewed

@@ -248,7 +248,8 @@ class AggregateNER:
                     n1 = flip_category(orig_cs_entity)
                     n1["e"] = prefix +  n1["e"]
                     n2 = flip_category(consolidated_entity)
-                    ret_obj["e"] = n2["e"] + "/" + n1["e"]
                     return ret_obj
                 else:
                     #if we come here consolidated is same as cs prediction. So we try to either use ci or the second cs prediction if ci is out of domain
@@ -262,6 +263,7 @@ class AggregateNER:
                             n1["e"] = prefix +  n1["e"]
                             n2 = flip_category(orig_ci_entity)
                             n2["e"] = prefix +  n2["e"]
                             ret_obj["e"] = n1["e"] + "/" + n2["e"]
                             return ret_obj
                         else:
@@ -287,7 +289,7 @@ class AggregateNER:
                             else:
                                 return flip_category(results[server_index]["ner"][run_index])
                     else:
-                        #here cs and ci are same. So use two cs predictions if meaningful
                         if (len(results[server_index]["orig_cs_prediction_details"][pivot_index]['cs_distribution']) >= 2):
                             ret_arr = self.get_predictions_above_threshold(results[server_index]["orig_cs_prediction_details"][pivot_index])
                             orig_cs_second_entity = results[server_index]["orig_cs_prediction_details"][pivot_index]['cs_distribution'][1]
@@ -302,7 +304,8 @@ class AggregateNER:
                                 n1["e"] = prefix +  n1["e"]
                                 n2 = flip_category(orig_cs_entity)
                                 n2["e"] = prefix +  n2["e"]
-                                ret_obj["e"] = n2["e"] + "/" + n1["e"]
                                 return ret_obj
                             else:
                                 return flip_category(results[server_index]["ner"][run_index])

                     n1 = flip_category(orig_cs_entity)
                     n1["e"] = prefix +  n1["e"]
                     n2 = flip_category(consolidated_entity)
+                    print("consolidated != orig cs. P1 case. Emit orig cs first")
+                    ret_obj["e"] = n1["e"] + "/" + n2["e"] #emit orig cs first
                     return ret_obj
                 else:
                     #if we come here consolidated is same as cs prediction. So we try to either use ci or the second cs prediction if ci is out of domain
                             n1["e"] = prefix +  n1["e"]
                             n2 = flip_category(orig_ci_entity)
                             n2["e"] = prefix +  n2["e"]
+                            print("consolidated == orig cs. P2 case. Emit orig cs first. Then ci")
                             ret_obj["e"] = n1["e"] + "/" + n2["e"]
                             return ret_obj
                         else:
                             else:
                                 return flip_category(results[server_index]["ner"][run_index])
                     else:
+                        #here cs and ci are same. So use two consecutive cs predictions if meaningful
                         if (len(results[server_index]["orig_cs_prediction_details"][pivot_index]['cs_distribution']) >= 2):
                             ret_arr = self.get_predictions_above_threshold(results[server_index]["orig_cs_prediction_details"][pivot_index])
                             orig_cs_second_entity = results[server_index]["orig_cs_prediction_details"][pivot_index]['cs_distribution'][1]
                                 n1["e"] = prefix +  n1["e"]
                                 n2 = flip_category(orig_cs_entity)
                                 n2["e"] = prefix +  n2["e"]
+                                print("consolidated == orig cs. P3 case. Emit orig cs first. Then ci")
+                                ret_obj["e"] = n2["e"] + "/" + n1["e"] #when using single server twice, best to keep cs first
                                 return ret_obj
                             else:
                                 return flip_category(results[server_index]["ner"][run_index])