impresso-project
/

ner-stacked-bert-multilingual

@@ -14,6 +14,124 @@
   "hidden_size": 512,
   "initializer_range": 0.02,
   "intermediate_size": 2048,
   "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,
   "model_type": "stacked_bert",

   "hidden_size": 512,
   "initializer_range": 0.02,
   "intermediate_size": 2048,
+  "label_map": {
+    "NE-COARSE-LIT": {
+      "B-loc": 8,
+      "B-org": 0,
+      "B-pers": 7,
+      "B-prod": 4,
+      "B-time": 5,
+      "I-loc": 1,
+      "I-org": 2,
+      "I-pers": 9,
+      "I-prod": 10,
+      "I-time": 6,
+      "O": 3
+    },
+    "NE-COARSE-METO": {
+      "B-loc": 3,
+      "B-org": 0,
+      "B-time": 5,
+      "I-loc": 4,
+      "I-org": 2,
+      "O": 1
+    },
+    "NE-FINE-COMP": {
+      "B-comp.demonym": 8,
+      "B-comp.function": 5,
+      "B-comp.name": 1,
+      "B-comp.qualifier": 9,
+      "B-comp.title": 2,
+      "I-comp.demonym": 7,
+      "I-comp.function": 3,
+      "I-comp.name": 0,
+      "I-comp.qualifier": 10,
+      "I-comp.title": 4,
+      "O": 6
+    },
+    "NE-FINE-LIT": {
+      "B-loc.add.elec": 32,
+      "B-loc.add.phys": 5,
+      "B-loc.adm.nat": 34,
+      "B-loc.adm.reg": 39,
+      "B-loc.adm.sup": 12,
+      "B-loc.adm.town": 33,
+      "B-loc.fac": 36,
+      "B-loc.oro": 19,
+      "B-loc.phys.geo": 13,
+      "B-loc.phys.hydro": 28,
+      "B-loc.unk": 4,
+      "B-org.adm": 3,
+      "B-org.ent": 24,
+      "B-org.ent.pressagency": 37,
+      "B-pers.coll": 9,
+      "B-pers.ind": 0,
+      "B-pers.ind.articleauthor": 20,
+      "B-prod.doctr": 2,
+      "B-prod.media": 10,
+      "B-time.date.abs": 23,
+      "I-loc.add.elec": 22,
+      "I-loc.add.phys": 6,
+      "I-loc.adm.nat": 11,
+      "I-loc.adm.reg": 35,
+      "I-loc.adm.sup": 15,
+      "I-loc.adm.town": 8,
+      "I-loc.fac": 27,
+      "I-loc.oro": 21,
+      "I-loc.phys.geo": 25,
+      "I-loc.phys.hydro": 17,
+      "I-loc.unk": 40,
+      "I-org.adm": 29,
+      "I-org.ent": 1,
+      "I-org.ent.pressagency": 14,
+      "I-pers.coll": 26,
+      "I-pers.ind": 16,
+      "I-pers.ind.articleauthor": 31,
+      "I-prod.doctr": 30,
+      "I-prod.media": 38,
+      "I-time.date.abs": 7,
+      "O": 18
+    },
+    "NE-FINE-METO": {
+      "B-loc.adm.town": 6,
+      "B-loc.fac": 3,
+      "B-loc.oro": 5,
+      "B-org.adm": 1,
+      "B-org.ent": 7,
+      "B-time.date.abs": 9,
+      "I-loc.fac": 8,
+      "I-org.adm": 2,
+      "I-org.ent": 0,
+      "O": 4
+    },
+    "NE-NESTED": {
+      "B-loc.adm.nat": 13,
+      "B-loc.adm.reg": 15,
+      "B-loc.adm.sup": 10,
+      "B-loc.adm.town": 9,
+      "B-loc.fac": 18,
+      "B-loc.oro": 17,
+      "B-loc.phys.geo": 11,
+      "B-loc.phys.hydro": 1,
+      "B-org.adm": 4,
+      "B-org.ent": 20,
+      "B-pers.coll": 7,
+      "B-pers.ind": 2,
+      "B-prod.media": 23,
+      "I-loc.adm.nat": 8,
+      "I-loc.adm.reg": 14,
+      "I-loc.adm.town": 6,
+      "I-loc.fac": 0,
+      "I-loc.oro": 19,
+      "I-loc.phys.geo": 21,
+      "I-loc.phys.hydro": 22,
+      "I-org.adm": 5,
+      "I-org.ent": 3,
+      "I-pers.ind": 12,
+      "I-prod.media": 24,
+      "O": 16
+    }
+  },
   "layer_norm_eps": 1e-12,
   "max_position_embeddings": 512,
   "model_type": "stacked_bert",

configuration_stacked.py CHANGED Viewed

@@ -23,6 +23,7 @@ class ImpressoConfig(PretrainedConfig):
         use_cache=True,
         classifier_dropout=None,
         pretrained_config=None,
         **kwargs,
     ):
         super().__init__(pad_token_id=pad_token_id, **kwargs)
@@ -43,6 +44,7 @@ class ImpressoConfig(PretrainedConfig):
         self.use_cache = use_cache
         self.classifier_dropout = classifier_dropout
         self.pretrained_config = pretrained_config
 # Register the configuration with the transformers library

         use_cache=True,
         classifier_dropout=None,
         pretrained_config=None,
+        label_map=None,
         **kwargs,
     ):
         super().__init__(pad_token_id=pad_token_id, **kwargs)
         self.use_cache = use_cache
         self.classifier_dropout = classifier_dropout
         self.pretrained_config = pretrained_config
+        self.label_map = label_map
 # Register the configuration with the transformers library

modeling_stacked.py CHANGED Viewed

@@ -23,14 +23,7 @@ class ExtendedMultitaskModelForTokenClassification(PreTrainedModel):
     def __init__(self, config):
         super().__init__(config)
-        print("Current folder path:", os.path.dirname(os.path.abspath(__file__)))
-        # Get the directory of the current script
-        current_dir = os.path.dirname(os.path.abspath(__file__))
-        # Construct the full path to label_map.json
-        label_map_path = os.path.join(current_dir, "label_map.json")
-        label_map = json.load(open(label_map_path, "r"))
-        self.num_token_labels_dict = get_info(label_map)
         self.config = config
         self.bert = AutoModel.from_pretrained(

     def __init__(self, config):
         super().__init__(config)
+        self.num_token_labels_dict = get_info(config.label_map)
         self.config = config
         self.bert = AutoModel.from_pretrained(