emanuelaboros
commited on
Commit
•
43f6405
1
Parent(s):
153888c
Upload model
Browse files- config.json +118 -0
- configuration_stacked.py +2 -0
- modeling_stacked.py +1 -8
config.json
CHANGED
@@ -14,6 +14,124 @@
|
|
14 |
"hidden_size": 512,
|
15 |
"initializer_range": 0.02,
|
16 |
"intermediate_size": 2048,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
"layer_norm_eps": 1e-12,
|
18 |
"max_position_embeddings": 512,
|
19 |
"model_type": "stacked_bert",
|
|
|
14 |
"hidden_size": 512,
|
15 |
"initializer_range": 0.02,
|
16 |
"intermediate_size": 2048,
|
17 |
+
"label_map": {
|
18 |
+
"NE-COARSE-LIT": {
|
19 |
+
"B-loc": 8,
|
20 |
+
"B-org": 0,
|
21 |
+
"B-pers": 7,
|
22 |
+
"B-prod": 4,
|
23 |
+
"B-time": 5,
|
24 |
+
"I-loc": 1,
|
25 |
+
"I-org": 2,
|
26 |
+
"I-pers": 9,
|
27 |
+
"I-prod": 10,
|
28 |
+
"I-time": 6,
|
29 |
+
"O": 3
|
30 |
+
},
|
31 |
+
"NE-COARSE-METO": {
|
32 |
+
"B-loc": 3,
|
33 |
+
"B-org": 0,
|
34 |
+
"B-time": 5,
|
35 |
+
"I-loc": 4,
|
36 |
+
"I-org": 2,
|
37 |
+
"O": 1
|
38 |
+
},
|
39 |
+
"NE-FINE-COMP": {
|
40 |
+
"B-comp.demonym": 8,
|
41 |
+
"B-comp.function": 5,
|
42 |
+
"B-comp.name": 1,
|
43 |
+
"B-comp.qualifier": 9,
|
44 |
+
"B-comp.title": 2,
|
45 |
+
"I-comp.demonym": 7,
|
46 |
+
"I-comp.function": 3,
|
47 |
+
"I-comp.name": 0,
|
48 |
+
"I-comp.qualifier": 10,
|
49 |
+
"I-comp.title": 4,
|
50 |
+
"O": 6
|
51 |
+
},
|
52 |
+
"NE-FINE-LIT": {
|
53 |
+
"B-loc.add.elec": 32,
|
54 |
+
"B-loc.add.phys": 5,
|
55 |
+
"B-loc.adm.nat": 34,
|
56 |
+
"B-loc.adm.reg": 39,
|
57 |
+
"B-loc.adm.sup": 12,
|
58 |
+
"B-loc.adm.town": 33,
|
59 |
+
"B-loc.fac": 36,
|
60 |
+
"B-loc.oro": 19,
|
61 |
+
"B-loc.phys.geo": 13,
|
62 |
+
"B-loc.phys.hydro": 28,
|
63 |
+
"B-loc.unk": 4,
|
64 |
+
"B-org.adm": 3,
|
65 |
+
"B-org.ent": 24,
|
66 |
+
"B-org.ent.pressagency": 37,
|
67 |
+
"B-pers.coll": 9,
|
68 |
+
"B-pers.ind": 0,
|
69 |
+
"B-pers.ind.articleauthor": 20,
|
70 |
+
"B-prod.doctr": 2,
|
71 |
+
"B-prod.media": 10,
|
72 |
+
"B-time.date.abs": 23,
|
73 |
+
"I-loc.add.elec": 22,
|
74 |
+
"I-loc.add.phys": 6,
|
75 |
+
"I-loc.adm.nat": 11,
|
76 |
+
"I-loc.adm.reg": 35,
|
77 |
+
"I-loc.adm.sup": 15,
|
78 |
+
"I-loc.adm.town": 8,
|
79 |
+
"I-loc.fac": 27,
|
80 |
+
"I-loc.oro": 21,
|
81 |
+
"I-loc.phys.geo": 25,
|
82 |
+
"I-loc.phys.hydro": 17,
|
83 |
+
"I-loc.unk": 40,
|
84 |
+
"I-org.adm": 29,
|
85 |
+
"I-org.ent": 1,
|
86 |
+
"I-org.ent.pressagency": 14,
|
87 |
+
"I-pers.coll": 26,
|
88 |
+
"I-pers.ind": 16,
|
89 |
+
"I-pers.ind.articleauthor": 31,
|
90 |
+
"I-prod.doctr": 30,
|
91 |
+
"I-prod.media": 38,
|
92 |
+
"I-time.date.abs": 7,
|
93 |
+
"O": 18
|
94 |
+
},
|
95 |
+
"NE-FINE-METO": {
|
96 |
+
"B-loc.adm.town": 6,
|
97 |
+
"B-loc.fac": 3,
|
98 |
+
"B-loc.oro": 5,
|
99 |
+
"B-org.adm": 1,
|
100 |
+
"B-org.ent": 7,
|
101 |
+
"B-time.date.abs": 9,
|
102 |
+
"I-loc.fac": 8,
|
103 |
+
"I-org.adm": 2,
|
104 |
+
"I-org.ent": 0,
|
105 |
+
"O": 4
|
106 |
+
},
|
107 |
+
"NE-NESTED": {
|
108 |
+
"B-loc.adm.nat": 13,
|
109 |
+
"B-loc.adm.reg": 15,
|
110 |
+
"B-loc.adm.sup": 10,
|
111 |
+
"B-loc.adm.town": 9,
|
112 |
+
"B-loc.fac": 18,
|
113 |
+
"B-loc.oro": 17,
|
114 |
+
"B-loc.phys.geo": 11,
|
115 |
+
"B-loc.phys.hydro": 1,
|
116 |
+
"B-org.adm": 4,
|
117 |
+
"B-org.ent": 20,
|
118 |
+
"B-pers.coll": 7,
|
119 |
+
"B-pers.ind": 2,
|
120 |
+
"B-prod.media": 23,
|
121 |
+
"I-loc.adm.nat": 8,
|
122 |
+
"I-loc.adm.reg": 14,
|
123 |
+
"I-loc.adm.town": 6,
|
124 |
+
"I-loc.fac": 0,
|
125 |
+
"I-loc.oro": 19,
|
126 |
+
"I-loc.phys.geo": 21,
|
127 |
+
"I-loc.phys.hydro": 22,
|
128 |
+
"I-org.adm": 5,
|
129 |
+
"I-org.ent": 3,
|
130 |
+
"I-pers.ind": 12,
|
131 |
+
"I-prod.media": 24,
|
132 |
+
"O": 16
|
133 |
+
}
|
134 |
+
},
|
135 |
"layer_norm_eps": 1e-12,
|
136 |
"max_position_embeddings": 512,
|
137 |
"model_type": "stacked_bert",
|
configuration_stacked.py
CHANGED
@@ -23,6 +23,7 @@ class ImpressoConfig(PretrainedConfig):
|
|
23 |
use_cache=True,
|
24 |
classifier_dropout=None,
|
25 |
pretrained_config=None,
|
|
|
26 |
**kwargs,
|
27 |
):
|
28 |
super().__init__(pad_token_id=pad_token_id, **kwargs)
|
@@ -43,6 +44,7 @@ class ImpressoConfig(PretrainedConfig):
|
|
43 |
self.use_cache = use_cache
|
44 |
self.classifier_dropout = classifier_dropout
|
45 |
self.pretrained_config = pretrained_config
|
|
|
46 |
|
47 |
|
48 |
# Register the configuration with the transformers library
|
|
|
23 |
use_cache=True,
|
24 |
classifier_dropout=None,
|
25 |
pretrained_config=None,
|
26 |
+
label_map=None,
|
27 |
**kwargs,
|
28 |
):
|
29 |
super().__init__(pad_token_id=pad_token_id, **kwargs)
|
|
|
44 |
self.use_cache = use_cache
|
45 |
self.classifier_dropout = classifier_dropout
|
46 |
self.pretrained_config = pretrained_config
|
47 |
+
self.label_map = label_map
|
48 |
|
49 |
|
50 |
# Register the configuration with the transformers library
|
modeling_stacked.py
CHANGED
@@ -23,14 +23,7 @@ class ExtendedMultitaskModelForTokenClassification(PreTrainedModel):
|
|
23 |
|
24 |
def __init__(self, config):
|
25 |
super().__init__(config)
|
26 |
-
|
27 |
-
# Get the directory of the current script
|
28 |
-
current_dir = os.path.dirname(os.path.abspath(__file__))
|
29 |
-
# Construct the full path to label_map.json
|
30 |
-
label_map_path = os.path.join(current_dir, "label_map.json")
|
31 |
-
|
32 |
-
label_map = json.load(open(label_map_path, "r"))
|
33 |
-
self.num_token_labels_dict = get_info(label_map)
|
34 |
self.config = config
|
35 |
|
36 |
self.bert = AutoModel.from_pretrained(
|
|
|
23 |
|
24 |
def __init__(self, config):
|
25 |
super().__init__(config)
|
26 |
+
self.num_token_labels_dict = get_info(config.label_map)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
self.config = config
|
28 |
|
29 |
self.bert = AutoModel.from_pretrained(
|