T-Almeida commited on
Commit
b5ce06f
·
verified ·
1 Parent(s): 9c114bf

Upload model

Browse files
config.json CHANGED
@@ -48,7 +48,6 @@
48
  "num_attention_heads": 12,
49
  "num_hidden_layers": 12,
50
  "number_of_layer_per_head": 1,
51
- "p_augmentation": 0.5,
52
  "pad_token_id": 1,
53
  "percentage_tags": 0.25,
54
  "position_embedding_type": "absolute",
 
48
  "num_attention_heads": 12,
49
  "num_hidden_layers": 12,
50
  "number_of_layer_per_head": 1,
 
51
  "pad_token_id": 1,
52
  "percentage_tags": 0.25,
53
  "position_embedding_type": "absolute",
configuration_multiheadcrf.py CHANGED
@@ -13,7 +13,6 @@ class MultiHeadCRFConfig(PretrainedConfig):
13
  augmentation = "random",
14
  context_size = 64,
15
  percentage_tags = 0.2,
16
- p_augmentation = 0.5,
17
  aug_prob = 0.5,
18
  crf_reduction = "mean",
19
  freeze = False,
@@ -26,7 +25,6 @@ class MultiHeadCRFConfig(PretrainedConfig):
26
  self.augmentation = augmentation
27
  self.context_size = context_size
28
  self.percentage_tags = percentage_tags
29
- self.p_augmentation = p_augmentation
30
  self.aug_prob = aug_prob,
31
  self.crf_reduction = crf_reduction
32
  self.freeze=freeze
 
13
  augmentation = "random",
14
  context_size = 64,
15
  percentage_tags = 0.2,
 
16
  aug_prob = 0.5,
17
  crf_reduction = "mean",
18
  freeze = False,
 
25
  self.augmentation = augmentation
26
  self.context_size = context_size
27
  self.percentage_tags = percentage_tags
 
28
  self.aug_prob = aug_prob,
29
  self.crf_reduction = crf_reduction
30
  self.freeze=freeze
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7dfd5e5406355dfa24dfe1d97009f715cbed98713e9d0f3a06c41d0c45ed666
3
- size 508095632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37dd4fda285a360ba9dae043072640394db34629a40097e48bc182a061002f3d
3
+ size 508096224
modeling_multiheadcrf.py CHANGED
@@ -12,9 +12,10 @@ NUM_PER_LAYER = 16
12
 
13
  class RobertaMultiHeadCRFModel(PreTrainedModel):
14
  config_class = MultiHeadCRFConfig
15
- transformer_backbone_class = RobertaModel
 
16
  _keys_to_ignore_on_load_unexpected = [r"pooler"]
17
-
18
  def __init__(self, config):
19
  super().__init__(config)
20
  self.num_labels = config.num_labels
@@ -24,7 +25,10 @@ class RobertaMultiHeadCRFModel(PreTrainedModel):
24
  self.heads = config.classes #expected an array of classes we are predicting
25
 
26
  # this can be BERT ROBERTA and other BERT-variants
27
- self.bert = self.transformer_backbone_class(config, add_pooling_layer=False)
 
 
 
28
  #AutoModel(config, add_pooling_layer=False)
29
  #AutoModel.from_pretrained(config._name_or_path, config=config, add_pooling_layer=False)
30
  self.dropout = nn.Dropout(config.hidden_dropout_prob)
@@ -43,20 +47,23 @@ class RobertaMultiHeadCRFModel(PreTrainedModel):
43
  self.manage_freezing()
44
 
45
  def training_mode(self):
 
46
  # for some reason these layers are not being correctly init
47
  # probably related with the lifecycle of the hf .from_pretrained method
48
- self.dense.reset_parameters()
49
- self.classifier.reset_parameters()
50
- self.crf.reset_parameters()
51
- self.crf.mask_impossible_transitions()
 
 
52
 
53
  def manage_freezing(self):
54
- for _, param in self.bert.embeddings.named_parameters():
55
  param.requires_grad = False
56
 
57
  num_encoders_to_freeze = self.config.num_frozen_encoder
58
  if num_encoders_to_freeze > 0:
59
- for _, param in islice(self.bert.encoder.named_parameters(), num_encoders_to_freeze*NUM_PER_LAYER):
60
  param.requires_grad = False
61
 
62
 
@@ -75,7 +82,7 @@ class RobertaMultiHeadCRFModel(PreTrainedModel):
75
  # Default `model.config.use_return_dict´ is `True´
76
  return_dict = return_dict if return_dict is not None else self.config.use_return_dict
77
 
78
- outputs = self.bert(input_ids,
79
  attention_mask=attention_mask,
80
  token_type_ids=token_type_ids,
81
  position_ids=position_ids,
@@ -119,7 +126,8 @@ class RobertaMultiHeadCRFModel(PreTrainedModel):
119
 
120
  class BertMultiHeadCRFModel(RobertaMultiHeadCRFModel):
121
  config_class = MultiHeadCRFConfig
122
- transformer_backbone_class = BertModel
 
123
  _keys_to_ignore_on_load_unexpected = [r"pooler"]
124
 
125
  # Taken from https://github.com/kmkurn/pytorch-crf/blob/master/torchcrf/__init__.py and fixed got uint8 warning
 
12
 
13
  class RobertaMultiHeadCRFModel(PreTrainedModel):
14
  config_class = MultiHeadCRFConfig
15
+ transformers_backbone_name = "roberta"
16
+ transformers_backbone_class = RobertaModel
17
  _keys_to_ignore_on_load_unexpected = [r"pooler"]
18
+
19
  def __init__(self, config):
20
  super().__init__(config)
21
  self.num_labels = config.num_labels
 
25
  self.heads = config.classes #expected an array of classes we are predicting
26
 
27
  # this can be BERT ROBERTA and other BERT-variants
28
+ # THIS IS BC HF needs to have "roberta" for roberta models and "bert" for BERT models as var so tha I can load
29
+ # check https://github.com/huggingface/transformers/blob/b487096b02307cd6e0f132b676cdcc7255fe8e74/src/transformers/models/roberta/modeling_roberta.py#L1170C16-L1170C20
30
+ setattr(self, self.transformers_backbone_name, self.transformers_backbone_class(config, add_pooling_layer=False))
31
+ #self.roberta = self.transformer_backbone_class(config, add_pooling_layer=False)
32
  #AutoModel(config, add_pooling_layer=False)
33
  #AutoModel.from_pretrained(config._name_or_path, config=config, add_pooling_layer=False)
34
  self.dropout = nn.Dropout(config.hidden_dropout_prob)
 
47
  self.manage_freezing()
48
 
49
  def training_mode(self):
50
+
51
  # for some reason these layers are not being correctly init
52
  # probably related with the lifecycle of the hf .from_pretrained method
53
+ for ent in self.heads:
54
+ for i in range(self.number_of_layer_per_head):
55
+ getattr(self, f"{ent}_dense_{i}").reset_parameters()
56
+ getattr(self, f"{ent}_classifier").reset_parameters()
57
+ getattr(self, f"{ent}_crf").reset_parameters()
58
+ getattr(self, f"{ent}_crf").mask_impossible_transitions()
59
 
60
  def manage_freezing(self):
61
+ for _, param in getattr(self, self.transformers_backbone_name).embeddings.named_parameters():
62
  param.requires_grad = False
63
 
64
  num_encoders_to_freeze = self.config.num_frozen_encoder
65
  if num_encoders_to_freeze > 0:
66
+ for _, param in islice(getattr(self, self.transformers_backbone_name).encoder.named_parameters(), num_encoders_to_freeze*NUM_PER_LAYER):
67
  param.requires_grad = False
68
 
69
 
 
82
  # Default `model.config.use_return_dict´ is `True´
83
  return_dict = return_dict if return_dict is not None else self.config.use_return_dict
84
 
85
+ outputs = getattr(self, self.transformers_backbone_name)(input_ids,
86
  attention_mask=attention_mask,
87
  token_type_ids=token_type_ids,
88
  position_ids=position_ids,
 
126
 
127
  class BertMultiHeadCRFModel(RobertaMultiHeadCRFModel):
128
  config_class = MultiHeadCRFConfig
129
+ transformers_backbone_name = "bert"
130
+ transformers_backbone_class = BertModel
131
  _keys_to_ignore_on_load_unexpected = [r"pooler"]
132
 
133
  # Taken from https://github.com/kmkurn/pytorch-crf/blob/master/torchcrf/__init__.py and fixed got uint8 warning