emanuelaboros commited on
Commit
e5b1d73
1 Parent(s): df1a05b

Initial commit including model and configuration

Browse files
Files changed (3) hide show
  1. config.json +1 -1
  2. modeling_stacked.py +143 -0
  3. push_to_hf.py +1 -1
config.json CHANGED
@@ -6,7 +6,7 @@
6
  "attention_probs_dropout_prob": 0.1,
7
  "auto_map": {
8
  "AutoConfig": "configuration_stacked.ImpressoConfig",
9
- "AutoModelForTokenClassification": "models.ExtendedMultitaskModelForTokenClassification"
10
  },
11
  "classifier_dropout": null,
12
  "hidden_act": "gelu",
 
6
  "attention_probs_dropout_prob": 0.1,
7
  "auto_map": {
8
  "AutoConfig": "configuration_stacked.ImpressoConfig",
9
+ "AutoModelForTokenClassification": "modeling_stacked.ExtendedMultitaskModelForTokenClassification"
10
  },
11
  "classifier_dropout": null,
12
  "hidden_act": "gelu",
modeling_stacked.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers.modeling_outputs import TokenClassifierOutput
2
+ import torch
3
+ import torch.nn as nn
4
+ from transformers import PreTrainedModel, AutoModel, AutoConfig, BertConfig
5
+ from torch.nn import CrossEntropyLoss
6
+ from typing import Optional, Tuple, Union
7
+ import logging, json, os
8
+
9
+ from .configuration_stacked import ImpressoConfig
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def get_info(label_map):
15
+ num_token_labels_dict = {task: len(labels) for task, labels in label_map.items()}
16
+ return num_token_labels_dict
17
+
18
+
19
+ class ExtendedMultitaskModelForTokenClassification(PreTrainedModel):
20
+
21
+ config_class = ImpressoConfig
22
+ _keys_to_ignore_on_load_missing = [r"position_ids"]
23
+
24
+ def __init__(self, config):
25
+ super().__init__(config)
26
+ print("Current folder path:", os.path.dirname(os.path.abspath(__file__)))
27
+ # Get the directory of the current script
28
+ current_dir = os.path.dirname(os.path.abspath(__file__))
29
+ # Construct the full path to label_map.json
30
+ label_map_path = os.path.join(current_dir, "label_map.json")
31
+
32
+ label_map = json.load(open(label_map_path, "r"))
33
+ self.num_token_labels_dict = get_info(label_map)
34
+ self.config = config
35
+
36
+ self.bert = AutoModel.from_pretrained(
37
+ config.pretrained_config["_name_or_path"], config=config.pretrained_config
38
+ )
39
+ if "classifier_dropout" not in config.__dict__:
40
+ classifier_dropout = 0.1
41
+ else:
42
+ classifier_dropout = (
43
+ config.classifier_dropout
44
+ if config.classifier_dropout is not None
45
+ else config.hidden_dropout_prob
46
+ )
47
+ self.dropout = nn.Dropout(classifier_dropout)
48
+
49
+ # Additional transformer layers
50
+ self.transformer_encoder = nn.TransformerEncoder(
51
+ nn.TransformerEncoderLayer(
52
+ d_model=config.hidden_size, nhead=config.num_attention_heads
53
+ ),
54
+ num_layers=2,
55
+ )
56
+
57
+ # For token classification, create a classifier for each task
58
+ self.token_classifiers = nn.ModuleDict(
59
+ {
60
+ task: nn.Linear(config.hidden_size, num_labels)
61
+ for task, num_labels in self.num_token_labels_dict.items()
62
+ }
63
+ )
64
+
65
+ # Initialize weights and apply final processing
66
+ self.post_init()
67
+
68
+ def forward(
69
+ self,
70
+ input_ids: Optional[torch.Tensor] = None,
71
+ attention_mask: Optional[torch.Tensor] = None,
72
+ token_type_ids: Optional[torch.Tensor] = None,
73
+ position_ids: Optional[torch.Tensor] = None,
74
+ head_mask: Optional[torch.Tensor] = None,
75
+ inputs_embeds: Optional[torch.Tensor] = None,
76
+ labels: Optional[torch.Tensor] = None,
77
+ token_labels: Optional[dict] = None,
78
+ output_attentions: Optional[bool] = None,
79
+ output_hidden_states: Optional[bool] = None,
80
+ return_dict: Optional[bool] = None,
81
+ ) -> Union[Tuple[torch.Tensor], TokenClassifierOutput]:
82
+ r"""
83
+ token_labels (`dict` of `torch.LongTensor` of shape `(batch_size, seq_length)`, *optional*):
84
+ Labels for computing the token classification loss. Keys should match the tasks.
85
+ """
86
+ return_dict = (
87
+ return_dict if return_dict is not None else self.config.use_return_dict
88
+ )
89
+
90
+ bert_kwargs = {
91
+ "input_ids": input_ids,
92
+ "attention_mask": attention_mask,
93
+ "token_type_ids": token_type_ids,
94
+ "position_ids": position_ids,
95
+ "head_mask": head_mask,
96
+ "inputs_embeds": inputs_embeds,
97
+ "output_attentions": output_attentions,
98
+ "output_hidden_states": output_hidden_states,
99
+ "return_dict": return_dict,
100
+ }
101
+
102
+ if any(
103
+ keyword in self.config.name_or_path.lower()
104
+ for keyword in ["llama", "deberta"]
105
+ ):
106
+ bert_kwargs.pop("token_type_ids")
107
+ bert_kwargs.pop("head_mask")
108
+
109
+ outputs = self.bert(**bert_kwargs)
110
+
111
+ # For token classification
112
+ token_output = outputs[0]
113
+ token_output = self.dropout(token_output)
114
+
115
+ # Pass through additional transformer layers
116
+ token_output = self.transformer_encoder(token_output.transpose(0, 1)).transpose(
117
+ 0, 1
118
+ )
119
+
120
+ # Collect the logits and compute the loss for each task
121
+ task_logits = {}
122
+ total_loss = 0
123
+ for task, classifier in self.token_classifiers.items():
124
+ logits = classifier(token_output)
125
+ task_logits[task] = logits
126
+ if token_labels and task in token_labels:
127
+ loss_fct = CrossEntropyLoss()
128
+ loss = loss_fct(
129
+ logits.view(-1, self.num_token_labels_dict[task]),
130
+ token_labels[task].view(-1),
131
+ )
132
+ total_loss += loss
133
+
134
+ if not return_dict:
135
+ output = (task_logits,) + outputs[2:]
136
+ return ((total_loss,) + output) if total_loss != 0 else output
137
+
138
+ return TokenClassifierOutput(
139
+ loss=total_loss,
140
+ logits=task_logits,
141
+ hidden_states=outputs.hidden_states,
142
+ attentions=outputs.attentions,
143
+ )
push_to_hf.py CHANGED
@@ -11,7 +11,7 @@ from huggingface_hub import HfApi, Repository
11
 
12
  # import json
13
  from .configuration_stacked import ImpressoConfig
14
- from .models import ExtendedMultitaskModelForTokenClassification
15
  import subprocess
16
 
17
 
 
11
 
12
  # import json
13
  from .configuration_stacked import ImpressoConfig
14
+ from .modeling_stacked import ExtendedMultitaskModelForTokenClassification
15
  import subprocess
16
 
17