Rename flaubert2_model.py to jargon_model.py
Browse files
flaubert2_model.py → jargon_model.py
RENAMED
@@ -1,42 +1,44 @@
|
|
1 |
-
#from transformers import RobertaModel, RobertaConfig, RobertaForMaskedLM, RobertaLMHead
|
2 |
-
#from linformer import LinformerTransformerEncoder, LinformerTransformerEncoderLayer, LinformerTransformerEncoderFS, LinformerTransformerEncoderLayerFS
|
3 |
-
#import linformer
|
4 |
-
from .linformer import LinformerTransformerEncoderLayer
|
5 |
-
from .flaubert2_configuration import Flaubert2Config
|
6 |
-
from transformers.models.roberta.modeling_roberta import RobertaEncoder, RobertaConfig, RobertaModel, RobertaLMHead, RobertaForMaskedLM, RobertaEmbeddings, RobertaForTokenClassification, RobertaForSequenceClassification
|
7 |
-
import torch.nn as nn
|
8 |
import math
|
9 |
-
import torch.nn.functional as F
|
10 |
-
from torch.nn import LayerNorm
|
11 |
-
import torch
|
12 |
from typing import List, Optional, Tuple, Union
|
13 |
|
|
|
|
|
|
|
|
|
14 |
from fairseq.models.roberta import (
|
15 |
RobertaModel as RobertModel,
|
16 |
RobertaEncoder as RobertaEncoderFS
|
17 |
)
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
from transformers.modeling_outputs import (
|
20 |
MaskedLMOutput,
|
21 |
BaseModelOutputWithPastAndCrossAttentions,
|
22 |
BaseModelOutputWithPoolingAndCrossAttentions,
|
23 |
)
|
24 |
|
|
|
|
|
|
|
25 |
|
26 |
-
class
|
27 |
|
28 |
-
config_class =
|
29 |
-
auto_map = {"test": "test3"}
|
30 |
|
31 |
def __init__(self, config, **kwargs):
|
32 |
-
base_model_prefix = "
|
33 |
|
34 |
super().__init__(config, **kwargs)
|
35 |
|
36 |
-
|
37 |
-
self.roberta = Flaubert2Model(config, add_pooling_layer=False)
|
38 |
-
#self.encoder = LinformerTransformerEncoder(config)
|
39 |
-
#self.encoder = LinformerTransformerEncoder(config)
|
40 |
self.sbo_head = self.build_sbo_head(config)
|
41 |
|
42 |
def build_sbo_head(self, config):
|
@@ -50,19 +52,16 @@ class Flaubert2ModelForSequenceClassification(RobertaForSequenceClassification)
|
|
50 |
)
|
51 |
|
52 |
|
53 |
-
class
|
54 |
|
55 |
-
config_class =
|
56 |
|
57 |
def __init__(self, config, **kwargs):
|
58 |
-
base_model_prefix = "
|
59 |
|
60 |
super().__init__(config, **kwargs)
|
61 |
|
62 |
-
|
63 |
-
self.roberta = Flaubert2Model(config, add_pooling_layer=False)
|
64 |
-
#self.encoder = LinformerTransformerEncoder(config)
|
65 |
-
#self.encoder = LinformerTransformerEncoder(config)
|
66 |
self.sbo_head = self.build_sbo_head(config)
|
67 |
|
68 |
def build_sbo_head(self, config):
|
@@ -76,19 +75,16 @@ class Flaubert2ModelForTokenClassification(RobertaForTokenClassification):
|
|
76 |
)
|
77 |
|
78 |
|
79 |
-
class
|
80 |
|
81 |
-
config_class =
|
82 |
|
83 |
def __init__(self, config, **kwargs):
|
84 |
-
base_model_prefix = "
|
85 |
|
86 |
super().__init__(config, **kwargs)
|
87 |
|
88 |
-
|
89 |
-
self.roberta = Flaubert2Model(config, add_pooling_layer=False)
|
90 |
-
#self.encoder = LinformerTransformerEncoder(config)
|
91 |
-
#self.encoder = LinformerTransformerEncoder(config)
|
92 |
self.sbo_head = self.build_sbo_head(config)
|
93 |
|
94 |
def build_sbo_head(self, config):
|
@@ -101,20 +97,16 @@ class Flaubert2ModelForMaskedLM(RobertaForMaskedLM):
|
|
101 |
)
|
102 |
)
|
103 |
|
104 |
-
|
|
|
105 |
|
106 |
def __init__(self, config, dictionary, **kwargs):
|
107 |
-
config_class =
|
108 |
-
base_model_prefix = "
|
109 |
|
110 |
super().__init__(config, **kwargs)
|
111 |
|
112 |
-
|
113 |
-
#self.roberta = Flaubert2ModelFS(config, dictionary, add_pooling_layer=False)
|
114 |
-
self.roberta =FlaubertEncoder(config, dictionary)
|
115 |
-
#self.encoder =
|
116 |
-
#self.encoder = LinformerTransformerEncoder(config)
|
117 |
-
#self.sbo_head = self.build_sbo_head(config)
|
118 |
|
119 |
def build_sbo_head(self, config):
|
120 |
return SBOHead(
|
@@ -127,12 +119,11 @@ class Flaubert2ModelForMaskedLMFS(RobertaForMaskedLM):
|
|
127 |
)
|
128 |
|
129 |
|
130 |
-
|
131 |
-
class Flaubert2Embeddings(RobertaEmbeddings):
|
132 |
|
133 |
def __init__(self, config, **kwargs):
|
134 |
-
config_class =
|
135 |
-
base_model_prefix = "
|
136 |
super().__init__(config, **kwargs)
|
137 |
|
138 |
def forward(
|
@@ -168,16 +159,14 @@ class Flaubert2Embeddings(RobertaEmbeddings):
|
|
168 |
token_type_embeddings = self.token_type_embeddings(token_type_ids)
|
169 |
|
170 |
embeddings = inputs_embeds + token_type_embeddings
|
171 |
-
#if self.position_embedding_type == "absolute":
|
172 |
position_embeddings = self.position_embeddings(position_ids)
|
173 |
-
#else:
|
174 |
|
175 |
embeddings += position_embeddings
|
176 |
-
#embeddings = self.LayerNorm(embeddings)
|
177 |
embeddings = self.dropout(embeddings)
|
178 |
return embeddings
|
179 |
|
180 |
-
|
|
|
181 |
|
182 |
def __init__(self, args):
|
183 |
compress_layer = None
|
@@ -258,15 +247,15 @@ class Flaubert2Encoder(RobertaEncoder):
|
|
258 |
return lm_out
|
259 |
|
260 |
|
261 |
-
class
|
262 |
-
config_class =
|
263 |
def __init__(self, config, **kwargs):
|
264 |
-
|
265 |
-
base_model_prefix = "
|
266 |
|
267 |
super().__init__(config, **kwargs)
|
268 |
-
self.embeddings =
|
269 |
-
self.encoder =
|
270 |
# Copied from modeling_roberta.py
|
271 |
# Add transpose of embeddings as implemented in fairseq
|
272 |
def forward(
|
@@ -406,6 +395,7 @@ class Flaubert2Model(RobertaModel):
|
|
406 |
cross_attentions=encoder_outputs.cross_attentions,
|
407 |
)
|
408 |
|
|
|
409 |
class SBOLayer(nn.Module):
|
410 |
|
411 |
def __init__(self, input_size, hidden_size, activation, export):
|
@@ -417,6 +407,7 @@ class SBOLayer(nn.Module):
|
|
417 |
def forward(self, x):
|
418 |
return self.norm(self.activ(self.layer(x)))
|
419 |
|
|
|
420 |
class SBONetwork(nn.Module):
|
421 |
|
422 |
def __init__(self, input_size, hidden_size, activation, export):
|
@@ -514,6 +505,7 @@ def get_activation_fn(activation):
|
|
514 |
else:
|
515 |
raise RuntimeError("--activation-fn {} not supported".format(activation))
|
516 |
|
|
|
517 |
def create_position_ids_from_input_ids(input_ids, padding_idx, past_key_values_length=0):
|
518 |
"""
|
519 |
Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import math
|
|
|
|
|
|
|
2 |
from typing import List, Optional, Tuple, Union
|
3 |
|
4 |
+
import torch
|
5 |
+
import torch.nn as nn
|
6 |
+
import torch.nn.functional as F
|
7 |
+
from torch.nn import LayerNorm
|
8 |
from fairseq.models.roberta import (
|
9 |
RobertaModel as RobertModel,
|
10 |
RobertaEncoder as RobertaEncoderFS
|
11 |
)
|
12 |
+
from transformers.models.roberta.modeling_roberta import (
|
13 |
+
RobertaEncoder,
|
14 |
+
RobertaConfig,
|
15 |
+
RobertaModel,
|
16 |
+
RobertaLMHead,
|
17 |
+
RobertaForMaskedLM,
|
18 |
+
RobertaEmbeddings,
|
19 |
+
RobertaForTokenClassification,
|
20 |
+
RobertaForSequenceClassification
|
21 |
+
)
|
22 |
from transformers.modeling_outputs import (
|
23 |
MaskedLMOutput,
|
24 |
BaseModelOutputWithPastAndCrossAttentions,
|
25 |
BaseModelOutputWithPoolingAndCrossAttentions,
|
26 |
)
|
27 |
|
28 |
+
from .linformer import LinformerTransformerEncoderLayer
|
29 |
+
from .jargon_configuration import JargonConfig
|
30 |
+
|
31 |
|
32 |
+
class JargonForSequenceClassification(RobertaForSequenceClassification):
|
33 |
|
34 |
+
config_class = JargonConfig
|
|
|
35 |
|
36 |
def __init__(self, config, **kwargs):
|
37 |
+
base_model_prefix = "jargon"
|
38 |
|
39 |
super().__init__(config, **kwargs)
|
40 |
|
41 |
+
self.roberta = JargonModel(config, add_pooling_layer=False)
|
|
|
|
|
|
|
42 |
self.sbo_head = self.build_sbo_head(config)
|
43 |
|
44 |
def build_sbo_head(self, config):
|
|
|
52 |
)
|
53 |
|
54 |
|
55 |
+
class JargonForTokenClassification(RobertaForTokenClassification):
|
56 |
|
57 |
+
config_class = JargonConfig
|
58 |
|
59 |
def __init__(self, config, **kwargs):
|
60 |
+
base_model_prefix = "jargon"
|
61 |
|
62 |
super().__init__(config, **kwargs)
|
63 |
|
64 |
+
self.roberta = JargonModel(config, add_pooling_layer=False)
|
|
|
|
|
|
|
65 |
self.sbo_head = self.build_sbo_head(config)
|
66 |
|
67 |
def build_sbo_head(self, config):
|
|
|
75 |
)
|
76 |
|
77 |
|
78 |
+
class JargonForMaskedLM(RobertaForMaskedLM):
|
79 |
|
80 |
+
config_class = JargonConfig
|
81 |
|
82 |
def __init__(self, config, **kwargs):
|
83 |
+
base_model_prefix = "jargon"
|
84 |
|
85 |
super().__init__(config, **kwargs)
|
86 |
|
87 |
+
self.roberta = JargonModel(config, add_pooling_layer=False)
|
|
|
|
|
|
|
88 |
self.sbo_head = self.build_sbo_head(config)
|
89 |
|
90 |
def build_sbo_head(self, config):
|
|
|
97 |
)
|
98 |
)
|
99 |
|
100 |
+
|
101 |
+
class JargonForMaskedLMFS(RobertaForMaskedLM):
|
102 |
|
103 |
def __init__(self, config, dictionary, **kwargs):
|
104 |
+
config_class = JargonConfig
|
105 |
+
base_model_prefix = "jargon"
|
106 |
|
107 |
super().__init__(config, **kwargs)
|
108 |
|
109 |
+
self.roberta = FlaubertEncoder(config, dictionary)
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
def build_sbo_head(self, config):
|
112 |
return SBOHead(
|
|
|
119 |
)
|
120 |
|
121 |
|
122 |
+
class JargonEmbeddings(RobertaEmbeddings):
|
|
|
123 |
|
124 |
def __init__(self, config, **kwargs):
|
125 |
+
config_class = JargonConfig
|
126 |
+
base_model_prefix = "jargon"
|
127 |
super().__init__(config, **kwargs)
|
128 |
|
129 |
def forward(
|
|
|
159 |
token_type_embeddings = self.token_type_embeddings(token_type_ids)
|
160 |
|
161 |
embeddings = inputs_embeds + token_type_embeddings
|
|
|
162 |
position_embeddings = self.position_embeddings(position_ids)
|
|
|
163 |
|
164 |
embeddings += position_embeddings
|
|
|
165 |
embeddings = self.dropout(embeddings)
|
166 |
return embeddings
|
167 |
|
168 |
+
|
169 |
+
class JargonEncoder(RobertaEncoder):
|
170 |
|
171 |
def __init__(self, args):
|
172 |
compress_layer = None
|
|
|
247 |
return lm_out
|
248 |
|
249 |
|
250 |
+
class JargonModel(RobertaModel):
|
251 |
+
config_class = JargonConfig
|
252 |
def __init__(self, config, **kwargs):
|
253 |
+
config_class = JargonConfig
|
254 |
+
base_model_prefix = "jargon"
|
255 |
|
256 |
super().__init__(config, **kwargs)
|
257 |
+
self.embeddings = JargonEmbeddings(config)
|
258 |
+
self.encoder = JargonEncoder(config)
|
259 |
# Copied from modeling_roberta.py
|
260 |
# Add transpose of embeddings as implemented in fairseq
|
261 |
def forward(
|
|
|
395 |
cross_attentions=encoder_outputs.cross_attentions,
|
396 |
)
|
397 |
|
398 |
+
|
399 |
class SBOLayer(nn.Module):
|
400 |
|
401 |
def __init__(self, input_size, hidden_size, activation, export):
|
|
|
407 |
def forward(self, x):
|
408 |
return self.norm(self.activ(self.layer(x)))
|
409 |
|
410 |
+
|
411 |
class SBONetwork(nn.Module):
|
412 |
|
413 |
def __init__(self, input_size, hidden_size, activation, export):
|
|
|
505 |
else:
|
506 |
raise RuntimeError("--activation-fn {} not supported".format(activation))
|
507 |
|
508 |
+
|
509 |
def create_position_ids_from_input_ids(input_ids, padding_idx, past_key_values_length=0):
|
510 |
"""
|
511 |
Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
|