dingzx97 commited on
Commit
e03b2ec
1 Parent(s): c301640
config.json CHANGED
@@ -9,15 +9,17 @@
9
  "AutoModelForMaskedLM": "modeling_lddbert.LddBertForMaskedLM",
10
  "AutoModelForSequenceClassification": "modeling_lddbert.LddBertForSequenceClassification"
11
  },
 
12
  "dim": 768,
13
  "dropout": 0.1,
14
  "hidden_dim": 3072,
15
  "initializer_range": 0.02,
16
  "max_position_embeddings": 512,
17
  "model_type": "lddbert",
18
- "n_gru_layers": 1,
 
19
  "n_heads": 12,
20
- "n_layers": 12,
21
  "pad_token_id": 0,
22
  "qa_dropout": 0.1,
23
  "seq_classif_dropout": 0.2,
 
9
  "AutoModelForMaskedLM": "modeling_lddbert.LddBertForMaskedLM",
10
  "AutoModelForSequenceClassification": "modeling_lddbert.LddBertForSequenceClassification"
11
  },
12
+ "cnn_kernel_size": 5,
13
  "dim": 768,
14
  "dropout": 0.1,
15
  "hidden_dim": 3072,
16
  "initializer_range": 0.02,
17
  "max_position_embeddings": 512,
18
  "model_type": "lddbert",
19
+ "n_cnn_layers": 6,
20
+ "n_gru_layers": 6,
21
  "n_heads": 12,
22
+ "n_layers": 6,
23
  "pad_token_id": 0,
24
  "qa_dropout": 0.1,
25
  "seq_classif_dropout": 0.2,
configuration_lddbert.py CHANGED
@@ -87,7 +87,7 @@ class LddBertConfig(PretrainedConfig):
87
 
88
  def __init__(
89
  self,
90
- n_layers=12,
91
  n_heads=12,
92
  dim=768,
93
  hidden_dim=4*768,
@@ -102,7 +102,9 @@ class LddBertConfig(PretrainedConfig):
102
  attention_dropout=0.1,
103
  qa_dropout=0.1,
104
  seq_classif_dropout=0.2,
105
- n_gru_layers=1,
 
 
106
  **kwargs
107
  ):
108
  self.vocab_size = vocab_size
@@ -110,6 +112,8 @@ class LddBertConfig(PretrainedConfig):
110
  self.sinusoidal_pos_embds = sinusoidal_pos_embds
111
  self.n_layers = n_layers
112
  self.n_gru_layers = n_gru_layers
 
 
113
  self.n_heads = n_heads
114
  self.dim = dim
115
  self.hidden_dim = hidden_dim
 
87
 
88
  def __init__(
89
  self,
90
+ n_layers=6,
91
  n_heads=12,
92
  dim=768,
93
  hidden_dim=4*768,
 
102
  attention_dropout=0.1,
103
  qa_dropout=0.1,
104
  seq_classif_dropout=0.2,
105
+ n_gru_layers=6,
106
+ n_cnn_layers=6,
107
+ cnn_kernel_size=5,
108
  **kwargs
109
  ):
110
  self.vocab_size = vocab_size
 
112
  self.sinusoidal_pos_embds = sinusoidal_pos_embds
113
  self.n_layers = n_layers
114
  self.n_gru_layers = n_gru_layers
115
+ self.n_cnn_layers = n_cnn_layers
116
+ self.cnn_kernel_size = cnn_kernel_size
117
  self.n_heads = n_heads
118
  self.dim = dim
119
  self.hidden_dim = hidden_dim
modeling_lddbert.py CHANGED
@@ -378,9 +378,15 @@ LDDBERT_INPUTS_DOCSTRING = DISTILBERT_INPUTS_DOCSTRING
378
  class LddBertModel(LddBertPreTrainedModel):
379
  def __init__(self, config: PretrainedConfig):
380
  super().__init__(config)
 
381
 
382
  self.embeddings = Embeddings(config) # Embeddings
383
  self.transformer = Transformer(config) # Encoder
 
 
 
 
 
384
 
385
  # Initialize weights and apply final processing
386
  self.post_init()
@@ -494,7 +500,7 @@ class LddBertModel(LddBertPreTrainedModel):
494
  token_type_ids=token_type_ids,
495
  ) # (bs, seq_length, dim)
496
 
497
- return self.transformer(
498
  x=inputs_embeds,
499
  attn_mask=attention_mask,
500
  head_mask=head_mask,
@@ -503,6 +509,22 @@ class LddBertModel(LddBertPreTrainedModel):
503
  return_dict=return_dict,
504
  )
505
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
506
 
507
  @add_start_docstrings(
508
  """LddBert Model with a `masked language modeling` head on top.""",
@@ -622,15 +644,12 @@ class LddBertForSequenceClassification(LddBertPreTrainedModel):
622
  self.num_labels = config.num_labels
623
  self.config = config
624
 
625
- assert config.dim % 2 == 0
626
-
627
- self.activation = get_activation(config.activation)
628
 
629
  self.lddbert = LddBertModel(config)
630
- self.gru = nn.GRU(config.dim , 32, config.n_gru_layers, batch_first=True, bidirectional=True)
631
- self.layer_norm = nn.LayerNorm(config.dim + 32*2*config.n_gru_layers, eps=1e-12)
632
  self.dropout = nn.Dropout(config.seq_classif_dropout)
633
- self.classifier = nn.Linear(config.dim + 32*2*config.n_gru_layers, config.num_labels)
634
 
635
  # Initialize weights and apply final processing
636
  self.post_init()
@@ -693,15 +712,11 @@ class LddBertForSequenceClassification(LddBertPreTrainedModel):
693
  )
694
  hidden_state = lddbert_output[0] # (bs, seq_len, dim)
695
 
696
- cls_output= hidden_state[:, 0] # (bs, dim)
697
- gru_output, _ = self.gru(hidden_state[:, 1:]) # (bs, seq_len, dim)
698
- gru_output = gru_output[:, -1]
699
-
700
- concat_output = torch.cat((cls_output, gru_output), dim=-1)
701
- concat_output = self.activation(concat_output) # (bs, dim)
702
- concat_output = self.layer_norm(concat_output) # (bs, dim)
703
- concat_output = self.dropout(concat_output) # (bs, dim)
704
- logits = self.classifier(concat_output) # (bs, num_labels)
705
 
706
  loss = None
707
  if labels is not None:
 
378
  class LddBertModel(LddBertPreTrainedModel):
379
  def __init__(self, config: PretrainedConfig):
380
  super().__init__(config)
381
+ assert config.cnn_kernel_size%2 == 1
382
 
383
  self.embeddings = Embeddings(config) # Embeddings
384
  self.transformer = Transformer(config) # Encoder
385
+ self.gru = nn.GRU(config.dim , config.dim//2, config.n_gru_layers, batch_first=True, bidirectional=True)
386
+ self.cnn = nn.Sequential(*(
387
+ nn.Conv1d(config.max_position_embeddings, config.max_position_embeddings, config.cnn_kernel_size, padding=(config.cnn_kernel_size-1)//2)
388
+ for _ in range(config.n_cnn_layers)
389
+ ))
390
 
391
  # Initialize weights and apply final processing
392
  self.post_init()
 
500
  token_type_ids=token_type_ids,
501
  ) # (bs, seq_length, dim)
502
 
503
+ bert_output = self.transformer(
504
  x=inputs_embeds,
505
  attn_mask=attention_mask,
506
  head_mask=head_mask,
 
509
  return_dict=return_dict,
510
  )
511
 
512
+ gru_output, _ = self.gru(bert_output[0])
513
+
514
+ cnn_output = self.cnn(bert_output[0])
515
+
516
+ output = gru_output + cnn_output
517
+ if not return_dict:
518
+ return (output, ) + bert_output[1:]
519
+
520
+ return BaseModelOutput(
521
+ last_hidden_state=output,
522
+ hidden_states=bert_output.hidden_states,
523
+ attentions=bert_output.attentions,
524
+ )
525
+
526
+
527
+
528
 
529
  @add_start_docstrings(
530
  """LddBert Model with a `masked language modeling` head on top.""",
 
644
  self.num_labels = config.num_labels
645
  self.config = config
646
 
 
 
 
647
 
648
  self.lddbert = LddBertModel(config)
649
+ self.pre_classifier = nn.Linear(config.dim, config.dim)
650
+ self.activation = get_activation(config.activation)
651
  self.dropout = nn.Dropout(config.seq_classif_dropout)
652
+ self.classifier = nn.Linear(config.dim, config.num_labels)
653
 
654
  # Initialize weights and apply final processing
655
  self.post_init()
 
712
  )
713
  hidden_state = lddbert_output[0] # (bs, seq_len, dim)
714
 
715
+ pooled_output = hidden_state[:, 0] # (bs, dim)
716
+ pooled_output = self.pre_classifier(pooled_output) # (bs, dim)
717
+ pooled_output = self.activation(pooled_output) # (bs, dim)
718
+ pooled_output = self.dropout(pooled_output) # (bs, dim)
719
+ logits = self.classifier(pooled_output) # (bs, num_labels)
 
 
 
 
720
 
721
  loss = None
722
  if labels is not None:
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0bfcefa51ca80f85ff8c117271997b66f92e10f8638237d14f908605703c7419
3
- size 438121645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:151f439844ff10c523e93c90fbce4a543ab1bcce6f660822748eae4bd2e9c94c
3
+ size 363280885