qilowoq commited on
Commit
9548beb
1 Parent(s): 3ffd2f8

Upload RobertaForMaskedLMV2

Browse files
Files changed (3) hide show
  1. AbLang_roberta_model.py +41 -0
  2. config.json +6 -3
  3. pytorch_model.bin +2 -2
AbLang_roberta_model.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers.models.roberta.modeling_roberta import RobertaEmbeddings, RobertaModel, RobertaForMaskedLM
2
+ from typing import Optional
3
+ import torch
4
+
5
+ class RobertaEmbeddingsV2(RobertaEmbeddings):
6
+ def __init__(self, config):
7
+ super().__init__(config)
8
+ self.pad_token_id = config.pad_token_id
9
+ self.position_embeddings = torch.nn.Embedding(config.max_position_embeddings, config.hidden_size, padding_idx=0) # here padding_idx is always 0
10
+
11
+ def forward(
12
+ self,
13
+ input_ids: torch.LongTensor,
14
+ token_type_ids: Optional[torch.LongTensor] = None,
15
+ position_ids: Optional[torch.LongTensor] = None,
16
+ inputs_embeds: Optional[torch.FloatTensor] = None,
17
+ past_key_values_length: int = 0,
18
+ ) -> torch.Tensor:
19
+ inputs_embeds = self.word_embeddings(input_ids)
20
+ position_ids = self.create_position_ids_from_input_ids(input_ids)
21
+ position_embeddings = self.position_embeddings(position_ids)
22
+ embeddings = inputs_embeds + position_embeddings
23
+ return self.dropout(self.LayerNorm(embeddings))
24
+
25
+ def create_position_ids_from_input_ids(self, input_ids: torch.LongTensor) -> torch.Tensor:
26
+ mask = input_ids.ne(self.pad_token_id).int()
27
+ return torch.cumsum(mask, dim=1).long() * mask
28
+
29
+
30
+ class RobertaModelV2(RobertaModel):
31
+ def __init__(self, config, add_pooling_layer=False):
32
+ super().__init__(config, add_pooling_layer=add_pooling_layer)
33
+ self.embeddings = RobertaEmbeddingsV2(config)
34
+
35
+
36
+ class RobertaForMaskedLMV2(RobertaForMaskedLM):
37
+ def __init__(self, config):
38
+ super().__init__(config)
39
+ self.roberta = RobertaModelV2(config, add_pooling_layer=False)
40
+
41
+
config.json CHANGED
@@ -1,13 +1,15 @@
1
  {
2
  "add_pooling_layer": false,
3
  "architectures": [
4
- "BertModelV2"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "auto_map": {
8
- "AutoModel": "AbLang_bert_model.BertModelV2"
9
  },
 
10
  "classifier_dropout": null,
 
11
  "hidden_act": "gelu",
12
  "hidden_dropout_prob": 0.1,
13
  "hidden_size": 768,
@@ -15,11 +17,12 @@
15
  "intermediate_size": 3072,
16
  "layer_norm_eps": 1e-12,
17
  "max_position_embeddings": 160,
18
- "model_type": "bert",
19
  "num_attention_heads": 12,
20
  "num_hidden_layers": 12,
21
  "pad_token_id": 21,
22
  "position_embedding_type": "absolute",
 
23
  "torch_dtype": "float32",
24
  "transformers_version": "4.28.1",
25
  "type_vocab_size": 2,
 
1
  {
2
  "add_pooling_layer": false,
3
  "architectures": [
4
+ "RobertaForMaskedLMV2"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "auto_map": {
8
+ "AutoModelForMaskedLM": "AbLang_roberta_model.RobertaForMaskedLMV2"
9
  },
10
+ "bos_token_id": 0,
11
  "classifier_dropout": null,
12
+ "eos_token_id": 2,
13
  "hidden_act": "gelu",
14
  "hidden_dropout_prob": 0.1,
15
  "hidden_size": 768,
 
17
  "intermediate_size": 3072,
18
  "layer_norm_eps": 1e-12,
19
  "max_position_embeddings": 160,
20
+ "model_type": "roberta",
21
  "num_attention_heads": 12,
22
  "num_hidden_layers": 12,
23
  "pad_token_id": 21,
24
  "position_embedding_type": "absolute",
25
+ "tie_word_embeddings": false,
26
  "torch_dtype": "float32",
27
  "transformers_version": "4.28.1",
28
  "type_vocab_size": 2,
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9d5458446b8f723995df81e9b24b7a4635285fcb33d0d787a7e308bb16c75ea
3
- size 343223341
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:508c7ea07c28cf327ae680d5b7d1ce72def49c8099991bbbe40997a772055dd7
3
+ size 343306045