Iseratho commited on
Commit
d4efd8e
1 Parent(s): 56e517a
Files changed (2) hide show
  1. config.json +4 -0
  2. pretrained_word2vec.py +26 -0
config.json CHANGED
@@ -2,6 +2,10 @@
2
  "architectures": [
3
  "PretrainedWord2VecHFModel"
4
  ],
 
 
 
 
5
  "hidden_size": 50,
6
  "model_type": "glove",
7
  "num_words": 400001,
2
  "architectures": [
3
  "PretrainedWord2VecHFModel"
4
  ],
5
+ "auto_map": {
6
+ "AutoConfig": "pretrained_word2vec.PretrainedWord2VecHFConfig",
7
+ "AutoModel": "pretrained_word2vec.PretrainedWord2VecHFModel"
8
+ },
9
  "hidden_size": 50,
10
  "model_type": "glove",
11
  "num_words": 400001,
pretrained_word2vec.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import PreTrainedModel, PretrainedConfig
2
+ from torch import nn
3
+ import torch
4
+
5
+ class PretrainedWord2VecHFConfig(PretrainedConfig):
6
+ model_type = "glove"
7
+
8
+ def __init__(self, num_words=400001, vector_size=50, **kwargs):
9
+ self.num_words = num_words
10
+ self.vector_size = vector_size
11
+ self.hidden_size = self.vector_size # Required for sBERT
12
+ super().__init__(**kwargs)
13
+
14
+ class PretrainedWord2VecHFModel(PreTrainedModel):
15
+ config_class = PretrainedWord2VecHFConfig
16
+
17
+ def __init__(self, config):
18
+ super().__init__(config)
19
+ self.embeddings = nn.Embedding(config.num_words, config.vector_size)
20
+
21
+ def set_embeddings(self, embeddings):
22
+ self.embeddings = nn.Embedding.from_pretrained(torch.tensor(embeddings))
23
+
24
+ def forward(self, input_ids, **kwargs):
25
+ x = self.embeddings(torch.tensor(input_ids))
26
+ return x