xiaotinghe commited on
Commit
32bd338
·
1 Parent(s): 4fc9846

Upload model

Browse files
Files changed (4) hide show
  1. config.json +36 -0
  2. configuration.py +55 -0
  3. embedding_model.py +29 -0
  4. pytorch_model.bin +3 -0
config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "buffer-embedding-002",
3
+ "apply_residual_connection_post_layernorm": false,
4
+ "architectures": [
5
+ "DualModel"
6
+ ],
7
+ "attention_dropout": 0.0,
8
+ "attention_softmax_in_fp32": true,
9
+ "auto_map": {
10
+ "AutoModel": "embedding_model.DualModel"
11
+ },
12
+ "bias_dropout_fusion": true,
13
+ "bos_token_id": 1,
14
+ "eos_token_id": 2,
15
+ "hidden_dropout": 0.0,
16
+ "hidden_size": 1536,
17
+ "initializer_range": 0.02,
18
+ "layer_norm_epsilon": 1e-05,
19
+ "masked_softmax_fusion": true,
20
+ "model_type": "bloom",
21
+ "n_head": 16,
22
+ "n_inner": null,
23
+ "n_layer": 24,
24
+ "offset_alibi": 100,
25
+ "pad_token_id": 3,
26
+ "pretraining_tp": 1,
27
+ "seq_length": 2048,
28
+ "skip_bias_add": true,
29
+ "skip_bias_add_qkv": false,
30
+ "slow_but_exact": false,
31
+ "torch_dtype": "float16",
32
+ "transformers_version": "4.31.0",
33
+ "unk_token_id": 0,
34
+ "use_cache": true,
35
+ "vocab_size": 46145
36
+ }
configuration.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers.utils import logging
2
+ from transformers.configuration_utils import PretrainedConfig
3
+
4
+
5
+ logger = logging.get_logger(__name__)
6
+
7
+ INTERNLM_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
8
+
9
+
10
+ class BufferEmbeddingConfig(PretrainedConfig):
11
+ model_type = "buffer_embedding"
12
+ _auto_class = "AutoConfig"
13
+ keys_to_ignore_at_inference = ["past_key_values"]
14
+ attribute_map = {
15
+ "num_hidden_layers": "n_layer",
16
+ "num_attention_heads": "n_head",
17
+ }
18
+ def __init__(
19
+ self,
20
+ vocab_size=250880,
21
+ hidden_size=64,
22
+ n_layer=2,
23
+ n_head=8,
24
+ layer_norm_epsilon=1e-5,
25
+ initializer_range=0.02,
26
+ use_cache=True,
27
+ bos_token_id=1,
28
+ eos_token_id=2,
29
+ apply_residual_connection_post_layernorm=False,
30
+ hidden_dropout=0.0,
31
+ attention_dropout=0.0,
32
+ pretraining_tp=1, # TP rank used when training with megatron
33
+ slow_but_exact=False,
34
+ **kwargs,
35
+ ):
36
+ self.vocab_size = vocab_size
37
+ # Backward compatibility with n_embed kwarg
38
+ n_embed = kwargs.pop("n_embed", None)
39
+ self.hidden_size = hidden_size if n_embed is None else n_embed
40
+ self.n_layer = n_layer
41
+ self.n_head = n_head
42
+ self.layer_norm_epsilon = layer_norm_epsilon
43
+ self.initializer_range = initializer_range
44
+ self.use_cache = use_cache
45
+ self.pretraining_tp = pretraining_tp
46
+ self.apply_residual_connection_post_layernorm = apply_residual_connection_post_layernorm
47
+ self.hidden_dropout = hidden_dropout
48
+ self.attention_dropout = attention_dropout
49
+
50
+ self.bos_token_id = bos_token_id
51
+ self.eos_token_id = eos_token_id
52
+ self.slow_but_exact = slow_but_exact
53
+
54
+ super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
55
+
embedding_model.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn.functional as F
3
+ from torch import nn
4
+ from transformers import BloomForCausalLM, PreTrainedModel
5
+ from .configuration import BufferEmbeddingConfig
6
+
7
+
8
+ class DualModel(PreTrainedModel):
9
+ config_class = BufferEmbeddingConfig
10
+ _auto_class = "AutoModel"
11
+ def __init__(self, config):
12
+ super(DualModel, self).__init__(config)
13
+ self.model = BloomForCausalLM(config)#.from_pretrained('Langboat/bloom-800m-zh')
14
+ self.classifier = nn.Linear(1536, 1536)
15
+ self.hidden = nn.Sequential(nn.Linear(1536, 1536),
16
+ nn.Tanh())
17
+ def forward(self,
18
+ input_ids,
19
+ token_type_ids=None,
20
+ position_ids_ids=None,
21
+ attention_mask=None,
22
+ labels=None
23
+ ):
24
+ attention_mask = torch.ne(input_ids, 3) # size: batch_size, max_len
25
+
26
+ y = self.model(input_ids, attention_mask=attention_mask, output_hidden_states=True)
27
+ embedding = (y.hidden_states[-1]*attention_mask.unsqueeze(-1)).sum(1)/attention_mask.sum(1).unsqueeze(-1)
28
+ embedding = self.classifier(self.hidden(embedding))
29
+ return F.normalize(embedding, p=2, dim=-1)
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d52d56062dce41743e6a21f04e7e725a82ef7eff0a3edc01e610cc2ddd9619f
3
+ size 1652985845