danieldk-explosion commited on
Commit
8cf416b
1 Parent(s): 20c7724

Add refined-web-model (falcon) test model

Browse files
config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "tiiuae/falcon-7b",
3
+ "alibi": false,
4
+ "apply_residual_connection_post_layernorm": false,
5
+ "architectures": [
6
+ "RWForCausalLM"
7
+ ],
8
+ "attention_dropout": 0.0,
9
+ "auto_map": {
10
+ "AutoConfig": "configuration_RW.RWConfig",
11
+ "AutoModel": "modelling_RW.RWModel",
12
+ "AutoModelForCausalLM": "modelling_RW.RWForCausalLM",
13
+ "AutoModelForQuestionAnswering": "modelling_RW.RWForQuestionAnswering",
14
+ "AutoModelForSequenceClassification": "modelling_RW.RWForSequenceClassification",
15
+ "AutoModelForTokenClassification": "modelling_RW.RWForTokenClassification"
16
+ },
17
+ "bias": false,
18
+ "bos_token_id": 11,
19
+ "eos_token_id": 11,
20
+ "hidden_dropout": 0.0,
21
+ "hidden_size": 32,
22
+ "initializer_range": 0.02,
23
+ "intermediate_size": 37,
24
+ "layer_norm_epsilon": 1e-05,
25
+ "max_position_embeddings": 512,
26
+ "model_type": "RefinedWebModel",
27
+ "multi_query": true,
28
+ "n_head": 4,
29
+ "n_layer": 5,
30
+ "parallel_attn": true,
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.28.1",
33
+ "type_vocab_size": 16,
34
+ "use_cache": true,
35
+ "vocab_size": 1024
36
+ }
configuration_RW.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2022 the Big Science Workshop and HuggingFace Inc. team. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ """ Bloom configuration"""
16
+ from transformers.configuration_utils import PretrainedConfig
17
+ from transformers.utils import logging
18
+
19
+
20
+ logger = logging.get_logger(__name__)
21
+
22
+
23
+ class RWConfig(PretrainedConfig):
24
+ model_type = "RefinedWebModel"
25
+ keys_to_ignore_at_inference = ["past_key_values"]
26
+ attribute_map = {
27
+ "num_hidden_layers": "n_layer",
28
+ "num_attention_heads": "n_head",
29
+ }
30
+
31
+ def __init__(
32
+ self,
33
+ vocab_size=250880,
34
+ hidden_size=64,
35
+ n_layer=2,
36
+ n_head=8,
37
+ layer_norm_epsilon=1e-5,
38
+ initializer_range=0.02,
39
+ use_cache=True,
40
+ bos_token_id=1,
41
+ eos_token_id=2,
42
+ apply_residual_connection_post_layernorm=False,
43
+ hidden_dropout=0.0,
44
+ attention_dropout=0.0,
45
+ multi_query=False,
46
+ alibi=False,
47
+ bias=False,
48
+ parallel_attn=False,
49
+ **kwargs,
50
+ ):
51
+ self.vocab_size = vocab_size
52
+ # Backward compatibility with n_embed kwarg
53
+ n_embed = kwargs.pop("n_embed", None)
54
+ self.hidden_size = hidden_size if n_embed is None else n_embed
55
+ self.n_layer = n_layer
56
+ self.n_head = n_head
57
+ self.layer_norm_epsilon = layer_norm_epsilon
58
+ self.initializer_range = initializer_range
59
+ self.use_cache = use_cache
60
+ self.apply_residual_connection_post_layernorm = apply_residual_connection_post_layernorm
61
+ self.hidden_dropout = hidden_dropout
62
+ self.attention_dropout = attention_dropout
63
+
64
+ self.bos_token_id = bos_token_id
65
+ self.eos_token_id = eos_token_id
66
+ self.multi_query = multi_query
67
+ self.alibi = alibi
68
+ self.bias = bias
69
+ self.parallel_attn = parallel_attn
70
+
71
+ super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
72
+
73
+ @property
74
+ def head_dim(self):
75
+ return self.hidden_size // self.n_head
76
+
77
+ @property
78
+ def rotary(self):
79
+ return not self.alibi
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 11,
4
+ "eos_token_id": 11,
5
+ "transformers_version": "4.28.1"
6
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cdb9ea212f8e0ed2beb7daa39f49daccbf801ef021c1695f4084f24b6425ea7
3
+ size 358036