hugohrban commited on
Commit
2029a8c
1 Parent(s): e7d79a5

Upload config

Browse files
Files changed (2) hide show
  1. config.json +3 -11
  2. configuration_progen.py +87 -0
config.json CHANGED
@@ -1,10 +1,9 @@
1
  {
2
- "_name_or_path": "progen2-small",
3
  "activation_function": "gelu_new",
4
- "architectures": [
5
- "ProGenForCausalLM"
6
- ],
7
  "attn_pdrop": 0.0,
 
 
 
8
  "bos_token_id": 1,
9
  "embd_pdrop": 0.0,
10
  "eos_token_id": 2,
@@ -20,13 +19,6 @@
20
  "resid_pdrop": 0.0,
21
  "rotary_dim": 32,
22
  "scale_attn_weights": true,
23
- "summary_activation": null,
24
- "summary_first_dropout": 0.1,
25
- "summary_proj_to_labels": true,
26
- "summary_type": "cls_index",
27
- "summary_use_proj": true,
28
- "tokenizer_class": "GPT2Tokenizer",
29
- "torch_dtype": "float32",
30
  "transformers_version": "4.40.0",
31
  "use_cache": true,
32
  "vocab_size_emb": 32,
 
1
  {
 
2
  "activation_function": "gelu_new",
 
 
 
3
  "attn_pdrop": 0.0,
4
+ "auto_map": {
5
+ "AutoConfig": "configuration_progen.ProGenConfig"
6
+ },
7
  "bos_token_id": 1,
8
  "embd_pdrop": 0.0,
9
  "eos_token_id": 2,
 
19
  "resid_pdrop": 0.0,
20
  "rotary_dim": 32,
21
  "scale_attn_weights": true,
 
 
 
 
 
 
 
22
  "transformers_version": "4.40.0",
23
  "use_cache": true,
24
  "vocab_size_emb": 32,
configuration_progen.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2021 The EleutherAI and HuggingFace Teams. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ # Modified configuration implementation based on https://github.com/huggingface/transformers/blob/main/src/transformers/models/gptj/configuration_gptj.py
17
+
18
+ from transformers.configuration_utils import PretrainedConfig
19
+ from transformers.utils import logging
20
+
21
+ logger = logging.get_logger(__name__)
22
+
23
+
24
+ class ProGenConfig(PretrainedConfig):
25
+ model_type = "progen"
26
+
27
+ def __init__(
28
+ self,
29
+ vocab_size_emb=32,
30
+ vocab_size_lm_head=32,
31
+ n_positions=1024,
32
+ n_embd=1024,
33
+ n_layer=12,
34
+ n_head=16,
35
+ rotary_dim=32,
36
+ n_inner=None,
37
+ activation_function="gelu_new",
38
+ resid_pdrop=0.0,
39
+ embd_pdrop=0.0,
40
+ attn_pdrop=0.0,
41
+ layer_norm_epsilon=1e-5,
42
+ initializer_range=0.02,
43
+ scale_attn_weights=True,
44
+ gradient_checkpointing=False,
45
+ use_cache=True,
46
+ bos_token_id=1,
47
+ eos_token_id=2,
48
+ **kwargs
49
+ ):
50
+ super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
51
+
52
+ self.vocab_size_emb = vocab_size_emb
53
+ self.vocab_size_lm_head = vocab_size_lm_head
54
+ self.n_positions = n_positions # context window size
55
+ self.n_embd = n_embd
56
+ self.n_layer = n_layer
57
+ self.n_head = n_head
58
+ self.n_inner = n_inner
59
+ self.rotary_dim = rotary_dim
60
+ self.activation_function = activation_function
61
+ self.resid_pdrop = resid_pdrop
62
+ self.embd_pdrop = embd_pdrop
63
+ self.attn_pdrop = attn_pdrop
64
+ self.layer_norm_epsilon = layer_norm_epsilon
65
+ self.initializer_range = initializer_range
66
+ self.gradient_checkpointing = gradient_checkpointing
67
+ self.scale_attn_weights = scale_attn_weights
68
+ self.use_cache = use_cache
69
+
70
+ self.bos_token_id = bos_token_id
71
+ self.eos_token_id = eos_token_id
72
+
73
+ @property
74
+ def max_position_embeddings(self):
75
+ return self.n_positions
76
+
77
+ @property
78
+ def hidden_size(self):
79
+ return self.n_embd
80
+
81
+ @property
82
+ def num_attention_heads(self):
83
+ return self.n_head
84
+
85
+ @property
86
+ def num_hidden_layers(self):
87
+ return self.n_layer