ainz commited on
Commit
c887597
·
verified ·
1 Parent(s): f02cb8f

Upload configuration_tiny_recursive.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. configuration_tiny_recursive.py +47 -0
configuration_tiny_recursive.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from transformers import PretrainedConfig
3
+
4
+ class TinyRecursiveConfig(PretrainedConfig):
5
+ model_type = "tiny_recursive"
6
+
7
+ def __init__(
8
+ self,
9
+ vocab_size=50257,
10
+ n_positions=1024,
11
+ n_embd=512,
12
+ n_head=8,
13
+ n_physical_layers=2,
14
+ n_loops=6,
15
+ activation_function="gelu_new",
16
+ resid_pdrop=0.1,
17
+ embd_pdrop=0.1,
18
+ attn_pdrop=0.1,
19
+ layer_norm_epsilon=1e-5,
20
+ scale_attn_weights=True,
21
+ scale_attn_by_inverse_layer_idx=False,
22
+ reorder_and_upcast_attn=False,
23
+ **kwargs,
24
+ ):
25
+ super().__init__(**kwargs)
26
+ # Standard config
27
+ self.vocab_size = vocab_size
28
+ self.n_positions = n_positions
29
+ self.n_embd = n_embd
30
+ self.n_head = n_head
31
+ self.n_physical_layers = n_physical_layers
32
+ self.n_loops = n_loops
33
+ self.activation_function = activation_function
34
+ self.resid_pdrop = resid_pdrop
35
+ self.embd_pdrop = embd_pdrop
36
+ self.attn_pdrop = attn_pdrop
37
+ self.layer_norm_epsilon = layer_norm_epsilon
38
+ self.scale_attn_weights = scale_attn_weights
39
+ self.scale_attn_by_inverse_layer_idx = scale_attn_by_inverse_layer_idx
40
+ self.reorder_and_upcast_attn = reorder_and_upcast_attn
41
+
42
+ # CRITICAL FIXES FOR COMPATIBILITY
43
+ self.max_position_embeddings = n_positions
44
+ self.hidden_size = n_embd
45
+ self.num_attention_heads = n_head
46
+ self.num_hidden_layers = n_physical_layers
47
+ self.n_inner = None