prelington commited on
Commit
806b578
·
verified ·
1 Parent(s): c264cbb

Create model_config.py

Browse files
Files changed (1) hide show
  1. model_config.py +25 -0
model_config.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from transformers import PretrainedConfig
3
+
4
+ @dataclass
5
+ class OrcaleSeekConfig:
6
+ model_type: str = "orcaleseek"
7
+ vocab_size: int = 50257
8
+ n_embd: int = 768
9
+ n_layer: int = 12
10
+ n_head: int = 12
11
+ n_inner: int = 3072
12
+ activation_function: str = "gelu_new"
13
+ resid_pdrop: float = 0.1
14
+ embd_pdrop: float = 0.1
15
+ attn_pdrop: float = 0.1
16
+ layer_norm_epsilon: float = 1e-5
17
+ initializer_range: float = 0.02
18
+ scale_attn_weights: bool = True
19
+ use_cache: bool = True
20
+ bos_token_id: int = 50256
21
+ eos_token_id: int = 50256
22
+ architectures = ["OrcaleSeekForCausalLM"]
23
+
24
+ def to_hf_config(self):
25
+ return PretrainedConfig(**self.__dict__)