Moreno La Quatra commited on
Commit
9afde18
1 Parent(s): cee4f67

training config v1 model

Browse files
Files changed (1) hide show
  1. v1_config.json +13 -0
v1_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "batch_size": 64,
3
+ "mask_probability": 0.15,
4
+ "num_layers": 6,
5
+ "num_heads": 8,
6
+ "d_model": 128,
7
+ "d_ff": 256,
8
+ "p_dropout": 0.1,
9
+ "max_seq_len": 128,
10
+ "vocab_size": 25000,
11
+ "learning_rate": 3e-4,
12
+ "grad_clip_value": 1,
13
+ }