ehartford commited on
Commit
b6d16c3
1 Parent(s): 9e25ebb

Update ds_config.json

Browse files
Files changed (1) hide show
  1. ds_config.json +54 -0
ds_config.json CHANGED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "zero_optimization": {
3
+ "stage": 3,
4
+ "overlap_comm": true,
5
+ "contiguous_gradients": true,
6
+ "sub_group_size": 0,
7
+ "reduce_bucket_size": "auto",
8
+ "stage3_prefetch_bucket_size": "auto",
9
+ "stage3_param_persistence_threshold": "auto",
10
+ "stage3_max_live_parameters": 0,
11
+ "stage3_max_reuse_distance": 0,
12
+ "stage3_gather_16bit_weights_on_model_save": true
13
+ },
14
+ "bf16": {
15
+ "enabled": true,
16
+ "auto_cast": false,
17
+ "loss_scale": 0,
18
+ "initial_scale_power": 32,
19
+ "loss_scale_window": 1000,
20
+ "hysteresis": 2,
21
+ "min_loss_scale": 1
22
+ },
23
+ "optimizer": {
24
+ "type": "AdamW",
25
+ "params": {
26
+ "lr": "auto",
27
+ "betas": [
28
+ 0.9,
29
+ 0.999
30
+ ],
31
+ "eps": 1e-8,
32
+ "weight_decay": 0
33
+ }
34
+ },
35
+ "scheduler": {
36
+ "type": "OneCycle",
37
+ "params": {
38
+ "cycle_first_step_size": 1000,
39
+ "cycle_first_stair_count": 500,
40
+ "cycle_second_step_size": 1000,
41
+ "cycle_second_stair_count": 500,
42
+ "decay_step_size": 1000,
43
+ "cycle_min_lr": 0,
44
+ "cycle_max_lr": 1e-5,
45
+ "decay_lr_rate": 0.001,
46
+ "cycle_min_mom": 0.85,
47
+ "cycle_max_mom": 0.99,
48
+ "decay_mom_rate": 0.0
49
+ }
50
+ },
51
+ "train_batch_size": "auto",
52
+ "train_micro_batch_size_per_gpu": "auto",
53
+ "wall_clock_breakdown": false
54
+ }