winglian commited on
Commit
4dbef09
1 Parent(s): 6dfdd2d

update ds_config

Browse files
Files changed (1) hide show
  1. ds_config.json +19 -0
ds_config.json CHANGED
@@ -10,6 +10,15 @@
10
  "hysteresis": 2,
11
  "min_loss_scale": 1
12
  },
 
 
 
 
 
 
 
 
 
13
  "scheduler": {
14
  "type": "OneCycle",
15
  "params": {
@@ -19,12 +28,22 @@
19
  },
20
  "zero_optimization": {
21
  "stage": 2,
 
 
 
 
 
 
 
 
22
  "overlap_comm": true,
23
  "allgather_partitions": true,
24
  "allgather_bucket_size": 5e8,
25
  "contiguous_gradients": true,
26
  "reduce_bucket_size": "auto",
27
  "reduce_scatter": true,
 
 
28
  "stage3_gather_16bit_weights_on_model_save": true
29
  },
30
  "gradient_accumulation_steps": "auto",
 
10
  "hysteresis": 2,
11
  "min_loss_scale": 1
12
  },
13
+ "optimizer": {
14
+ "type": "Adam",
15
+ "params": {
16
+ "lr": "auto",
17
+ "betas": "auto",
18
+ "eps": "auto",
19
+ "weight_decay": "auto"
20
+ }
21
+ },
22
  "scheduler": {
23
  "type": "OneCycle",
24
  "params": {
 
28
  },
29
  "zero_optimization": {
30
  "stage": 2,
31
+ "offload_optimizer": {
32
+ "device": "cpu",
33
+ "pin_memory": true
34
+ },
35
+ "offload_param": {
36
+ "device": "cpu",
37
+ "pin_memory": true
38
+ },
39
  "overlap_comm": true,
40
  "allgather_partitions": true,
41
  "allgather_bucket_size": 5e8,
42
  "contiguous_gradients": true,
43
  "reduce_bucket_size": "auto",
44
  "reduce_scatter": true,
45
+ "stage3_max_live_parameters": 0,
46
+ "stage3_max_reuse_distance": 0,
47
  "stage3_gather_16bit_weights_on_model_save": true
48
  },
49
  "gradient_accumulation_steps": "auto",