Teknium commited on
Commit
d3193be
1 Parent(s): 2e71ff0

Fix Deepspeed Zero3 Config (#791)

Browse files

* Update zero3.json

Take away CPU Offload by default (Slows things down horribly, better off reducing batchsize), and changes LR Scheduler to a properly decaying one

* Update zero3.json

fix something

Files changed (1) hide show
  1. deepspeed/zero3.json +3 -10
deepspeed/zero3.json CHANGED
@@ -1,14 +1,6 @@
1
  {
2
  "zero_optimization": {
3
  "stage": 3,
4
- "offload_optimizer": {
5
- "device": "cpu",
6
- "pin_memory": true
7
- },
8
- "offload_param": {
9
- "device": "cpu",
10
- "pin_memory": true
11
- },
12
  "overlap_comm": true,
13
  "contiguous_gradients": true,
14
  "sub_group_size": 0,
@@ -41,12 +33,13 @@
41
  }
42
  },
43
  "scheduler": {
44
- "type": "WarmupLR",
45
  "params": {
46
  "warmup_min_lr": "auto",
47
  "warmup_max_lr": "auto",
48
  "warmup_num_steps": "auto",
49
- "warmup_type": "linear"
 
50
  }
51
  },
52
  "gradient_accumulation_steps": "auto",
 
1
  {
2
  "zero_optimization": {
3
  "stage": 3,
 
 
 
 
 
 
 
 
4
  "overlap_comm": true,
5
  "contiguous_gradients": true,
6
  "sub_group_size": 0,
 
33
  }
34
  },
35
  "scheduler": {
36
+ "type": "WarmupDecayLR",
37
  "params": {
38
  "warmup_min_lr": "auto",
39
  "warmup_max_lr": "auto",
40
  "warmup_num_steps": "auto",
41
+ "warmup_type": "linear",
42
+ "total_num_steps": "auto"
43
  }
44
  },
45
  "gradient_accumulation_steps": "auto",