inarikami commited on
Commit
a843499
1 Parent(s): e3169ef

Create new file

Browse files
Files changed (1) hide show
  1. 4-A100-config.json +66 -0
4-A100-config.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+
3
+
4
+ "pipe-parallel-size": 1,
5
+ "model-parallel-size": 1,
6
+
7
+
8
+ "scaled-upper-triang-masked-softmax-fusion": false,
9
+ "bias-gelu-fusion": false,
10
+
11
+ "fp16": {
12
+ "enabled": "auto",
13
+ "auto_cast": false,
14
+ "loss_scale": 0,
15
+ "initial_scale_power": 32,
16
+ "loss_scale_window": 1000,
17
+ "hysteresis": 2,
18
+ "min_loss_scale": 1
19
+ },
20
+
21
+ "tensorboard": {
22
+ "enabled": true,
23
+ "output_path": "output/ds_logs/",
24
+ "job_name": "train_bert"
25
+ },
26
+
27
+ "optimizer": {
28
+ "type": "AdamW",
29
+ "params": {
30
+ "lr": "auto",
31
+ "betas": "auto",
32
+ "eps": "auto",
33
+ "weight_decay": "auto"
34
+ }
35
+ },
36
+
37
+ "scheduler": {
38
+ "type": "WarmupLR",
39
+ "params": {
40
+ "warmup_min_lr": "auto",
41
+ "warmup_max_lr": "auto",
42
+ "warmup_num_steps": "auto"
43
+ }
44
+ },
45
+
46
+ "zero_optimization": {
47
+ "stage": 2,
48
+ "offload_optimizer": {
49
+ "device": "cpu",
50
+ "pin_memory": true
51
+ },
52
+ "allgather_partitions": true,
53
+ "allgather_bucket_size": 2e8,
54
+ "overlap_comm": true,
55
+ "reduce_scatter": true,
56
+ "reduce_bucket_size": 2e8,
57
+ "contiguous_gradients": true
58
+ },
59
+
60
+ "gradient_accumulation_steps": "auto",
61
+ "gradient_clipping": "auto",
62
+ "steps_per_print": 2000,
63
+ "train_batch_size": "auto",
64
+ "train_micro_batch_size_per_gpu": "auto",
65
+ "wall_clock_breakdown": false
66
+ }