Training in progress, step 1000

Files changed (7) hide show

ds_config.json CHANGED Viewed

@@ -18,14 +18,16 @@
         }
     },
-    "scheduler": {
-        "type": "WarmupLR",
-        "params": {
-            "warmup_min_lr": "auto",
-            "warmup_max_lr": "auto",
-            "warmup_num_steps": "auto"
-        }
-    },
     "zero_optimization": {
         "stage": 2,

         }
     },
+   "scheduler": {
+         "type": "WarmupDecayLR",
+         "params": {
+             "last_batch_iteration": -1,
+             "total_num_steps": "auto",
+             "warmup_min_lr": "auto",
+             "warmup_max_lr": "auto",
+             "warmup_num_steps": "auto"
+         }
+     },
     "zero_optimization": {
         "stage": 2,

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0c0f5ee3669a1e41af8da9db9fe9ae9130dc32a13e3ae28cd41aef89e280450d
 size 483536061

 version https://git-lfs.github.com/spec/v1
+oid sha256:66c65726b9d7262b98529730a55b1d37e92aafedea5486722aba0d97e4216171
 size 483536061

run.sh CHANGED Viewed

@@ -11,7 +11,7 @@ deepspeed run_speech_recognition_seq2seq_streaming-farsipal.py \
 	--torch_compile="True" \
 	--torch_compile_mode="reduce-overhead" \
 	--torch_compile_mode="max-autotune" \
-	--max_steps="20000" \
 	--output_dir="./" \
 	--per_device_train_batch_size="32" \
 	--gradient_accumulation_steps="1" \

 	--torch_compile="True" \
 	--torch_compile_mode="reduce-overhead" \
 	--torch_compile_mode="max-autotune" \
+	--max_steps="5000" \
 	--output_dir="./" \
 	--per_device_train_batch_size="32" \
 	--gradient_accumulation_steps="1" \

runs/Dec20_12-59-04_129-146-176-120/events.out.tfevents.1671542011.129-146-176-120.680663.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:20b6c09a11d60f3398e4b98c0406379babb3f2ead03f08f83b1353de1038c4a3
-size 43878

 version https://git-lfs.github.com/spec/v1
+oid sha256:43fcfbc982169b9c6eb2693bf7b43e6d0e4479fe2231d88d1b01991bc0ac2779
+size 48588

runs/Dec20_17-12-10_129-146-176-120/1671557236.120787/events.out.tfevents.1671557236.129-146-176-120.1173831.1 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:c58b0f30d76344d37ea230440b9eb5c6772543e5e75da63ce4f21659a56709f9
+size 5893

runs/Dec20_17-12-10_129-146-176-120/events.out.tfevents.1671557236.129-146-176-120.1173831.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a3ff8c5d37804ce31968452140d31507b018a6068f914438dcace86df6f795d
+size 10887

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1d501de5f42040e906b7cddad2305bbf2fffbf7817fcdc2277b3a505956e81b7
-size 4667

 version https://git-lfs.github.com/spec/v1
+oid sha256:787167c636f3a936e53d4a21eb438c4ffab814c944cd2c0ae1aba0081a1e1f43
+size 4731