marinone94 commited on Jan 24, 2022

Commit

4621ac3

1 Parent(s): 216b1e3

Training in progress, step 500

Browse files

Files changed (42) hide show

.ipynb_checkpoints/run-checkpoint.sh +3 -3
checkpoint-200/scaler.pt +0 -3
checkpoint-200/trainer_state.json +0 -172
checkpoint-250/optimizer.pt +0 -3
checkpoint-250/trainer_state.json +0 -211
checkpoint-350/config.json +0 -107
checkpoint-350/preprocessor_config.json +0 -9
checkpoint-350/pytorch_model.bin +0 -3
checkpoint-350/rng_state.pth +0 -3
checkpoint-350/scheduler.pt +0 -3
checkpoint-350/training_args.bin +0 -3
{checkpoint-200 → checkpoint-400}/config.json +0 -0
{checkpoint-300 → checkpoint-400}/optimizer.pt +1 -1
{checkpoint-200 → checkpoint-400}/preprocessor_config.json +0 -0
{checkpoint-250 → checkpoint-400}/pytorch_model.bin +1 -1
{checkpoint-250 → checkpoint-400}/rng_state.pth +1 -1
{checkpoint-300 → checkpoint-400}/scaler.pt +1 -1
{checkpoint-200 → checkpoint-400}/scheduler.pt +1 -1
{checkpoint-350 → checkpoint-400}/trainer_state.json +42 -3
{checkpoint-200 → checkpoint-400}/training_args.bin +0 -0
{checkpoint-250 → checkpoint-450}/config.json +0 -0
{checkpoint-350 → checkpoint-450}/optimizer.pt +1 -1
{checkpoint-250 → checkpoint-450}/preprocessor_config.json +0 -0
{checkpoint-200 → checkpoint-450}/pytorch_model.bin +1 -1
{checkpoint-300 → checkpoint-450}/rng_state.pth +1 -1
{checkpoint-250 → checkpoint-450}/scaler.pt +1 -1
{checkpoint-300 → checkpoint-450}/scheduler.pt +1 -1
{checkpoint-300 → checkpoint-450}/trainer_state.json +120 -3
{checkpoint-250 → checkpoint-450}/training_args.bin +0 -0
{checkpoint-300 → checkpoint-500}/config.json +0 -0
{checkpoint-200 → checkpoint-500}/optimizer.pt +2 -2
{checkpoint-300 → checkpoint-500}/preprocessor_config.json +0 -0
{checkpoint-300 → checkpoint-500}/pytorch_model.bin +1 -1
{checkpoint-200 → checkpoint-500}/rng_state.pth +2 -2
{checkpoint-350 → checkpoint-500}/scaler.pt +1 -1
{checkpoint-250 → checkpoint-500}/scheduler.pt +1 -1
checkpoint-500/trainer_state.json +55 -0
{checkpoint-300 → checkpoint-500}/training_args.bin +1 -1
pytorch_model.bin +1 -1
run.sh +3 -3
special_tokens_map.json +1 -1
training_args.bin +1 -1

.ipynb_checkpoints/run-checkpoint.sh CHANGED Viewed

@@ -14,9 +14,9 @@ python run_speech_recognition_ctc.py \
 	--evaluation_strategy="steps" \
 	--text_column_name="sentence" \
 	--chars_to_ignore , ? . ! \- \; \: \" “ % ‘ ” � — ’ … – \
-	--save_steps="50" \
-	--eval_steps="50" \
-	--logging_steps="10" \
 	--layerdrop="0.0" \
 	--activation_dropout="0.1" \
 	--save_total_limit="3" \

 	--evaluation_strategy="steps" \
 	--text_column_name="sentence" \
 	--chars_to_ignore , ? . ! \- \; \: \" “ % ‘ ” � — ’ … – \
+	--save_steps="500" \
+	--eval_steps="500" \
+	--logging_steps="100" \
 	--layerdrop="0.0" \
 	--activation_dropout="0.1" \
 	--save_total_limit="3" \

checkpoint-200/scaler.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:456d3f8c3511ae0b0f0b3bf14cf84027d3dd6e2dd5258c9c8a92b9132d6ccfef
-size 559

checkpoint-200/trainer_state.json DELETED Viewed

@@ -1,172 +0,0 @@
-{
-  "best_metric": null,
-  "best_model_checkpoint": null,
-  "epoch": 0.580130529369108,
-  "global_step": 200,
-  "is_hyper_param_search": false,
-  "is_local_process_zero": true,
-  "is_world_process_zero": true,
-  "log_history": [
-    {
-      "epoch": 0.03,
-      "learning_rate": 3.75e-05,
-      "loss": 12.1562,
-      "step": 10
-    },
-    {
-      "epoch": 0.06,
-      "learning_rate": 7.125e-05,
-      "loss": 8.7679,
-      "step": 20
-    },
-    {
-      "epoch": 0.09,
-      "learning_rate": 7.398952095808383e-05,
-      "loss": 5.3683,
-      "step": 30
-    },
-    {
-      "epoch": 0.12,
-      "learning_rate": 7.286676646706586e-05,
-      "loss": 4.3219,
-      "step": 40
-    },
-    {
-      "epoch": 0.15,
-      "learning_rate": 7.17440119760479e-05,
-      "loss": 3.7182,
-      "step": 50
-    },
-    {
-      "epoch": 0.15,
-      "eval_loss": 3.836604595184326,
-      "eval_runtime": 133.4846,
-      "eval_samples_per_second": 34.611,
-      "eval_steps_per_second": 4.33,
-      "eval_wer": 1.0,
-      "step": 50
-    },
-    {
-      "epoch": 0.17,
-      "learning_rate": 7.062125748502993e-05,
-      "loss": 3.478,
-      "step": 60
-    },
-    {
-      "epoch": 0.2,
-      "learning_rate": 6.949850299401197e-05,
-      "loss": 3.4492,
-      "step": 70
-    },
-    {
-      "epoch": 0.23,
-      "learning_rate": 6.837574850299401e-05,
-      "loss": 3.3928,
-      "step": 80
-    },
-    {
-      "epoch": 0.26,
-      "learning_rate": 6.725299401197604e-05,
-      "loss": 3.3183,
-      "step": 90
-    },
-    {
-      "epoch": 0.29,
-      "learning_rate": 6.613023952095809e-05,
-      "loss": 3.2075,
-      "step": 100
-    },
-    {
-      "epoch": 0.29,
-      "eval_loss": 3.258362293243408,
-      "eval_runtime": 126.6078,
-      "eval_samples_per_second": 36.491,
-      "eval_steps_per_second": 4.565,
-      "eval_wer": 1.0,
-      "step": 100
-    },
-    {
-      "epoch": 0.32,
-      "learning_rate": 6.500748502994012e-05,
-      "loss": 3.14,
-      "step": 110
-    },
-    {
-      "epoch": 0.35,
-      "learning_rate": 6.388473053892215e-05,
-      "loss": 3.1281,
-      "step": 120
-    },
-    {
-      "epoch": 0.38,
-      "learning_rate": 6.276197604790418e-05,
-      "loss": 3.0987,
-      "step": 130
-    },
-    {
-      "epoch": 0.41,
-      "learning_rate": 6.163922155688622e-05,
-      "loss": 3.1003,
-      "step": 140
-    },
-    {
-      "epoch": 0.44,
-      "learning_rate": 6.0516467065868256e-05,
-      "loss": 3.0922,
-      "step": 150
-    },
-    {
-      "epoch": 0.44,
-      "eval_loss": 3.127869129180908,
-      "eval_runtime": 126.3837,
-      "eval_samples_per_second": 36.555,
-      "eval_steps_per_second": 4.573,
-      "eval_wer": 1.0,
-      "step": 150
-    },
-    {
-      "epoch": 0.46,
-      "learning_rate": 5.9393712574850293e-05,
-      "loss": 3.0588,
-      "step": 160
-    },
-    {
-      "epoch": 0.49,
-      "learning_rate": 5.827095808383233e-05,
-      "loss": 3.0477,
-      "step": 170
-    },
-    {
-      "epoch": 0.52,
-      "learning_rate": 5.714820359281436e-05,
-      "loss": 3.045,
-      "step": 180
-    },
-    {
-      "epoch": 0.55,
-      "learning_rate": 5.602544910179641e-05,
-      "loss": 3.0439,
-      "step": 190
-    },
-    {
-      "epoch": 0.58,
-      "learning_rate": 5.490269461077844e-05,
-      "loss": 3.0846,
-      "step": 200
-    },
-    {
-      "epoch": 0.58,
-      "eval_loss": 3.079519271850586,
-      "eval_runtime": 125.7215,
-      "eval_samples_per_second": 36.748,
-      "eval_steps_per_second": 4.597,
-      "eval_wer": 1.0,
-      "step": 200
-    }
-  ],
-  "max_steps": 688,
-  "num_train_epochs": 2,
-  "total_flos": 5.906333355279667e+17,
-  "trial_name": null,
-  "trial_params": null
-}

checkpoint-250/optimizer.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:25575d34d06948a00999a341b6ef425486d94285aac957915fb8f09abecc3531
-size 2490361937

checkpoint-250/trainer_state.json DELETED Viewed

@@ -1,211 +0,0 @@
-{
-  "best_metric": null,
-  "best_model_checkpoint": null,
-  "epoch": 0.7251631617113851,
-  "global_step": 250,
-  "is_hyper_param_search": false,
-  "is_local_process_zero": true,
-  "is_world_process_zero": true,
-  "log_history": [
-    {
-      "epoch": 0.03,
-      "learning_rate": 3.75e-05,
-      "loss": 12.1562,
-      "step": 10
-    },
-    {
-      "epoch": 0.06,
-      "learning_rate": 7.125e-05,
-      "loss": 8.7679,
-      "step": 20
-    },
-    {
-      "epoch": 0.09,
-      "learning_rate": 7.398952095808383e-05,
-      "loss": 5.3683,
-      "step": 30
-    },
-    {
-      "epoch": 0.12,
-      "learning_rate": 7.286676646706586e-05,
-      "loss": 4.3219,
-      "step": 40
-    },
-    {
-      "epoch": 0.15,
-      "learning_rate": 7.17440119760479e-05,
-      "loss": 3.7182,
-      "step": 50
-    },
-    {
-      "epoch": 0.15,
-      "eval_loss": 3.836604595184326,
-      "eval_runtime": 133.4846,
-      "eval_samples_per_second": 34.611,
-      "eval_steps_per_second": 4.33,
-      "eval_wer": 1.0,
-      "step": 50
-    },
-    {
-      "epoch": 0.17,
-      "learning_rate": 7.062125748502993e-05,
-      "loss": 3.478,
-      "step": 60
-    },
-    {
-      "epoch": 0.2,
-      "learning_rate": 6.949850299401197e-05,
-      "loss": 3.4492,
-      "step": 70
-    },
-    {
-      "epoch": 0.23,
-      "learning_rate": 6.837574850299401e-05,
-      "loss": 3.3928,
-      "step": 80
-    },
-    {
-      "epoch": 0.26,
-      "learning_rate": 6.725299401197604e-05,
-      "loss": 3.3183,
-      "step": 90
-    },
-    {
-      "epoch": 0.29,
-      "learning_rate": 6.613023952095809e-05,
-      "loss": 3.2075,
-      "step": 100
-    },
-    {
-      "epoch": 0.29,
-      "eval_loss": 3.258362293243408,
-      "eval_runtime": 126.6078,
-      "eval_samples_per_second": 36.491,
-      "eval_steps_per_second": 4.565,
-      "eval_wer": 1.0,
-      "step": 100
-    },
-    {
-      "epoch": 0.32,
-      "learning_rate": 6.500748502994012e-05,
-      "loss": 3.14,
-      "step": 110
-    },
-    {
-      "epoch": 0.35,
-      "learning_rate": 6.388473053892215e-05,
-      "loss": 3.1281,
-      "step": 120
-    },
-    {
-      "epoch": 0.38,
-      "learning_rate": 6.276197604790418e-05,
-      "loss": 3.0987,
-      "step": 130
-    },
-    {
-      "epoch": 0.41,
-      "learning_rate": 6.163922155688622e-05,
-      "loss": 3.1003,
-      "step": 140
-    },
-    {
-      "epoch": 0.44,
-      "learning_rate": 6.0516467065868256e-05,
-      "loss": 3.0922,
-      "step": 150
-    },
-    {
-      "epoch": 0.44,
-      "eval_loss": 3.127869129180908,
-      "eval_runtime": 126.3837,
-      "eval_samples_per_second": 36.555,
-      "eval_steps_per_second": 4.573,
-      "eval_wer": 1.0,
-      "step": 150
-    },
-    {
-      "epoch": 0.46,
-      "learning_rate": 5.9393712574850293e-05,
-      "loss": 3.0588,
-      "step": 160
-    },
-    {
-      "epoch": 0.49,
-      "learning_rate": 5.827095808383233e-05,
-      "loss": 3.0477,
-      "step": 170
-    },
-    {
-      "epoch": 0.52,
-      "learning_rate": 5.714820359281436e-05,
-      "loss": 3.045,
-      "step": 180
-    },
-    {
-      "epoch": 0.55,
-      "learning_rate": 5.602544910179641e-05,
-      "loss": 3.0439,
-      "step": 190
-    },
-    {
-      "epoch": 0.58,
-      "learning_rate": 5.490269461077844e-05,
-      "loss": 3.0846,
-      "step": 200
-    },
-    {
-      "epoch": 0.58,
-      "eval_loss": 3.079519271850586,
-      "eval_runtime": 125.7215,
-      "eval_samples_per_second": 36.748,
-      "eval_steps_per_second": 4.597,
-      "eval_wer": 1.0,
-      "step": 200
-    },
-    {
-      "epoch": 0.61,
-      "learning_rate": 5.3779940119760477e-05,
-      "loss": 3.0512,
-      "step": 210
-    },
-    {
-      "epoch": 0.64,
-      "learning_rate": 5.265718562874251e-05,
-      "loss": 3.0143,
-      "step": 220
-    },
-    {
-      "epoch": 0.67,
-      "learning_rate": 5.1534431137724546e-05,
-      "loss": 3.0387,
-      "step": 230
-    },
-    {
-      "epoch": 0.7,
-      "learning_rate": 5.0411676646706584e-05,
-      "loss": 3.0311,
-      "step": 240
-    },
-    {
-      "epoch": 0.73,
-      "learning_rate": 4.9288922155688615e-05,
-      "loss": 3.0417,
-      "step": 250
-    },
-    {
-      "epoch": 0.73,
-      "eval_loss": 3.069390058517456,
-      "eval_runtime": 125.7339,
-      "eval_samples_per_second": 36.744,
-      "eval_steps_per_second": 4.597,
-      "eval_wer": 1.0,
-      "step": 250
-    }
-  ],
-  "max_steps": 688,
-  "num_train_epochs": 2,
-  "total_flos": 7.370807236652851e+17,
-  "trial_name": null,
-  "trial_params": null
-}

checkpoint-350/config.json DELETED Viewed

@@ -1,107 +0,0 @@
-{
-  "_name_or_path": "facebook/wav2vec2-xls-r-300m",
-  "activation_dropout": 0.1,
-  "adapter_kernel_size": 3,
-  "adapter_stride": 2,
-  "add_adapter": false,
-  "apply_spec_augment": true,
-  "architectures": [
-    "Wav2Vec2ForCTC"
-  ],
-  "attention_dropout": 0.0,
-  "bos_token_id": 1,
-  "classifier_proj_size": 256,
-  "codevector_dim": 768,
-  "contrastive_logits_temperature": 0.1,
-  "conv_bias": true,
-  "conv_dim": [
-    512,
-    512,
-    512,
-    512,
-    512,
-    512,
-    512
-  ],
-  "conv_kernel": [
-    10,
-    3,
-    3,
-    3,
-    3,
-    2,
-    2
-  ],
-  "conv_stride": [
-    5,
-    2,
-    2,
-    2,
-    2,
-    2,
-    2
-  ],
-  "ctc_loss_reduction": "mean",
-  "ctc_zero_infinity": false,
-  "diversity_loss_weight": 0.1,
-  "do_stable_layer_norm": true,
-  "eos_token_id": 2,
-  "feat_extract_activation": "gelu",
-  "feat_extract_dropout": 0.0,
-  "feat_extract_norm": "layer",
-  "feat_proj_dropout": 0.0,
-  "feat_quantizer_dropout": 0.0,
-  "final_dropout": 0.0,
-  "hidden_act": "gelu",
-  "hidden_dropout": 0.0,
-  "hidden_size": 1024,
-  "initializer_range": 0.02,
-  "intermediate_size": 4096,
-  "layer_norm_eps": 1e-05,
-  "layerdrop": 0.0,
-  "mask_feature_length": 64,
-  "mask_feature_min_masks": 0,
-  "mask_feature_prob": 0.25,
-  "mask_time_length": 10,
-  "mask_time_min_masks": 2,
-  "mask_time_prob": 0.75,
-  "model_type": "wav2vec2",
-  "num_adapter_layers": 3,
-  "num_attention_heads": 16,
-  "num_codevector_groups": 2,
-  "num_codevectors_per_group": 320,
-  "num_conv_pos_embedding_groups": 16,
-  "num_conv_pos_embeddings": 128,
-  "num_feat_extract_layers": 7,
-  "num_hidden_layers": 24,
-  "num_negatives": 100,
-  "output_hidden_size": 1024,
-  "pad_token_id": 34,
-  "proj_codevector_dim": 768,
-  "tdnn_dilation": [
-    1,
-    2,
-    3,
-    1,
-    1
-  ],
-  "tdnn_dim": [
-    512,
-    512,
-    512,
-    512,
-    1500
-  ],
-  "tdnn_kernel": [
-    5,
-    3,
-    3,
-    1,
-    1
-  ],
-  "torch_dtype": "float32",
-  "transformers_version": "4.16.0.dev0",
-  "use_weighted_layer_sum": false,
-  "vocab_size": 37,
-  "xvector_output_dim": 512
-}

checkpoint-350/preprocessor_config.json DELETED Viewed

@@ -1,9 +0,0 @@
-{
-  "do_normalize": true,
-  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
-  "feature_size": 1,
-  "padding_side": "right",
-  "padding_value": 0,
-  "return_attention_mask": true,
-  "sampling_rate": 16000
-}

checkpoint-350/pytorch_model.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f0d76767012f4da5dc0d5f53230af28368c23a384f4a2b862a3afb09aea2dc6d
-size 1262075377

checkpoint-350/rng_state.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4671c6ef20752fdeca41dd978833212c15422e660369baad61a24c693eba960d
-size 14567

checkpoint-350/scheduler.pt DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:68cb16be7c67302d24fc36708cbe6b5ff6ca823143d0ed4ccd59b12de9852185
-size 623

checkpoint-350/training_args.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:77806a16ec6ef209c8a5c1c085159a0bc8e45c23f2d143f2c13e01527f13b5b2
-size 2991

{checkpoint-200 → checkpoint-400}/config.json RENAMED Viewed

File without changes

{checkpoint-300 → checkpoint-400}/optimizer.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0fbc6f4dd07d8d67fd707ed0307012427b86b156035171248cffd740587b141d
 size 2490362385

 version https://git-lfs.github.com/spec/v1
+oid sha256:16fdeeb953ce80d82d1ba249d99ae68b45e80c943e36a80b8a6517c275a7b594
 size 2490362385

{checkpoint-200 → checkpoint-400}/preprocessor_config.json RENAMED Viewed

File without changes

{checkpoint-250 → checkpoint-400}/pytorch_model.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:23de3d5e56e89844f6d19c6036e4e2b363f725b01482cc092d77475cb79971ed
 size 1262075377

 version https://git-lfs.github.com/spec/v1
+oid sha256:65431ecd0a10e000abb091aa8e3064f6e344c3e7771a07e792b6c0470ee8092c
 size 1262075377

{checkpoint-250 → checkpoint-400}/rng_state.pth RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b3ca012043e7838ae908c34417eeb6fac92f1f9b5c9fe391cf53fd8efcf0ad26
 size 14567

 version https://git-lfs.github.com/spec/v1
+oid sha256:a426a909abceadbfb6682c295322fafea2b96fc77d190014718f2a843f386bc4
 size 14567

{checkpoint-300 → checkpoint-400}/scaler.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d459085befd098f94535d03ce7c7b44c036f922e66bbf06dd7d1df1f2271db85
 size 559

 version https://git-lfs.github.com/spec/v1
+oid sha256:3828e6a0e34e076fa271e3dd1c08f47dc96711f380d9585d89de05befff54169
 size 559

{checkpoint-200 → checkpoint-400}/scheduler.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:efd672a046efb6eb5df47d5237a07689c59887dac098586f96bf610f5cf17f77
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:b73abf21b39473fffe8d985c5918328870ddc7f68ec8e4b77b7e7ea44506a1b1
 size 623

{checkpoint-350 → checkpoint-400}/trainer_state.json RENAMED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.0174039158810733,
-  "global_step": 350,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -279,11 +279,50 @@
       "eval_steps_per_second": 3.875,
       "eval_wer": 1.0,
       "step": 350
     }
   ],
   "max_steps": 688,
   "num_train_epochs": 2,
-  "total_flos": 1.0437262031798554e+18,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.1624365482233503,
+  "global_step": 400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_steps_per_second": 3.875,
       "eval_wer": 1.0,
       "step": 350
+    },
+    {
+      "epoch": 1.05,
+      "learning_rate": 3.693862275449102e-05,
+      "loss": 2.9665,
+      "step": 360
+    },
+    {
+      "epoch": 1.08,
+      "learning_rate": 3.581586826347305e-05,
+      "loss": 2.9641,
+      "step": 370
+    },
+    {
+      "epoch": 1.1,
+      "learning_rate": 3.469311377245509e-05,
+      "loss": 2.9484,
+      "step": 380
+    },
+    {
+      "epoch": 1.13,
+      "learning_rate": 3.3570359281437126e-05,
+      "loss": 2.9494,
+      "step": 390
+    },
+    {
+      "epoch": 1.16,
+      "learning_rate": 3.244760479041916e-05,
+      "loss": 2.9698,
+      "step": 400
+    },
+    {
+      "epoch": 1.16,
+      "eval_loss": 2.989494562149048,
+      "eval_runtime": 150.6903,
+      "eval_samples_per_second": 30.659,
+      "eval_steps_per_second": 3.836,
+      "eval_wer": 1.0,
+      "step": 400
     }
   ],
   "max_steps": 688,
   "num_train_epochs": 2,
+  "total_flos": 1.1900340345446784e+18,
   "trial_name": null,
   "trial_params": null
 }

{checkpoint-200 → checkpoint-400}/training_args.bin RENAMED Viewed

File without changes

{checkpoint-250 → checkpoint-450}/config.json RENAMED Viewed

File without changes

{checkpoint-350 → checkpoint-450}/optimizer.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:646667c125c12f737d1ffee17d6371268d6575f3796018a92eb4102f5f1fff0b
 size 2490362385

 version https://git-lfs.github.com/spec/v1
+oid sha256:8d20a2e7a06e4c6fe1ca1763518fe26e3bd509e54e5c6ce336b1cdd4ad352fc8
 size 2490362385

{checkpoint-250 → checkpoint-450}/preprocessor_config.json RENAMED Viewed

File without changes

{checkpoint-200 → checkpoint-450}/pytorch_model.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:81a809773f4c41661a588636b358c9e5380d7596cf519d3864f59d078d6b5d56
 size 1262075377

 version https://git-lfs.github.com/spec/v1
+oid sha256:64a67aef87c3664c6c90fef5a16264df1a1e9ec09017448f7dd3962f7e7ed3cd
 size 1262075377

{checkpoint-300 → checkpoint-450}/rng_state.pth RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c4e6a8eb3cbe35497679345db620fc16f097b4fd6a7597d0d1e70c31e40c4556
 size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:171f5d4a1db63b5ecb87951d275cbcef882e72c7875f0439f386ec2a0cc474fc
 size 14503

{checkpoint-250 → checkpoint-450}/scaler.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d4b33a302d3359c12eb2b6ea50d6c9c2f406dda2633a8f61a78ad84ec0805e1f
 size 559

 version https://git-lfs.github.com/spec/v1
+oid sha256:5b437a2168639a7226a6616cf299b21e2671732a153df5f941f17a2c38a20459
 size 559

{checkpoint-300 → checkpoint-450}/scheduler.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f7af1e86e64792f71c1d8769a03e0e23b66ad421c106349a338e419060870b38
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:89db00b13c6183184d8d5a3b77b2b0c26843c6c480825e0951a03deff712e541
 size 623

{checkpoint-300 → checkpoint-450}/trainer_state.json RENAMED Viewed

@@ -1,8 +1,8 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8701957940536621,
-  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -240,11 +240,128 @@
       "eval_steps_per_second": 4.369,
       "eval_wer": 1.0,
       "step": 300
     }
   ],
   "max_steps": 688,
   "num_train_epochs": 2,
-  "total_flos": 8.840514496994611e+17,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.3074691805656273,
+  "global_step": 450,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_steps_per_second": 4.369,
       "eval_wer": 1.0,
       "step": 300
+    },
+    {
+      "epoch": 0.9,
+      "learning_rate": 4.255239520958083e-05,
+      "loss": 3.0052,
+      "step": 310
+    },
+    {
+      "epoch": 0.93,
+      "learning_rate": 4.142964071856287e-05,
+      "loss": 2.9826,
+      "step": 320
+    },
+    {
+      "epoch": 0.96,
+      "learning_rate": 4.030688622754491e-05,
+      "loss": 2.9747,
+      "step": 330
+    },
+    {
+      "epoch": 0.99,
+      "learning_rate": 3.918413173652694e-05,
+      "loss": 2.9617,
+      "step": 340
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 3.806137724550898e-05,
+      "loss": 3.2053,
+      "step": 350
+    },
+    {
+      "epoch": 1.02,
+      "eval_loss": 2.984886407852173,
+      "eval_runtime": 149.1508,
+      "eval_samples_per_second": 30.975,
+      "eval_steps_per_second": 3.875,
+      "eval_wer": 1.0,
+      "step": 350
+    },
+    {
+      "epoch": 1.05,
+      "learning_rate": 3.693862275449102e-05,
+      "loss": 2.9665,
+      "step": 360
+    },
+    {
+      "epoch": 1.08,
+      "learning_rate": 3.581586826347305e-05,
+      "loss": 2.9641,
+      "step": 370
+    },
+    {
+      "epoch": 1.1,
+      "learning_rate": 3.469311377245509e-05,
+      "loss": 2.9484,
+      "step": 380
+    },
+    {
+      "epoch": 1.13,
+      "learning_rate": 3.3570359281437126e-05,
+      "loss": 2.9494,
+      "step": 390
+    },
+    {
+      "epoch": 1.16,
+      "learning_rate": 3.244760479041916e-05,
+      "loss": 2.9698,
+      "step": 400
+    },
+    {
+      "epoch": 1.16,
+      "eval_loss": 2.989494562149048,
+      "eval_runtime": 150.6903,
+      "eval_samples_per_second": 30.659,
+      "eval_steps_per_second": 3.836,
+      "eval_wer": 1.0,
+      "step": 400
+    },
+    {
+      "epoch": 1.19,
+      "learning_rate": 3.1324850299401195e-05,
+      "loss": 2.9664,
+      "step": 410
+    },
+    {
+      "epoch": 1.22,
+      "learning_rate": 3.020209580838323e-05,
+      "loss": 2.9494,
+      "step": 420
+    },
+    {
+      "epoch": 1.25,
+      "learning_rate": 2.9079341317365265e-05,
+      "loss": 2.935,
+      "step": 430
+    },
+    {
+      "epoch": 1.28,
+      "learning_rate": 2.7956586826347306e-05,
+      "loss": 2.9397,
+      "step": 440
+    },
+    {
+      "epoch": 1.31,
+      "learning_rate": 2.683383233532934e-05,
+      "loss": 2.9485,
+      "step": 450
+    },
+    {
+      "epoch": 1.31,
+      "eval_loss": 2.9584460258483887,
+      "eval_runtime": 140.6358,
+      "eval_samples_per_second": 32.851,
+      "eval_steps_per_second": 4.11,
+      "eval_wer": 1.0,
+      "step": 450
     }
   ],
   "max_steps": 688,
   "num_train_epochs": 2,
+  "total_flos": 1.336010418574825e+18,
   "trial_name": null,
   "trial_params": null
 }

{checkpoint-250 → checkpoint-450}/training_args.bin RENAMED Viewed

File without changes

{checkpoint-300 → checkpoint-500}/config.json RENAMED Viewed

File without changes

{checkpoint-200 → checkpoint-500}/optimizer.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b1202a5092b4eef4129f21d94d892672eedd0f405c7b97384c527938ad263ff2
-size 2490361937

 version https://git-lfs.github.com/spec/v1
+oid sha256:2dbd398c0993452243cb7df79d58793489c5b41d1a1000e547261b81959e2f45
+size 2490362385

{checkpoint-300 → checkpoint-500}/preprocessor_config.json RENAMED Viewed

File without changes

{checkpoint-300 → checkpoint-500}/pytorch_model.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8edb565b658376cb9cf7a44bbc1bfea5a26ebcd17d3f36739a8535e85a13dcee
 size 1262075377

 version https://git-lfs.github.com/spec/v1
+oid sha256:ed159635bd947770842e25d915d8157fd198d2f06d5db2476c6663627e3beee7
 size 1262075377

{checkpoint-200 → checkpoint-500}/rng_state.pth RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aac8c2cbd9ad36e5da5c9bde6c85c2a957009b424972b91ca2f61d198a65abaf
-size 14567

 version https://git-lfs.github.com/spec/v1
+oid sha256:32b82ac75b07e68dc3bc90e76f55a339f64dce724d87a9ae3c69ee46df441867
+size 14503

{checkpoint-350 → checkpoint-500}/scaler.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:11a38190a29b2e515a160c453a45f3b2acc23e2c8c2240009e053ed0dbf017f0
 size 559

 version https://git-lfs.github.com/spec/v1
+oid sha256:0c8bceadebe118ff459b01a775a8e9b38a6b8302c162d022f78d3646163e6486
 size 559

{checkpoint-250 → checkpoint-500}/scheduler.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0d0c28659ec00a57e0df47b2243fcc9567217741ac741e245ef295e398e80890
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:15019bcd1c8fc8b1bd39c46d9e1196c2fa76648918a0024eb84229f57debcf7e
 size 623

checkpoint-500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.4525018129079044,
+  "global_step": 500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.29,
+      "learning_rate": 6.613023952095809e-05,
+      "loss": 5.1206,
+      "step": 100
+    },
+    {
+      "epoch": 0.58,
+      "learning_rate": 5.490269461077844e-05,
+      "loss": 3.0901,
+      "step": 200
+    },
+    {
+      "epoch": 0.87,
+      "learning_rate": 4.36751497005988e-05,
+      "loss": 3.0224,
+      "step": 300
+    },
+    {
+      "epoch": 1.16,
+      "learning_rate": 3.244760479041916e-05,
+      "loss": 2.9922,
+      "step": 400
+    },
+    {
+      "epoch": 1.45,
+      "learning_rate": 2.1220059880239517e-05,
+      "loss": 2.9357,
+      "step": 500
+    },
+    {
+      "epoch": 1.45,
+      "eval_loss": 2.9458744525909424,
+      "eval_runtime": 138.8724,
+      "eval_samples_per_second": 33.268,
+      "eval_steps_per_second": 4.162,
+      "eval_wer": 1.0,
+      "step": 500
+    }
+  ],
+  "max_steps": 688,
+  "num_train_epochs": 2,
+  "total_flos": 1.4827194756605722e+18,
+  "trial_name": null,
+  "trial_params": null
+}

{checkpoint-300 → checkpoint-500}/training_args.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:77806a16ec6ef209c8a5c1c085159a0bc8e45c23f2d143f2c13e01527f13b5b2
 size 2991

 version https://git-lfs.github.com/spec/v1
+oid sha256:07c8daeeea1ded5d5b75ab1c6033b9bed25c9ac1f192a365842399932683cfcc
 size 2991

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f0d76767012f4da5dc0d5f53230af28368c23a384f4a2b862a3afb09aea2dc6d
 size 1262075377

 version https://git-lfs.github.com/spec/v1
+oid sha256:ed159635bd947770842e25d915d8157fd198d2f06d5db2476c6663627e3beee7
 size 1262075377

run.sh CHANGED Viewed

@@ -14,9 +14,9 @@ python run_speech_recognition_ctc.py \
 	--evaluation_strategy="steps" \
 	--text_column_name="sentence" \
 	--chars_to_ignore , ? . ! \- \; \: \" “ % ‘ ” � — ’ … – \
-	--save_steps="50" \
-	--eval_steps="50" \
-	--logging_steps="10" \
 	--layerdrop="0.0" \
 	--activation_dropout="0.1" \
 	--save_total_limit="3" \

 	--evaluation_strategy="steps" \
 	--text_column_name="sentence" \
 	--chars_to_ignore , ? . ! \- \; \: \" “ % ‘ ” � — ’ … – \
+	--save_steps="500" \
+	--eval_steps="500" \
+	--logging_steps="100" \
 	--layerdrop="0.0" \
 	--activation_dropout="0.1" \
 	--save_total_limit="3" \

special_tokens_map.json CHANGED Viewed

@@ -1 +1 @@

- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}

+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:77806a16ec6ef209c8a5c1c085159a0bc8e45c23f2d143f2c13e01527f13b5b2
 size 2991

 version https://git-lfs.github.com/spec/v1
+oid sha256:07c8daeeea1ded5d5b75ab1c6033b9bed25c9ac1f192a365842399932683cfcc
 size 2991