soaring0616 commited on Apr 13

Commit

37dc5ca

•

1 Parent(s): 7f36220

Upload folder using huggingface_hub

Browse files

Files changed (36) hide show

checkpoint-1000/config.json +52 -0
checkpoint-1000/generation_config.json +18 -0
checkpoint-1000/model.safetensors +3 -0
checkpoint-1000/optimizer.pt +3 -0
checkpoint-1000/preprocessor_config.json +14 -0
checkpoint-1000/rng_state.pth +3 -0
checkpoint-1000/scheduler.pt +3 -0
checkpoint-1000/trainer_state.json +310 -0
checkpoint-1000/training_args.bin +3 -0
checkpoint-2000/config.json +52 -0
checkpoint-2000/generation_config.json +18 -0
checkpoint-2000/model.safetensors +3 -0
checkpoint-2000/optimizer.pt +3 -0
checkpoint-2000/preprocessor_config.json +14 -0
checkpoint-2000/rng_state.pth +3 -0
checkpoint-2000/scheduler.pt +3 -0
checkpoint-2000/trainer_state.json +599 -0
checkpoint-2000/training_args.bin +3 -0
checkpoint-3000/config.json +52 -0
checkpoint-3000/generation_config.json +18 -0
checkpoint-3000/model.safetensors +3 -0
checkpoint-3000/optimizer.pt +3 -0
checkpoint-3000/preprocessor_config.json +14 -0
checkpoint-3000/rng_state.pth +3 -0
checkpoint-3000/scheduler.pt +3 -0
checkpoint-3000/trainer_state.json +888 -0
checkpoint-3000/training_args.bin +3 -0
checkpoint-4000/config.json +52 -0
checkpoint-4000/generation_config.json +18 -0
checkpoint-4000/model.safetensors +3 -0
checkpoint-4000/optimizer.pt +3 -0
checkpoint-4000/preprocessor_config.json +14 -0
checkpoint-4000/rng_state.pth +3 -0
checkpoint-4000/scheduler.pt +3 -0
checkpoint-4000/trainer_state.json +1177 -0
checkpoint-4000/training_args.bin +3 -0

checkpoint-1000/config.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+  "_name_or_path": "soaring0616/whisper-small-vietnamese",
+  "activation_dropout": 0.0,
+  "activation_function": "gelu",
+  "apply_spec_augment": false,
+  "architectures": [
+    "WhisperForConditionalGeneration"
+  ],
+  "attention_dropout": 0.0,
+  "begin_suppress_tokens": [
+    220,
+    50257
+  ],
+  "bos_token_id": 50257,
+  "classifier_proj_size": 256,
+  "d_model": 768,
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 12,
+  "decoder_start_token_id": 50258,
+  "dropout": 0.0,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 12,
+  "eos_token_id": 50257,
+  "forced_decoder_ids": null,
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": 448,
+  "max_source_positions": 1500,
+  "max_target_positions": 448,
+  "median_filter_width": 7,
+  "model_type": "whisper",
+  "num_hidden_layers": 12,
+  "num_mel_bins": 80,
+  "pad_token_id": 50257,
+  "scale_embedding": false,
+  "suppress_tokens": [],
+  "torch_dtype": "float32",
+  "transformers_version": "4.39.1",
+  "use_cache": true,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 51865
+}

checkpoint-1000/generation_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "begin_suppress_tokens": [
+    220,
+    50257
+  ],
+  "bos_token_id": 50257,
+  "decoder_start_token_id": 50258,
+  "eos_token_id": 50257,
+  "lang_to_id": {
+    "<|id|>": 50275,
+    "<|vi|>": 50278
+  },
+  "language": "indonesian",
+  "max_length": 448,
+  "pad_token_id": 50257,
+  "suppress_tokens": [],
+  "transformers_version": "4.39.1"
+}

checkpoint-1000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:409b131b2b6b89b1a619cce45c0ef4d23c303ee2d6a154a7fb195e6dc9acffca
+size 966995080

checkpoint-1000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6b5908c0d8823cef594cd13852f55721e5f728e301254489c2122a9941926448
+size 1925064044

checkpoint-1000/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "chunk_length": 30,
+  "feature_extractor_type": "WhisperFeatureExtractor",
+  "feature_size": 80,
+  "hop_length": 160,
+  "n_fft": 400,
+  "n_samples": 480000,
+  "nb_max_frames": 3000,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "WhisperProcessor",
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}

checkpoint-1000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c29334f857fb39218b3eb2c67acb056e10392cedb3f61bdf7b6329defc27b559
+size 14244

checkpoint-1000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b05e09481e16be6920637a3fff45223880ebd6057d17611017bd0d9e08ae606b
+size 1064

checkpoint-1000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,310 @@

+{
+  "best_metric": 17.814782082887575,
+  "best_model_checkpoint": "./whisper-small-in_from_vi\\checkpoint-1000",
+  "epoch": 1.9305019305019306,
+  "eval_steps": 1000,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.05,
+      "grad_norm": 218.94273376464844,
+      "learning_rate": 3.8e-07,
+      "loss": 5.6914,
+      "step": 25
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 35.336517333984375,
+      "learning_rate": 8.8e-07,
+      "loss": 2.5237,
+      "step": 50
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 14.206599235534668,
+      "learning_rate": 1.3800000000000001e-06,
+      "loss": 0.8446,
+      "step": 75
+    },
+    {
+      "epoch": 0.19,
+      "grad_norm": 12.359480857849121,
+      "learning_rate": 1.8800000000000002e-06,
+      "loss": 0.579,
+      "step": 100
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 8.259902954101562,
+      "learning_rate": 2.38e-06,
+      "loss": 0.5636,
+      "step": 125
+    },
+    {
+      "epoch": 0.29,
+      "grad_norm": 11.162034034729004,
+      "learning_rate": 2.88e-06,
+      "loss": 0.5191,
+      "step": 150
+    },
+    {
+      "epoch": 0.34,
+      "grad_norm": 8.917764663696289,
+      "learning_rate": 3.3800000000000007e-06,
+      "loss": 0.4879,
+      "step": 175
+    },
+    {
+      "epoch": 0.39,
+      "grad_norm": 8.865296363830566,
+      "learning_rate": 3.88e-06,
+      "loss": 0.4333,
+      "step": 200
+    },
+    {
+      "epoch": 0.43,
+      "grad_norm": 8.579310417175293,
+      "learning_rate": 4.38e-06,
+      "loss": 0.4679,
+      "step": 225
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 8.361818313598633,
+      "learning_rate": 4.880000000000001e-06,
+      "loss": 0.3948,
+      "step": 250
+    },
+    {
+      "epoch": 0.53,
+      "grad_norm": 6.773808479309082,
+      "learning_rate": 5.380000000000001e-06,
+      "loss": 0.4284,
+      "step": 275
+    },
+    {
+      "epoch": 0.58,
+      "grad_norm": 7.8365678787231445,
+      "learning_rate": 5.8800000000000005e-06,
+      "loss": 0.3829,
+      "step": 300
+    },
+    {
+      "epoch": 0.63,
+      "grad_norm": 10.097782135009766,
+      "learning_rate": 6.380000000000001e-06,
+      "loss": 0.4091,
+      "step": 325
+    },
+    {
+      "epoch": 0.68,
+      "grad_norm": 8.221065521240234,
+      "learning_rate": 6.88e-06,
+      "loss": 0.4067,
+      "step": 350
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 7.281307697296143,
+      "learning_rate": 7.3800000000000005e-06,
+      "loss": 0.3501,
+      "step": 375
+    },
+    {
+      "epoch": 0.77,
+      "grad_norm": 7.796460151672363,
+      "learning_rate": 7.88e-06,
+      "loss": 0.3754,
+      "step": 400
+    },
+    {
+      "epoch": 0.82,
+      "grad_norm": 6.829456329345703,
+      "learning_rate": 8.380000000000001e-06,
+      "loss": 0.3789,
+      "step": 425
+    },
+    {
+      "epoch": 0.87,
+      "grad_norm": 7.42793607711792,
+      "learning_rate": 8.880000000000001e-06,
+      "loss": 0.3709,
+      "step": 450
+    },
+    {
+      "epoch": 0.92,
+      "grad_norm": 6.301506519317627,
+      "learning_rate": 9.38e-06,
+      "loss": 0.355,
+      "step": 475
+    },
+    {
+      "epoch": 0.97,
+      "grad_norm": 7.611047744750977,
+      "learning_rate": 9.88e-06,
+      "loss": 0.3347,
+      "step": 500
+    },
+    {
+      "epoch": 1.01,
+      "grad_norm": 6.571840286254883,
+      "learning_rate": 9.945714285714286e-06,
+      "loss": 0.3231,
+      "step": 525
+    },
+    {
+      "epoch": 1.06,
+      "grad_norm": 6.700900554656982,
+      "learning_rate": 9.874285714285715e-06,
+      "loss": 0.2356,
+      "step": 550
+    },
+    {
+      "epoch": 1.11,
+      "grad_norm": 6.217418670654297,
+      "learning_rate": 9.802857142857144e-06,
+      "loss": 0.2177,
+      "step": 575
+    },
+    {
+      "epoch": 1.16,
+      "grad_norm": 5.774504661560059,
+      "learning_rate": 9.731428571428573e-06,
+      "loss": 0.2103,
+      "step": 600
+    },
+    {
+      "epoch": 1.21,
+      "grad_norm": 5.770651340484619,
+      "learning_rate": 9.66e-06,
+      "loss": 0.2529,
+      "step": 625
+    },
+    {
+      "epoch": 1.25,
+      "grad_norm": 5.579860687255859,
+      "learning_rate": 9.58857142857143e-06,
+      "loss": 0.2435,
+      "step": 650
+    },
+    {
+      "epoch": 1.3,
+      "grad_norm": 5.943060398101807,
+      "learning_rate": 9.517142857142859e-06,
+      "loss": 0.2064,
+      "step": 675
+    },
+    {
+      "epoch": 1.35,
+      "grad_norm": 7.721786975860596,
+      "learning_rate": 9.445714285714288e-06,
+      "loss": 0.2273,
+      "step": 700
+    },
+    {
+      "epoch": 1.4,
+      "grad_norm": 6.976935386657715,
+      "learning_rate": 9.374285714285715e-06,
+      "loss": 0.2373,
+      "step": 725
+    },
+    {
+      "epoch": 1.45,
+      "grad_norm": 6.749694347381592,
+      "learning_rate": 9.302857142857144e-06,
+      "loss": 0.2306,
+      "step": 750
+    },
+    {
+      "epoch": 1.5,
+      "grad_norm": 5.087153434753418,
+      "learning_rate": 9.231428571428573e-06,
+      "loss": 0.2292,
+      "step": 775
+    },
+    {
+      "epoch": 1.54,
+      "grad_norm": 6.032232761383057,
+      "learning_rate": 9.16e-06,
+      "loss": 0.2096,
+      "step": 800
+    },
+    {
+      "epoch": 1.59,
+      "grad_norm": 5.749306678771973,
+      "learning_rate": 9.08857142857143e-06,
+      "loss": 0.2217,
+      "step": 825
+    },
+    {
+      "epoch": 1.64,
+      "grad_norm": 7.6291184425354,
+      "learning_rate": 9.017142857142858e-06,
+      "loss": 0.2023,
+      "step": 850
+    },
+    {
+      "epoch": 1.69,
+      "grad_norm": 4.76064395904541,
+      "learning_rate": 8.945714285714286e-06,
+      "loss": 0.2099,
+      "step": 875
+    },
+    {
+      "epoch": 1.74,
+      "grad_norm": 5.0226898193359375,
+      "learning_rate": 8.874285714285715e-06,
+      "loss": 0.2017,
+      "step": 900
+    },
+    {
+      "epoch": 1.79,
+      "grad_norm": 5.6098313331604,
+      "learning_rate": 8.802857142857144e-06,
+      "loss": 0.2126,
+      "step": 925
+    },
+    {
+      "epoch": 1.83,
+      "grad_norm": 5.58658504486084,
+      "learning_rate": 8.731428571428571e-06,
+      "loss": 0.2209,
+      "step": 950
+    },
+    {
+      "epoch": 1.88,
+      "grad_norm": 7.067436218261719,
+      "learning_rate": 8.66e-06,
+      "loss": 0.225,
+      "step": 975
+    },
+    {
+      "epoch": 1.93,
+      "grad_norm": 4.616860389709473,
+      "learning_rate": 8.588571428571429e-06,
+      "loss": 0.2059,
+      "step": 1000
+    },
+    {
+      "epoch": 1.93,
+      "eval_loss": 0.25656187534332275,
+      "eval_runtime": 2791.9455,
+      "eval_samples_per_second": 1.296,
+      "eval_steps_per_second": 0.162,
+      "eval_wer": 17.814782082887575,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 4000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 8,
+  "save_steps": 1000,
+  "total_flos": 4.61332620951552e+18,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d97a7d674ba5458016027e8fcbeac271e6fe45c463b2f89dfb01782b9ae4928d
+size 5112

checkpoint-2000/config.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+  "_name_or_path": "soaring0616/whisper-small-vietnamese",
+  "activation_dropout": 0.0,
+  "activation_function": "gelu",
+  "apply_spec_augment": false,
+  "architectures": [
+    "WhisperForConditionalGeneration"
+  ],
+  "attention_dropout": 0.0,
+  "begin_suppress_tokens": [
+    220,
+    50257
+  ],
+  "bos_token_id": 50257,
+  "classifier_proj_size": 256,
+  "d_model": 768,
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 12,
+  "decoder_start_token_id": 50258,
+  "dropout": 0.0,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 12,
+  "eos_token_id": 50257,
+  "forced_decoder_ids": null,
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": 448,
+  "max_source_positions": 1500,
+  "max_target_positions": 448,
+  "median_filter_width": 7,
+  "model_type": "whisper",
+  "num_hidden_layers": 12,
+  "num_mel_bins": 80,
+  "pad_token_id": 50257,
+  "scale_embedding": false,
+  "suppress_tokens": [],
+  "torch_dtype": "float32",
+  "transformers_version": "4.39.1",
+  "use_cache": true,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 51865
+}

checkpoint-2000/generation_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "begin_suppress_tokens": [
+    220,
+    50257
+  ],
+  "bos_token_id": 50257,
+  "decoder_start_token_id": 50258,
+  "eos_token_id": 50257,
+  "lang_to_id": {
+    "<|id|>": 50275,
+    "<|vi|>": 50278
+  },
+  "language": "indonesian",
+  "max_length": 448,
+  "pad_token_id": 50257,
+  "suppress_tokens": [],
+  "transformers_version": "4.39.1"
+}

checkpoint-2000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9bdf8dd5e50b4d747fd632fcb05a6eb5f0aa1e71789bd2279a8601856f952cd0
+size 966995080

checkpoint-2000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d731de9f880a4937bbca96b4b05942ee9956648112b9950c1e0e271edf92c7b
+size 1925064044

checkpoint-2000/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "chunk_length": 30,
+  "feature_extractor_type": "WhisperFeatureExtractor",
+  "feature_size": 80,
+  "hop_length": 160,
+  "n_fft": 400,
+  "n_samples": 480000,
+  "nb_max_frames": 3000,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "WhisperProcessor",
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}

checkpoint-2000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f7a343a22814950dc78f0eee3ac380f9abc6236ec1a214b3aa30607b846d41fb
+size 14244

checkpoint-2000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a943107e4f3199cccea374c098af12bfc7ca97eb99b7cc85cdbea246fbef66a9
+size 1064

checkpoint-2000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,599 @@

+{
+  "best_metric": 17.470580026978,
+  "best_model_checkpoint": "./whisper-small-in_from_vi\\checkpoint-2000",
+  "epoch": 3.861003861003861,
+  "eval_steps": 1000,
+  "global_step": 2000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.05,
+      "grad_norm": 218.94273376464844,
+      "learning_rate": 3.8e-07,
+      "loss": 5.6914,
+      "step": 25
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 35.336517333984375,
+      "learning_rate": 8.8e-07,
+      "loss": 2.5237,
+      "step": 50
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 14.206599235534668,
+      "learning_rate": 1.3800000000000001e-06,
+      "loss": 0.8446,
+      "step": 75
+    },
+    {
+      "epoch": 0.19,
+      "grad_norm": 12.359480857849121,
+      "learning_rate": 1.8800000000000002e-06,
+      "loss": 0.579,
+      "step": 100
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 8.259902954101562,
+      "learning_rate": 2.38e-06,
+      "loss": 0.5636,
+      "step": 125
+    },
+    {
+      "epoch": 0.29,
+      "grad_norm": 11.162034034729004,
+      "learning_rate": 2.88e-06,
+      "loss": 0.5191,
+      "step": 150
+    },
+    {
+      "epoch": 0.34,
+      "grad_norm": 8.917764663696289,
+      "learning_rate": 3.3800000000000007e-06,
+      "loss": 0.4879,
+      "step": 175
+    },
+    {
+      "epoch": 0.39,
+      "grad_norm": 8.865296363830566,
+      "learning_rate": 3.88e-06,
+      "loss": 0.4333,
+      "step": 200
+    },
+    {
+      "epoch": 0.43,
+      "grad_norm": 8.579310417175293,
+      "learning_rate": 4.38e-06,
+      "loss": 0.4679,
+      "step": 225
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 8.361818313598633,
+      "learning_rate": 4.880000000000001e-06,
+      "loss": 0.3948,
+      "step": 250
+    },
+    {
+      "epoch": 0.53,
+      "grad_norm": 6.773808479309082,
+      "learning_rate": 5.380000000000001e-06,
+      "loss": 0.4284,
+      "step": 275
+    },
+    {
+      "epoch": 0.58,
+      "grad_norm": 7.8365678787231445,
+      "learning_rate": 5.8800000000000005e-06,
+      "loss": 0.3829,
+      "step": 300
+    },
+    {
+      "epoch": 0.63,
+      "grad_norm": 10.097782135009766,
+      "learning_rate": 6.380000000000001e-06,
+      "loss": 0.4091,
+      "step": 325
+    },
+    {
+      "epoch": 0.68,
+      "grad_norm": 8.221065521240234,
+      "learning_rate": 6.88e-06,
+      "loss": 0.4067,
+      "step": 350
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 7.281307697296143,
+      "learning_rate": 7.3800000000000005e-06,
+      "loss": 0.3501,
+      "step": 375
+    },
+    {
+      "epoch": 0.77,
+      "grad_norm": 7.796460151672363,
+      "learning_rate": 7.88e-06,
+      "loss": 0.3754,
+      "step": 400
+    },
+    {
+      "epoch": 0.82,
+      "grad_norm": 6.829456329345703,
+      "learning_rate": 8.380000000000001e-06,
+      "loss": 0.3789,
+      "step": 425
+    },
+    {
+      "epoch": 0.87,
+      "grad_norm": 7.42793607711792,
+      "learning_rate": 8.880000000000001e-06,
+      "loss": 0.3709,
+      "step": 450
+    },
+    {
+      "epoch": 0.92,
+      "grad_norm": 6.301506519317627,
+      "learning_rate": 9.38e-06,
+      "loss": 0.355,
+      "step": 475
+    },
+    {
+      "epoch": 0.97,
+      "grad_norm": 7.611047744750977,
+      "learning_rate": 9.88e-06,
+      "loss": 0.3347,
+      "step": 500
+    },
+    {
+      "epoch": 1.01,
+      "grad_norm": 6.571840286254883,
+      "learning_rate": 9.945714285714286e-06,
+      "loss": 0.3231,
+      "step": 525
+    },
+    {
+      "epoch": 1.06,
+      "grad_norm": 6.700900554656982,
+      "learning_rate": 9.874285714285715e-06,
+      "loss": 0.2356,
+      "step": 550
+    },
+    {
+      "epoch": 1.11,
+      "grad_norm": 6.217418670654297,
+      "learning_rate": 9.802857142857144e-06,
+      "loss": 0.2177,
+      "step": 575
+    },
+    {
+      "epoch": 1.16,
+      "grad_norm": 5.774504661560059,
+      "learning_rate": 9.731428571428573e-06,
+      "loss": 0.2103,
+      "step": 600
+    },
+    {
+      "epoch": 1.21,
+      "grad_norm": 5.770651340484619,
+      "learning_rate": 9.66e-06,
+      "loss": 0.2529,
+      "step": 625
+    },
+    {
+      "epoch": 1.25,
+      "grad_norm": 5.579860687255859,
+      "learning_rate": 9.58857142857143e-06,
+      "loss": 0.2435,
+      "step": 650
+    },
+    {
+      "epoch": 1.3,
+      "grad_norm": 5.943060398101807,
+      "learning_rate": 9.517142857142859e-06,
+      "loss": 0.2064,
+      "step": 675
+    },
+    {
+      "epoch": 1.35,
+      "grad_norm": 7.721786975860596,
+      "learning_rate": 9.445714285714288e-06,
+      "loss": 0.2273,
+      "step": 700
+    },
+    {
+      "epoch": 1.4,
+      "grad_norm": 6.976935386657715,
+      "learning_rate": 9.374285714285715e-06,
+      "loss": 0.2373,
+      "step": 725
+    },
+    {
+      "epoch": 1.45,
+      "grad_norm": 6.749694347381592,
+      "learning_rate": 9.302857142857144e-06,
+      "loss": 0.2306,
+      "step": 750
+    },
+    {
+      "epoch": 1.5,
+      "grad_norm": 5.087153434753418,
+      "learning_rate": 9.231428571428573e-06,
+      "loss": 0.2292,
+      "step": 775
+    },
+    {
+      "epoch": 1.54,
+      "grad_norm": 6.032232761383057,
+      "learning_rate": 9.16e-06,
+      "loss": 0.2096,
+      "step": 800
+    },
+    {
+      "epoch": 1.59,
+      "grad_norm": 5.749306678771973,
+      "learning_rate": 9.08857142857143e-06,
+      "loss": 0.2217,
+      "step": 825
+    },
+    {
+      "epoch": 1.64,
+      "grad_norm": 7.6291184425354,
+      "learning_rate": 9.017142857142858e-06,
+      "loss": 0.2023,
+      "step": 850
+    },
+    {
+      "epoch": 1.69,
+      "grad_norm": 4.76064395904541,
+      "learning_rate": 8.945714285714286e-06,
+      "loss": 0.2099,
+      "step": 875
+    },
+    {
+      "epoch": 1.74,
+      "grad_norm": 5.0226898193359375,
+      "learning_rate": 8.874285714285715e-06,
+      "loss": 0.2017,
+      "step": 900
+    },
+    {
+      "epoch": 1.79,
+      "grad_norm": 5.6098313331604,
+      "learning_rate": 8.802857142857144e-06,
+      "loss": 0.2126,
+      "step": 925
+    },
+    {
+      "epoch": 1.83,
+      "grad_norm": 5.58658504486084,
+      "learning_rate": 8.731428571428571e-06,
+      "loss": 0.2209,
+      "step": 950
+    },
+    {
+      "epoch": 1.88,
+      "grad_norm": 7.067436218261719,
+      "learning_rate": 8.66e-06,
+      "loss": 0.225,
+      "step": 975
+    },
+    {
+      "epoch": 1.93,
+      "grad_norm": 4.616860389709473,
+      "learning_rate": 8.588571428571429e-06,
+      "loss": 0.2059,
+      "step": 1000
+    },
+    {
+      "epoch": 1.93,
+      "eval_loss": 0.25656187534332275,
+      "eval_runtime": 2791.9455,
+      "eval_samples_per_second": 1.296,
+      "eval_steps_per_second": 0.162,
+      "eval_wer": 17.814782082887575,
+      "step": 1000
+    },
+    {
+      "epoch": 1.98,
+      "grad_norm": 4.517664909362793,
+      "learning_rate": 8.517142857142858e-06,
+      "loss": 0.2132,
+      "step": 1025
+    },
+    {
+      "epoch": 2.03,
+      "grad_norm": 3.016611099243164,
+      "learning_rate": 8.445714285714285e-06,
+      "loss": 0.1498,
+      "step": 1050
+    },
+    {
+      "epoch": 2.08,
+      "grad_norm": 4.563220977783203,
+      "learning_rate": 8.374285714285714e-06,
+      "loss": 0.0848,
+      "step": 1075
+    },
+    {
+      "epoch": 2.12,
+      "grad_norm": 4.658745288848877,
+      "learning_rate": 8.302857142857143e-06,
+      "loss": 0.0877,
+      "step": 1100
+    },
+    {
+      "epoch": 2.17,
+      "grad_norm": 3.2172155380249023,
+      "learning_rate": 8.231428571428572e-06,
+      "loss": 0.0858,
+      "step": 1125
+    },
+    {
+      "epoch": 2.22,
+      "grad_norm": 3.33174991607666,
+      "learning_rate": 8.16e-06,
+      "loss": 0.08,
+      "step": 1150
+    },
+    {
+      "epoch": 2.27,
+      "grad_norm": 4.132905960083008,
+      "learning_rate": 8.088571428571429e-06,
+      "loss": 0.0945,
+      "step": 1175
+    },
+    {
+      "epoch": 2.32,
+      "grad_norm": 3.5062320232391357,
+      "learning_rate": 8.017142857142858e-06,
+      "loss": 0.0849,
+      "step": 1200
+    },
+    {
+      "epoch": 2.36,
+      "grad_norm": 2.746927261352539,
+      "learning_rate": 7.945714285714287e-06,
+      "loss": 0.0814,
+      "step": 1225
+    },
+    {
+      "epoch": 2.41,
+      "grad_norm": 2.8374855518341064,
+      "learning_rate": 7.874285714285716e-06,
+      "loss": 0.0898,
+      "step": 1250
+    },
+    {
+      "epoch": 2.46,
+      "grad_norm": 2.8154654502868652,
+      "learning_rate": 7.802857142857143e-06,
+      "loss": 0.091,
+      "step": 1275
+    },
+    {
+      "epoch": 2.51,
+      "grad_norm": 3.5242350101470947,
+      "learning_rate": 7.731428571428572e-06,
+      "loss": 0.085,
+      "step": 1300
+    },
+    {
+      "epoch": 2.56,
+      "grad_norm": 3.3596408367156982,
+      "learning_rate": 7.660000000000001e-06,
+      "loss": 0.093,
+      "step": 1325
+    },
+    {
+      "epoch": 2.61,
+      "grad_norm": 3.4869582653045654,
+      "learning_rate": 7.588571428571429e-06,
+      "loss": 0.0836,
+      "step": 1350
+    },
+    {
+      "epoch": 2.65,
+      "grad_norm": 2.8120055198669434,
+      "learning_rate": 7.5171428571428575e-06,
+      "loss": 0.0769,
+      "step": 1375
+    },
+    {
+      "epoch": 2.7,
+      "grad_norm": 3.5809738636016846,
+      "learning_rate": 7.445714285714286e-06,
+      "loss": 0.0916,
+      "step": 1400
+    },
+    {
+      "epoch": 2.75,
+      "grad_norm": 2.8839168548583984,
+      "learning_rate": 7.374285714285715e-06,
+      "loss": 0.09,
+      "step": 1425
+    },
+    {
+      "epoch": 2.8,
+      "grad_norm": 4.721149921417236,
+      "learning_rate": 7.302857142857144e-06,
+      "loss": 0.0915,
+      "step": 1450
+    },
+    {
+      "epoch": 2.85,
+      "grad_norm": 3.5091800689697266,
+      "learning_rate": 7.231428571428573e-06,
+      "loss": 0.0794,
+      "step": 1475
+    },
+    {
+      "epoch": 2.9,
+      "grad_norm": 3.820974349975586,
+      "learning_rate": 7.16e-06,
+      "loss": 0.0934,
+      "step": 1500
+    },
+    {
+      "epoch": 2.94,
+      "grad_norm": 4.247288227081299,
+      "learning_rate": 7.088571428571429e-06,
+      "loss": 0.0822,
+      "step": 1525
+    },
+    {
+      "epoch": 2.99,
+      "grad_norm": 3.3485350608825684,
+      "learning_rate": 7.017142857142858e-06,
+      "loss": 0.0971,
+      "step": 1550
+    },
+    {
+      "epoch": 3.04,
+      "grad_norm": 1.4603108167648315,
+      "learning_rate": 6.945714285714287e-06,
+      "loss": 0.0352,
+      "step": 1575
+    },
+    {
+      "epoch": 3.09,
+      "grad_norm": 1.7609468698501587,
+      "learning_rate": 6.874285714285714e-06,
+      "loss": 0.0402,
+      "step": 1600
+    },
+    {
+      "epoch": 3.14,
+      "grad_norm": 2.6159989833831787,
+      "learning_rate": 6.8028571428571434e-06,
+      "loss": 0.0317,
+      "step": 1625
+    },
+    {
+      "epoch": 3.19,
+      "grad_norm": 1.857428789138794,
+      "learning_rate": 6.7314285714285724e-06,
+      "loss": 0.039,
+      "step": 1650
+    },
+    {
+      "epoch": 3.23,
+      "grad_norm": 3.3788986206054688,
+      "learning_rate": 6.660000000000001e-06,
+      "loss": 0.0347,
+      "step": 1675
+    },
+    {
+      "epoch": 3.28,
+      "grad_norm": 1.8846665620803833,
+      "learning_rate": 6.588571428571429e-06,
+      "loss": 0.0321,
+      "step": 1700
+    },
+    {
+      "epoch": 3.33,
+      "grad_norm": 1.2387264966964722,
+      "learning_rate": 6.517142857142858e-06,
+      "loss": 0.0327,
+      "step": 1725
+    },
+    {
+      "epoch": 3.38,
+      "grad_norm": 1.4381897449493408,
+      "learning_rate": 6.445714285714286e-06,
+      "loss": 0.0354,
+      "step": 1750
+    },
+    {
+      "epoch": 3.43,
+      "grad_norm": 2.2566747665405273,
+      "learning_rate": 6.374285714285715e-06,
+      "loss": 0.0368,
+      "step": 1775
+    },
+    {
+      "epoch": 3.47,
+      "grad_norm": 1.6017179489135742,
+      "learning_rate": 6.302857142857144e-06,
+      "loss": 0.0351,
+      "step": 1800
+    },
+    {
+      "epoch": 3.52,
+      "grad_norm": 3.2970995903015137,
+      "learning_rate": 6.231428571428571e-06,
+      "loss": 0.0332,
+      "step": 1825
+    },
+    {
+      "epoch": 3.57,
+      "grad_norm": 2.8597681522369385,
+      "learning_rate": 6.16e-06,
+      "loss": 0.0355,
+      "step": 1850
+    },
+    {
+      "epoch": 3.62,
+      "grad_norm": 2.8335232734680176,
+      "learning_rate": 6.088571428571429e-06,
+      "loss": 0.0338,
+      "step": 1875
+    },
+    {
+      "epoch": 3.67,
+      "grad_norm": 3.405618667602539,
+      "learning_rate": 6.017142857142858e-06,
+      "loss": 0.0376,
+      "step": 1900
+    },
+    {
+      "epoch": 3.72,
+      "grad_norm": 2.4519917964935303,
+      "learning_rate": 5.945714285714286e-06,
+      "loss": 0.0359,
+      "step": 1925
+    },
+    {
+      "epoch": 3.76,
+      "grad_norm": 3.2067463397979736,
+      "learning_rate": 5.874285714285715e-06,
+      "loss": 0.029,
+      "step": 1950
+    },
+    {
+      "epoch": 3.81,
+      "grad_norm": 2.5410799980163574,
+      "learning_rate": 5.802857142857144e-06,
+      "loss": 0.0313,
+      "step": 1975
+    },
+    {
+      "epoch": 3.86,
+      "grad_norm": 1.2326685190200806,
+      "learning_rate": 5.731428571428572e-06,
+      "loss": 0.0379,
+      "step": 2000
+    },
+    {
+      "epoch": 3.86,
+      "eval_loss": 0.28133508563041687,
+      "eval_runtime": 3847.5516,
+      "eval_samples_per_second": 0.94,
+      "eval_steps_per_second": 0.118,
+      "eval_wer": 17.470580026978,
+      "step": 2000
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 4000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 8,
+  "save_steps": 1000,
+  "total_flos": 9.22261222342656e+18,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-2000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d97a7d674ba5458016027e8fcbeac271e6fe45c463b2f89dfb01782b9ae4928d
+size 5112

checkpoint-3000/config.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+  "_name_or_path": "soaring0616/whisper-small-vietnamese",
+  "activation_dropout": 0.0,
+  "activation_function": "gelu",
+  "apply_spec_augment": false,
+  "architectures": [
+    "WhisperForConditionalGeneration"
+  ],
+  "attention_dropout": 0.0,
+  "begin_suppress_tokens": [
+    220,
+    50257
+  ],
+  "bos_token_id": 50257,
+  "classifier_proj_size": 256,
+  "d_model": 768,
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 12,
+  "decoder_start_token_id": 50258,
+  "dropout": 0.0,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 12,
+  "eos_token_id": 50257,
+  "forced_decoder_ids": null,
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": 448,
+  "max_source_positions": 1500,
+  "max_target_positions": 448,
+  "median_filter_width": 7,
+  "model_type": "whisper",
+  "num_hidden_layers": 12,
+  "num_mel_bins": 80,
+  "pad_token_id": 50257,
+  "scale_embedding": false,
+  "suppress_tokens": [],
+  "torch_dtype": "float32",
+  "transformers_version": "4.39.1",
+  "use_cache": true,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 51865
+}

checkpoint-3000/generation_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "begin_suppress_tokens": [
+    220,
+    50257
+  ],
+  "bos_token_id": 50257,
+  "decoder_start_token_id": 50258,
+  "eos_token_id": 50257,
+  "lang_to_id": {
+    "<|id|>": 50275,
+    "<|vi|>": 50278
+  },
+  "language": "indonesian",
+  "max_length": 448,
+  "pad_token_id": 50257,
+  "suppress_tokens": [],
+  "transformers_version": "4.39.1"
+}

checkpoint-3000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2b2ec79a801b50df28d9dcf71f3ccb414ede7323b8ad5deca0727f6df7a6ce20
+size 966995080

checkpoint-3000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:11c4ea31347a7fcc610d51ca4993379c662430065ecdba232a62dee37bef2242
+size 1925064044

checkpoint-3000/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "chunk_length": 30,
+  "feature_extractor_type": "WhisperFeatureExtractor",
+  "feature_size": 80,
+  "hop_length": 160,
+  "n_fft": 400,
+  "n_samples": 480000,
+  "nb_max_frames": 3000,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "WhisperProcessor",
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}

checkpoint-3000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f95c8c9f4042f7d715cc8da1a8d12e4c9b1216536db0a30c619f6cf232f2504e
+size 14244

checkpoint-3000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5e8c76b5c6475a914c815628b47a0a7cb6f2916cc52428671ba7c3d32633cb5e
+size 1064

checkpoint-3000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,888 @@

+{
+  "best_metric": 17.18219452067538,
+  "best_model_checkpoint": "./whisper-small-in_from_vi\\checkpoint-3000",
+  "epoch": 5.7915057915057915,
+  "eval_steps": 1000,
+  "global_step": 3000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.05,
+      "grad_norm": 218.94273376464844,
+      "learning_rate": 3.8e-07,
+      "loss": 5.6914,
+      "step": 25
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 35.336517333984375,
+      "learning_rate": 8.8e-07,
+      "loss": 2.5237,
+      "step": 50
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 14.206599235534668,
+      "learning_rate": 1.3800000000000001e-06,
+      "loss": 0.8446,
+      "step": 75
+    },
+    {
+      "epoch": 0.19,
+      "grad_norm": 12.359480857849121,
+      "learning_rate": 1.8800000000000002e-06,
+      "loss": 0.579,
+      "step": 100
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 8.259902954101562,
+      "learning_rate": 2.38e-06,
+      "loss": 0.5636,
+      "step": 125
+    },
+    {
+      "epoch": 0.29,
+      "grad_norm": 11.162034034729004,
+      "learning_rate": 2.88e-06,
+      "loss": 0.5191,
+      "step": 150
+    },
+    {
+      "epoch": 0.34,
+      "grad_norm": 8.917764663696289,
+      "learning_rate": 3.3800000000000007e-06,
+      "loss": 0.4879,
+      "step": 175
+    },
+    {
+      "epoch": 0.39,
+      "grad_norm": 8.865296363830566,
+      "learning_rate": 3.88e-06,
+      "loss": 0.4333,
+      "step": 200
+    },
+    {
+      "epoch": 0.43,
+      "grad_norm": 8.579310417175293,
+      "learning_rate": 4.38e-06,
+      "loss": 0.4679,
+      "step": 225
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 8.361818313598633,
+      "learning_rate": 4.880000000000001e-06,
+      "loss": 0.3948,
+      "step": 250
+    },
+    {
+      "epoch": 0.53,
+      "grad_norm": 6.773808479309082,
+      "learning_rate": 5.380000000000001e-06,
+      "loss": 0.4284,
+      "step": 275
+    },
+    {
+      "epoch": 0.58,
+      "grad_norm": 7.8365678787231445,
+      "learning_rate": 5.8800000000000005e-06,
+      "loss": 0.3829,
+      "step": 300
+    },
+    {
+      "epoch": 0.63,
+      "grad_norm": 10.097782135009766,
+      "learning_rate": 6.380000000000001e-06,
+      "loss": 0.4091,
+      "step": 325
+    },
+    {
+      "epoch": 0.68,
+      "grad_norm": 8.221065521240234,
+      "learning_rate": 6.88e-06,
+      "loss": 0.4067,
+      "step": 350
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 7.281307697296143,
+      "learning_rate": 7.3800000000000005e-06,
+      "loss": 0.3501,
+      "step": 375
+    },
+    {
+      "epoch": 0.77,
+      "grad_norm": 7.796460151672363,
+      "learning_rate": 7.88e-06,
+      "loss": 0.3754,
+      "step": 400
+    },
+    {
+      "epoch": 0.82,
+      "grad_norm": 6.829456329345703,
+      "learning_rate": 8.380000000000001e-06,
+      "loss": 0.3789,
+      "step": 425
+    },
+    {
+      "epoch": 0.87,
+      "grad_norm": 7.42793607711792,
+      "learning_rate": 8.880000000000001e-06,
+      "loss": 0.3709,
+      "step": 450
+    },
+    {
+      "epoch": 0.92,
+      "grad_norm": 6.301506519317627,
+      "learning_rate": 9.38e-06,
+      "loss": 0.355,
+      "step": 475
+    },
+    {
+      "epoch": 0.97,
+      "grad_norm": 7.611047744750977,
+      "learning_rate": 9.88e-06,
+      "loss": 0.3347,
+      "step": 500
+    },
+    {
+      "epoch": 1.01,
+      "grad_norm": 6.571840286254883,
+      "learning_rate": 9.945714285714286e-06,
+      "loss": 0.3231,
+      "step": 525
+    },
+    {
+      "epoch": 1.06,
+      "grad_norm": 6.700900554656982,
+      "learning_rate": 9.874285714285715e-06,
+      "loss": 0.2356,
+      "step": 550
+    },
+    {
+      "epoch": 1.11,
+      "grad_norm": 6.217418670654297,
+      "learning_rate": 9.802857142857144e-06,
+      "loss": 0.2177,
+      "step": 575
+    },
+    {
+      "epoch": 1.16,
+      "grad_norm": 5.774504661560059,
+      "learning_rate": 9.731428571428573e-06,
+      "loss": 0.2103,
+      "step": 600
+    },
+    {
+      "epoch": 1.21,
+      "grad_norm": 5.770651340484619,
+      "learning_rate": 9.66e-06,
+      "loss": 0.2529,
+      "step": 625
+    },
+    {
+      "epoch": 1.25,
+      "grad_norm": 5.579860687255859,
+      "learning_rate": 9.58857142857143e-06,
+      "loss": 0.2435,
+      "step": 650
+    },
+    {
+      "epoch": 1.3,
+      "grad_norm": 5.943060398101807,
+      "learning_rate": 9.517142857142859e-06,
+      "loss": 0.2064,
+      "step": 675
+    },
+    {
+      "epoch": 1.35,
+      "grad_norm": 7.721786975860596,
+      "learning_rate": 9.445714285714288e-06,
+      "loss": 0.2273,
+      "step": 700
+    },
+    {
+      "epoch": 1.4,
+      "grad_norm": 6.976935386657715,
+      "learning_rate": 9.374285714285715e-06,
+      "loss": 0.2373,
+      "step": 725
+    },
+    {
+      "epoch": 1.45,
+      "grad_norm": 6.749694347381592,
+      "learning_rate": 9.302857142857144e-06,
+      "loss": 0.2306,
+      "step": 750
+    },
+    {
+      "epoch": 1.5,
+      "grad_norm": 5.087153434753418,
+      "learning_rate": 9.231428571428573e-06,
+      "loss": 0.2292,
+      "step": 775
+    },
+    {
+      "epoch": 1.54,
+      "grad_norm": 6.032232761383057,
+      "learning_rate": 9.16e-06,
+      "loss": 0.2096,
+      "step": 800
+    },
+    {
+      "epoch": 1.59,
+      "grad_norm": 5.749306678771973,
+      "learning_rate": 9.08857142857143e-06,
+      "loss": 0.2217,
+      "step": 825
+    },
+    {
+      "epoch": 1.64,
+      "grad_norm": 7.6291184425354,
+      "learning_rate": 9.017142857142858e-06,
+      "loss": 0.2023,
+      "step": 850
+    },
+    {
+      "epoch": 1.69,
+      "grad_norm": 4.76064395904541,
+      "learning_rate": 8.945714285714286e-06,
+      "loss": 0.2099,
+      "step": 875
+    },
+    {
+      "epoch": 1.74,
+      "grad_norm": 5.0226898193359375,
+      "learning_rate": 8.874285714285715e-06,
+      "loss": 0.2017,
+      "step": 900
+    },
+    {
+      "epoch": 1.79,
+      "grad_norm": 5.6098313331604,
+      "learning_rate": 8.802857142857144e-06,
+      "loss": 0.2126,
+      "step": 925
+    },
+    {
+      "epoch": 1.83,
+      "grad_norm": 5.58658504486084,
+      "learning_rate": 8.731428571428571e-06,
+      "loss": 0.2209,
+      "step": 950
+    },
+    {
+      "epoch": 1.88,
+      "grad_norm": 7.067436218261719,
+      "learning_rate": 8.66e-06,
+      "loss": 0.225,
+      "step": 975
+    },
+    {
+      "epoch": 1.93,
+      "grad_norm": 4.616860389709473,
+      "learning_rate": 8.588571428571429e-06,
+      "loss": 0.2059,
+      "step": 1000
+    },
+    {
+      "epoch": 1.93,
+      "eval_loss": 0.25656187534332275,
+      "eval_runtime": 2791.9455,
+      "eval_samples_per_second": 1.296,
+      "eval_steps_per_second": 0.162,
+      "eval_wer": 17.814782082887575,
+      "step": 1000
+    },
+    {
+      "epoch": 1.98,
+      "grad_norm": 4.517664909362793,
+      "learning_rate": 8.517142857142858e-06,
+      "loss": 0.2132,
+      "step": 1025
+    },
+    {
+      "epoch": 2.03,
+      "grad_norm": 3.016611099243164,
+      "learning_rate": 8.445714285714285e-06,
+      "loss": 0.1498,
+      "step": 1050
+    },
+    {
+      "epoch": 2.08,
+      "grad_norm": 4.563220977783203,
+      "learning_rate": 8.374285714285714e-06,
+      "loss": 0.0848,
+      "step": 1075
+    },
+    {
+      "epoch": 2.12,
+      "grad_norm": 4.658745288848877,
+      "learning_rate": 8.302857142857143e-06,
+      "loss": 0.0877,
+      "step": 1100
+    },
+    {
+      "epoch": 2.17,
+      "grad_norm": 3.2172155380249023,
+      "learning_rate": 8.231428571428572e-06,
+      "loss": 0.0858,
+      "step": 1125
+    },
+    {
+      "epoch": 2.22,
+      "grad_norm": 3.33174991607666,
+      "learning_rate": 8.16e-06,
+      "loss": 0.08,
+      "step": 1150
+    },
+    {
+      "epoch": 2.27,
+      "grad_norm": 4.132905960083008,
+      "learning_rate": 8.088571428571429e-06,
+      "loss": 0.0945,
+      "step": 1175
+    },
+    {
+      "epoch": 2.32,
+      "grad_norm": 3.5062320232391357,
+      "learning_rate": 8.017142857142858e-06,
+      "loss": 0.0849,
+      "step": 1200
+    },
+    {
+      "epoch": 2.36,
+      "grad_norm": 2.746927261352539,
+      "learning_rate": 7.945714285714287e-06,
+      "loss": 0.0814,
+      "step": 1225
+    },
+    {
+      "epoch": 2.41,
+      "grad_norm": 2.8374855518341064,
+      "learning_rate": 7.874285714285716e-06,
+      "loss": 0.0898,
+      "step": 1250
+    },
+    {
+      "epoch": 2.46,
+      "grad_norm": 2.8154654502868652,
+      "learning_rate": 7.802857142857143e-06,
+      "loss": 0.091,
+      "step": 1275
+    },
+    {
+      "epoch": 2.51,
+      "grad_norm": 3.5242350101470947,
+      "learning_rate": 7.731428571428572e-06,
+      "loss": 0.085,
+      "step": 1300
+    },
+    {
+      "epoch": 2.56,
+      "grad_norm": 3.3596408367156982,
+      "learning_rate": 7.660000000000001e-06,
+      "loss": 0.093,
+      "step": 1325
+    },
+    {
+      "epoch": 2.61,
+      "grad_norm": 3.4869582653045654,
+      "learning_rate": 7.588571428571429e-06,
+      "loss": 0.0836,
+      "step": 1350
+    },
+    {
+      "epoch": 2.65,
+      "grad_norm": 2.8120055198669434,
+      "learning_rate": 7.5171428571428575e-06,
+      "loss": 0.0769,
+      "step": 1375
+    },
+    {
+      "epoch": 2.7,
+      "grad_norm": 3.5809738636016846,
+      "learning_rate": 7.445714285714286e-06,
+      "loss": 0.0916,
+      "step": 1400
+    },
+    {
+      "epoch": 2.75,
+      "grad_norm": 2.8839168548583984,
+      "learning_rate": 7.374285714285715e-06,
+      "loss": 0.09,
+      "step": 1425
+    },
+    {
+      "epoch": 2.8,
+      "grad_norm": 4.721149921417236,
+      "learning_rate": 7.302857142857144e-06,
+      "loss": 0.0915,
+      "step": 1450
+    },
+    {
+      "epoch": 2.85,
+      "grad_norm": 3.5091800689697266,
+      "learning_rate": 7.231428571428573e-06,
+      "loss": 0.0794,
+      "step": 1475
+    },
+    {
+      "epoch": 2.9,
+      "grad_norm": 3.820974349975586,
+      "learning_rate": 7.16e-06,
+      "loss": 0.0934,
+      "step": 1500
+    },
+    {
+      "epoch": 2.94,
+      "grad_norm": 4.247288227081299,
+      "learning_rate": 7.088571428571429e-06,
+      "loss": 0.0822,
+      "step": 1525
+    },
+    {
+      "epoch": 2.99,
+      "grad_norm": 3.3485350608825684,
+      "learning_rate": 7.017142857142858e-06,
+      "loss": 0.0971,
+      "step": 1550
+    },
+    {
+      "epoch": 3.04,
+      "grad_norm": 1.4603108167648315,
+      "learning_rate": 6.945714285714287e-06,
+      "loss": 0.0352,
+      "step": 1575
+    },
+    {
+      "epoch": 3.09,
+      "grad_norm": 1.7609468698501587,
+      "learning_rate": 6.874285714285714e-06,
+      "loss": 0.0402,
+      "step": 1600
+    },
+    {
+      "epoch": 3.14,
+      "grad_norm": 2.6159989833831787,
+      "learning_rate": 6.8028571428571434e-06,
+      "loss": 0.0317,
+      "step": 1625
+    },
+    {
+      "epoch": 3.19,
+      "grad_norm": 1.857428789138794,
+      "learning_rate": 6.7314285714285724e-06,
+      "loss": 0.039,
+      "step": 1650
+    },
+    {
+      "epoch": 3.23,
+      "grad_norm": 3.3788986206054688,
+      "learning_rate": 6.660000000000001e-06,
+      "loss": 0.0347,
+      "step": 1675
+    },
+    {
+      "epoch": 3.28,
+      "grad_norm": 1.8846665620803833,
+      "learning_rate": 6.588571428571429e-06,
+      "loss": 0.0321,
+      "step": 1700
+    },
+    {
+      "epoch": 3.33,
+      "grad_norm": 1.2387264966964722,
+      "learning_rate": 6.517142857142858e-06,
+      "loss": 0.0327,
+      "step": 1725
+    },
+    {
+      "epoch": 3.38,
+      "grad_norm": 1.4381897449493408,
+      "learning_rate": 6.445714285714286e-06,
+      "loss": 0.0354,
+      "step": 1750
+    },
+    {
+      "epoch": 3.43,
+      "grad_norm": 2.2566747665405273,
+      "learning_rate": 6.374285714285715e-06,
+      "loss": 0.0368,
+      "step": 1775
+    },
+    {
+      "epoch": 3.47,
+      "grad_norm": 1.6017179489135742,
+      "learning_rate": 6.302857142857144e-06,
+      "loss": 0.0351,
+      "step": 1800
+    },
+    {
+      "epoch": 3.52,
+      "grad_norm": 3.2970995903015137,
+      "learning_rate": 6.231428571428571e-06,
+      "loss": 0.0332,
+      "step": 1825
+    },
+    {
+      "epoch": 3.57,
+      "grad_norm": 2.8597681522369385,
+      "learning_rate": 6.16e-06,
+      "loss": 0.0355,
+      "step": 1850
+    },
+    {
+      "epoch": 3.62,
+      "grad_norm": 2.8335232734680176,
+      "learning_rate": 6.088571428571429e-06,
+      "loss": 0.0338,
+      "step": 1875
+    },
+    {
+      "epoch": 3.67,
+      "grad_norm": 3.405618667602539,
+      "learning_rate": 6.017142857142858e-06,
+      "loss": 0.0376,
+      "step": 1900
+    },
+    {
+      "epoch": 3.72,
+      "grad_norm": 2.4519917964935303,
+      "learning_rate": 5.945714285714286e-06,
+      "loss": 0.0359,
+      "step": 1925
+    },
+    {
+      "epoch": 3.76,
+      "grad_norm": 3.2067463397979736,
+      "learning_rate": 5.874285714285715e-06,
+      "loss": 0.029,
+      "step": 1950
+    },
+    {
+      "epoch": 3.81,
+      "grad_norm": 2.5410799980163574,
+      "learning_rate": 5.802857142857144e-06,
+      "loss": 0.0313,
+      "step": 1975
+    },
+    {
+      "epoch": 3.86,
+      "grad_norm": 1.2326685190200806,
+      "learning_rate": 5.731428571428572e-06,
+      "loss": 0.0379,
+      "step": 2000
+    },
+    {
+      "epoch": 3.86,
+      "eval_loss": 0.28133508563041687,
+      "eval_runtime": 3847.5516,
+      "eval_samples_per_second": 0.94,
+      "eval_steps_per_second": 0.118,
+      "eval_wer": 17.470580026978,
+      "step": 2000
+    },
+    {
+      "epoch": 3.91,
+      "grad_norm": 2.7244298458099365,
+      "learning_rate": 5.66e-06,
+      "loss": 0.0356,
+      "step": 2025
+    },
+    {
+      "epoch": 3.96,
+      "grad_norm": 2.446558952331543,
+      "learning_rate": 5.588571428571429e-06,
+      "loss": 0.0273,
+      "step": 2050
+    },
+    {
+      "epoch": 4.01,
+      "grad_norm": 0.7038655281066895,
+      "learning_rate": 5.517142857142857e-06,
+      "loss": 0.0298,
+      "step": 2075
+    },
+    {
+      "epoch": 4.05,
+      "grad_norm": 0.8899651765823364,
+      "learning_rate": 5.445714285714286e-06,
+      "loss": 0.0162,
+      "step": 2100
+    },
+    {
+      "epoch": 4.1,
+      "grad_norm": 2.154207944869995,
+      "learning_rate": 5.374285714285715e-06,
+      "loss": 0.0143,
+      "step": 2125
+    },
+    {
+      "epoch": 4.15,
+      "grad_norm": 1.203642725944519,
+      "learning_rate": 5.3028571428571425e-06,
+      "loss": 0.0142,
+      "step": 2150
+    },
+    {
+      "epoch": 4.2,
+      "grad_norm": 0.45312538743019104,
+      "learning_rate": 5.2314285714285716e-06,
+      "loss": 0.0133,
+      "step": 2175
+    },
+    {
+      "epoch": 4.25,
+      "grad_norm": 1.146144151687622,
+      "learning_rate": 5.1600000000000006e-06,
+      "loss": 0.0142,
+      "step": 2200
+    },
+    {
+      "epoch": 4.3,
+      "grad_norm": 2.2396395206451416,
+      "learning_rate": 5.08857142857143e-06,
+      "loss": 0.0163,
+      "step": 2225
+    },
+    {
+      "epoch": 4.34,
+      "grad_norm": 0.6650336384773254,
+      "learning_rate": 5.017142857142857e-06,
+      "loss": 0.0119,
+      "step": 2250
+    },
+    {
+      "epoch": 4.39,
+      "grad_norm": 0.5963819026947021,
+      "learning_rate": 4.945714285714286e-06,
+      "loss": 0.0173,
+      "step": 2275
+    },
+    {
+      "epoch": 4.44,
+      "grad_norm": 1.5120543241500854,
+      "learning_rate": 4.874285714285715e-06,
+      "loss": 0.0148,
+      "step": 2300
+    },
+    {
+      "epoch": 4.49,
+      "grad_norm": 0.7102583050727844,
+      "learning_rate": 4.802857142857143e-06,
+      "loss": 0.0121,
+      "step": 2325
+    },
+    {
+      "epoch": 4.54,
+      "grad_norm": 1.2483290433883667,
+      "learning_rate": 4.731428571428572e-06,
+      "loss": 0.0169,
+      "step": 2350
+    },
+    {
+      "epoch": 4.58,
+      "grad_norm": 1.8999782800674438,
+      "learning_rate": 4.66e-06,
+      "loss": 0.013,
+      "step": 2375
+    },
+    {
+      "epoch": 4.63,
+      "grad_norm": 0.893521249294281,
+      "learning_rate": 4.588571428571429e-06,
+      "loss": 0.0149,
+      "step": 2400
+    },
+    {
+      "epoch": 4.68,
+      "grad_norm": 1.4212769269943237,
+      "learning_rate": 4.5171428571428575e-06,
+      "loss": 0.0132,
+      "step": 2425
+    },
+    {
+      "epoch": 4.73,
+      "grad_norm": 0.5899932980537415,
+      "learning_rate": 4.445714285714286e-06,
+      "loss": 0.018,
+      "step": 2450
+    },
+    {
+      "epoch": 4.78,
+      "grad_norm": 0.5150299668312073,
+      "learning_rate": 4.374285714285715e-06,
+      "loss": 0.012,
+      "step": 2475
+    },
+    {
+      "epoch": 4.83,
+      "grad_norm": 1.5570229291915894,
+      "learning_rate": 4.302857142857143e-06,
+      "loss": 0.0121,
+      "step": 2500
+    },
+    {
+      "epoch": 4.87,
+      "grad_norm": 1.727569818496704,
+      "learning_rate": 4.231428571428572e-06,
+      "loss": 0.0117,
+      "step": 2525
+    },
+    {
+      "epoch": 4.92,
+      "grad_norm": 0.8942755460739136,
+      "learning_rate": 4.16e-06,
+      "loss": 0.0125,
+      "step": 2550
+    },
+    {
+      "epoch": 4.97,
+      "grad_norm": 2.9615914821624756,
+      "learning_rate": 4.088571428571429e-06,
+      "loss": 0.0129,
+      "step": 2575
+    },
+    {
+      "epoch": 5.02,
+      "grad_norm": 0.27788445353507996,
+      "learning_rate": 4.017142857142857e-06,
+      "loss": 0.0101,
+      "step": 2600
+    },
+    {
+      "epoch": 5.07,
+      "grad_norm": 0.5583022832870483,
+      "learning_rate": 3.945714285714286e-06,
+      "loss": 0.007,
+      "step": 2625
+    },
+    {
+      "epoch": 5.12,
+      "grad_norm": 0.392986536026001,
+      "learning_rate": 3.874285714285715e-06,
+      "loss": 0.0065,
+      "step": 2650
+    },
+    {
+      "epoch": 5.16,
+      "grad_norm": 0.7827091813087463,
+      "learning_rate": 3.802857142857143e-06,
+      "loss": 0.0054,
+      "step": 2675
+    },
+    {
+      "epoch": 5.21,
+      "grad_norm": 0.5473342537879944,
+      "learning_rate": 3.731428571428572e-06,
+      "loss": 0.0068,
+      "step": 2700
+    },
+    {
+      "epoch": 5.26,
+      "grad_norm": 0.545080840587616,
+      "learning_rate": 3.66e-06,
+      "loss": 0.0055,
+      "step": 2725
+    },
+    {
+      "epoch": 5.31,
+      "grad_norm": 0.44736358523368835,
+      "learning_rate": 3.588571428571429e-06,
+      "loss": 0.0085,
+      "step": 2750
+    },
+    {
+      "epoch": 5.36,
+      "grad_norm": 0.7332315444946289,
+      "learning_rate": 3.5171428571428573e-06,
+      "loss": 0.0071,
+      "step": 2775
+    },
+    {
+      "epoch": 5.41,
+      "grad_norm": 0.6373933553695679,
+      "learning_rate": 3.4457142857142863e-06,
+      "loss": 0.0058,
+      "step": 2800
+    },
+    {
+      "epoch": 5.45,
+      "grad_norm": 0.47440409660339355,
+      "learning_rate": 3.3742857142857145e-06,
+      "loss": 0.0056,
+      "step": 2825
+    },
+    {
+      "epoch": 5.5,
+      "grad_norm": 0.23941631615161896,
+      "learning_rate": 3.302857142857143e-06,
+      "loss": 0.0046,
+      "step": 2850
+    },
+    {
+      "epoch": 5.55,
+      "grad_norm": 0.5772097110748291,
+      "learning_rate": 3.2314285714285716e-06,
+      "loss": 0.0055,
+      "step": 2875
+    },
+    {
+      "epoch": 5.6,
+      "grad_norm": 0.22144152224063873,
+      "learning_rate": 3.1600000000000002e-06,
+      "loss": 0.0048,
+      "step": 2900
+    },
+    {
+      "epoch": 5.65,
+      "grad_norm": 0.28436514735221863,
+      "learning_rate": 3.0885714285714284e-06,
+      "loss": 0.0052,
+      "step": 2925
+    },
+    {
+      "epoch": 5.69,
+      "grad_norm": 0.2404721975326538,
+      "learning_rate": 3.0171428571428574e-06,
+      "loss": 0.0056,
+      "step": 2950
+    },
+    {
+      "epoch": 5.74,
+      "grad_norm": 0.49395838379859924,
+      "learning_rate": 2.945714285714286e-06,
+      "loss": 0.005,
+      "step": 2975
+    },
+    {
+      "epoch": 5.79,
+      "grad_norm": 0.32905882596969604,
+      "learning_rate": 2.8742857142857146e-06,
+      "loss": 0.006,
+      "step": 3000
+    },
+    {
+      "epoch": 5.79,
+      "eval_loss": 0.32524359226226807,
+      "eval_runtime": 2778.5628,
+      "eval_samples_per_second": 1.302,
+      "eval_steps_per_second": 0.163,
+      "eval_wer": 17.18219452067538,
+      "step": 3000
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 4000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 8,
+  "save_steps": 1000,
+  "total_flos": 1.38318982373376e+19,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-3000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d97a7d674ba5458016027e8fcbeac271e6fe45c463b2f89dfb01782b9ae4928d
+size 5112

checkpoint-4000/config.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+  "_name_or_path": "soaring0616/whisper-small-vietnamese",
+  "activation_dropout": 0.0,
+  "activation_function": "gelu",
+  "apply_spec_augment": false,
+  "architectures": [
+    "WhisperForConditionalGeneration"
+  ],
+  "attention_dropout": 0.0,
+  "begin_suppress_tokens": [
+    220,
+    50257
+  ],
+  "bos_token_id": 50257,
+  "classifier_proj_size": 256,
+  "d_model": 768,
+  "decoder_attention_heads": 12,
+  "decoder_ffn_dim": 3072,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 12,
+  "decoder_start_token_id": 50258,
+  "dropout": 0.0,
+  "encoder_attention_heads": 12,
+  "encoder_ffn_dim": 3072,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 12,
+  "eos_token_id": 50257,
+  "forced_decoder_ids": null,
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "max_length": 448,
+  "max_source_positions": 1500,
+  "max_target_positions": 448,
+  "median_filter_width": 7,
+  "model_type": "whisper",
+  "num_hidden_layers": 12,
+  "num_mel_bins": 80,
+  "pad_token_id": 50257,
+  "scale_embedding": false,
+  "suppress_tokens": [],
+  "torch_dtype": "float32",
+  "transformers_version": "4.39.1",
+  "use_cache": true,
+  "use_weighted_layer_sum": false,
+  "vocab_size": 51865
+}

checkpoint-4000/generation_config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "begin_suppress_tokens": [
+    220,
+    50257
+  ],
+  "bos_token_id": 50257,
+  "decoder_start_token_id": 50258,
+  "eos_token_id": 50257,
+  "lang_to_id": {
+    "<|id|>": 50275,
+    "<|vi|>": 50278
+  },
+  "language": "indonesian",
+  "max_length": 448,
+  "pad_token_id": 50257,
+  "suppress_tokens": [],
+  "transformers_version": "4.39.1"
+}

checkpoint-4000/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cc40f62bc219565ea4fe7f38a6a349c133b6dfc2a5c89c40c0abf2be56c48320
+size 966995080

checkpoint-4000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ff92fd5980e080d2c3378e5b65d7be86c9462f60ffe5c96be0656923426c8567
+size 1925064044

checkpoint-4000/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "chunk_length": 30,
+  "feature_extractor_type": "WhisperFeatureExtractor",
+  "feature_size": 80,
+  "hop_length": 160,
+  "n_fft": 400,
+  "n_samples": 480000,
+  "nb_max_frames": 3000,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "WhisperProcessor",
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}

checkpoint-4000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8b05194b2f99c0ed6f16f3ed9d04212c1494104bdaea1339f2285ef60727128a
+size 14244

checkpoint-4000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2855e8906b1a9cae8fe1084d27dc3bbefc0b436cc2ae5fffa97d518a5d5d732d
+size 1064

checkpoint-4000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1177 @@

+{
+  "best_metric": 17.051955904925812,
+  "best_model_checkpoint": "./whisper-small-in_from_vi\\checkpoint-4000",
+  "epoch": 7.722007722007722,
+  "eval_steps": 1000,
+  "global_step": 4000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.05,
+      "grad_norm": 218.94273376464844,
+      "learning_rate": 3.8e-07,
+      "loss": 5.6914,
+      "step": 25
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 35.336517333984375,
+      "learning_rate": 8.8e-07,
+      "loss": 2.5237,
+      "step": 50
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 14.206599235534668,
+      "learning_rate": 1.3800000000000001e-06,
+      "loss": 0.8446,
+      "step": 75
+    },
+    {
+      "epoch": 0.19,
+      "grad_norm": 12.359480857849121,
+      "learning_rate": 1.8800000000000002e-06,
+      "loss": 0.579,
+      "step": 100
+    },
+    {
+      "epoch": 0.24,
+      "grad_norm": 8.259902954101562,
+      "learning_rate": 2.38e-06,
+      "loss": 0.5636,
+      "step": 125
+    },
+    {
+      "epoch": 0.29,
+      "grad_norm": 11.162034034729004,
+      "learning_rate": 2.88e-06,
+      "loss": 0.5191,
+      "step": 150
+    },
+    {
+      "epoch": 0.34,
+      "grad_norm": 8.917764663696289,
+      "learning_rate": 3.3800000000000007e-06,
+      "loss": 0.4879,
+      "step": 175
+    },
+    {
+      "epoch": 0.39,
+      "grad_norm": 8.865296363830566,
+      "learning_rate": 3.88e-06,
+      "loss": 0.4333,
+      "step": 200
+    },
+    {
+      "epoch": 0.43,
+      "grad_norm": 8.579310417175293,
+      "learning_rate": 4.38e-06,
+      "loss": 0.4679,
+      "step": 225
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 8.361818313598633,
+      "learning_rate": 4.880000000000001e-06,
+      "loss": 0.3948,
+      "step": 250
+    },
+    {
+      "epoch": 0.53,
+      "grad_norm": 6.773808479309082,
+      "learning_rate": 5.380000000000001e-06,
+      "loss": 0.4284,
+      "step": 275
+    },
+    {
+      "epoch": 0.58,
+      "grad_norm": 7.8365678787231445,
+      "learning_rate": 5.8800000000000005e-06,
+      "loss": 0.3829,
+      "step": 300
+    },
+    {
+      "epoch": 0.63,
+      "grad_norm": 10.097782135009766,
+      "learning_rate": 6.380000000000001e-06,
+      "loss": 0.4091,
+      "step": 325
+    },
+    {
+      "epoch": 0.68,
+      "grad_norm": 8.221065521240234,
+      "learning_rate": 6.88e-06,
+      "loss": 0.4067,
+      "step": 350
+    },
+    {
+      "epoch": 0.72,
+      "grad_norm": 7.281307697296143,
+      "learning_rate": 7.3800000000000005e-06,
+      "loss": 0.3501,
+      "step": 375
+    },
+    {
+      "epoch": 0.77,
+      "grad_norm": 7.796460151672363,
+      "learning_rate": 7.88e-06,
+      "loss": 0.3754,
+      "step": 400
+    },
+    {
+      "epoch": 0.82,
+      "grad_norm": 6.829456329345703,
+      "learning_rate": 8.380000000000001e-06,
+      "loss": 0.3789,
+      "step": 425
+    },
+    {
+      "epoch": 0.87,
+      "grad_norm": 7.42793607711792,
+      "learning_rate": 8.880000000000001e-06,
+      "loss": 0.3709,
+      "step": 450
+    },
+    {
+      "epoch": 0.92,
+      "grad_norm": 6.301506519317627,
+      "learning_rate": 9.38e-06,
+      "loss": 0.355,
+      "step": 475
+    },
+    {
+      "epoch": 0.97,
+      "grad_norm": 7.611047744750977,
+      "learning_rate": 9.88e-06,
+      "loss": 0.3347,
+      "step": 500
+    },
+    {
+      "epoch": 1.01,
+      "grad_norm": 6.571840286254883,
+      "learning_rate": 9.945714285714286e-06,
+      "loss": 0.3231,
+      "step": 525
+    },
+    {
+      "epoch": 1.06,
+      "grad_norm": 6.700900554656982,
+      "learning_rate": 9.874285714285715e-06,
+      "loss": 0.2356,
+      "step": 550
+    },
+    {
+      "epoch": 1.11,
+      "grad_norm": 6.217418670654297,
+      "learning_rate": 9.802857142857144e-06,
+      "loss": 0.2177,
+      "step": 575
+    },
+    {
+      "epoch": 1.16,
+      "grad_norm": 5.774504661560059,
+      "learning_rate": 9.731428571428573e-06,
+      "loss": 0.2103,
+      "step": 600
+    },
+    {
+      "epoch": 1.21,
+      "grad_norm": 5.770651340484619,
+      "learning_rate": 9.66e-06,
+      "loss": 0.2529,
+      "step": 625
+    },
+    {
+      "epoch": 1.25,
+      "grad_norm": 5.579860687255859,
+      "learning_rate": 9.58857142857143e-06,
+      "loss": 0.2435,
+      "step": 650
+    },
+    {
+      "epoch": 1.3,
+      "grad_norm": 5.943060398101807,
+      "learning_rate": 9.517142857142859e-06,
+      "loss": 0.2064,
+      "step": 675
+    },
+    {
+      "epoch": 1.35,
+      "grad_norm": 7.721786975860596,
+      "learning_rate": 9.445714285714288e-06,
+      "loss": 0.2273,
+      "step": 700
+    },
+    {
+      "epoch": 1.4,
+      "grad_norm": 6.976935386657715,
+      "learning_rate": 9.374285714285715e-06,
+      "loss": 0.2373,
+      "step": 725
+    },
+    {
+      "epoch": 1.45,
+      "grad_norm": 6.749694347381592,
+      "learning_rate": 9.302857142857144e-06,
+      "loss": 0.2306,
+      "step": 750
+    },
+    {
+      "epoch": 1.5,
+      "grad_norm": 5.087153434753418,
+      "learning_rate": 9.231428571428573e-06,
+      "loss": 0.2292,
+      "step": 775
+    },
+    {
+      "epoch": 1.54,
+      "grad_norm": 6.032232761383057,
+      "learning_rate": 9.16e-06,
+      "loss": 0.2096,
+      "step": 800
+    },
+    {
+      "epoch": 1.59,
+      "grad_norm": 5.749306678771973,
+      "learning_rate": 9.08857142857143e-06,
+      "loss": 0.2217,
+      "step": 825
+    },
+    {
+      "epoch": 1.64,
+      "grad_norm": 7.6291184425354,
+      "learning_rate": 9.017142857142858e-06,
+      "loss": 0.2023,
+      "step": 850
+    },
+    {
+      "epoch": 1.69,
+      "grad_norm": 4.76064395904541,
+      "learning_rate": 8.945714285714286e-06,
+      "loss": 0.2099,
+      "step": 875
+    },
+    {
+      "epoch": 1.74,
+      "grad_norm": 5.0226898193359375,
+      "learning_rate": 8.874285714285715e-06,
+      "loss": 0.2017,
+      "step": 900
+    },
+    {
+      "epoch": 1.79,
+      "grad_norm": 5.6098313331604,
+      "learning_rate": 8.802857142857144e-06,
+      "loss": 0.2126,
+      "step": 925
+    },
+    {
+      "epoch": 1.83,
+      "grad_norm": 5.58658504486084,
+      "learning_rate": 8.731428571428571e-06,
+      "loss": 0.2209,
+      "step": 950
+    },
+    {
+      "epoch": 1.88,
+      "grad_norm": 7.067436218261719,
+      "learning_rate": 8.66e-06,
+      "loss": 0.225,
+      "step": 975
+    },
+    {
+      "epoch": 1.93,
+      "grad_norm": 4.616860389709473,
+      "learning_rate": 8.588571428571429e-06,
+      "loss": 0.2059,
+      "step": 1000
+    },
+    {
+      "epoch": 1.93,
+      "eval_loss": 0.25656187534332275,
+      "eval_runtime": 2791.9455,
+      "eval_samples_per_second": 1.296,
+      "eval_steps_per_second": 0.162,
+      "eval_wer": 17.814782082887575,
+      "step": 1000
+    },
+    {
+      "epoch": 1.98,
+      "grad_norm": 4.517664909362793,
+      "learning_rate": 8.517142857142858e-06,
+      "loss": 0.2132,
+      "step": 1025
+    },
+    {
+      "epoch": 2.03,
+      "grad_norm": 3.016611099243164,
+      "learning_rate": 8.445714285714285e-06,
+      "loss": 0.1498,
+      "step": 1050
+    },
+    {
+      "epoch": 2.08,
+      "grad_norm": 4.563220977783203,
+      "learning_rate": 8.374285714285714e-06,
+      "loss": 0.0848,
+      "step": 1075
+    },
+    {
+      "epoch": 2.12,
+      "grad_norm": 4.658745288848877,
+      "learning_rate": 8.302857142857143e-06,
+      "loss": 0.0877,
+      "step": 1100
+    },
+    {
+      "epoch": 2.17,
+      "grad_norm": 3.2172155380249023,
+      "learning_rate": 8.231428571428572e-06,
+      "loss": 0.0858,
+      "step": 1125
+    },
+    {
+      "epoch": 2.22,
+      "grad_norm": 3.33174991607666,
+      "learning_rate": 8.16e-06,
+      "loss": 0.08,
+      "step": 1150
+    },
+    {
+      "epoch": 2.27,
+      "grad_norm": 4.132905960083008,
+      "learning_rate": 8.088571428571429e-06,
+      "loss": 0.0945,
+      "step": 1175
+    },
+    {
+      "epoch": 2.32,
+      "grad_norm": 3.5062320232391357,
+      "learning_rate": 8.017142857142858e-06,
+      "loss": 0.0849,
+      "step": 1200
+    },
+    {
+      "epoch": 2.36,
+      "grad_norm": 2.746927261352539,
+      "learning_rate": 7.945714285714287e-06,
+      "loss": 0.0814,
+      "step": 1225
+    },
+    {
+      "epoch": 2.41,
+      "grad_norm": 2.8374855518341064,
+      "learning_rate": 7.874285714285716e-06,
+      "loss": 0.0898,
+      "step": 1250
+    },
+    {
+      "epoch": 2.46,
+      "grad_norm": 2.8154654502868652,
+      "learning_rate": 7.802857142857143e-06,
+      "loss": 0.091,
+      "step": 1275
+    },
+    {
+      "epoch": 2.51,
+      "grad_norm": 3.5242350101470947,
+      "learning_rate": 7.731428571428572e-06,
+      "loss": 0.085,
+      "step": 1300
+    },
+    {
+      "epoch": 2.56,
+      "grad_norm": 3.3596408367156982,
+      "learning_rate": 7.660000000000001e-06,
+      "loss": 0.093,
+      "step": 1325
+    },
+    {
+      "epoch": 2.61,
+      "grad_norm": 3.4869582653045654,
+      "learning_rate": 7.588571428571429e-06,
+      "loss": 0.0836,
+      "step": 1350
+    },
+    {
+      "epoch": 2.65,
+      "grad_norm": 2.8120055198669434,
+      "learning_rate": 7.5171428571428575e-06,
+      "loss": 0.0769,
+      "step": 1375
+    },
+    {
+      "epoch": 2.7,
+      "grad_norm": 3.5809738636016846,
+      "learning_rate": 7.445714285714286e-06,
+      "loss": 0.0916,
+      "step": 1400
+    },
+    {
+      "epoch": 2.75,
+      "grad_norm": 2.8839168548583984,
+      "learning_rate": 7.374285714285715e-06,
+      "loss": 0.09,
+      "step": 1425
+    },
+    {
+      "epoch": 2.8,
+      "grad_norm": 4.721149921417236,
+      "learning_rate": 7.302857142857144e-06,
+      "loss": 0.0915,
+      "step": 1450
+    },
+    {
+      "epoch": 2.85,
+      "grad_norm": 3.5091800689697266,
+      "learning_rate": 7.231428571428573e-06,
+      "loss": 0.0794,
+      "step": 1475
+    },
+    {
+      "epoch": 2.9,
+      "grad_norm": 3.820974349975586,
+      "learning_rate": 7.16e-06,
+      "loss": 0.0934,
+      "step": 1500
+    },
+    {
+      "epoch": 2.94,
+      "grad_norm": 4.247288227081299,
+      "learning_rate": 7.088571428571429e-06,
+      "loss": 0.0822,
+      "step": 1525
+    },
+    {
+      "epoch": 2.99,
+      "grad_norm": 3.3485350608825684,
+      "learning_rate": 7.017142857142858e-06,
+      "loss": 0.0971,
+      "step": 1550
+    },
+    {
+      "epoch": 3.04,
+      "grad_norm": 1.4603108167648315,
+      "learning_rate": 6.945714285714287e-06,
+      "loss": 0.0352,
+      "step": 1575
+    },
+    {
+      "epoch": 3.09,
+      "grad_norm": 1.7609468698501587,
+      "learning_rate": 6.874285714285714e-06,
+      "loss": 0.0402,
+      "step": 1600
+    },
+    {
+      "epoch": 3.14,
+      "grad_norm": 2.6159989833831787,
+      "learning_rate": 6.8028571428571434e-06,
+      "loss": 0.0317,
+      "step": 1625
+    },
+    {
+      "epoch": 3.19,
+      "grad_norm": 1.857428789138794,
+      "learning_rate": 6.7314285714285724e-06,
+      "loss": 0.039,
+      "step": 1650
+    },
+    {
+      "epoch": 3.23,
+      "grad_norm": 3.3788986206054688,
+      "learning_rate": 6.660000000000001e-06,
+      "loss": 0.0347,
+      "step": 1675
+    },
+    {
+      "epoch": 3.28,
+      "grad_norm": 1.8846665620803833,
+      "learning_rate": 6.588571428571429e-06,
+      "loss": 0.0321,
+      "step": 1700
+    },
+    {
+      "epoch": 3.33,
+      "grad_norm": 1.2387264966964722,
+      "learning_rate": 6.517142857142858e-06,
+      "loss": 0.0327,
+      "step": 1725
+    },
+    {
+      "epoch": 3.38,
+      "grad_norm": 1.4381897449493408,
+      "learning_rate": 6.445714285714286e-06,
+      "loss": 0.0354,
+      "step": 1750
+    },
+    {
+      "epoch": 3.43,
+      "grad_norm": 2.2566747665405273,
+      "learning_rate": 6.374285714285715e-06,
+      "loss": 0.0368,
+      "step": 1775
+    },
+    {
+      "epoch": 3.47,
+      "grad_norm": 1.6017179489135742,
+      "learning_rate": 6.302857142857144e-06,
+      "loss": 0.0351,
+      "step": 1800
+    },
+    {
+      "epoch": 3.52,
+      "grad_norm": 3.2970995903015137,
+      "learning_rate": 6.231428571428571e-06,
+      "loss": 0.0332,
+      "step": 1825
+    },
+    {
+      "epoch": 3.57,
+      "grad_norm": 2.8597681522369385,
+      "learning_rate": 6.16e-06,
+      "loss": 0.0355,
+      "step": 1850
+    },
+    {
+      "epoch": 3.62,
+      "grad_norm": 2.8335232734680176,
+      "learning_rate": 6.088571428571429e-06,
+      "loss": 0.0338,
+      "step": 1875
+    },
+    {
+      "epoch": 3.67,
+      "grad_norm": 3.405618667602539,
+      "learning_rate": 6.017142857142858e-06,
+      "loss": 0.0376,
+      "step": 1900
+    },
+    {
+      "epoch": 3.72,
+      "grad_norm": 2.4519917964935303,
+      "learning_rate": 5.945714285714286e-06,
+      "loss": 0.0359,
+      "step": 1925
+    },
+    {
+      "epoch": 3.76,
+      "grad_norm": 3.2067463397979736,
+      "learning_rate": 5.874285714285715e-06,
+      "loss": 0.029,
+      "step": 1950
+    },
+    {
+      "epoch": 3.81,
+      "grad_norm": 2.5410799980163574,
+      "learning_rate": 5.802857142857144e-06,
+      "loss": 0.0313,
+      "step": 1975
+    },
+    {
+      "epoch": 3.86,
+      "grad_norm": 1.2326685190200806,
+      "learning_rate": 5.731428571428572e-06,
+      "loss": 0.0379,
+      "step": 2000
+    },
+    {
+      "epoch": 3.86,
+      "eval_loss": 0.28133508563041687,
+      "eval_runtime": 3847.5516,
+      "eval_samples_per_second": 0.94,
+      "eval_steps_per_second": 0.118,
+      "eval_wer": 17.470580026978,
+      "step": 2000
+    },
+    {
+      "epoch": 3.91,
+      "grad_norm": 2.7244298458099365,
+      "learning_rate": 5.66e-06,
+      "loss": 0.0356,
+      "step": 2025
+    },
+    {
+      "epoch": 3.96,
+      "grad_norm": 2.446558952331543,
+      "learning_rate": 5.588571428571429e-06,
+      "loss": 0.0273,
+      "step": 2050
+    },
+    {
+      "epoch": 4.01,
+      "grad_norm": 0.7038655281066895,
+      "learning_rate": 5.517142857142857e-06,
+      "loss": 0.0298,
+      "step": 2075
+    },
+    {
+      "epoch": 4.05,
+      "grad_norm": 0.8899651765823364,
+      "learning_rate": 5.445714285714286e-06,
+      "loss": 0.0162,
+      "step": 2100
+    },
+    {
+      "epoch": 4.1,
+      "grad_norm": 2.154207944869995,
+      "learning_rate": 5.374285714285715e-06,
+      "loss": 0.0143,
+      "step": 2125
+    },
+    {
+      "epoch": 4.15,
+      "grad_norm": 1.203642725944519,
+      "learning_rate": 5.3028571428571425e-06,
+      "loss": 0.0142,
+      "step": 2150
+    },
+    {
+      "epoch": 4.2,
+      "grad_norm": 0.45312538743019104,
+      "learning_rate": 5.2314285714285716e-06,
+      "loss": 0.0133,
+      "step": 2175
+    },
+    {
+      "epoch": 4.25,
+      "grad_norm": 1.146144151687622,
+      "learning_rate": 5.1600000000000006e-06,
+      "loss": 0.0142,
+      "step": 2200
+    },
+    {
+      "epoch": 4.3,
+      "grad_norm": 2.2396395206451416,
+      "learning_rate": 5.08857142857143e-06,
+      "loss": 0.0163,
+      "step": 2225
+    },
+    {
+      "epoch": 4.34,
+      "grad_norm": 0.6650336384773254,
+      "learning_rate": 5.017142857142857e-06,
+      "loss": 0.0119,
+      "step": 2250
+    },
+    {
+      "epoch": 4.39,
+      "grad_norm": 0.5963819026947021,
+      "learning_rate": 4.945714285714286e-06,
+      "loss": 0.0173,
+      "step": 2275
+    },
+    {
+      "epoch": 4.44,
+      "grad_norm": 1.5120543241500854,
+      "learning_rate": 4.874285714285715e-06,
+      "loss": 0.0148,
+      "step": 2300
+    },
+    {
+      "epoch": 4.49,
+      "grad_norm": 0.7102583050727844,
+      "learning_rate": 4.802857142857143e-06,
+      "loss": 0.0121,
+      "step": 2325
+    },
+    {
+      "epoch": 4.54,
+      "grad_norm": 1.2483290433883667,
+      "learning_rate": 4.731428571428572e-06,
+      "loss": 0.0169,
+      "step": 2350
+    },
+    {
+      "epoch": 4.58,
+      "grad_norm": 1.8999782800674438,
+      "learning_rate": 4.66e-06,
+      "loss": 0.013,
+      "step": 2375
+    },
+    {
+      "epoch": 4.63,
+      "grad_norm": 0.893521249294281,
+      "learning_rate": 4.588571428571429e-06,
+      "loss": 0.0149,
+      "step": 2400
+    },
+    {
+      "epoch": 4.68,
+      "grad_norm": 1.4212769269943237,
+      "learning_rate": 4.5171428571428575e-06,
+      "loss": 0.0132,
+      "step": 2425
+    },
+    {
+      "epoch": 4.73,
+      "grad_norm": 0.5899932980537415,
+      "learning_rate": 4.445714285714286e-06,
+      "loss": 0.018,
+      "step": 2450
+    },
+    {
+      "epoch": 4.78,
+      "grad_norm": 0.5150299668312073,
+      "learning_rate": 4.374285714285715e-06,
+      "loss": 0.012,
+      "step": 2475
+    },
+    {
+      "epoch": 4.83,
+      "grad_norm": 1.5570229291915894,
+      "learning_rate": 4.302857142857143e-06,
+      "loss": 0.0121,
+      "step": 2500
+    },
+    {
+      "epoch": 4.87,
+      "grad_norm": 1.727569818496704,
+      "learning_rate": 4.231428571428572e-06,
+      "loss": 0.0117,
+      "step": 2525
+    },
+    {
+      "epoch": 4.92,
+      "grad_norm": 0.8942755460739136,
+      "learning_rate": 4.16e-06,
+      "loss": 0.0125,
+      "step": 2550
+    },
+    {
+      "epoch": 4.97,
+      "grad_norm": 2.9615914821624756,
+      "learning_rate": 4.088571428571429e-06,
+      "loss": 0.0129,
+      "step": 2575
+    },
+    {
+      "epoch": 5.02,
+      "grad_norm": 0.27788445353507996,
+      "learning_rate": 4.017142857142857e-06,
+      "loss": 0.0101,
+      "step": 2600
+    },
+    {
+      "epoch": 5.07,
+      "grad_norm": 0.5583022832870483,
+      "learning_rate": 3.945714285714286e-06,
+      "loss": 0.007,
+      "step": 2625
+    },
+    {
+      "epoch": 5.12,
+      "grad_norm": 0.392986536026001,
+      "learning_rate": 3.874285714285715e-06,
+      "loss": 0.0065,
+      "step": 2650
+    },
+    {
+      "epoch": 5.16,
+      "grad_norm": 0.7827091813087463,
+      "learning_rate": 3.802857142857143e-06,
+      "loss": 0.0054,
+      "step": 2675
+    },
+    {
+      "epoch": 5.21,
+      "grad_norm": 0.5473342537879944,
+      "learning_rate": 3.731428571428572e-06,
+      "loss": 0.0068,
+      "step": 2700
+    },
+    {
+      "epoch": 5.26,
+      "grad_norm": 0.545080840587616,
+      "learning_rate": 3.66e-06,
+      "loss": 0.0055,
+      "step": 2725
+    },
+    {
+      "epoch": 5.31,
+      "grad_norm": 0.44736358523368835,
+      "learning_rate": 3.588571428571429e-06,
+      "loss": 0.0085,
+      "step": 2750
+    },
+    {
+      "epoch": 5.36,
+      "grad_norm": 0.7332315444946289,
+      "learning_rate": 3.5171428571428573e-06,
+      "loss": 0.0071,
+      "step": 2775
+    },
+    {
+      "epoch": 5.41,
+      "grad_norm": 0.6373933553695679,
+      "learning_rate": 3.4457142857142863e-06,
+      "loss": 0.0058,
+      "step": 2800
+    },
+    {
+      "epoch": 5.45,
+      "grad_norm": 0.47440409660339355,
+      "learning_rate": 3.3742857142857145e-06,
+      "loss": 0.0056,
+      "step": 2825
+    },
+    {
+      "epoch": 5.5,
+      "grad_norm": 0.23941631615161896,
+      "learning_rate": 3.302857142857143e-06,
+      "loss": 0.0046,
+      "step": 2850
+    },
+    {
+      "epoch": 5.55,
+      "grad_norm": 0.5772097110748291,
+      "learning_rate": 3.2314285714285716e-06,
+      "loss": 0.0055,
+      "step": 2875
+    },
+    {
+      "epoch": 5.6,
+      "grad_norm": 0.22144152224063873,
+      "learning_rate": 3.1600000000000002e-06,
+      "loss": 0.0048,
+      "step": 2900
+    },
+    {
+      "epoch": 5.65,
+      "grad_norm": 0.28436514735221863,
+      "learning_rate": 3.0885714285714284e-06,
+      "loss": 0.0052,
+      "step": 2925
+    },
+    {
+      "epoch": 5.69,
+      "grad_norm": 0.2404721975326538,
+      "learning_rate": 3.0171428571428574e-06,
+      "loss": 0.0056,
+      "step": 2950
+    },
+    {
+      "epoch": 5.74,
+      "grad_norm": 0.49395838379859924,
+      "learning_rate": 2.945714285714286e-06,
+      "loss": 0.005,
+      "step": 2975
+    },
+    {
+      "epoch": 5.79,
+      "grad_norm": 0.32905882596969604,
+      "learning_rate": 2.8742857142857146e-06,
+      "loss": 0.006,
+      "step": 3000
+    },
+    {
+      "epoch": 5.79,
+      "eval_loss": 0.32524359226226807,
+      "eval_runtime": 2778.5628,
+      "eval_samples_per_second": 1.302,
+      "eval_steps_per_second": 0.163,
+      "eval_wer": 17.18219452067538,
+      "step": 3000
+    },
+    {
+      "epoch": 5.84,
+      "grad_norm": 0.21157915890216827,
+      "learning_rate": 2.802857142857143e-06,
+      "loss": 0.0053,
+      "step": 3025
+    },
+    {
+      "epoch": 5.89,
+      "grad_norm": 1.544253945350647,
+      "learning_rate": 2.7314285714285714e-06,
+      "loss": 0.0048,
+      "step": 3050
+    },
+    {
+      "epoch": 5.94,
+      "grad_norm": 1.376765251159668,
+      "learning_rate": 2.6600000000000004e-06,
+      "loss": 0.0059,
+      "step": 3075
+    },
+    {
+      "epoch": 5.98,
+      "grad_norm": 0.9934395551681519,
+      "learning_rate": 2.5885714285714285e-06,
+      "loss": 0.0063,
+      "step": 3100
+    },
+    {
+      "epoch": 6.03,
+      "grad_norm": 0.12821950018405914,
+      "learning_rate": 2.5171428571428575e-06,
+      "loss": 0.0071,
+      "step": 3125
+    },
+    {
+      "epoch": 6.08,
+      "grad_norm": 0.1806168109178543,
+      "learning_rate": 2.445714285714286e-06,
+      "loss": 0.0033,
+      "step": 3150
+    },
+    {
+      "epoch": 6.13,
+      "grad_norm": 0.17940457165241241,
+      "learning_rate": 2.3742857142857147e-06,
+      "loss": 0.0036,
+      "step": 3175
+    },
+    {
+      "epoch": 6.18,
+      "grad_norm": 0.16006788611412048,
+      "learning_rate": 2.302857142857143e-06,
+      "loss": 0.0034,
+      "step": 3200
+    },
+    {
+      "epoch": 6.23,
+      "grad_norm": 0.14261183142662048,
+      "learning_rate": 2.2314285714285715e-06,
+      "loss": 0.003,
+      "step": 3225
+    },
+    {
+      "epoch": 6.27,
+      "grad_norm": 0.17255297303199768,
+      "learning_rate": 2.16e-06,
+      "loss": 0.003,
+      "step": 3250
+    },
+    {
+      "epoch": 6.32,
+      "grad_norm": 0.17864187061786652,
+      "learning_rate": 2.0885714285714287e-06,
+      "loss": 0.0045,
+      "step": 3275
+    },
+    {
+      "epoch": 6.37,
+      "grad_norm": 0.1522483378648758,
+      "learning_rate": 2.0171428571428573e-06,
+      "loss": 0.0032,
+      "step": 3300
+    },
+    {
+      "epoch": 6.42,
+      "grad_norm": 0.137981116771698,
+      "learning_rate": 1.945714285714286e-06,
+      "loss": 0.0037,
+      "step": 3325
+    },
+    {
+      "epoch": 6.47,
+      "grad_norm": 0.14591948688030243,
+      "learning_rate": 1.8742857142857142e-06,
+      "loss": 0.0033,
+      "step": 3350
+    },
+    {
+      "epoch": 6.52,
+      "grad_norm": 0.18055008351802826,
+      "learning_rate": 1.8028571428571432e-06,
+      "loss": 0.0031,
+      "step": 3375
+    },
+    {
+      "epoch": 6.56,
+      "grad_norm": 0.5128812789916992,
+      "learning_rate": 1.7314285714285716e-06,
+      "loss": 0.0033,
+      "step": 3400
+    },
+    {
+      "epoch": 6.61,
+      "grad_norm": 0.3612448275089264,
+      "learning_rate": 1.6600000000000002e-06,
+      "loss": 0.0035,
+      "step": 3425
+    },
+    {
+      "epoch": 6.66,
+      "grad_norm": 0.2181573212146759,
+      "learning_rate": 1.5885714285714288e-06,
+      "loss": 0.0031,
+      "step": 3450
+    },
+    {
+      "epoch": 6.71,
+      "grad_norm": 0.14946699142456055,
+      "learning_rate": 1.5171428571428574e-06,
+      "loss": 0.003,
+      "step": 3475
+    },
+    {
+      "epoch": 6.76,
+      "grad_norm": 0.1762605756521225,
+      "learning_rate": 1.4457142857142858e-06,
+      "loss": 0.0029,
+      "step": 3500
+    },
+    {
+      "epoch": 6.81,
+      "grad_norm": 0.15365581214427948,
+      "learning_rate": 1.3742857142857143e-06,
+      "loss": 0.0029,
+      "step": 3525
+    },
+    {
+      "epoch": 6.85,
+      "grad_norm": 0.16057215631008148,
+      "learning_rate": 1.302857142857143e-06,
+      "loss": 0.0029,
+      "step": 3550
+    },
+    {
+      "epoch": 6.9,
+      "grad_norm": 0.16833922266960144,
+      "learning_rate": 1.2314285714285715e-06,
+      "loss": 0.0028,
+      "step": 3575
+    },
+    {
+      "epoch": 6.95,
+      "grad_norm": 0.12415596097707748,
+      "learning_rate": 1.1600000000000001e-06,
+      "loss": 0.0034,
+      "step": 3600
+    },
+    {
+      "epoch": 7.0,
+      "grad_norm": 0.1122882068157196,
+      "learning_rate": 1.0885714285714287e-06,
+      "loss": 0.0039,
+      "step": 3625
+    },
+    {
+      "epoch": 7.05,
+      "grad_norm": 0.1392960548400879,
+      "learning_rate": 1.0171428571428573e-06,
+      "loss": 0.0025,
+      "step": 3650
+    },
+    {
+      "epoch": 7.09,
+      "grad_norm": 0.12869219481945038,
+      "learning_rate": 9.457142857142858e-07,
+      "loss": 0.0024,
+      "step": 3675
+    },
+    {
+      "epoch": 7.14,
+      "grad_norm": 0.11602462083101273,
+      "learning_rate": 8.742857142857144e-07,
+      "loss": 0.0025,
+      "step": 3700
+    },
+    {
+      "epoch": 7.19,
+      "grad_norm": 0.14960308372974396,
+      "learning_rate": 8.028571428571429e-07,
+      "loss": 0.0025,
+      "step": 3725
+    },
+    {
+      "epoch": 7.24,
+      "grad_norm": 0.1800779402256012,
+      "learning_rate": 7.314285714285715e-07,
+      "loss": 0.0025,
+      "step": 3750
+    },
+    {
+      "epoch": 7.29,
+      "grad_norm": 0.09803508967161179,
+      "learning_rate": 6.6e-07,
+      "loss": 0.0026,
+      "step": 3775
+    },
+    {
+      "epoch": 7.34,
+      "grad_norm": 0.1335836946964264,
+      "learning_rate": 5.885714285714286e-07,
+      "loss": 0.0029,
+      "step": 3800
+    },
+    {
+      "epoch": 7.38,
+      "grad_norm": 0.13514183461666107,
+      "learning_rate": 5.171428571428572e-07,
+      "loss": 0.0025,
+      "step": 3825
+    },
+    {
+      "epoch": 7.43,
+      "grad_norm": 0.13820233941078186,
+      "learning_rate": 4.457142857142858e-07,
+      "loss": 0.0025,
+      "step": 3850
+    },
+    {
+      "epoch": 7.48,
+      "grad_norm": 0.12157788872718811,
+      "learning_rate": 3.7428571428571434e-07,
+      "loss": 0.0027,
+      "step": 3875
+    },
+    {
+      "epoch": 7.53,
+      "grad_norm": 0.13221661746501923,
+      "learning_rate": 3.028571428571429e-07,
+      "loss": 0.0024,
+      "step": 3900
+    },
+    {
+      "epoch": 7.58,
+      "grad_norm": 0.1175406202673912,
+      "learning_rate": 2.3142857142857144e-07,
+      "loss": 0.0025,
+      "step": 3925
+    },
+    {
+      "epoch": 7.63,
+      "grad_norm": 0.14412976801395416,
+      "learning_rate": 1.6e-07,
+      "loss": 0.0029,
+      "step": 3950
+    },
+    {
+      "epoch": 7.67,
+      "grad_norm": 0.13637402653694153,
+      "learning_rate": 8.857142857142858e-08,
+      "loss": 0.0024,
+      "step": 3975
+    },
+    {
+      "epoch": 7.72,
+      "grad_norm": 0.1199258491396904,
+      "learning_rate": 1.7142857142857143e-08,
+      "loss": 0.0024,
+      "step": 4000
+    },
+    {
+      "epoch": 7.72,
+      "eval_loss": 0.33461278676986694,
+      "eval_runtime": 2683.6147,
+      "eval_samples_per_second": 1.348,
+      "eval_steps_per_second": 0.169,
+      "eval_wer": 17.051955904925812,
+      "step": 4000
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 4000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 8,
+  "save_steps": 1000,
+  "total_flos": 1.844118425124864e+19,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-4000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d97a7d674ba5458016027e8fcbeac271e6fe45c463b2f89dfb01782b9ae4928d
+size 5112