skylord commited on
Commit
3d31777
1 Parent(s): 7e3ace4

Add model files

Browse files
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 33.0,
3
- "init_mem_cpu_alloc_delta": 65159,
4
  "init_mem_cpu_peaked_delta": 18306,
5
  "init_mem_gpu_alloc_delta": 1261972480,
6
  "init_mem_gpu_peaked_delta": 0,
7
- "total_flos": 4.638603148454345e+19,
8
  "train_mem_cpu_alloc_delta": 0,
9
  "train_mem_cpu_peaked_delta": 0,
10
- "train_mem_gpu_alloc_delta": 6365176832,
11
  "train_mem_gpu_peaked_delta": 0,
12
- "train_runtime": 3655.2719,
13
  "train_samples": 7434,
14
- "train_samples_per_second": 2.095
15
  }
1
  {
2
+ "epoch": 40.0,
3
+ "init_mem_cpu_alloc_delta": 64929,
4
  "init_mem_cpu_peaked_delta": 18306,
5
  "init_mem_gpu_alloc_delta": 1261972480,
6
  "init_mem_gpu_peaked_delta": 0,
7
+ "total_flos": 5.602791262432019e+19,
8
  "train_mem_cpu_alloc_delta": 0,
9
  "train_mem_cpu_peaked_delta": 0,
10
+ "train_mem_gpu_alloc_delta": 6367137280,
11
  "train_mem_gpu_peaked_delta": 0,
12
+ "train_runtime": 9410.6193,
13
  "train_samples": 7434,
14
+ "train_samples_per_second": 0.986
15
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/workspace/data2/el/wav2vec2-large-xlsr-greek/checkpoint-7656",
3
  "activation_dropout": 0.0,
4
  "apply_spec_augment": true,
5
  "architectures": [
1
  {
2
+ "_name_or_path": "/workspace/data2/el/wav2vec2-large-xlsr-greek/checkpoint-9280",
3
  "activation_dropout": 0.0,
4
  "apply_spec_augment": true,
5
  "architectures": [
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a303ff6f524b384a16a252f54365fe10e1a6f856be9b4e837b29419acc431958
3
  size 1262151127
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17f7a6c0b25909b375e4ee72d8cc71e87ddde508311325b2e8562d4bb69c7c91
3
  size 1262151127
train_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 33.0,
3
- "init_mem_cpu_alloc_delta": 65159,
4
  "init_mem_cpu_peaked_delta": 18306,
5
  "init_mem_gpu_alloc_delta": 1261972480,
6
  "init_mem_gpu_peaked_delta": 0,
7
- "total_flos": 4.638603148454345e+19,
8
  "train_mem_cpu_alloc_delta": 0,
9
  "train_mem_cpu_peaked_delta": 0,
10
- "train_mem_gpu_alloc_delta": 6365176832,
11
  "train_mem_gpu_peaked_delta": 0,
12
- "train_runtime": 3655.2719,
13
  "train_samples": 7434,
14
- "train_samples_per_second": 2.095
15
  }
1
  {
2
+ "epoch": 40.0,
3
+ "init_mem_cpu_alloc_delta": 64929,
4
  "init_mem_cpu_peaked_delta": 18306,
5
  "init_mem_gpu_alloc_delta": 1261972480,
6
  "init_mem_gpu_peaked_delta": 0,
7
+ "total_flos": 5.602791262432019e+19,
8
  "train_mem_cpu_alloc_delta": 0,
9
  "train_mem_cpu_peaked_delta": 0,
10
+ "train_mem_gpu_alloc_delta": 6367137280,
11
  "train_mem_gpu_peaked_delta": 0,
12
+ "train_runtime": 9410.6193,
13
  "train_samples": 7434,
14
+ "train_samples_per_second": 0.986
15
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.42152013727667303,
3
- "best_model_checkpoint": "/workspace/data2/el/wav2vec2-large-xlsr-greek/checkpoint-7656",
4
- "epoch": 32.99784946236559,
5
- "global_step": 7656,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -4926,16 +4926,1050 @@
4926
  "step": 7656
4927
  },
4928
  {
4929
- "epoch": 33.0,
4930
- "step": 7656,
4931
- "total_flos": 4.638603148454345e+19,
4932
- "train_runtime": 3655.2719,
4933
- "train_samples_per_second": 2.095
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4934
  }
4935
  ],
4936
- "max_steps": 7656,
4937
- "num_train_epochs": 33,
4938
- "total_flos": 4.638603148454345e+19,
4939
  "trial_name": null,
4940
  "trial_params": null
4941
  }
1
  {
2
+ "best_metric": 0.3336529726456041,
3
+ "best_model_checkpoint": "/workspace/data2/el/wav2vec2-large-xlsr-greek/checkpoint-9280",
4
+ "epoch": 39.99784946236559,
5
+ "global_step": 9280,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
4926
  "step": 7656
4927
  },
4928
  {
4929
+ "epoch": 33.02,
4930
+ "learning_rate": 5.248380129589633e-05,
4931
+ "loss": 0.0542,
4932
+ "step": 7660
4933
+ },
4934
+ {
4935
+ "epoch": 33.06,
4936
+ "learning_rate": 5.2159827213822885e-05,
4937
+ "loss": 0.0352,
4938
+ "step": 7670
4939
+ },
4940
+ {
4941
+ "epoch": 33.1,
4942
+ "learning_rate": 5.183585313174945e-05,
4943
+ "loss": 0.0387,
4944
+ "step": 7680
4945
+ },
4946
+ {
4947
+ "epoch": 33.15,
4948
+ "learning_rate": 5.151187904967602e-05,
4949
+ "loss": 0.065,
4950
+ "step": 7690
4951
+ },
4952
+ {
4953
+ "epoch": 33.19,
4954
+ "learning_rate": 5.118790496760259e-05,
4955
+ "loss": 0.0542,
4956
+ "step": 7700
4957
+ },
4958
+ {
4959
+ "epoch": 33.23,
4960
+ "learning_rate": 5.086393088552915e-05,
4961
+ "loss": 0.0498,
4962
+ "step": 7710
4963
+ },
4964
+ {
4965
+ "epoch": 33.28,
4966
+ "learning_rate": 5.053995680345572e-05,
4967
+ "loss": 0.0771,
4968
+ "step": 7720
4969
+ },
4970
+ {
4971
+ "epoch": 33.32,
4972
+ "learning_rate": 5.021598272138229e-05,
4973
+ "loss": 0.0614,
4974
+ "step": 7730
4975
+ },
4976
+ {
4977
+ "epoch": 33.36,
4978
+ "learning_rate": 4.9892008639308855e-05,
4979
+ "loss": 0.0633,
4980
+ "step": 7740
4981
+ },
4982
+ {
4983
+ "epoch": 33.4,
4984
+ "learning_rate": 4.956803455723542e-05,
4985
+ "loss": 0.0574,
4986
+ "step": 7750
4987
+ },
4988
+ {
4989
+ "epoch": 33.45,
4990
+ "learning_rate": 4.924406047516198e-05,
4991
+ "loss": 0.038,
4992
+ "step": 7760
4993
+ },
4994
+ {
4995
+ "epoch": 33.49,
4996
+ "learning_rate": 4.8920086393088544e-05,
4997
+ "loss": 0.0588,
4998
+ "step": 7770
4999
+ },
5000
+ {
5001
+ "epoch": 33.53,
5002
+ "learning_rate": 4.8596112311015116e-05,
5003
+ "loss": 0.0484,
5004
+ "step": 7780
5005
+ },
5006
+ {
5007
+ "epoch": 33.58,
5008
+ "learning_rate": 4.827213822894168e-05,
5009
+ "loss": 0.0645,
5010
+ "step": 7790
5011
+ },
5012
+ {
5013
+ "epoch": 33.62,
5014
+ "learning_rate": 4.7948164146868247e-05,
5015
+ "loss": 0.0545,
5016
+ "step": 7800
5017
+ },
5018
+ {
5019
+ "epoch": 33.66,
5020
+ "learning_rate": 4.762419006479481e-05,
5021
+ "loss": 0.0617,
5022
+ "step": 7810
5023
+ },
5024
+ {
5025
+ "epoch": 33.71,
5026
+ "learning_rate": 4.7300215982721384e-05,
5027
+ "loss": 0.0899,
5028
+ "step": 7820
5029
+ },
5030
+ {
5031
+ "epoch": 33.75,
5032
+ "learning_rate": 4.697624190064795e-05,
5033
+ "loss": 0.0613,
5034
+ "step": 7830
5035
+ },
5036
+ {
5037
+ "epoch": 33.79,
5038
+ "learning_rate": 4.665226781857451e-05,
5039
+ "loss": 0.065,
5040
+ "step": 7840
5041
+ },
5042
+ {
5043
+ "epoch": 33.83,
5044
+ "learning_rate": 4.632829373650107e-05,
5045
+ "loss": 0.0541,
5046
+ "step": 7850
5047
+ },
5048
+ {
5049
+ "epoch": 33.88,
5050
+ "learning_rate": 4.600431965442764e-05,
5051
+ "loss": 0.0532,
5052
+ "step": 7860
5053
+ },
5054
+ {
5055
+ "epoch": 33.92,
5056
+ "learning_rate": 4.568034557235421e-05,
5057
+ "loss": 0.0704,
5058
+ "step": 7870
5059
+ },
5060
+ {
5061
+ "epoch": 33.96,
5062
+ "learning_rate": 4.5356371490280775e-05,
5063
+ "loss": 0.0565,
5064
+ "step": 7880
5065
+ },
5066
+ {
5067
+ "epoch": 34.0,
5068
+ "eval_loss": 0.3123264014720917,
5069
+ "eval_runtime": 273.8278,
5070
+ "eval_samples_per_second": 11.116,
5071
+ "eval_wer": 0.3369839507418997,
5072
+ "step": 7888
5073
+ },
5074
+ {
5075
+ "epoch": 34.01,
5076
+ "learning_rate": 4.503239740820734e-05,
5077
+ "loss": 0.0576,
5078
+ "step": 7890
5079
+ },
5080
+ {
5081
+ "epoch": 34.05,
5082
+ "learning_rate": 4.47084233261339e-05,
5083
+ "loss": 0.0737,
5084
+ "step": 7900
5085
+ },
5086
+ {
5087
+ "epoch": 34.09,
5088
+ "learning_rate": 4.438444924406048e-05,
5089
+ "loss": 0.0541,
5090
+ "step": 7910
5091
+ },
5092
+ {
5093
+ "epoch": 34.14,
5094
+ "learning_rate": 4.4060475161987036e-05,
5095
+ "loss": 0.0416,
5096
+ "step": 7920
5097
+ },
5098
+ {
5099
+ "epoch": 34.18,
5100
+ "learning_rate": 4.37365010799136e-05,
5101
+ "loss": 0.0541,
5102
+ "step": 7930
5103
+ },
5104
+ {
5105
+ "epoch": 34.22,
5106
+ "learning_rate": 4.3412526997840166e-05,
5107
+ "loss": 0.059,
5108
+ "step": 7940
5109
+ },
5110
+ {
5111
+ "epoch": 34.27,
5112
+ "learning_rate": 4.308855291576674e-05,
5113
+ "loss": 0.0538,
5114
+ "step": 7950
5115
+ },
5116
+ {
5117
+ "epoch": 34.31,
5118
+ "learning_rate": 4.2764578833693303e-05,
5119
+ "loss": 0.0795,
5120
+ "step": 7960
5121
+ },
5122
+ {
5123
+ "epoch": 34.35,
5124
+ "learning_rate": 4.244060475161987e-05,
5125
+ "loss": 0.0618,
5126
+ "step": 7970
5127
+ },
5128
+ {
5129
+ "epoch": 34.4,
5130
+ "learning_rate": 4.2116630669546434e-05,
5131
+ "loss": 0.0667,
5132
+ "step": 7980
5133
+ },
5134
+ {
5135
+ "epoch": 34.44,
5136
+ "learning_rate": 4.179265658747299e-05,
5137
+ "loss": 0.0501,
5138
+ "step": 7990
5139
+ },
5140
+ {
5141
+ "epoch": 34.48,
5142
+ "learning_rate": 4.1468682505399564e-05,
5143
+ "loss": 0.0564,
5144
+ "step": 8000
5145
+ },
5146
+ {
5147
+ "epoch": 34.52,
5148
+ "learning_rate": 4.114470842332613e-05,
5149
+ "loss": 0.0507,
5150
+ "step": 8010
5151
+ },
5152
+ {
5153
+ "epoch": 34.57,
5154
+ "learning_rate": 4.0820734341252695e-05,
5155
+ "loss": 0.0785,
5156
+ "step": 8020
5157
+ },
5158
+ {
5159
+ "epoch": 34.61,
5160
+ "learning_rate": 4.049676025917926e-05,
5161
+ "loss": 0.0697,
5162
+ "step": 8030
5163
+ },
5164
+ {
5165
+ "epoch": 34.65,
5166
+ "learning_rate": 4.017278617710583e-05,
5167
+ "loss": 0.0675,
5168
+ "step": 8040
5169
+ },
5170
+ {
5171
+ "epoch": 34.7,
5172
+ "learning_rate": 3.98488120950324e-05,
5173
+ "loss": 0.0516,
5174
+ "step": 8050
5175
+ },
5176
+ {
5177
+ "epoch": 34.74,
5178
+ "learning_rate": 3.952483801295896e-05,
5179
+ "loss": 0.0556,
5180
+ "step": 8060
5181
+ },
5182
+ {
5183
+ "epoch": 34.78,
5184
+ "learning_rate": 3.920086393088552e-05,
5185
+ "loss": 0.0739,
5186
+ "step": 8070
5187
+ },
5188
+ {
5189
+ "epoch": 34.83,
5190
+ "learning_rate": 3.887688984881209e-05,
5191
+ "loss": 0.0584,
5192
+ "step": 8080
5193
+ },
5194
+ {
5195
+ "epoch": 34.87,
5196
+ "learning_rate": 3.855291576673866e-05,
5197
+ "loss": 0.0827,
5198
+ "step": 8090
5199
+ },
5200
+ {
5201
+ "epoch": 34.91,
5202
+ "learning_rate": 3.822894168466522e-05,
5203
+ "loss": 0.0504,
5204
+ "step": 8100
5205
+ },
5206
+ {
5207
+ "epoch": 34.95,
5208
+ "learning_rate": 3.790496760259179e-05,
5209
+ "loss": 0.0521,
5210
+ "step": 8110
5211
+ },
5212
+ {
5213
+ "epoch": 35.0,
5214
+ "learning_rate": 3.7580993520518354e-05,
5215
+ "loss": 0.0568,
5216
+ "step": 8120
5217
+ },
5218
+ {
5219
+ "epoch": 35.0,
5220
+ "eval_loss": 0.30912891030311584,
5221
+ "eval_runtime": 273.0561,
5222
+ "eval_samples_per_second": 11.148,
5223
+ "eval_wer": 0.3364287877258504,
5224
+ "step": 8120
5225
+ },
5226
+ {
5227
+ "epoch": 35.04,
5228
+ "learning_rate": 3.725701943844492e-05,
5229
+ "loss": 0.0615,
5230
+ "step": 8130
5231
+ },
5232
+ {
5233
+ "epoch": 35.09,
5234
+ "learning_rate": 3.693304535637149e-05,
5235
+ "loss": 0.0569,
5236
+ "step": 8140
5237
+ },
5238
+ {
5239
+ "epoch": 35.13,
5240
+ "learning_rate": 3.660907127429805e-05,
5241
+ "loss": 0.0747,
5242
+ "step": 8150
5243
+ },
5244
+ {
5245
+ "epoch": 35.17,
5246
+ "learning_rate": 3.628509719222462e-05,
5247
+ "loss": 0.0527,
5248
+ "step": 8160
5249
+ },
5250
+ {
5251
+ "epoch": 35.22,
5252
+ "learning_rate": 3.5961123110151187e-05,
5253
+ "loss": 0.0622,
5254
+ "step": 8170
5255
+ },
5256
+ {
5257
+ "epoch": 35.26,
5258
+ "learning_rate": 3.563714902807775e-05,
5259
+ "loss": 0.0689,
5260
+ "step": 8180
5261
+ },
5262
+ {
5263
+ "epoch": 35.3,
5264
+ "learning_rate": 3.531317494600432e-05,
5265
+ "loss": 0.0658,
5266
+ "step": 8190
5267
+ },
5268
+ {
5269
+ "epoch": 35.34,
5270
+ "learning_rate": 3.498920086393088e-05,
5271
+ "loss": 0.0411,
5272
+ "step": 8200
5273
+ },
5274
+ {
5275
+ "epoch": 35.39,
5276
+ "learning_rate": 3.466522678185745e-05,
5277
+ "loss": 0.0732,
5278
+ "step": 8210
5279
+ },
5280
+ {
5281
+ "epoch": 35.43,
5282
+ "learning_rate": 3.434125269978401e-05,
5283
+ "loss": 0.0568,
5284
+ "step": 8220
5285
+ },
5286
+ {
5287
+ "epoch": 35.47,
5288
+ "learning_rate": 3.401727861771058e-05,
5289
+ "loss": 0.0512,
5290
+ "step": 8230
5291
+ },
5292
+ {
5293
+ "epoch": 35.52,
5294
+ "learning_rate": 3.369330453563714e-05,
5295
+ "loss": 0.0373,
5296
+ "step": 8240
5297
+ },
5298
+ {
5299
+ "epoch": 35.56,
5300
+ "learning_rate": 3.3369330453563715e-05,
5301
+ "loss": 0.0667,
5302
+ "step": 8250
5303
+ },
5304
+ {
5305
+ "epoch": 35.6,
5306
+ "learning_rate": 3.3045356371490274e-05,
5307
+ "loss": 0.0433,
5308
+ "step": 8260
5309
+ },
5310
+ {
5311
+ "epoch": 35.65,
5312
+ "learning_rate": 3.2721382289416845e-05,
5313
+ "loss": 0.0595,
5314
+ "step": 8270
5315
+ },
5316
+ {
5317
+ "epoch": 35.69,
5318
+ "learning_rate": 3.239740820734341e-05,
5319
+ "loss": 0.0569,
5320
+ "step": 8280
5321
+ },
5322
+ {
5323
+ "epoch": 35.73,
5324
+ "learning_rate": 3.2073434125269976e-05,
5325
+ "loss": 0.0612,
5326
+ "step": 8290
5327
+ },
5328
+ {
5329
+ "epoch": 35.77,
5330
+ "learning_rate": 3.174946004319654e-05,
5331
+ "loss": 0.0681,
5332
+ "step": 8300
5333
+ },
5334
+ {
5335
+ "epoch": 35.82,
5336
+ "learning_rate": 3.1425485961123106e-05,
5337
+ "loss": 0.0579,
5338
+ "step": 8310
5339
+ },
5340
+ {
5341
+ "epoch": 35.86,
5342
+ "learning_rate": 3.110151187904967e-05,
5343
+ "loss": 0.0397,
5344
+ "step": 8320
5345
+ },
5346
+ {
5347
+ "epoch": 35.9,
5348
+ "learning_rate": 3.0777537796976244e-05,
5349
+ "loss": 0.0604,
5350
+ "step": 8330
5351
+ },
5352
+ {
5353
+ "epoch": 35.95,
5354
+ "learning_rate": 3.0453563714902805e-05,
5355
+ "loss": 0.0669,
5356
+ "step": 8340
5357
+ },
5358
+ {
5359
+ "epoch": 35.99,
5360
+ "learning_rate": 3.012958963282937e-05,
5361
+ "loss": 0.0616,
5362
+ "step": 8350
5363
+ },
5364
+ {
5365
+ "epoch": 36.0,
5366
+ "eval_loss": 0.31008386611938477,
5367
+ "eval_runtime": 276.5254,
5368
+ "eval_samples_per_second": 11.008,
5369
+ "eval_wer": 0.3399616432825275,
5370
+ "step": 8352
5371
+ },
5372
+ {
5373
+ "epoch": 36.03,
5374
+ "learning_rate": 2.9805615550755936e-05,
5375
+ "loss": 0.0434,
5376
+ "step": 8360
5377
+ },
5378
+ {
5379
+ "epoch": 36.08,
5380
+ "learning_rate": 2.94816414686825e-05,
5381
+ "loss": 0.0598,
5382
+ "step": 8370
5383
+ },
5384
+ {
5385
+ "epoch": 36.12,
5386
+ "learning_rate": 2.915766738660907e-05,
5387
+ "loss": 0.0565,
5388
+ "step": 8380
5389
+ },
5390
+ {
5391
+ "epoch": 36.16,
5392
+ "learning_rate": 2.8833693304535635e-05,
5393
+ "loss": 0.056,
5394
+ "step": 8390
5395
+ },
5396
+ {
5397
+ "epoch": 36.21,
5398
+ "learning_rate": 2.85097192224622e-05,
5399
+ "loss": 0.0563,
5400
+ "step": 8400
5401
+ },
5402
+ {
5403
+ "epoch": 36.25,
5404
+ "learning_rate": 2.8185745140388765e-05,
5405
+ "loss": 0.064,
5406
+ "step": 8410
5407
+ },
5408
+ {
5409
+ "epoch": 36.29,
5410
+ "learning_rate": 2.7861771058315334e-05,
5411
+ "loss": 0.0466,
5412
+ "step": 8420
5413
+ },
5414
+ {
5415
+ "epoch": 36.34,
5416
+ "learning_rate": 2.75377969762419e-05,
5417
+ "loss": 0.0445,
5418
+ "step": 8430
5419
+ },
5420
+ {
5421
+ "epoch": 36.38,
5422
+ "learning_rate": 2.7213822894168468e-05,
5423
+ "loss": 0.0504,
5424
+ "step": 8440
5425
+ },
5426
+ {
5427
+ "epoch": 36.42,
5428
+ "learning_rate": 2.688984881209503e-05,
5429
+ "loss": 0.0715,
5430
+ "step": 8450
5431
+ },
5432
+ {
5433
+ "epoch": 36.46,
5434
+ "learning_rate": 2.6565874730021595e-05,
5435
+ "loss": 0.0488,
5436
+ "step": 8460
5437
+ },
5438
+ {
5439
+ "epoch": 36.51,
5440
+ "learning_rate": 2.6241900647948163e-05,
5441
+ "loss": 0.0493,
5442
+ "step": 8470
5443
+ },
5444
+ {
5445
+ "epoch": 36.55,
5446
+ "learning_rate": 2.5917926565874725e-05,
5447
+ "loss": 0.0395,
5448
+ "step": 8480
5449
+ },
5450
+ {
5451
+ "epoch": 36.59,
5452
+ "learning_rate": 2.5593952483801294e-05,
5453
+ "loss": 0.0683,
5454
+ "step": 8490
5455
+ },
5456
+ {
5457
+ "epoch": 36.64,
5458
+ "learning_rate": 2.526997840172786e-05,
5459
+ "loss": 0.0584,
5460
+ "step": 8500
5461
+ },
5462
+ {
5463
+ "epoch": 36.68,
5464
+ "learning_rate": 2.4946004319654428e-05,
5465
+ "loss": 0.07,
5466
+ "step": 8510
5467
+ },
5468
+ {
5469
+ "epoch": 36.72,
5470
+ "learning_rate": 2.462203023758099e-05,
5471
+ "loss": 0.0448,
5472
+ "step": 8520
5473
+ },
5474
+ {
5475
+ "epoch": 36.77,
5476
+ "learning_rate": 2.4298056155507558e-05,
5477
+ "loss": 0.0443,
5478
+ "step": 8530
5479
+ },
5480
+ {
5481
+ "epoch": 36.81,
5482
+ "learning_rate": 2.3974082073434123e-05,
5483
+ "loss": 0.0586,
5484
+ "step": 8540
5485
+ },
5486
+ {
5487
+ "epoch": 36.85,
5488
+ "learning_rate": 2.3650107991360692e-05,
5489
+ "loss": 0.069,
5490
+ "step": 8550
5491
+ },
5492
+ {
5493
+ "epoch": 36.89,
5494
+ "learning_rate": 2.3326133909287254e-05,
5495
+ "loss": 0.0621,
5496
+ "step": 8560
5497
+ },
5498
+ {
5499
+ "epoch": 36.94,
5500
+ "learning_rate": 2.300215982721382e-05,
5501
+ "loss": 0.0617,
5502
+ "step": 8570
5503
+ },
5504
+ {
5505
+ "epoch": 36.98,
5506
+ "learning_rate": 2.2678185745140387e-05,
5507
+ "loss": 0.0589,
5508
+ "step": 8580
5509
+ },
5510
+ {
5511
+ "epoch": 37.0,
5512
+ "eval_loss": 0.3041793406009674,
5513
+ "eval_runtime": 277.9516,
5514
+ "eval_samples_per_second": 10.952,
5515
+ "eval_wer": 0.3337034420106995,
5516
+ "step": 8584
5517
+ },
5518
+ {
5519
+ "epoch": 37.03,
5520
+ "learning_rate": 2.235421166306695e-05,
5521
+ "loss": 0.0452,
5522
+ "step": 8590
5523
+ },
5524
+ {
5525
+ "epoch": 37.07,
5526
+ "learning_rate": 2.2030237580993518e-05,
5527
+ "loss": 0.0503,
5528
+ "step": 8600
5529
+ },
5530
+ {
5531
+ "epoch": 37.11,
5532
+ "learning_rate": 2.1706263498920083e-05,
5533
+ "loss": 0.0464,
5534
+ "step": 8610
5535
+ },
5536
+ {
5537
+ "epoch": 37.15,
5538
+ "learning_rate": 2.1382289416846652e-05,
5539
+ "loss": 0.0423,
5540
+ "step": 8620
5541
+ },
5542
+ {
5543
+ "epoch": 37.2,
5544
+ "learning_rate": 2.1058315334773217e-05,
5545
+ "loss": 0.0625,
5546
+ "step": 8630
5547
+ },
5548
+ {
5549
+ "epoch": 37.24,
5550
+ "learning_rate": 2.0734341252699782e-05,
5551
+ "loss": 0.0497,
5552
+ "step": 8640
5553
+ },
5554
+ {
5555
+ "epoch": 37.28,
5556
+ "learning_rate": 2.0410367170626347e-05,
5557
+ "loss": 0.0427,
5558
+ "step": 8650
5559
+ },
5560
+ {
5561
+ "epoch": 37.33,
5562
+ "learning_rate": 2.0086393088552916e-05,
5563
+ "loss": 0.0562,
5564
+ "step": 8660
5565
+ },
5566
+ {
5567
+ "epoch": 37.37,
5568
+ "learning_rate": 1.976241900647948e-05,
5569
+ "loss": 0.0371,
5570
+ "step": 8670
5571
+ },
5572
+ {
5573
+ "epoch": 37.41,
5574
+ "learning_rate": 1.9438444924406046e-05,
5575
+ "loss": 0.0661,
5576
+ "step": 8680
5577
+ },
5578
+ {
5579
+ "epoch": 37.46,
5580
+ "learning_rate": 1.911447084233261e-05,
5581
+ "loss": 0.0556,
5582
+ "step": 8690
5583
+ },
5584
+ {
5585
+ "epoch": 37.5,
5586
+ "learning_rate": 1.8790496760259177e-05,
5587
+ "loss": 0.0711,
5588
+ "step": 8700
5589
+ },
5590
+ {
5591
+ "epoch": 37.54,
5592
+ "learning_rate": 1.8466522678185745e-05,
5593
+ "loss": 0.0568,
5594
+ "step": 8710
5595
+ },
5596
+ {
5597
+ "epoch": 37.58,
5598
+ "learning_rate": 1.814254859611231e-05,
5599
+ "loss": 0.0389,
5600
+ "step": 8720
5601
+ },
5602
+ {
5603
+ "epoch": 37.63,
5604
+ "learning_rate": 1.7818574514038876e-05,
5605
+ "loss": 0.0668,
5606
+ "step": 8730
5607
+ },
5608
+ {
5609
+ "epoch": 37.67,
5610
+ "learning_rate": 1.749460043196544e-05,
5611
+ "loss": 0.0694,
5612
+ "step": 8740
5613
+ },
5614
+ {
5615
+ "epoch": 37.71,
5616
+ "learning_rate": 1.7170626349892006e-05,
5617
+ "loss": 0.0704,
5618
+ "step": 8750
5619
+ },
5620
+ {
5621
+ "epoch": 37.76,
5622
+ "learning_rate": 1.684665226781857e-05,
5623
+ "loss": 0.0373,
5624
+ "step": 8760
5625
+ },
5626
+ {
5627
+ "epoch": 37.8,
5628
+ "learning_rate": 1.6522678185745137e-05,
5629
+ "loss": 0.0595,
5630
+ "step": 8770
5631
+ },
5632
+ {
5633
+ "epoch": 37.84,
5634
+ "learning_rate": 1.6198704103671705e-05,
5635
+ "loss": 0.0489,
5636
+ "step": 8780
5637
+ },
5638
+ {
5639
+ "epoch": 37.89,
5640
+ "learning_rate": 1.587473002159827e-05,
5641
+ "loss": 0.0448,
5642
+ "step": 8790
5643
+ },
5644
+ {
5645
+ "epoch": 37.93,
5646
+ "learning_rate": 1.5550755939524836e-05,
5647
+ "loss": 0.0524,
5648
+ "step": 8800
5649
+ },
5650
+ {
5651
+ "epoch": 37.97,
5652
+ "learning_rate": 1.5226781857451403e-05,
5653
+ "loss": 0.0548,
5654
+ "step": 8810
5655
+ },
5656
+ {
5657
+ "epoch": 38.0,
5658
+ "eval_loss": 0.3078618049621582,
5659
+ "eval_runtime": 282.4379,
5660
+ "eval_samples_per_second": 10.778,
5661
+ "eval_wer": 0.3337539113757949,
5662
+ "step": 8816
5663
+ },
5664
+ {
5665
+ "epoch": 38.02,
5666
+ "learning_rate": 1.4902807775377968e-05,
5667
+ "loss": 0.0635,
5668
+ "step": 8820
5669
+ },
5670
+ {
5671
+ "epoch": 38.06,
5672
+ "learning_rate": 1.4578833693304535e-05,
5673
+ "loss": 0.0457,
5674
+ "step": 8830
5675
+ },
5676
+ {
5677
+ "epoch": 38.1,
5678
+ "learning_rate": 1.42548596112311e-05,
5679
+ "loss": 0.0474,
5680
+ "step": 8840
5681
+ },
5682
+ {
5683
+ "epoch": 38.15,
5684
+ "learning_rate": 1.3930885529157667e-05,
5685
+ "loss": 0.0921,
5686
+ "step": 8850
5687
+ },
5688
+ {
5689
+ "epoch": 38.19,
5690
+ "learning_rate": 1.3606911447084234e-05,
5691
+ "loss": 0.0377,
5692
+ "step": 8860
5693
+ },
5694
+ {
5695
+ "epoch": 38.23,
5696
+ "learning_rate": 1.3282937365010797e-05,
5697
+ "loss": 0.0458,
5698
+ "step": 8870
5699
+ },
5700
+ {
5701
+ "epoch": 38.28,
5702
+ "learning_rate": 1.2958963282937363e-05,
5703
+ "loss": 0.0417,
5704
+ "step": 8880
5705
+ },
5706
+ {
5707
+ "epoch": 38.32,
5708
+ "learning_rate": 1.263498920086393e-05,
5709
+ "loss": 0.0498,
5710
+ "step": 8890
5711
+ },
5712
+ {
5713
+ "epoch": 38.36,
5714
+ "learning_rate": 1.2311015118790495e-05,
5715
+ "loss": 0.0307,
5716
+ "step": 8900
5717
+ },
5718
+ {
5719
+ "epoch": 38.4,
5720
+ "learning_rate": 1.1987041036717062e-05,
5721
+ "loss": 0.0378,
5722
+ "step": 8910
5723
+ },
5724
+ {
5725
+ "epoch": 38.45,
5726
+ "learning_rate": 1.1663066954643627e-05,
5727
+ "loss": 0.0489,
5728
+ "step": 8920
5729
+ },
5730
+ {
5731
+ "epoch": 38.49,
5732
+ "learning_rate": 1.1339092872570194e-05,
5733
+ "loss": 0.0526,
5734
+ "step": 8930
5735
+ },
5736
+ {
5737
+ "epoch": 38.53,
5738
+ "learning_rate": 1.1015118790496759e-05,
5739
+ "loss": 0.0483,
5740
+ "step": 8940
5741
+ },
5742
+ {
5743
+ "epoch": 38.58,
5744
+ "learning_rate": 1.0691144708423326e-05,
5745
+ "loss": 0.0721,
5746
+ "step": 8950
5747
+ },
5748
+ {
5749
+ "epoch": 38.62,
5750
+ "learning_rate": 1.0367170626349891e-05,
5751
+ "loss": 0.052,
5752
+ "step": 8960
5753
+ },
5754
+ {
5755
+ "epoch": 38.66,
5756
+ "learning_rate": 1.0043196544276458e-05,
5757
+ "loss": 0.0487,
5758
+ "step": 8970
5759
+ },
5760
+ {
5761
+ "epoch": 38.71,
5762
+ "learning_rate": 9.719222462203023e-06,
5763
+ "loss": 0.0369,
5764
+ "step": 8980
5765
+ },
5766
+ {
5767
+ "epoch": 38.75,
5768
+ "learning_rate": 9.395248380129588e-06,
5769
+ "loss": 0.0776,
5770
+ "step": 8990
5771
+ },
5772
+ {
5773
+ "epoch": 38.79,
5774
+ "learning_rate": 9.071274298056155e-06,
5775
+ "loss": 0.0454,
5776
+ "step": 9000
5777
+ },
5778
+ {
5779
+ "epoch": 38.83,
5780
+ "learning_rate": 8.74730021598272e-06,
5781
+ "loss": 0.0452,
5782
+ "step": 9010
5783
+ },
5784
+ {
5785
+ "epoch": 38.88,
5786
+ "learning_rate": 8.423326133909286e-06,
5787
+ "loss": 0.0601,
5788
+ "step": 9020
5789
+ },
5790
+ {
5791
+ "epoch": 38.92,
5792
+ "learning_rate": 8.099352051835853e-06,
5793
+ "loss": 0.0488,
5794
+ "step": 9030
5795
+ },
5796
+ {
5797
+ "epoch": 38.96,
5798
+ "learning_rate": 7.775377969762418e-06,
5799
+ "loss": 0.0614,
5800
+ "step": 9040
5801
+ },
5802
+ {
5803
+ "epoch": 39.0,
5804
+ "eval_loss": 0.31897786259651184,
5805
+ "eval_runtime": 287.468,
5806
+ "eval_samples_per_second": 10.589,
5807
+ "eval_wer": 0.3349651761380842,
5808
+ "step": 9048
5809
+ },
5810
+ {
5811
+ "epoch": 39.01,
5812
+ "learning_rate": 7.451403887688984e-06,
5813
+ "loss": 0.0615,
5814
+ "step": 9050
5815
+ },
5816
+ {
5817
+ "epoch": 39.05,
5818
+ "learning_rate": 7.12742980561555e-06,
5819
+ "loss": 0.0412,
5820
+ "step": 9060
5821
+ },
5822
+ {
5823
+ "epoch": 39.09,
5824
+ "learning_rate": 6.803455723542117e-06,
5825
+ "loss": 0.0496,
5826
+ "step": 9070
5827
+ },
5828
+ {
5829
+ "epoch": 39.14,
5830
+ "learning_rate": 6.479481641468681e-06,
5831
+ "loss": 0.0524,
5832
+ "step": 9080
5833
+ },
5834
+ {
5835
+ "epoch": 39.18,
5836
+ "learning_rate": 6.155507559395247e-06,
5837
+ "loss": 0.0485,
5838
+ "step": 9090
5839
+ },
5840
+ {
5841
+ "epoch": 39.22,
5842
+ "learning_rate": 5.831533477321813e-06,
5843
+ "loss": 0.0646,
5844
+ "step": 9100
5845
+ },
5846
+ {
5847
+ "epoch": 39.27,
5848
+ "learning_rate": 5.5075593952483795e-06,
5849
+ "loss": 0.0602,
5850
+ "step": 9110
5851
+ },
5852
+ {
5853
+ "epoch": 39.31,
5854
+ "learning_rate": 5.1835853131749455e-06,
5855
+ "loss": 0.0623,
5856
+ "step": 9120
5857
+ },
5858
+ {
5859
+ "epoch": 39.35,
5860
+ "learning_rate": 4.859611231101512e-06,
5861
+ "loss": 0.0399,
5862
+ "step": 9130
5863
+ },
5864
+ {
5865
+ "epoch": 39.4,
5866
+ "learning_rate": 4.535637149028078e-06,
5867
+ "loss": 0.0571,
5868
+ "step": 9140
5869
+ },
5870
+ {
5871
+ "epoch": 39.44,
5872
+ "learning_rate": 4.211663066954643e-06,
5873
+ "loss": 0.0668,
5874
+ "step": 9150
5875
+ },
5876
+ {
5877
+ "epoch": 39.48,
5878
+ "learning_rate": 3.887688984881209e-06,
5879
+ "loss": 0.0374,
5880
+ "step": 9160
5881
+ },
5882
+ {
5883
+ "epoch": 39.52,
5884
+ "learning_rate": 3.563714902807775e-06,
5885
+ "loss": 0.0424,
5886
+ "step": 9170
5887
+ },
5888
+ {
5889
+ "epoch": 39.57,
5890
+ "learning_rate": 3.2397408207343406e-06,
5891
+ "loss": 0.047,
5892
+ "step": 9180
5893
+ },
5894
+ {
5895
+ "epoch": 39.61,
5896
+ "learning_rate": 2.9157667386609067e-06,
5897
+ "loss": 0.0562,
5898
+ "step": 9190
5899
+ },
5900
+ {
5901
+ "epoch": 39.65,
5902
+ "learning_rate": 2.5917926565874728e-06,
5903
+ "loss": 0.0789,
5904
+ "step": 9200
5905
+ },
5906
+ {
5907
+ "epoch": 39.7,
5908
+ "learning_rate": 2.267818574514039e-06,
5909
+ "loss": 0.0485,
5910
+ "step": 9210
5911
+ },
5912
+ {
5913
+ "epoch": 39.74,
5914
+ "learning_rate": 1.9438444924406045e-06,
5915
+ "loss": 0.0513,
5916
+ "step": 9220
5917
+ },
5918
+ {
5919
+ "epoch": 39.78,
5920
+ "learning_rate": 1.6198704103671703e-06,
5921
+ "loss": 0.0518,
5922
+ "step": 9230
5923
+ },
5924
+ {
5925
+ "epoch": 39.83,
5926
+ "learning_rate": 1.2958963282937364e-06,
5927
+ "loss": 0.0334,
5928
+ "step": 9240
5929
+ },
5930
+ {
5931
+ "epoch": 39.87,
5932
+ "learning_rate": 9.719222462203022e-07,
5933
+ "loss": 0.0257,
5934
+ "step": 9250
5935
+ },
5936
+ {
5937
+ "epoch": 39.91,
5938
+ "learning_rate": 6.479481641468682e-07,
5939
+ "loss": 0.055,
5940
+ "step": 9260
5941
+ },
5942
+ {
5943
+ "epoch": 39.95,
5944
+ "learning_rate": 3.239740820734341e-07,
5945
+ "loss": 0.0444,
5946
+ "step": 9270
5947
+ },
5948
+ {
5949
+ "epoch": 40.0,
5950
+ "learning_rate": 0.0,
5951
+ "loss": 0.0732,
5952
+ "step": 9280
5953
+ },
5954
+ {
5955
+ "epoch": 40.0,
5956
+ "eval_loss": 0.31733959913253784,
5957
+ "eval_runtime": 284.3146,
5958
+ "eval_samples_per_second": 10.706,
5959
+ "eval_wer": 0.3336529726456041,
5960
+ "step": 9280
5961
+ },
5962
+ {
5963
+ "epoch": 40.0,
5964
+ "step": 9280,
5965
+ "total_flos": 5.602791262432019e+19,
5966
+ "train_runtime": 9410.6193,
5967
+ "train_samples_per_second": 0.986
5968
  }
5969
  ],
5970
+ "max_steps": 9280,
5971
+ "num_train_epochs": 40,
5972
+ "total_flos": 5.602791262432019e+19,
5973
  "trial_name": null,
5974
  "trial_params": null
5975
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb557f92a18c44747d9bda5647e3c4c26918f83d098cf2176b9933f5b2902c2d
3
  size 2351
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfe3500aa78573c5ffcb7645c8b988c8aff05806fb8cbd1ecaaf65db5f3c2601
3
  size 2351