mprzibilla commited on
Commit
6966377
1 Parent(s): 2093d43

Training in progress, step 6220

Browse files
config.json CHANGED
@@ -110,7 +110,7 @@
110
  1
111
  ],
112
  "torch_dtype": "float32",
113
- "transformers_version": "4.18.0",
114
  "use_weighted_layer_sum": false,
115
  "vocab_size": 28,
116
  "xvector_output_dim": 512
 
110
  1
111
  ],
112
  "torch_dtype": "float32",
113
+ "transformers_version": "4.23.1",
114
  "use_weighted_layer_sum": false,
115
  "vocab_size": 28,
116
  "xvector_output_dim": 512
last-checkpoint/config.json CHANGED
@@ -110,7 +110,7 @@
110
  1
111
  ],
112
  "torch_dtype": "float32",
113
- "transformers_version": "4.18.0",
114
  "use_weighted_layer_sum": false,
115
  "vocab_size": 28,
116
  "xvector_output_dim": 512
 
110
  1
111
  ],
112
  "torch_dtype": "float32",
113
+ "transformers_version": "4.23.1",
114
  "use_weighted_layer_sum": false,
115
  "vocab_size": 28,
116
  "xvector_output_dim": 512
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3333ee8e53af6b1f85bcd67c929a51ec25d553108f47034bd343e26211f8a32
3
- size 174303
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:927f431de7ce4e1ea9c341f8117ca3748190de20455628b650b57805a47bfbe1
3
+ size 721661957
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f32a40766506810aeb69e5aff37fb21a6951556747267b8b3d151d03dea2616
3
- size 377644503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f3e0c86203048b55a478f0d5d6f563a4886d3d79134deab79ef29ea87f55197
3
+ size 377646433
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:370432457d1899c6918618be0ebb3c1ad372c7a0deee55d5ebd556abf838be44
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ad1f76f42c6b6db74eff6108567b57feed75b7f920793f46fb271d321687bec
3
+ size 14703
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f68c7cae533afea841f7981dbe73911683ef32c6ec89d7710d32dbdc3c08462
3
- size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cda56571df27698319b374c2d8c5a88b0f34877e85fb00c92caccf17f01645df
3
+ size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cff7900d8bc1e4ddaeb333afa964d4173b26154ba7e42e952ecc04b0838218a
3
- size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34adf423566f00eb1ba5621161b4351c08313bd50e3c016789c25bb5f425ae85
3
+ size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,176 +1,32 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 250.0,
5
- "global_step": 124500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 25.0,
12
- "learning_rate": 9.474360600295921e-05,
13
- "loss": 19.6049,
14
- "step": 12450
15
  },
16
  {
17
- "epoch": 25.0,
18
- "eval_cer": 0.9922077922077922,
19
- "eval_loss": 3.1665115356445312,
20
- "eval_runtime": 1.0509,
21
- "eval_samples_per_second": 199.825,
22
- "eval_steps_per_second": 25.692,
23
  "eval_wer": 1.0,
24
- "step": 12450
25
- },
26
- {
27
- "epoch": 50.0,
28
- "learning_rate": 8.422067216233354e-05,
29
- "loss": 2.746,
30
- "step": 24900
31
- },
32
- {
33
- "epoch": 50.0,
34
- "eval_cer": 0.9489177489177489,
35
- "eval_loss": 3.065709352493286,
36
- "eval_runtime": 1.0247,
37
- "eval_samples_per_second": 204.941,
38
- "eval_steps_per_second": 26.35,
39
- "eval_wer": 1.0,
40
- "step": 24900
41
- },
42
- {
43
- "epoch": 75.0,
44
- "learning_rate": 7.36994292961319e-05,
45
- "loss": 2.5773,
46
- "step": 37350
47
- },
48
- {
49
- "epoch": 75.0,
50
- "eval_cer": 0.929004329004329,
51
- "eval_loss": 3.0779831409454346,
52
- "eval_runtime": 1.0289,
53
- "eval_samples_per_second": 204.093,
54
- "eval_steps_per_second": 26.241,
55
- "eval_wer": 1.0,
56
- "step": 37350
57
- },
58
- {
59
- "epoch": 100.0,
60
- "learning_rate": 6.317734094271824e-05,
61
- "loss": 2.511,
62
- "step": 49800
63
- },
64
- {
65
- "epoch": 100.0,
66
- "eval_cer": 0.9333333333333333,
67
- "eval_loss": 3.0956246852874756,
68
- "eval_runtime": 1.0323,
69
- "eval_samples_per_second": 203.432,
70
- "eval_steps_per_second": 26.155,
71
- "eval_wer": 1.0,
72
- "step": 49800
73
- },
74
- {
75
- "epoch": 125.0,
76
- "learning_rate": 5.265609807651659e-05,
77
- "loss": 2.4727,
78
- "step": 62250
79
- },
80
- {
81
- "epoch": 125.0,
82
- "eval_cer": 0.9341991341991343,
83
- "eval_loss": 3.10306978225708,
84
- "eval_runtime": 1.0322,
85
- "eval_samples_per_second": 203.454,
86
- "eval_steps_per_second": 26.158,
87
- "eval_wer": 1.0,
88
- "step": 62250
89
- },
90
- {
91
- "epoch": 150.0,
92
- "learning_rate": 4.213400972310294e-05,
93
- "loss": 2.449,
94
- "step": 74700
95
- },
96
- {
97
- "epoch": 150.0,
98
- "eval_cer": 0.9316017316017317,
99
- "eval_loss": 3.1064517498016357,
100
- "eval_runtime": 1.0551,
101
- "eval_samples_per_second": 199.032,
102
- "eval_steps_per_second": 25.59,
103
- "eval_wer": 1.0,
104
- "step": 74700
105
- },
106
- {
107
- "epoch": 175.0,
108
- "learning_rate": 3.161276685690129e-05,
109
- "loss": 2.4326,
110
- "step": 87150
111
- },
112
- {
113
- "epoch": 175.0,
114
- "eval_cer": 0.9333333333333333,
115
- "eval_loss": 3.1083292961120605,
116
- "eval_runtime": 1.0455,
117
- "eval_samples_per_second": 200.864,
118
- "eval_steps_per_second": 25.825,
119
- "eval_wer": 1.0,
120
- "step": 87150
121
- },
122
- {
123
- "epoch": 200.0,
124
- "learning_rate": 2.1090678503487633e-05,
125
- "loss": 2.4226,
126
- "step": 99600
127
- },
128
- {
129
- "epoch": 200.0,
130
- "eval_cer": 0.9341991341991343,
131
- "eval_loss": 3.1126484870910645,
132
- "eval_runtime": 1.0516,
133
- "eval_samples_per_second": 199.703,
134
- "eval_steps_per_second": 25.676,
135
- "eval_wer": 1.0,
136
- "step": 99600
137
- },
138
- {
139
- "epoch": 225.0,
140
- "learning_rate": 1.056859015007398e-05,
141
- "loss": 2.4151,
142
- "step": 112050
143
- },
144
- {
145
- "epoch": 225.0,
146
- "eval_cer": 0.9341991341991343,
147
- "eval_loss": 3.1124825477600098,
148
- "eval_runtime": 1.0358,
149
- "eval_samples_per_second": 202.734,
150
- "eval_steps_per_second": 26.066,
151
- "eval_wer": 1.0,
152
- "step": 112050
153
- },
154
- {
155
- "epoch": 250.0,
156
- "learning_rate": 4.7347283872331434e-08,
157
- "loss": 2.4121,
158
- "step": 124500
159
- },
160
- {
161
- "epoch": 250.0,
162
- "eval_cer": 0.935064935064935,
163
- "eval_loss": 3.11251163482666,
164
- "eval_runtime": 0.9891,
165
- "eval_samples_per_second": 212.307,
166
- "eval_steps_per_second": 27.297,
167
- "eval_wer": 1.0,
168
- "step": 124500
169
  }
170
  ],
171
- "max_steps": 124500,
172
- "num_train_epochs": 250,
173
- "total_flos": 1.1522414746503514e+20,
174
  "trial_name": null,
175
  "trial_params": null
176
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
+ "global_step": 6220,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 10.0,
12
+ "learning_rate": 9.475038077508885e-05,
13
+ "loss": 4.7101,
14
+ "step": 6220
15
  },
16
  {
17
+ "epoch": 10.0,
18
+ "eval_cer": 0.7567099567099567,
19
+ "eval_loss": 5.277076244354248,
20
+ "eval_runtime": 7.9435,
21
+ "eval_samples_per_second": 26.437,
22
+ "eval_steps_per_second": 3.399,
23
  "eval_wer": 1.0,
24
+ "step": 6220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  }
26
  ],
27
+ "max_steps": 62200,
28
+ "num_train_epochs": 100,
29
+ "total_flos": 4.6105604043819725e+18,
30
  "trial_name": null,
31
  "trial_params": null
32
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94a3a3e4c9721bb45336b089b084f2a9f706b1e615467edde5761ec2a859776b
3
- size 3119
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7620f2dba0add0f24529ee099e5753c390b45d9c00db8a63639e26ef90afc6f
3
+ size 3451
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f32a40766506810aeb69e5aff37fb21a6951556747267b8b3d151d03dea2616
3
- size 377644503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f3e0c86203048b55a478f0d5d6f563a4886d3d79134deab79ef29ea87f55197
3
+ size 377646433
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94a3a3e4c9721bb45336b089b084f2a9f706b1e615467edde5761ec2a859776b
3
- size 3119
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7620f2dba0add0f24529ee099e5753c390b45d9c00db8a63639e26ef90afc6f
3
+ size 3451