ivanlau commited on
Commit
fe0cd44
1 Parent(s): db1ddb6

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +12 -12
  2. eval_results.json +7 -7
  3. train_results.json +6 -6
  4. trainer_state.json +91 -154
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_loss": 40.69682312011719,
4
- "eval_runtime": 26.0441,
5
- "eval_samples": 553,
6
- "eval_samples_per_second": 21.233,
7
- "eval_steps_per_second": 21.233,
8
- "eval_wer": 1.0,
9
- "train_loss": 109.71464342948718,
10
- "train_runtime": 1380.1966,
11
- "train_samples": 3119,
12
- "train_samples_per_second": 2.26,
13
- "train_steps_per_second": 0.141
14
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_loss": 2.67260479927063,
4
+ "eval_runtime": 163.5078,
5
+ "eval_samples": 3659,
6
+ "eval_samples_per_second": 22.378,
7
+ "eval_steps_per_second": 1.401,
8
+ "eval_wer": 0.9814612868047983,
9
+ "train_loss": 14.438921352032104,
10
+ "train_runtime": 7245.592,
11
+ "train_samples": 11686,
12
+ "train_samples_per_second": 16.128,
13
+ "train_steps_per_second": 0.253
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_loss": 40.69682312011719,
4
- "eval_runtime": 26.0441,
5
- "eval_samples": 553,
6
- "eval_samples_per_second": 21.233,
7
- "eval_steps_per_second": 21.233,
8
- "eval_wer": 1.0
9
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_loss": 2.67260479927063,
4
+ "eval_runtime": 163.5078,
5
+ "eval_samples": 3659,
6
+ "eval_samples_per_second": 22.378,
7
+ "eval_steps_per_second": 1.401,
8
+ "eval_wer": 0.9814612868047983
9
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.0,
3
- "train_loss": 109.71464342948718,
4
- "train_runtime": 1380.1966,
5
- "train_samples": 3119,
6
- "train_samples_per_second": 2.26,
7
- "train_steps_per_second": 0.141
8
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "train_loss": 14.438921352032104,
4
+ "train_runtime": 7245.592,
5
+ "train_samples": 11686,
6
+ "train_samples_per_second": 16.128,
7
+ "train_steps_per_second": 0.253
8
  }
trainer_state.json CHANGED
@@ -1,196 +1,133 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
- "global_step": 195,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.05,
12
- "eval_loss": 239.00135803222656,
13
- "eval_runtime": 28.5979,
14
- "eval_samples_per_second": 19.337,
15
- "eval_steps_per_second": 19.337,
16
- "eval_wer": 1.0,
17
- "step": 10
18
- },
19
- {
20
- "epoch": 0.1,
21
- "eval_loss": 235.82070922851562,
22
- "eval_runtime": 27.9371,
23
- "eval_samples_per_second": 19.794,
24
- "eval_steps_per_second": 19.794,
25
- "eval_wer": 1.0,
26
- "step": 20
27
- },
28
- {
29
- "epoch": 0.15,
30
- "eval_loss": 226.90093994140625,
31
- "eval_runtime": 27.8094,
32
- "eval_samples_per_second": 19.885,
33
- "eval_steps_per_second": 19.885,
34
- "eval_wer": 1.0,
35
- "step": 30
36
- },
37
- {
38
- "epoch": 0.21,
39
- "eval_loss": 198.07687377929688,
40
- "eval_runtime": 27.9651,
41
- "eval_samples_per_second": 19.775,
42
- "eval_steps_per_second": 19.775,
43
- "eval_wer": 1.0,
44
- "step": 40
45
- },
46
- {
47
- "epoch": 0.26,
48
- "eval_loss": 166.6727752685547,
49
- "eval_runtime": 28.4525,
50
- "eval_samples_per_second": 19.436,
51
- "eval_steps_per_second": 19.436,
52
- "eval_wer": 1.0,
53
- "step": 50
54
- },
55
- {
56
- "epoch": 0.31,
57
- "eval_loss": 149.14447021484375,
58
- "eval_runtime": 27.248,
59
- "eval_samples_per_second": 20.295,
60
- "eval_steps_per_second": 20.295,
61
  "eval_wer": 1.0,
62
- "step": 60
63
  },
64
  {
65
- "epoch": 0.36,
66
- "eval_loss": 138.44029235839844,
67
- "eval_runtime": 26.1313,
68
- "eval_samples_per_second": 21.162,
69
- "eval_steps_per_second": 21.162,
70
  "eval_wer": 1.0,
71
- "step": 70
72
  },
73
  {
74
- "epoch": 0.41,
75
- "eval_loss": 131.72488403320312,
76
- "eval_runtime": 28.1944,
77
- "eval_samples_per_second": 19.614,
78
- "eval_steps_per_second": 19.614,
79
- "eval_wer": 1.0,
80
- "step": 80
81
  },
82
  {
83
- "epoch": 0.46,
84
- "eval_loss": 125.558349609375,
85
- "eval_runtime": 26.1125,
86
- "eval_samples_per_second": 21.178,
87
- "eval_steps_per_second": 21.178,
88
  "eval_wer": 1.0,
89
- "step": 90
90
  },
91
  {
92
- "epoch": 0.51,
93
- "eval_loss": 119.75148010253906,
94
- "eval_runtime": 27.1597,
95
- "eval_samples_per_second": 20.361,
96
- "eval_steps_per_second": 20.361,
97
- "eval_wer": 1.0,
98
- "step": 100
99
  },
100
  {
101
- "epoch": 0.56,
102
- "eval_loss": 113.72832489013672,
103
- "eval_runtime": 26.3538,
104
- "eval_samples_per_second": 20.984,
105
- "eval_steps_per_second": 20.984,
106
- "eval_wer": 1.0,
107
- "step": 110
108
  },
109
  {
110
- "epoch": 0.62,
111
- "eval_loss": 107.24547576904297,
112
- "eval_runtime": 26.997,
113
- "eval_samples_per_second": 20.484,
114
- "eval_steps_per_second": 20.484,
115
- "eval_wer": 1.0,
116
- "step": 120
117
  },
118
  {
119
- "epoch": 0.67,
120
- "eval_loss": 100.21720886230469,
121
- "eval_runtime": 27.4511,
122
- "eval_samples_per_second": 20.145,
123
- "eval_steps_per_second": 20.145,
124
- "eval_wer": 1.0,
125
- "step": 130
126
  },
127
  {
128
- "epoch": 0.72,
129
- "eval_loss": 92.55851745605469,
130
- "eval_runtime": 27.8487,
131
- "eval_samples_per_second": 19.857,
132
- "eval_steps_per_second": 19.857,
133
- "eval_wer": 1.0,
134
- "step": 140
135
  },
136
  {
137
- "epoch": 0.77,
138
- "eval_loss": 84.25730895996094,
139
- "eval_runtime": 26.3189,
140
- "eval_samples_per_second": 21.012,
141
- "eval_steps_per_second": 21.012,
142
- "eval_wer": 1.0,
143
- "step": 150
144
  },
145
  {
146
- "epoch": 0.82,
147
- "eval_loss": 75.29525756835938,
148
- "eval_runtime": 26.194,
149
- "eval_samples_per_second": 21.112,
150
- "eval_steps_per_second": 21.112,
151
- "eval_wer": 1.0,
152
- "step": 160
153
  },
154
  {
155
- "epoch": 0.87,
156
- "eval_loss": 65.69525909423828,
157
- "eval_runtime": 26.5696,
158
- "eval_samples_per_second": 20.813,
159
- "eval_steps_per_second": 20.813,
160
- "eval_wer": 1.0,
161
- "step": 170
162
  },
163
  {
164
- "epoch": 0.92,
165
- "eval_loss": 55.75440216064453,
166
- "eval_runtime": 27.1022,
167
- "eval_samples_per_second": 20.404,
168
- "eval_steps_per_second": 20.404,
169
- "eval_wer": 1.0,
170
- "step": 180
171
  },
172
  {
173
- "epoch": 0.97,
174
- "eval_loss": 45.729740142822266,
175
- "eval_runtime": 27.0279,
176
- "eval_samples_per_second": 20.46,
177
- "eval_steps_per_second": 20.46,
178
- "eval_wer": 1.0,
179
- "step": 190
180
- },
181
- {
182
- "epoch": 1.0,
183
- "step": 195,
184
- "total_flos": 2.4781601494187277e+17,
185
- "train_loss": 109.71464342948718,
186
- "train_runtime": 1380.1966,
187
- "train_samples_per_second": 2.26,
188
- "train_steps_per_second": 0.141
189
  }
190
  ],
191
- "max_steps": 195,
192
- "num_train_epochs": 1,
193
- "total_flos": 2.4781601494187277e+17,
194
  "trial_name": null,
195
  "trial_params": null
196
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
+ "global_step": 1830,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 1.0,
12
+ "eval_loss": 47.84418869018555,
13
+ "eval_runtime": 186.8063,
14
+ "eval_samples_per_second": 19.587,
15
+ "eval_steps_per_second": 1.226,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  "eval_wer": 1.0,
17
+ "step": 183
18
  },
19
  {
20
+ "epoch": 2.0,
21
+ "eval_loss": 6.310945510864258,
22
+ "eval_runtime": 163.2985,
23
+ "eval_samples_per_second": 22.407,
24
+ "eval_steps_per_second": 1.402,
25
  "eval_wer": 1.0,
26
+ "step": 366
27
  },
28
  {
29
+ "epoch": 2.73,
30
+ "learning_rate": 0.00029699999999999996,
31
+ "loss": 41.8902,
32
+ "step": 500
 
 
 
33
  },
34
  {
35
+ "epoch": 3.0,
36
+ "eval_loss": 6.239192008972168,
37
+ "eval_runtime": 163.0201,
38
+ "eval_samples_per_second": 22.445,
39
+ "eval_steps_per_second": 1.405,
40
  "eval_wer": 1.0,
41
+ "step": 549
42
  },
43
  {
44
+ "epoch": 4.0,
45
+ "eval_loss": 5.973925590515137,
46
+ "eval_runtime": 163.0214,
47
+ "eval_samples_per_second": 22.445,
48
+ "eval_steps_per_second": 1.405,
49
+ "eval_wer": 1.1123227917121048,
50
+ "step": 732
51
  },
52
  {
53
+ "epoch": 5.0,
54
+ "eval_loss": 4.901411056518555,
55
+ "eval_runtime": 162.8715,
56
+ "eval_samples_per_second": 22.466,
57
+ "eval_steps_per_second": 1.406,
58
+ "eval_wer": 1.9473827699018538,
59
+ "step": 915
60
  },
61
  {
62
+ "epoch": 5.46,
63
+ "learning_rate": 0.00018834586466165413,
64
+ "loss": 5.5817,
65
+ "step": 1000
 
 
 
66
  },
67
  {
68
+ "epoch": 6.0,
69
+ "eval_loss": 3.9892334938049316,
70
+ "eval_runtime": 163.2053,
71
+ "eval_samples_per_second": 22.42,
72
+ "eval_steps_per_second": 1.403,
73
+ "eval_wer": 1.0188113413304254,
74
+ "step": 1098
75
  },
76
  {
77
+ "epoch": 7.0,
78
+ "eval_loss": 3.5080456733703613,
79
+ "eval_runtime": 162.7362,
80
+ "eval_samples_per_second": 22.484,
81
+ "eval_steps_per_second": 1.407,
82
+ "eval_wer": 1.0103598691384952,
83
+ "step": 1281
84
  },
85
  {
86
+ "epoch": 8.0,
87
+ "eval_loss": 3.0796852111816406,
88
+ "eval_runtime": 164.2666,
89
+ "eval_samples_per_second": 22.275,
90
+ "eval_steps_per_second": 1.394,
91
+ "eval_wer": 0.9904580152671756,
92
+ "step": 1464
93
  },
94
  {
95
+ "epoch": 8.2,
96
+ "learning_rate": 7.556390977443609e-05,
97
+ "loss": 3.5579,
98
+ "step": 1500
 
 
 
99
  },
100
  {
101
+ "epoch": 9.0,
102
+ "eval_loss": 2.8110806941986084,
103
+ "eval_runtime": 168.541,
104
+ "eval_samples_per_second": 21.71,
105
+ "eval_steps_per_second": 1.359,
106
+ "eval_wer": 0.9836423118865867,
107
+ "step": 1647
108
  },
109
  {
110
+ "epoch": 10.0,
111
+ "eval_loss": 2.67260479927063,
112
+ "eval_runtime": 165.7448,
113
+ "eval_samples_per_second": 22.076,
114
+ "eval_steps_per_second": 1.382,
115
+ "eval_wer": 0.9814612868047983,
116
+ "step": 1830
117
  },
118
  {
119
+ "epoch": 10.0,
120
+ "step": 1830,
121
+ "total_flos": 1.3577354977100892e+19,
122
+ "train_loss": 14.438921352032104,
123
+ "train_runtime": 7245.592,
124
+ "train_samples_per_second": 16.128,
125
+ "train_steps_per_second": 0.253
 
 
 
 
 
 
 
 
 
126
  }
127
  ],
128
+ "max_steps": 1830,
129
+ "num_train_epochs": 10,
130
+ "total_flos": 1.3577354977100892e+19,
131
  "trial_name": null,
132
  "trial_params": null
133
  }