infinitejoy commited on
Commit
85ab499
1 Parent(s): 1485f51

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_loss": 0.7654190063476562,
4
- "eval_runtime": 69.7723,
5
  "eval_samples": 1716,
6
- "eval_samples_per_second": 24.594,
7
- "eval_steps_per_second": 0.774,
8
- "eval_wer": 0.5185841828658266,
9
- "train_loss": 3.4940547976286513,
10
- "train_runtime": 2130.2547,
11
  "train_samples": 3670,
12
- "train_samples_per_second": 17.228,
13
- "train_steps_per_second": 0.54
14
  }
1
  {
2
+ "epoch": 20.0,
3
+ "eval_loss": 0.152541384100914,
4
+ "eval_runtime": 76.3888,
5
  "eval_samples": 1716,
6
+ "eval_samples_per_second": 22.464,
7
+ "eval_steps_per_second": 0.707,
8
+ "eval_wer": 0.1541822326347116,
9
+ "train_loss": 2.2170696955141813,
10
+ "train_runtime": 4486.5584,
11
  "train_samples": 3670,
12
+ "train_samples_per_second": 16.36,
13
+ "train_steps_per_second": 0.513
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 10.0,
3
- "eval_loss": 0.7654190063476562,
4
- "eval_runtime": 69.7723,
5
  "eval_samples": 1716,
6
- "eval_samples_per_second": 24.594,
7
- "eval_steps_per_second": 0.774,
8
- "eval_wer": 0.5185841828658266
9
  }
1
  {
2
+ "epoch": 20.0,
3
+ "eval_loss": 0.152541384100914,
4
+ "eval_runtime": 76.3888,
5
  "eval_samples": 1716,
6
+ "eval_samples_per_second": 22.464,
7
+ "eval_steps_per_second": 0.707,
8
+ "eval_wer": 0.1541822326347116
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ed4af3a1e58d193e74018cc964b495ecd7ad4a72a0c7ea8340250d57af9f9d8
3
  size 1262104049
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b17f616f0e9670a0bbf474af55df0e678019f3343c3a61b304db347bd89fb820
3
  size 1262104049
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 10.0,
3
- "train_loss": 3.4940547976286513,
4
- "train_runtime": 2130.2547,
5
  "train_samples": 3670,
6
- "train_samples_per_second": 17.228,
7
- "train_steps_per_second": 0.54
8
  }
1
  {
2
+ "epoch": 20.0,
3
+ "train_loss": 2.2170696955141813,
4
+ "train_runtime": 4486.5584,
5
  "train_samples": 3670,
6
+ "train_samples_per_second": 16.36,
7
+ "train_steps_per_second": 0.513
8
  }
trainer_state.json CHANGED
@@ -1,109 +1,199 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.0,
5
- "global_step": 1150,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.87,
12
- "learning_rate": 1.68e-05,
13
- "loss": 11.4306,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 1.74,
18
- "learning_rate": 3.4299999999999993e-05,
19
- "loss": 4.3407,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 2.61,
24
- "learning_rate": 5.179999999999999e-05,
25
- "loss": 3.2465,
26
  "step": 300
27
  },
28
  {
29
  "epoch": 3.48,
30
- "learning_rate": 6.929999999999999e-05,
31
- "loss": 3.0269,
32
  "step": 400
33
  },
34
  {
35
- "epoch": 3.48,
36
- "eval_loss": 2.9771385192871094,
37
- "eval_runtime": 71.2409,
38
- "eval_samples_per_second": 24.087,
39
- "eval_steps_per_second": 0.758,
40
- "eval_wer": 1.0,
41
- "step": 400
42
  },
43
  {
44
  "epoch": 4.35,
45
- "learning_rate": 6.104e-05,
46
- "loss": 2.9973,
 
 
 
47
  "step": 500
48
  },
49
  {
50
  "epoch": 5.22,
51
- "learning_rate": 5.1706666666666664e-05,
52
- "loss": 2.9206,
53
  "step": 600
54
  },
55
  {
56
  "epoch": 6.09,
57
- "learning_rate": 4.237333333333332e-05,
58
- "loss": 2.8692,
59
  "step": 700
60
  },
61
  {
62
  "epoch": 6.96,
63
- "learning_rate": 3.3039999999999995e-05,
64
- "loss": 2.7902,
65
- "step": 800
66
- },
67
- {
68
- "epoch": 6.96,
69
- "eval_loss": 2.4356915950775146,
70
- "eval_runtime": 71.3351,
71
- "eval_samples_per_second": 24.055,
72
- "eval_steps_per_second": 0.757,
73
- "eval_wer": 0.9988602545431521,
74
  "step": 800
75
  },
76
  {
77
  "epoch": 7.83,
78
- "learning_rate": 2.3706666666666664e-05,
79
- "loss": 2.3184,
80
  "step": 900
81
  },
82
  {
83
  "epoch": 8.7,
84
- "learning_rate": 1.4373333333333332e-05,
85
- "loss": 1.8236,
 
 
 
 
 
 
 
 
 
86
  "step": 1000
87
  },
88
  {
89
  "epoch": 9.57,
90
- "learning_rate": 5.039999999999999e-06,
91
- "loss": 1.6258,
92
  "step": 1100
93
  },
94
  {
95
- "epoch": 10.0,
96
- "step": 1150,
97
- "total_flos": 5.422401103281132e+18,
98
- "train_loss": 3.4940547976286513,
99
- "train_runtime": 2130.2547,
100
- "train_samples_per_second": 17.228,
101
- "train_steps_per_second": 0.54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  }
103
  ],
104
- "max_steps": 1150,
105
- "num_train_epochs": 10,
106
- "total_flos": 5.422401103281132e+18,
107
  "trial_name": null,
108
  "trial_params": null
109
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 20.0,
5
+ "global_step": 2300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.87,
12
+ "learning_rate": 1.3439999999999998e-05,
13
+ "loss": 12.0308,
14
  "step": 100
15
  },
16
  {
17
  "epoch": 1.74,
18
+ "learning_rate": 2.7439999999999998e-05,
19
+ "loss": 4.6075,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 2.61,
24
+ "learning_rate": 4.1439999999999996e-05,
25
+ "loss": 3.4071,
26
  "step": 300
27
  },
28
  {
29
  "epoch": 3.48,
30
+ "learning_rate": 5.544e-05,
31
+ "loss": 3.0602,
32
  "step": 400
33
  },
34
  {
35
+ "epoch": 4.35,
36
+ "learning_rate": 6.944e-05,
37
+ "loss": 3.0067,
38
+ "step": 500
 
 
 
39
  },
40
  {
41
  "epoch": 4.35,
42
+ "eval_loss": 2.9632411003112793,
43
+ "eval_runtime": 75.847,
44
+ "eval_samples_per_second": 22.624,
45
+ "eval_steps_per_second": 0.712,
46
+ "eval_wer": 1.0,
47
  "step": 500
48
  },
49
  {
50
  "epoch": 5.22,
51
+ "learning_rate": 6.626666666666666e-05,
52
+ "loss": 2.9665,
53
  "step": 600
54
  },
55
  {
56
  "epoch": 6.09,
57
+ "learning_rate": 6.237777777777777e-05,
58
+ "loss": 2.8916,
59
  "step": 700
60
  },
61
  {
62
  "epoch": 6.96,
63
+ "learning_rate": 5.8488888888888886e-05,
64
+ "loss": 2.7907,
 
 
 
 
 
 
 
 
 
65
  "step": 800
66
  },
67
  {
68
  "epoch": 7.83,
69
+ "learning_rate": 5.46e-05,
70
+ "loss": 2.0776,
71
  "step": 900
72
  },
73
  {
74
  "epoch": 8.7,
75
+ "learning_rate": 5.0711111111111105e-05,
76
+ "loss": 1.4939,
77
+ "step": 1000
78
+ },
79
+ {
80
+ "epoch": 8.7,
81
+ "eval_loss": 0.5004750490188599,
82
+ "eval_runtime": 75.9315,
83
+ "eval_samples_per_second": 22.599,
84
+ "eval_steps_per_second": 0.711,
85
+ "eval_wer": 0.41569049578927375,
86
  "step": 1000
87
  },
88
  {
89
  "epoch": 9.57,
90
+ "learning_rate": 4.682222222222222e-05,
91
+ "loss": 1.2841,
92
  "step": 1100
93
  },
94
  {
95
+ "epoch": 10.43,
96
+ "learning_rate": 4.2933333333333324e-05,
97
+ "loss": 1.1469,
98
+ "step": 1200
99
+ },
100
+ {
101
+ "epoch": 11.3,
102
+ "learning_rate": 3.9044444444444444e-05,
103
+ "loss": 1.0776,
104
+ "step": 1300
105
+ },
106
+ {
107
+ "epoch": 12.17,
108
+ "learning_rate": 3.515555555555555e-05,
109
+ "loss": 1.0147,
110
+ "step": 1400
111
+ },
112
+ {
113
+ "epoch": 13.04,
114
+ "learning_rate": 3.126666666666666e-05,
115
+ "loss": 0.9982,
116
+ "step": 1500
117
+ },
118
+ {
119
+ "epoch": 13.04,
120
+ "eval_loss": 0.19668780267238617,
121
+ "eval_runtime": 75.4326,
122
+ "eval_samples_per_second": 22.749,
123
+ "eval_steps_per_second": 0.716,
124
+ "eval_wer": 0.185651871082125,
125
+ "step": 1500
126
+ },
127
+ {
128
+ "epoch": 13.91,
129
+ "learning_rate": 2.7377777777777776e-05,
130
+ "loss": 0.9517,
131
+ "step": 1600
132
+ },
133
+ {
134
+ "epoch": 14.78,
135
+ "learning_rate": 2.3488888888888886e-05,
136
+ "loss": 0.9291,
137
+ "step": 1700
138
+ },
139
+ {
140
+ "epoch": 15.65,
141
+ "learning_rate": 1.96e-05,
142
+ "loss": 0.8937,
143
+ "step": 1800
144
+ },
145
+ {
146
+ "epoch": 16.52,
147
+ "learning_rate": 1.5711111111111108e-05,
148
+ "loss": 0.8918,
149
+ "step": 1900
150
+ },
151
+ {
152
+ "epoch": 17.39,
153
+ "learning_rate": 1.1822222222222221e-05,
154
+ "loss": 0.8726,
155
+ "step": 2000
156
+ },
157
+ {
158
+ "epoch": 17.39,
159
+ "eval_loss": 0.1586812287569046,
160
+ "eval_runtime": 75.8623,
161
+ "eval_samples_per_second": 22.62,
162
+ "eval_steps_per_second": 0.712,
163
+ "eval_wer": 0.1563984043563604,
164
+ "step": 2000
165
+ },
166
+ {
167
+ "epoch": 18.26,
168
+ "learning_rate": 7.933333333333332e-06,
169
+ "loss": 0.8785,
170
+ "step": 2100
171
+ },
172
+ {
173
+ "epoch": 19.13,
174
+ "learning_rate": 4.044444444444444e-06,
175
+ "loss": 0.8559,
176
+ "step": 2200
177
+ },
178
+ {
179
+ "epoch": 20.0,
180
+ "learning_rate": 1.5555555555555554e-07,
181
+ "loss": 0.8653,
182
+ "step": 2300
183
+ },
184
+ {
185
+ "epoch": 20.0,
186
+ "step": 2300,
187
+ "total_flos": 1.0844366081729495e+19,
188
+ "train_loss": 2.2170696955141813,
189
+ "train_runtime": 4486.5584,
190
+ "train_samples_per_second": 16.36,
191
+ "train_steps_per_second": 0.513
192
  }
193
  ],
194
+ "max_steps": 2300,
195
+ "num_train_epochs": 20,
196
+ "total_flos": 1.0844366081729495e+19,
197
  "trial_name": null,
198
  "trial_params": null
199
  }