13048909972 commited on
Commit
92c68c2
1 Parent(s): 540cc1d

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 15.0,
3
- "eval_loss": 0.37937504053115845,
4
- "eval_runtime": 67.5206,
5
  "eval_samples": 1647,
6
- "eval_samples_per_second": 24.393,
7
- "eval_steps_per_second": 3.051,
8
- "eval_wer": 0.342048820345215,
9
- "train_loss": 1.0777230644809361,
10
- "train_runtime": 3812.1297,
11
  "train_samples": 3478,
12
- "train_samples_per_second": 13.685,
13
- "train_steps_per_second": 0.429
14
  }
 
1
  {
2
  "epoch": 15.0,
3
+ "eval_loss": 0.37925440073013306,
4
+ "eval_runtime": 67.0208,
5
  "eval_samples": 1647,
6
+ "eval_samples_per_second": 24.574,
7
+ "eval_steps_per_second": 3.074,
8
+ "eval_wer": 0.34950464712491064,
9
+ "train_loss": 1.0876544129957846,
10
+ "train_runtime": 3803.1822,
11
  "train_samples": 3478,
12
+ "train_samples_per_second": 13.717,
13
+ "train_steps_per_second": 0.43
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 15.0,
3
- "eval_loss": 0.37937504053115845,
4
- "eval_runtime": 67.5206,
5
  "eval_samples": 1647,
6
- "eval_samples_per_second": 24.393,
7
- "eval_steps_per_second": 3.051,
8
- "eval_wer": 0.342048820345215
9
  }
 
1
  {
2
  "epoch": 15.0,
3
+ "eval_loss": 0.37925440073013306,
4
+ "eval_runtime": 67.0208,
5
  "eval_samples": 1647,
6
+ "eval_samples_per_second": 24.574,
7
+ "eval_steps_per_second": 3.074,
8
+ "eval_wer": 0.34950464712491064
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a364cb1fa5839ec73cef456a2b868c16845a58e01cb3e6b0257b65d652456d89
3
  size 1262091761
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:676cf1b13df5d5b133df645585b7f4f4d765e89d9dd3a7b22128124e2bca242e
3
  size 1262091761
runs/Dec09_09-01-28_cuda11-renwei-docker/events.out.tfevents.1639012200.cuda11-renwei-docker.296.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f095fadee554b40e27ba1bf9c3df16892f87f6c1f50b14bafa0ad3cc60ef0294
3
- size 6148
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6b7ebacf789cae7f1d5a8832e6ac259f2729cd2c6948d9a29d627fe6405d8ec
3
+ size 10789
runs/Dec09_09-01-28_cuda11-renwei-docker/events.out.tfevents.1639016073.cuda11-renwei-docker.296.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a082e79574a9b019342c37d54b8ca2ff29dba17c918dbb5ddcb882b051425643
3
+ size 358
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 15.0,
3
- "train_loss": 1.0777230644809361,
4
- "train_runtime": 3812.1297,
5
  "train_samples": 3478,
6
- "train_samples_per_second": 13.685,
7
- "train_steps_per_second": 0.429
8
  }
 
1
  {
2
  "epoch": 15.0,
3
+ "train_loss": 1.0876544129957846,
4
+ "train_runtime": 3803.1822,
5
  "train_samples": 3478,
6
+ "train_samples_per_second": 13.717,
7
+ "train_steps_per_second": 0.43
8
  }
trainer_state.json CHANGED
@@ -9,174 +9,174 @@
9
  "log_history": [
10
  {
11
  "epoch": 0.92,
12
- "eval_loss": 3.5880308151245117,
13
- "eval_runtime": 70.0803,
14
- "eval_samples_per_second": 23.502,
15
- "eval_steps_per_second": 2.939,
16
  "eval_wer": 1.0,
17
  "step": 100
18
  },
19
  {
20
  "epoch": 1.83,
21
- "eval_loss": 3.012378454208374,
22
- "eval_runtime": 67.4593,
23
- "eval_samples_per_second": 24.415,
24
- "eval_steps_per_second": 3.054,
25
  "eval_wer": 0.9998978653865795,
26
  "step": 200
27
  },
28
  {
29
  "epoch": 2.75,
30
- "eval_loss": 0.8977468609809875,
31
- "eval_runtime": 68.3285,
32
- "eval_samples_per_second": 24.104,
33
- "eval_steps_per_second": 3.015,
34
- "eval_wer": 0.7639669083852517,
35
  "step": 300
36
  },
37
  {
38
  "epoch": 3.67,
39
- "eval_loss": 0.5706909894943237,
40
- "eval_runtime": 67.9906,
41
- "eval_samples_per_second": 24.224,
42
- "eval_steps_per_second": 3.03,
43
- "eval_wer": 0.6114799305484628,
44
  "step": 400
45
  },
46
  {
47
  "epoch": 4.59,
48
- "learning_rate": 0.0002988,
49
- "loss": 3.1774,
50
  "step": 500
51
  },
52
  {
53
  "epoch": 4.59,
54
- "eval_loss": 0.5086258053779602,
55
- "eval_runtime": 68.5993,
56
- "eval_samples_per_second": 24.009,
57
- "eval_steps_per_second": 3.003,
58
- "eval_wer": 0.5162904708405679,
59
  "step": 500
60
  },
61
  {
62
  "epoch": 5.5,
63
- "eval_loss": 0.4572145342826843,
64
- "eval_runtime": 67.7684,
65
- "eval_samples_per_second": 24.303,
66
- "eval_steps_per_second": 3.04,
67
- "eval_wer": 0.4830967214789092,
68
  "step": 600
69
  },
70
  {
71
  "epoch": 6.42,
72
- "eval_loss": 0.4250616133213043,
73
- "eval_runtime": 68.0474,
74
- "eval_samples_per_second": 24.204,
75
- "eval_steps_per_second": 3.027,
76
- "eval_wer": 0.45143499131855785,
77
  "step": 700
78
  },
79
  {
80
  "epoch": 7.34,
81
- "eval_loss": 0.3982156217098236,
82
- "eval_runtime": 67.5727,
83
- "eval_samples_per_second": 24.374,
84
- "eval_steps_per_second": 3.049,
85
- "eval_wer": 0.4190583188642631,
86
  "step": 800
87
  },
88
  {
89
  "epoch": 8.26,
90
- "eval_loss": 0.41418781876564026,
91
- "eval_runtime": 67.8213,
92
- "eval_samples_per_second": 24.284,
93
- "eval_steps_per_second": 3.037,
94
- "eval_wer": 0.41068328056378306,
95
  "step": 900
96
  },
97
  {
98
  "epoch": 9.17,
99
- "learning_rate": 0.00016837004405286342,
100
- "loss": 0.2241,
101
  "step": 1000
102
  },
103
  {
104
  "epoch": 9.17,
105
- "eval_loss": 0.3958446979522705,
106
- "eval_runtime": 67.1127,
107
- "eval_samples_per_second": 24.541,
108
- "eval_steps_per_second": 3.069,
109
- "eval_wer": 0.39638443468491474,
110
  "step": 1000
111
  },
112
  {
113
  "epoch": 10.09,
114
- "eval_loss": 0.40702584385871887,
115
- "eval_runtime": 66.7245,
116
- "eval_samples_per_second": 24.684,
117
- "eval_steps_per_second": 3.087,
118
- "eval_wer": 0.38800939638443466,
119
  "step": 1100
120
  },
121
  {
122
  "epoch": 11.01,
123
- "eval_loss": 0.3864419162273407,
124
- "eval_runtime": 67.8823,
125
- "eval_samples_per_second": 24.263,
126
- "eval_steps_per_second": 3.035,
127
- "eval_wer": 0.3799407619242161,
128
  "step": 1200
129
  },
130
  {
131
  "epoch": 11.93,
132
- "eval_loss": 0.39367178082466125,
133
- "eval_runtime": 66.9603,
134
- "eval_samples_per_second": 24.597,
135
- "eval_steps_per_second": 3.076,
136
- "eval_wer": 0.367786742927178,
137
  "step": 1300
138
  },
139
  {
140
  "epoch": 12.84,
141
- "eval_loss": 0.39223712682724,
142
- "eval_runtime": 66.9432,
143
- "eval_samples_per_second": 24.603,
144
- "eval_steps_per_second": 3.077,
145
- "eval_wer": 0.3560412623838219,
146
  "step": 1400
147
  },
148
  {
149
  "epoch": 13.76,
150
- "learning_rate": 3.621145374449339e-05,
151
- "loss": 0.102,
152
  "step": 1500
153
  },
154
  {
155
  "epoch": 13.76,
156
- "eval_loss": 0.39097627997398376,
157
- "eval_runtime": 64.6816,
158
- "eval_samples_per_second": 25.463,
159
- "eval_steps_per_second": 3.185,
160
- "eval_wer": 0.34664487794913695,
161
  "step": 1500
162
  },
163
  {
164
  "epoch": 14.68,
165
- "eval_loss": 0.3765156865119934,
166
- "eval_runtime": 70.3564,
167
- "eval_samples_per_second": 23.409,
168
- "eval_steps_per_second": 2.928,
169
- "eval_wer": 0.3411296088244306,
170
  "step": 1600
171
  },
172
  {
173
  "epoch": 15.0,
174
  "step": 1635,
175
  "total_flos": 6.429866751096261e+18,
176
- "train_loss": 1.0777230644809361,
177
- "train_runtime": 3812.1297,
178
- "train_samples_per_second": 13.685,
179
- "train_steps_per_second": 0.429
180
  }
181
  ],
182
  "max_steps": 1635,
 
9
  "log_history": [
10
  {
11
  "epoch": 0.92,
12
+ "eval_loss": 3.6067986488342285,
13
+ "eval_runtime": 67.1306,
14
+ "eval_samples_per_second": 24.534,
15
+ "eval_steps_per_second": 3.069,
16
  "eval_wer": 1.0,
17
  "step": 100
18
  },
19
  {
20
  "epoch": 1.83,
21
+ "eval_loss": 3.041459321975708,
22
+ "eval_runtime": 67.0963,
23
+ "eval_samples_per_second": 24.547,
24
+ "eval_steps_per_second": 3.07,
25
  "eval_wer": 0.9998978653865795,
26
  "step": 200
27
  },
28
  {
29
  "epoch": 2.75,
30
+ "eval_loss": 0.8838828206062317,
31
+ "eval_runtime": 66.9473,
32
+ "eval_samples_per_second": 24.601,
33
+ "eval_steps_per_second": 3.077,
34
+ "eval_wer": 0.7398631396180165,
35
  "step": 300
36
  },
37
  {
38
  "epoch": 3.67,
39
+ "eval_loss": 0.5810018181800842,
40
+ "eval_runtime": 65.6828,
41
+ "eval_samples_per_second": 25.075,
42
+ "eval_steps_per_second": 3.136,
43
+ "eval_wer": 0.5892145848227964,
44
  "step": 400
45
  },
46
  {
47
  "epoch": 4.59,
48
+ "learning_rate": 0.0002982,
49
+ "loss": 3.2096,
50
  "step": 500
51
  },
52
  {
53
  "epoch": 4.59,
54
+ "eval_loss": 0.5231282711029053,
55
+ "eval_runtime": 67.353,
56
+ "eval_samples_per_second": 24.453,
57
+ "eval_steps_per_second": 3.059,
58
+ "eval_wer": 0.5485650086814421,
59
  "step": 500
60
  },
61
  {
62
  "epoch": 5.5,
63
+ "eval_loss": 0.449034720659256,
64
+ "eval_runtime": 66.057,
65
+ "eval_samples_per_second": 24.933,
66
+ "eval_steps_per_second": 3.119,
67
+ "eval_wer": 0.5066898171790419,
68
  "step": 600
69
  },
70
  {
71
  "epoch": 6.42,
72
+ "eval_loss": 0.4039786159992218,
73
+ "eval_runtime": 65.8832,
74
+ "eval_samples_per_second": 24.999,
75
+ "eval_steps_per_second": 3.127,
76
+ "eval_wer": 0.4405065876825656,
77
  "step": 700
78
  },
79
  {
80
  "epoch": 7.34,
81
+ "eval_loss": 0.4107377827167511,
82
+ "eval_runtime": 65.8157,
83
+ "eval_samples_per_second": 25.024,
84
+ "eval_steps_per_second": 3.13,
85
+ "eval_wer": 0.43080379940761926,
86
  "step": 800
87
  },
88
  {
89
  "epoch": 8.26,
90
+ "eval_loss": 0.4097311794757843,
91
+ "eval_runtime": 65.3892,
92
+ "eval_samples_per_second": 25.188,
93
+ "eval_steps_per_second": 3.15,
94
+ "eval_wer": 0.4143601266469206,
95
  "step": 900
96
  },
97
  {
98
  "epoch": 9.17,
99
+ "learning_rate": 0.00016863436123348017,
100
+ "loss": 0.224,
101
  "step": 1000
102
  },
103
  {
104
  "epoch": 9.17,
105
+ "eval_loss": 0.3750542402267456,
106
+ "eval_runtime": 65.3056,
107
+ "eval_samples_per_second": 25.22,
108
+ "eval_steps_per_second": 3.154,
109
+ "eval_wer": 0.3914819732407313,
110
  "step": 1000
111
  },
112
  {
113
  "epoch": 10.09,
114
+ "eval_loss": 0.4037274420261383,
115
+ "eval_runtime": 66.4673,
116
+ "eval_samples_per_second": 24.779,
117
+ "eval_steps_per_second": 3.099,
118
+ "eval_wer": 0.3872944540904913,
119
  "step": 1100
120
  },
121
  {
122
  "epoch": 11.01,
123
+ "eval_loss": 0.39948078989982605,
124
+ "eval_runtime": 65.7895,
125
+ "eval_samples_per_second": 25.034,
126
+ "eval_steps_per_second": 3.131,
127
+ "eval_wer": 0.37943008885711366,
128
  "step": 1200
129
  },
130
  {
131
  "epoch": 11.93,
132
+ "eval_loss": 0.380903959274292,
133
+ "eval_runtime": 66.0515,
134
+ "eval_samples_per_second": 24.935,
135
+ "eval_steps_per_second": 3.119,
136
+ "eval_wer": 0.36737820447349606,
137
  "step": 1300
138
  },
139
  {
140
  "epoch": 12.84,
141
+ "eval_loss": 0.3864530920982361,
142
+ "eval_runtime": 65.226,
143
+ "eval_samples_per_second": 25.251,
144
+ "eval_steps_per_second": 3.158,
145
+ "eval_wer": 0.3573690123582882,
146
  "step": 1400
147
  },
148
  {
149
  "epoch": 13.76,
150
+ "learning_rate": 3.647577092511013e-05,
151
+ "loss": 0.1027,
152
  "step": 1500
153
  },
154
  {
155
  "epoch": 13.76,
156
+ "eval_loss": 0.38098737597465515,
157
+ "eval_runtime": 66.3137,
158
+ "eval_samples_per_second": 24.837,
159
+ "eval_steps_per_second": 3.106,
160
+ "eval_wer": 0.3534878970483097,
161
  "step": 1500
162
  },
163
  {
164
  "epoch": 14.68,
165
+ "eval_loss": 0.37903106212615967,
166
+ "eval_runtime": 65.9553,
167
+ "eval_samples_per_second": 24.971,
168
+ "eval_steps_per_second": 3.123,
169
+ "eval_wer": 0.35021958941885406,
170
  "step": 1600
171
  },
172
  {
173
  "epoch": 15.0,
174
  "step": 1635,
175
  "total_flos": 6.429866751096261e+18,
176
+ "train_loss": 1.0876544129957846,
177
+ "train_runtime": 3803.1822,
178
+ "train_samples_per_second": 13.717,
179
+ "train_steps_per_second": 0.43
180
  }
181
  ],
182
  "max_steps": 1635,