WillHeld commited on
Commit
308f4fa
1 Parent(s): 6bb02f2

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 99.98,
3
+ "eval_exact_match": 0.7570469798657719,
4
+ "eval_loss": 0.10638269782066345,
5
+ "eval_runtime": 97.9358,
6
+ "eval_samples": 2235,
7
+ "eval_samples_per_second": 22.821,
8
+ "eval_steps_per_second": 1.43,
9
+ "predict_exact_match": 0.7731418148654811,
10
+ "predict_loss": 0.09263205528259277,
11
+ "predict_runtime": 184.8274,
12
+ "predict_samples": 4386,
13
+ "predict_samples_per_second": 23.73,
14
+ "predict_steps_per_second": 1.488,
15
+ "train_loss": 0.23219555624326071,
16
+ "train_runtime": 13657.0516,
17
+ "train_samples": 15667,
18
+ "train_samples_per_second": 112.469,
19
+ "train_steps_per_second": 0.22
20
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 99.98,
3
+ "eval_exact_match": 0.7570469798657719,
4
+ "eval_loss": 0.10638269782066345,
5
+ "eval_runtime": 97.9358,
6
+ "eval_samples": 2235,
7
+ "eval_samples_per_second": 22.821,
8
+ "eval_steps_per_second": 1.43
9
+ }
generated_predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
predict_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "predict_exact_match": 0.7731418148654811,
3
+ "predict_loss": 0.09263205528259277,
4
+ "predict_runtime": 184.8274,
5
+ "predict_samples": 4386,
6
+ "predict_samples_per_second": 23.73,
7
+ "predict_steps_per_second": 1.488
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 99.98,
3
+ "train_loss": 0.23219555624326071,
4
+ "train_runtime": 13657.0516,
5
+ "train_samples": 15667,
6
+ "train_samples_per_second": 112.469,
7
+ "train_steps_per_second": 0.22
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.10638269782066345,
3
+ "best_model_checkpoint": "/data/wheld3/mt5-small-pointer-mtop/checkpoint-1400",
4
+ "epoch": 99.9795918367347,
5
+ "global_step": 3000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 6.65,
12
+ "learning_rate": 0.0009333333333333333,
13
+ "loss": 2.0944,
14
+ "step": 200
15
+ },
16
+ {
17
+ "epoch": 6.65,
18
+ "eval_exact_match": 0.0026845637583892616,
19
+ "eval_loss": 0.6548439264297485,
20
+ "eval_runtime": 44.8886,
21
+ "eval_samples_per_second": 49.79,
22
+ "eval_steps_per_second": 3.119,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 13.33,
27
+ "learning_rate": 0.0008666666666666667,
28
+ "loss": 0.5307,
29
+ "step": 400
30
+ },
31
+ {
32
+ "epoch": 13.33,
33
+ "eval_exact_match": 0.2778523489932886,
34
+ "eval_loss": 0.24562813341617584,
35
+ "eval_runtime": 45.1956,
36
+ "eval_samples_per_second": 49.452,
37
+ "eval_steps_per_second": 3.098,
38
+ "step": 400
39
+ },
40
+ {
41
+ "epoch": 19.98,
42
+ "learning_rate": 0.0008,
43
+ "loss": 0.2388,
44
+ "step": 600
45
+ },
46
+ {
47
+ "epoch": 19.98,
48
+ "eval_exact_match": 0.4559284116331096,
49
+ "eval_loss": 0.14859849214553833,
50
+ "eval_runtime": 45.6558,
51
+ "eval_samples_per_second": 48.953,
52
+ "eval_steps_per_second": 3.066,
53
+ "step": 600
54
+ },
55
+ {
56
+ "epoch": 26.65,
57
+ "learning_rate": 0.0007333333333333333,
58
+ "loss": 0.1459,
59
+ "step": 800
60
+ },
61
+ {
62
+ "epoch": 26.65,
63
+ "eval_exact_match": 0.5042505592841163,
64
+ "eval_loss": 0.11904484033584595,
65
+ "eval_runtime": 54.895,
66
+ "eval_samples_per_second": 40.714,
67
+ "eval_steps_per_second": 2.55,
68
+ "step": 800
69
+ },
70
+ {
71
+ "epoch": 33.33,
72
+ "learning_rate": 0.0006666666666666666,
73
+ "loss": 0.1011,
74
+ "step": 1000
75
+ },
76
+ {
77
+ "epoch": 33.33,
78
+ "eval_exact_match": 0.5230425055928412,
79
+ "eval_loss": 0.11172914505004883,
80
+ "eval_runtime": 53.9217,
81
+ "eval_samples_per_second": 41.449,
82
+ "eval_steps_per_second": 2.596,
83
+ "step": 1000
84
+ },
85
+ {
86
+ "epoch": 39.98,
87
+ "learning_rate": 0.0006,
88
+ "loss": 0.0774,
89
+ "step": 1200
90
+ },
91
+ {
92
+ "epoch": 39.98,
93
+ "eval_exact_match": 0.5373601789709173,
94
+ "eval_loss": 0.10843723267316818,
95
+ "eval_runtime": 54.3522,
96
+ "eval_samples_per_second": 41.121,
97
+ "eval_steps_per_second": 2.576,
98
+ "step": 1200
99
+ },
100
+ {
101
+ "epoch": 46.65,
102
+ "learning_rate": 0.0005333333333333334,
103
+ "loss": 0.0598,
104
+ "step": 1400
105
+ },
106
+ {
107
+ "epoch": 46.65,
108
+ "eval_exact_match": 0.5404921700223714,
109
+ "eval_loss": 0.10638269782066345,
110
+ "eval_runtime": 44.8128,
111
+ "eval_samples_per_second": 49.874,
112
+ "eval_steps_per_second": 3.124,
113
+ "step": 1400
114
+ },
115
+ {
116
+ "epoch": 53.33,
117
+ "learning_rate": 0.00046666666666666666,
118
+ "loss": 0.0478,
119
+ "step": 1600
120
+ },
121
+ {
122
+ "epoch": 53.33,
123
+ "eval_exact_match": 0.545413870246085,
124
+ "eval_loss": 0.11469161510467529,
125
+ "eval_runtime": 45.8513,
126
+ "eval_samples_per_second": 48.745,
127
+ "eval_steps_per_second": 3.053,
128
+ "step": 1600
129
+ },
130
+ {
131
+ "epoch": 59.98,
132
+ "learning_rate": 0.0004,
133
+ "loss": 0.0397,
134
+ "step": 1800
135
+ },
136
+ {
137
+ "epoch": 59.98,
138
+ "eval_exact_match": 0.5472035794183445,
139
+ "eval_loss": 0.11389175802469254,
140
+ "eval_runtime": 44.8546,
141
+ "eval_samples_per_second": 49.828,
142
+ "eval_steps_per_second": 3.121,
143
+ "step": 1800
144
+ },
145
+ {
146
+ "epoch": 66.65,
147
+ "learning_rate": 0.0003333333333333333,
148
+ "loss": 0.0337,
149
+ "step": 2000
150
+ },
151
+ {
152
+ "epoch": 66.65,
153
+ "eval_exact_match": 0.5480984340044742,
154
+ "eval_loss": 0.117930106818676,
155
+ "eval_runtime": 47.7426,
156
+ "eval_samples_per_second": 46.814,
157
+ "eval_steps_per_second": 2.932,
158
+ "step": 2000
159
+ },
160
+ {
161
+ "epoch": 73.33,
162
+ "learning_rate": 0.0002666666666666667,
163
+ "loss": 0.0286,
164
+ "step": 2200
165
+ },
166
+ {
167
+ "epoch": 73.33,
168
+ "eval_exact_match": 0.5498881431767337,
169
+ "eval_loss": 0.12433456629514694,
170
+ "eval_runtime": 45.3848,
171
+ "eval_samples_per_second": 49.246,
172
+ "eval_steps_per_second": 3.085,
173
+ "step": 2200
174
+ },
175
+ {
176
+ "epoch": 79.98,
177
+ "learning_rate": 0.0002,
178
+ "loss": 0.0251,
179
+ "step": 2400
180
+ },
181
+ {
182
+ "epoch": 79.98,
183
+ "eval_exact_match": 0.5480984340044742,
184
+ "eval_loss": 0.12593261897563934,
185
+ "eval_runtime": 45.7566,
186
+ "eval_samples_per_second": 48.845,
187
+ "eval_steps_per_second": 3.06,
188
+ "step": 2400
189
+ },
190
+ {
191
+ "epoch": 86.65,
192
+ "learning_rate": 0.00013333333333333334,
193
+ "loss": 0.0218,
194
+ "step": 2600
195
+ },
196
+ {
197
+ "epoch": 86.65,
198
+ "eval_exact_match": 0.5503355704697986,
199
+ "eval_loss": 0.12759922444820404,
200
+ "eval_runtime": 45.5648,
201
+ "eval_samples_per_second": 49.051,
202
+ "eval_steps_per_second": 3.073,
203
+ "step": 2600
204
+ },
205
+ {
206
+ "epoch": 93.33,
207
+ "learning_rate": 6.666666666666667e-05,
208
+ "loss": 0.0197,
209
+ "step": 2800
210
+ },
211
+ {
212
+ "epoch": 93.33,
213
+ "eval_exact_match": 0.5503355704697986,
214
+ "eval_loss": 0.13092631101608276,
215
+ "eval_runtime": 45.4393,
216
+ "eval_samples_per_second": 49.186,
217
+ "eval_steps_per_second": 3.081,
218
+ "step": 2800
219
+ },
220
+ {
221
+ "epoch": 99.98,
222
+ "learning_rate": 0.0,
223
+ "loss": 0.0184,
224
+ "step": 3000
225
+ },
226
+ {
227
+ "epoch": 99.98,
228
+ "eval_exact_match": 0.5503355704697986,
229
+ "eval_loss": 0.1317095011472702,
230
+ "eval_runtime": 45.2758,
231
+ "eval_samples_per_second": 49.364,
232
+ "eval_steps_per_second": 3.092,
233
+ "step": 3000
234
+ },
235
+ {
236
+ "epoch": 99.98,
237
+ "step": 3000,
238
+ "total_flos": 3.831174875254272e+16,
239
+ "train_loss": 0.23219555624326071,
240
+ "train_runtime": 13657.0516,
241
+ "train_samples_per_second": 112.469,
242
+ "train_steps_per_second": 0.22
243
+ }
244
+ ],
245
+ "max_steps": 3000,
246
+ "num_train_epochs": 100,
247
+ "total_flos": 3.831174875254272e+16,
248
+ "trial_name": null,
249
+ "trial_params": null
250
+ }