gokuls commited on
Commit
d5918ca
1 Parent(s): 797915d

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 15.0,
3
+ "eval_accuracy": 0.14942141332434558,
4
+ "eval_loss": 6.609232425689697,
5
+ "eval_runtime": 0.5708,
6
+ "eval_samples": 479,
7
+ "eval_samples_per_second": 839.189,
8
+ "eval_steps_per_second": 14.016,
9
+ "perplexity": 741.9133264941478,
10
+ "train_loss": 7.326953015673104,
11
+ "train_runtime": 5761.7187,
12
+ "train_samples": 228639,
13
+ "train_samples_per_second": 595.236,
14
+ "train_steps_per_second": 9.302
15
+ }
eval_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 15.0,
3
+ "eval_accuracy": 0.14942141332434558,
4
+ "eval_loss": 6.609232425689697,
5
+ "eval_runtime": 0.5708,
6
+ "eval_samples": 479,
7
+ "eval_samples_per_second": 839.189,
8
+ "eval_steps_per_second": 14.016,
9
+ "perplexity": 741.9133264941478
10
+ }
logs/events.out.tfevents.1675181784.serv-3333.204260.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c4e396b16cf55986819870ccfb69c18818a5144b9f54a2eff9dd818964d9bb1
3
+ size 369
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 15.0,
3
+ "train_loss": 7.326953015673104,
4
+ "train_runtime": 5761.7187,
5
+ "train_samples": 228639,
6
+ "train_samples_per_second": 595.236,
7
+ "train_steps_per_second": 9.302
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 6.556983947753906,
3
+ "best_model_checkpoint": "distilbert_add_pre-training-dim-96/checkpoint-50022",
4
+ "epoch": 15.0,
5
+ "global_step": 53595,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "learning_rate": 4.6756706234227496e-05,
13
+ "loss": 14.685,
14
+ "step": 3573
15
+ },
16
+ {
17
+ "epoch": 1.0,
18
+ "eval_accuracy": 0.12398869012625627,
19
+ "eval_loss": 9.392213821411133,
20
+ "eval_runtime": 0.565,
21
+ "eval_samples_per_second": 847.75,
22
+ "eval_steps_per_second": 14.159,
23
+ "step": 3573
24
+ },
25
+ {
26
+ "epoch": 2.0,
27
+ "learning_rate": 4.341807645574353e-05,
28
+ "loss": 8.0255,
29
+ "step": 7146
30
+ },
31
+ {
32
+ "epoch": 2.0,
33
+ "eval_accuracy": 0.1315243012683803,
34
+ "eval_loss": 7.151001453399658,
35
+ "eval_runtime": 0.5512,
36
+ "eval_samples_per_second": 869.004,
37
+ "eval_steps_per_second": 14.514,
38
+ "step": 7146
39
+ },
40
+ {
41
+ "epoch": 3.0,
42
+ "learning_rate": 4.008038134405085e-05,
43
+ "loss": 7.0152,
44
+ "step": 10719
45
+ },
46
+ {
47
+ "epoch": 3.0,
48
+ "eval_accuracy": 0.14824699404427463,
49
+ "eval_loss": 6.786097049713135,
50
+ "eval_runtime": 0.5553,
51
+ "eval_samples_per_second": 862.599,
52
+ "eval_steps_per_second": 14.407,
53
+ "step": 10719
54
+ },
55
+ {
56
+ "epoch": 4.0,
57
+ "learning_rate": 3.674175156556688e-05,
58
+ "loss": 6.8127,
59
+ "step": 14292
60
+ },
61
+ {
62
+ "epoch": 4.0,
63
+ "eval_accuracy": 0.1492738182022094,
64
+ "eval_loss": 6.705262660980225,
65
+ "eval_runtime": 0.5553,
66
+ "eval_samples_per_second": 862.591,
67
+ "eval_steps_per_second": 14.407,
68
+ "step": 14292
69
+ },
70
+ {
71
+ "epoch": 5.0,
72
+ "learning_rate": 3.3404056453874196e-05,
73
+ "loss": 6.74,
74
+ "step": 17865
75
+ },
76
+ {
77
+ "epoch": 5.0,
78
+ "eval_accuracy": 0.1474488649134637,
79
+ "eval_loss": 6.669492244720459,
80
+ "eval_runtime": 0.5614,
81
+ "eval_samples_per_second": 853.267,
82
+ "eval_steps_per_second": 14.251,
83
+ "step": 17865
84
+ },
85
+ {
86
+ "epoch": 6.0,
87
+ "learning_rate": 3.0065426675390224e-05,
88
+ "loss": 6.7067,
89
+ "step": 21438
90
+ },
91
+ {
92
+ "epoch": 6.0,
93
+ "eval_accuracy": 0.1490975747320925,
94
+ "eval_loss": 6.643059253692627,
95
+ "eval_runtime": 0.5554,
96
+ "eval_samples_per_second": 862.513,
97
+ "eval_steps_per_second": 14.405,
98
+ "step": 21438
99
+ },
100
+ {
101
+ "epoch": 7.0,
102
+ "learning_rate": 2.6726796896906253e-05,
103
+ "loss": 6.6871,
104
+ "step": 25011
105
+ },
106
+ {
107
+ "epoch": 7.0,
108
+ "eval_accuracy": 0.14833291057185535,
109
+ "eval_loss": 6.620449542999268,
110
+ "eval_runtime": 0.5532,
111
+ "eval_samples_per_second": 865.795,
112
+ "eval_steps_per_second": 14.46,
113
+ "step": 25011
114
+ },
115
+ {
116
+ "epoch": 8.0,
117
+ "learning_rate": 2.338910178521357e-05,
118
+ "loss": 6.6748,
119
+ "step": 28584
120
+ },
121
+ {
122
+ "epoch": 8.0,
123
+ "eval_accuracy": 0.1472751167407656,
124
+ "eval_loss": 6.625016212463379,
125
+ "eval_runtime": 0.5524,
126
+ "eval_samples_per_second": 867.158,
127
+ "eval_steps_per_second": 14.483,
128
+ "step": 28584
129
+ },
130
+ {
131
+ "epoch": 9.0,
132
+ "learning_rate": 2.005140667352089e-05,
133
+ "loss": 6.6649,
134
+ "step": 32157
135
+ },
136
+ {
137
+ "epoch": 9.0,
138
+ "eval_accuracy": 0.14858899133836267,
139
+ "eval_loss": 6.610751152038574,
140
+ "eval_runtime": 0.5557,
141
+ "eval_samples_per_second": 862.053,
142
+ "eval_steps_per_second": 14.398,
143
+ "step": 32157
144
+ },
145
+ {
146
+ "epoch": 10.0,
147
+ "learning_rate": 1.671371156182821e-05,
148
+ "loss": 6.6596,
149
+ "step": 35730
150
+ },
151
+ {
152
+ "epoch": 10.0,
153
+ "eval_accuracy": 0.14966520045947385,
154
+ "eval_loss": 6.613977909088135,
155
+ "eval_runtime": 0.5538,
156
+ "eval_samples_per_second": 864.918,
157
+ "eval_steps_per_second": 14.445,
158
+ "step": 35730
159
+ },
160
+ {
161
+ "epoch": 11.0,
162
+ "learning_rate": 1.3375081783344236e-05,
163
+ "loss": 6.6536,
164
+ "step": 39303
165
+ },
166
+ {
167
+ "epoch": 11.0,
168
+ "eval_accuracy": 0.14933370739234114,
169
+ "eval_loss": 6.60673189163208,
170
+ "eval_runtime": 0.5553,
171
+ "eval_samples_per_second": 862.613,
172
+ "eval_steps_per_second": 14.407,
173
+ "step": 39303
174
+ },
175
+ {
176
+ "epoch": 12.0,
177
+ "learning_rate": 1.0037386671651557e-05,
178
+ "loss": 6.6483,
179
+ "step": 42876
180
+ },
181
+ {
182
+ "epoch": 12.0,
183
+ "eval_accuracy": 0.14892078326657765,
184
+ "eval_loss": 6.613958835601807,
185
+ "eval_runtime": 0.5595,
186
+ "eval_samples_per_second": 856.133,
187
+ "eval_steps_per_second": 14.299,
188
+ "step": 42876
189
+ },
190
+ {
191
+ "epoch": 13.0,
192
+ "learning_rate": 6.699691559958874e-06,
193
+ "loss": 6.6463,
194
+ "step": 46449
195
+ },
196
+ {
197
+ "epoch": 13.0,
198
+ "eval_accuracy": 0.14841714254033375,
199
+ "eval_loss": 6.609643459320068,
200
+ "eval_runtime": 0.5507,
201
+ "eval_samples_per_second": 869.79,
202
+ "eval_steps_per_second": 14.527,
203
+ "step": 46449
204
+ },
205
+ {
206
+ "epoch": 14.0,
207
+ "learning_rate": 3.3619964482661936e-06,
208
+ "loss": 6.6434,
209
+ "step": 50022
210
+ },
211
+ {
212
+ "epoch": 14.0,
213
+ "eval_accuracy": 0.15260783760924726,
214
+ "eval_loss": 6.556983947753906,
215
+ "eval_runtime": 0.5531,
216
+ "eval_samples_per_second": 866.025,
217
+ "eval_steps_per_second": 14.464,
218
+ "step": 50022
219
+ },
220
+ {
221
+ "epoch": 15.0,
222
+ "learning_rate": 2.336666978222264e-08,
223
+ "loss": 6.6414,
224
+ "step": 53595
225
+ },
226
+ {
227
+ "epoch": 15.0,
228
+ "eval_accuracy": 0.15261607518586057,
229
+ "eval_loss": 6.583581924438477,
230
+ "eval_runtime": 0.5553,
231
+ "eval_samples_per_second": 862.526,
232
+ "eval_steps_per_second": 14.405,
233
+ "step": 53595
234
+ },
235
+ {
236
+ "epoch": 15.0,
237
+ "step": 53595,
238
+ "total_flos": 3.972393508798464e+16,
239
+ "train_loss": 7.326953015673104,
240
+ "train_runtime": 5761.7187,
241
+ "train_samples_per_second": 595.236,
242
+ "train_steps_per_second": 9.302
243
+ }
244
+ ],
245
+ "max_steps": 53595,
246
+ "num_train_epochs": 15,
247
+ "total_flos": 3.972393508798464e+16,
248
+ "trial_name": null,
249
+ "trial_params": null
250
+ }