flozi00 commited on
Commit
fc88da5
1 Parent(s): 2c7ea35

End of training

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 9.778687784283113e-05,
4
- "train_runtime": 1221.5492,
5
- "train_samples": 147488,
6
- "train_samples_per_second": 120.738,
7
- "train_steps_per_second": 1.887
8
  }
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 8.709332525719078e-05,
4
+ "train_runtime": 189.6957,
5
+ "train_samples": 21538,
6
+ "train_samples_per_second": 113.54,
7
+ "train_steps_per_second": 1.777
8
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:940f1b23272e881ef6760e16562085c4ae9c6f88f2983ee47516ba3f7250fb47
3
  size 891650871
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6557f7e7707f045eb67eed0b29690171c23fbd92515f5215c978de1af11d2103
3
  size 891650871
runs/Nov20_12-49-20_DESKTOP-FPB11SM/1637409132.2384772/events.out.tfevents.1637409132.DESKTOP-FPB11SM.16720.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8b0073ebaa6cf7df2ef49a5c5e2f4f0c77ec8dca7435eae5dc6298db4062bd0
3
+ size 4740
runs/Nov20_12-49-20_DESKTOP-FPB11SM/events.out.tfevents.1637409132.DESKTOP-FPB11SM.16720.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca52f556bf2f36f778151ac76720ec3814439cd807c9b2fb7742ef47a0521b44
3
+ size 5326
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 9.778687784283113e-05,
4
- "train_runtime": 1221.5492,
5
- "train_samples": 147488,
6
- "train_samples_per_second": 120.738,
7
- "train_steps_per_second": 1.887
8
  }
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 8.709332525719078e-05,
4
+ "train_runtime": 189.6957,
5
+ "train_samples": 21538,
6
+ "train_samples_per_second": 113.54,
7
+ "train_steps_per_second": 1.777
8
  }
trainer_state.json CHANGED
@@ -2,300 +2,60 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
- "global_step": 2305,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
- {
11
- "epoch": 0.02,
12
- "learning_rate": 4.8915401301518446e-05,
13
- "loss": 0.001,
14
- "step": 50
15
- },
16
- {
17
- "epoch": 0.04,
18
- "learning_rate": 4.7830802603036875e-05,
19
- "loss": 0.0003,
20
- "step": 100
21
- },
22
- {
23
- "epoch": 0.07,
24
- "learning_rate": 4.674620390455532e-05,
25
- "loss": 0.0006,
26
- "step": 150
27
- },
28
- {
29
- "epoch": 0.09,
30
- "learning_rate": 4.5661605206073755e-05,
31
- "loss": 0.0001,
32
- "step": 200
33
- },
34
- {
35
- "epoch": 0.11,
36
- "learning_rate": 4.45770065075922e-05,
37
- "loss": 0.0003,
38
- "step": 250
39
- },
40
- {
41
- "epoch": 0.13,
42
- "learning_rate": 4.349240780911063e-05,
43
- "loss": 0.0003,
44
- "step": 300
45
- },
46
  {
47
  "epoch": 0.15,
48
- "learning_rate": 4.240780911062907e-05,
49
- "loss": 0.0,
50
- "step": 350
51
- },
52
- {
53
- "epoch": 0.17,
54
- "learning_rate": 4.132321041214751e-05,
55
- "loss": 0.0,
56
- "step": 400
57
- },
58
- {
59
- "epoch": 0.2,
60
- "learning_rate": 4.0238611713665944e-05,
61
- "loss": 0.0,
62
- "step": 450
63
- },
64
- {
65
- "epoch": 0.22,
66
- "learning_rate": 3.915401301518438e-05,
67
- "loss": 0.0,
68
- "step": 500
69
- },
70
- {
71
- "epoch": 0.24,
72
- "learning_rate": 3.8069414316702824e-05,
73
- "loss": 0.0,
74
- "step": 550
75
- },
76
- {
77
- "epoch": 0.26,
78
- "learning_rate": 3.698481561822126e-05,
79
- "loss": 0.0,
80
- "step": 600
81
- },
82
- {
83
- "epoch": 0.28,
84
- "learning_rate": 3.59002169197397e-05,
85
- "loss": 0.0,
86
- "step": 650
87
  },
88
  {
89
  "epoch": 0.3,
90
- "learning_rate": 3.481561822125813e-05,
91
  "loss": 0.0002,
92
- "step": 700
93
- },
94
- {
95
- "epoch": 0.33,
96
- "learning_rate": 3.3731019522776576e-05,
97
- "loss": 0.0,
98
- "step": 750
99
- },
100
- {
101
- "epoch": 0.35,
102
- "learning_rate": 3.264642082429501e-05,
103
- "loss": 0.0001,
104
- "step": 800
105
- },
106
- {
107
- "epoch": 0.37,
108
- "learning_rate": 3.156182212581345e-05,
109
- "loss": 0.0,
110
- "step": 850
111
- },
112
- {
113
- "epoch": 0.39,
114
- "learning_rate": 3.0477223427331893e-05,
115
- "loss": 0.0,
116
- "step": 900
117
- },
118
- {
119
- "epoch": 0.41,
120
- "learning_rate": 2.9392624728850326e-05,
121
- "loss": 0.0,
122
- "step": 950
123
- },
124
- {
125
- "epoch": 0.43,
126
- "learning_rate": 2.8308026030368766e-05,
127
- "loss": 0.0,
128
- "step": 1000
129
- },
130
- {
131
- "epoch": 0.46,
132
- "learning_rate": 2.7223427331887202e-05,
133
- "loss": 0.0,
134
- "step": 1050
135
- },
136
- {
137
- "epoch": 0.48,
138
- "learning_rate": 2.6138828633405642e-05,
139
- "loss": 0.0,
140
- "step": 1100
141
- },
142
- {
143
- "epoch": 0.5,
144
- "learning_rate": 2.505422993492408e-05,
145
- "loss": 0.0,
146
- "step": 1150
147
- },
148
- {
149
- "epoch": 0.52,
150
- "learning_rate": 2.3969631236442515e-05,
151
- "loss": 0.0,
152
- "step": 1200
153
- },
154
- {
155
- "epoch": 0.54,
156
- "learning_rate": 2.2885032537960955e-05,
157
- "loss": 0.0,
158
- "step": 1250
159
  },
160
  {
161
- "epoch": 0.56,
162
- "learning_rate": 2.1800433839479395e-05,
163
  "loss": 0.0,
164
- "step": 1300
165
  },
166
  {
167
  "epoch": 0.59,
168
- "learning_rate": 2.0715835140997834e-05,
169
- "loss": 0.0,
170
- "step": 1350
171
- },
172
- {
173
- "epoch": 0.61,
174
- "learning_rate": 1.963123644251627e-05,
175
- "loss": 0.0002,
176
- "step": 1400
177
- },
178
- {
179
- "epoch": 0.63,
180
- "learning_rate": 1.8546637744034707e-05,
181
- "loss": 0.0002,
182
- "step": 1450
183
- },
184
- {
185
- "epoch": 0.65,
186
- "learning_rate": 1.7462039045553147e-05,
187
- "loss": 0.0,
188
- "step": 1500
189
- },
190
- {
191
- "epoch": 0.67,
192
- "learning_rate": 1.6377440347071584e-05,
193
  "loss": 0.0001,
194
- "step": 1550
195
- },
196
- {
197
- "epoch": 0.69,
198
- "learning_rate": 1.5292841648590023e-05,
199
- "loss": 0.0,
200
- "step": 1600
201
- },
202
- {
203
- "epoch": 0.72,
204
- "learning_rate": 1.420824295010846e-05,
205
- "loss": 0.0,
206
- "step": 1650
207
  },
208
  {
209
  "epoch": 0.74,
210
- "learning_rate": 1.3123644251626898e-05,
211
- "loss": 0.0004,
212
- "step": 1700
213
- },
214
- {
215
- "epoch": 0.76,
216
- "learning_rate": 1.2039045553145336e-05,
217
- "loss": 0.0001,
218
- "step": 1750
219
- },
220
- {
221
- "epoch": 0.78,
222
- "learning_rate": 1.0954446854663774e-05,
223
- "loss": 0.0,
224
- "step": 1800
225
- },
226
- {
227
- "epoch": 0.8,
228
- "learning_rate": 9.869848156182213e-06,
229
- "loss": 0.0001,
230
- "step": 1850
231
- },
232
- {
233
- "epoch": 0.82,
234
- "learning_rate": 8.78524945770065e-06,
235
  "loss": 0.0,
236
- "step": 1900
237
- },
238
- {
239
- "epoch": 0.85,
240
- "learning_rate": 7.70065075921909e-06,
241
- "loss": 0.0,
242
- "step": 1950
243
- },
244
- {
245
- "epoch": 0.87,
246
- "learning_rate": 6.616052060737528e-06,
247
- "loss": 0.0,
248
- "step": 2000
249
  },
250
  {
251
  "epoch": 0.89,
252
- "learning_rate": 5.531453362255966e-06,
253
- "loss": 0.0,
254
- "step": 2050
255
- },
256
- {
257
- "epoch": 0.91,
258
- "learning_rate": 4.446854663774403e-06,
259
- "loss": 0.0,
260
- "step": 2100
261
- },
262
- {
263
- "epoch": 0.93,
264
- "learning_rate": 3.362255965292842e-06,
265
- "loss": 0.0,
266
- "step": 2150
267
- },
268
- {
269
- "epoch": 0.95,
270
- "learning_rate": 2.27765726681128e-06,
271
- "loss": 0.0001,
272
- "step": 2200
273
- },
274
- {
275
- "epoch": 0.98,
276
- "learning_rate": 1.193058568329718e-06,
277
- "loss": 0.0,
278
- "step": 2250
279
- },
280
- {
281
- "epoch": 1.0,
282
- "learning_rate": 1.0845986984815619e-07,
283
  "loss": 0.0,
284
- "step": 2300
285
  },
286
  {
287
  "epoch": 1.0,
288
- "step": 2305,
289
- "total_flos": 1.828765247127552e+16,
290
- "train_loss": 9.778687784283113e-05,
291
- "train_runtime": 1221.5492,
292
- "train_samples_per_second": 120.738,
293
- "train_steps_per_second": 1.887
294
  }
295
  ],
296
- "max_steps": 2305,
297
  "num_train_epochs": 1,
298
- "total_flos": 1.828765247127552e+16,
299
  "trial_name": null,
300
  "trial_params": null
301
  }
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
+ "global_step": 337,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 0.15,
12
+ "learning_rate": 4.258160237388724e-05,
13
+ "loss": 0.0002,
14
+ "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  },
16
  {
17
  "epoch": 0.3,
18
+ "learning_rate": 3.516320474777448e-05,
19
  "loss": 0.0002,
20
+ "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  },
22
  {
23
+ "epoch": 0.45,
24
+ "learning_rate": 2.774480712166172e-05,
25
  "loss": 0.0,
26
+ "step": 150
27
  },
28
  {
29
  "epoch": 0.59,
30
+ "learning_rate": 2.0326409495548962e-05,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  "loss": 0.0001,
32
+ "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
33
  },
34
  {
35
  "epoch": 0.74,
36
+ "learning_rate": 1.29080118694362e-05,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  "loss": 0.0,
38
+ "step": 250
 
 
 
 
 
 
 
 
 
 
 
 
39
  },
40
  {
41
  "epoch": 0.89,
42
+ "learning_rate": 5.489614243323442e-06,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  "loss": 0.0,
44
+ "step": 300
45
  },
46
  {
47
  "epoch": 1.0,
48
+ "step": 337,
49
+ "total_flos": 2801087359488000.0,
50
+ "train_loss": 8.709332525719078e-05,
51
+ "train_runtime": 189.6957,
52
+ "train_samples_per_second": 113.54,
53
+ "train_steps_per_second": 1.777
54
  }
55
  ],
56
+ "max_steps": 337,
57
  "num_train_epochs": 1,
58
+ "total_flos": 2801087359488000.0,
59
  "trial_name": null,
60
  "trial_params": null
61
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de8bf22c818a83c8a830b1d4ce820d7be4b116abfa1e76aceafa60c7dd2d8c03
3
  size 2991
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb7add8ff853cd2132d39756e4551133929cae92cc5bcd1c7db0103a7b3d00e1
3
  size 2991