mgfrantz commited on
Commit
18cc8c4
β€’
1 Parent(s): 71dc3ac

commit files to HF hub

Browse files
Files changed (34) hide show
  1. .gitignore +1 -0
  2. checkpoint-10000/config.json +1 -1
  3. checkpoint-10000/optimizer.pt +2 -2
  4. checkpoint-10000/pytorch_model.bin +1 -1
  5. checkpoint-10000/rng_state.pth +1 -1
  6. checkpoint-10000/scaler.pt +1 -1
  7. checkpoint-10000/scheduler.pt +1 -1
  8. checkpoint-10000/trainer_state.json +605 -125
  9. checkpoint-10000/training_args.bin +1 -1
  10. checkpoint-2000/config.json +1 -1
  11. checkpoint-2000/optimizer.pt +2 -2
  12. checkpoint-2000/pytorch_model.bin +1 -1
  13. checkpoint-2000/rng_state.pth +1 -1
  14. checkpoint-2000/scaler.pt +1 -1
  15. checkpoint-2000/scheduler.pt +1 -1
  16. checkpoint-2000/trainer_state.json +124 -28
  17. checkpoint-2000/training_args.bin +1 -1
  18. config.json +1 -1
  19. pytorch_model.bin +1 -1
  20. runs/Jul03_17-16-15_8a98c40ff775/events.out.tfevents.1656868588.8a98c40ff775.71.0 +0 -3
  21. runs/Jul03_21-34-55_8a98c40ff775/events.out.tfevents.1656884100.8a98c40ff775.71.4 +0 -3
  22. runs/Jul03_23-15-45_8a98c40ff775/1656890147.0297482/events.out.tfevents.1656890147.8a98c40ff775.71.7 +0 -3
  23. runs/Jul03_23-16-08_8a98c40ff775/1656890170.9030218/events.out.tfevents.1656890170.8a98c40ff775.71.9 +0 -3
  24. runs/Jul03_23-16-08_8a98c40ff775/events.out.tfevents.1656890170.8a98c40ff775.71.8 +0 -3
  25. runs/Jul03_23-26-33_8a98c40ff775/1656890797.636419/events.out.tfevents.1656890797.8a98c40ff775.5349.1 +0 -3
  26. runs/Jul03_23-26-33_8a98c40ff775/events.out.tfevents.1656890797.8a98c40ff775.5349.0 +0 -3
  27. runs/Jul04_00-33-28_8a98c40ff775/1656894810.6894863/events.out.tfevents.1656894810.8a98c40ff775.8083.1 +0 -3
  28. runs/{Jul03_17-16-15_8a98c40ff775/1656868588.8381069/events.out.tfevents.1656868588.8a98c40ff775.71.1 β†’ Jul26_21-03-46_1fe91f198d71/1658869435.620103/events.out.tfevents.1658869435.1fe91f198d71.71.1} +1 -1
  29. runs/{Jul03_19-51-55_8a98c40ff775/events.out.tfevents.1656877916.8a98c40ff775.71.2 β†’ Jul26_21-03-46_1fe91f198d71/events.out.tfevents.1658869435.1fe91f198d71.71.0} +2 -2
  30. runs/{Jul03_21-34-55_8a98c40ff775/1656884100.2687025/events.out.tfevents.1656884100.8a98c40ff775.71.5 β†’ Jul26_21-10-23_1fe91f198d71/1658869825.093272/events.out.tfevents.1658869825.1fe91f198d71.71.3} +1 -1
  31. runs/{Jul03_23-15-45_8a98c40ff775/events.out.tfevents.1656890147.8a98c40ff775.71.6 β†’ Jul26_21-10-23_1fe91f198d71/events.out.tfevents.1658869825.1fe91f198d71.71.2} +1 -1
  32. runs/{Jul03_19-51-55_8a98c40ff775/1656877916.4434352/events.out.tfevents.1656877916.8a98c40ff775.71.3 β†’ Jul26_21-13-22_1fe91f198d71/1658870005.9725268/events.out.tfevents.1658870005.1fe91f198d71.1213.1} +1 -1
  33. runs/{Jul04_00-33-28_8a98c40ff775/events.out.tfevents.1656894810.8a98c40ff775.8083.0 β†’ Jul26_21-13-22_1fe91f198d71/events.out.tfevents.1658870005.1fe91f198d71.1213.0} +1 -1
  34. training_args.bin +1 -1
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint*
checkpoint-10000/config.json CHANGED
@@ -5,7 +5,7 @@
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "hidden_act": "gelu",
8
- "hidden_dropout_prob": 0.1,
9
  "hidden_size": 768,
10
  "id2label": {
11
  "0": "LABEL_0",
 
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.2,
9
  "hidden_size": 768,
10
  "id2label": {
11
  "0": "LABEL_0",
checkpoint-10000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a71038f71294ce4bea3278ad481be4f7813e2bdc40702821a0c36c8fbf2b3bf0
3
- size 1475512665
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f720256e4e70820d89c9e06666f88dbcad39f323aa192001b302a880db22b488
3
+ size 1475512793
checkpoint-10000/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec9015c37d875002cc65e6ee1eea456f1901cfec172c12336855961046aa649d
3
  size 737770027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a23605ea589e2551eb0df2eb5509e159ed31156dfca15b2eeba8eb53c93f51f
3
  size 737770027
checkpoint-10000/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc7d4a3cc28f7bedaf263d0aaa9d44d8e8dbbb9318d984a9f15d4b64025950ca
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a41b1f8f5d265afac85d7d1219ee0f67a2407025613329d1f86629642de0409
3
  size 14503
checkpoint-10000/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0efbbbbf577f80c6508c5320fd27b2a182a4c5a083ba7b8c862c224c4c39e5ea
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdbc1a65543b77f14ca91d846aa90008d71866cbb1b49f2d46285e1f600dd308
3
  size 559
checkpoint-10000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4417a71fdc6b2b190384e77f425c104230eb51d1e5affb60e61f3daefae553d3
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63309151ebd54520ac76370f164e851299d6d6a0b59b77db141c9a804e76fd13
3
  size 623
checkpoint-10000/trainer_state.json CHANGED
@@ -1,296 +1,776 @@
1
  {
2
- "best_metric": 0.6859038472175598,
3
- "best_model_checkpoint": "deberta_v3_finetuned_predicting_effective_arguments/checkpoint-2000",
4
  "epoch": 4.835287009063444,
5
  "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 0.24,
12
- "learning_rate": 9.518375241779499e-06,
13
- "loss": 0.8315,
14
  "step": 500
15
  },
16
  {
17
  "epoch": 0.24,
18
- "eval_loss": 0.7444673180580139,
19
- "eval_runtime": 29.7252,
20
- "eval_samples_per_second": 123.33,
21
- "eval_steps_per_second": 30.849,
22
  "step": 500
23
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  {
25
  "epoch": 0.48,
26
- "learning_rate": 9.034816247582205e-06,
27
- "loss": 0.7232,
28
  "step": 1000
29
  },
30
  {
31
  "epoch": 0.48,
32
- "eval_loss": 0.7441162467002869,
33
- "eval_runtime": 29.4226,
34
- "eval_samples_per_second": 124.598,
35
- "eval_steps_per_second": 31.167,
36
  "step": 1000
37
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  {
39
  "epoch": 0.73,
40
- "learning_rate": 8.552224371373308e-06,
41
- "loss": 0.7269,
42
  "step": 1500
43
  },
44
  {
45
  "epoch": 0.73,
46
- "eval_loss": 0.7453668117523193,
47
- "eval_runtime": 29.5754,
48
- "eval_samples_per_second": 123.955,
49
- "eval_steps_per_second": 31.006,
50
  "step": 1500
51
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  {
53
  "epoch": 0.97,
54
- "learning_rate": 8.068665377176016e-06,
55
- "loss": 0.7185,
56
  "step": 2000
57
  },
58
  {
59
  "epoch": 0.97,
60
- "eval_loss": 0.6859038472175598,
61
- "eval_runtime": 29.569,
62
- "eval_samples_per_second": 123.981,
63
- "eval_steps_per_second": 31.012,
64
  "step": 2000
65
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  {
67
  "epoch": 1.21,
68
- "learning_rate": 7.585106382978724e-06,
69
- "loss": 0.6611,
70
  "step": 2500
71
  },
72
  {
73
  "epoch": 1.21,
74
- "eval_loss": 0.6883463263511658,
75
- "eval_runtime": 29.5377,
76
- "eval_samples_per_second": 124.113,
77
- "eval_steps_per_second": 31.045,
78
  "step": 2500
79
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  {
81
  "epoch": 1.45,
82
- "learning_rate": 7.101547388781432e-06,
83
- "loss": 0.6516,
84
  "step": 3000
85
  },
86
  {
87
  "epoch": 1.45,
88
- "eval_loss": 0.7088809013366699,
89
- "eval_runtime": 29.1585,
90
- "eval_samples_per_second": 125.727,
91
- "eval_steps_per_second": 31.449,
92
  "step": 3000
93
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  {
95
  "epoch": 1.69,
96
- "learning_rate": 6.617988394584139e-06,
97
- "loss": 0.6386,
98
  "step": 3500
99
  },
100
  {
101
  "epoch": 1.69,
102
- "eval_loss": 0.6939310431480408,
103
- "eval_runtime": 29.2346,
104
- "eval_samples_per_second": 125.399,
105
- "eval_steps_per_second": 31.367,
106
  "step": 3500
107
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  {
109
  "epoch": 1.93,
110
- "learning_rate": 6.134429400386848e-06,
111
- "loss": 0.6435,
112
  "step": 4000
113
  },
114
  {
115
  "epoch": 1.93,
116
- "eval_loss": 0.6942200064659119,
117
- "eval_runtime": 30.0324,
118
- "eval_samples_per_second": 122.068,
119
- "eval_steps_per_second": 30.534,
120
  "step": 4000
121
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  {
123
  "epoch": 2.18,
124
- "learning_rate": 5.650870406189556e-06,
125
- "loss": 0.5823,
126
  "step": 4500
127
  },
128
  {
129
  "epoch": 2.18,
130
- "eval_loss": 0.771172821521759,
131
- "eval_runtime": 29.4927,
132
- "eval_samples_per_second": 124.302,
133
- "eval_steps_per_second": 31.092,
134
  "step": 4500
135
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  {
137
  "epoch": 2.42,
138
- "learning_rate": 5.167311411992263e-06,
139
- "loss": 0.5652,
140
  "step": 5000
141
  },
142
  {
143
  "epoch": 2.42,
144
- "eval_loss": 0.7315438985824585,
145
- "eval_runtime": 28.9938,
146
- "eval_samples_per_second": 126.441,
147
- "eval_steps_per_second": 31.627,
148
  "step": 5000
149
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  {
151
  "epoch": 2.66,
152
- "learning_rate": 4.683752417794971e-06,
153
- "loss": 0.5701,
154
  "step": 5500
155
  },
156
  {
157
  "epoch": 2.66,
158
- "eval_loss": 0.7429590225219727,
159
- "eval_runtime": 29.2482,
160
- "eval_samples_per_second": 125.341,
161
- "eval_steps_per_second": 31.352,
162
  "step": 5500
163
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  {
165
  "epoch": 2.9,
166
- "learning_rate": 4.20019342359768e-06,
167
- "loss": 0.5583,
168
  "step": 6000
169
  },
170
  {
171
  "epoch": 2.9,
172
- "eval_loss": 0.74286949634552,
173
- "eval_runtime": 28.9549,
174
- "eval_samples_per_second": 126.611,
175
- "eval_steps_per_second": 31.67,
176
  "step": 6000
177
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  {
179
  "epoch": 3.14,
180
- "learning_rate": 3.7176015473887818e-06,
181
- "loss": 0.5145,
182
  "step": 6500
183
  },
184
  {
185
  "epoch": 3.14,
186
- "eval_loss": 0.8086823225021362,
187
- "eval_runtime": 29.0547,
188
- "eval_samples_per_second": 126.176,
189
- "eval_steps_per_second": 31.561,
190
  "step": 6500
191
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  {
193
  "epoch": 3.38,
194
- "learning_rate": 3.23404255319149e-06,
195
- "loss": 0.4795,
196
  "step": 7000
197
  },
198
  {
199
  "epoch": 3.38,
200
- "eval_loss": 0.8026116490364075,
201
- "eval_runtime": 28.8514,
202
- "eval_samples_per_second": 127.065,
203
- "eval_steps_per_second": 31.784,
204
  "step": 7000
205
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
  {
207
  "epoch": 3.63,
208
- "learning_rate": 2.7504835589941973e-06,
209
- "loss": 0.4784,
210
  "step": 7500
211
  },
212
  {
213
  "epoch": 3.63,
214
- "eval_loss": 0.7970021963119507,
215
- "eval_runtime": 28.8445,
216
- "eval_samples_per_second": 127.095,
217
- "eval_steps_per_second": 31.791,
218
  "step": 7500
219
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  {
221
  "epoch": 3.87,
222
- "learning_rate": 2.2669245647969055e-06,
223
- "loss": 0.4793,
224
  "step": 8000
225
  },
226
  {
227
  "epoch": 3.87,
228
- "eval_loss": 0.7971013784408569,
229
- "eval_runtime": 29.079,
230
- "eval_samples_per_second": 126.07,
231
- "eval_steps_per_second": 31.535,
232
  "step": 8000
233
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  {
235
  "epoch": 4.11,
236
- "learning_rate": 1.784332688588008e-06,
237
- "loss": 0.4544,
238
  "step": 8500
239
  },
240
  {
241
  "epoch": 4.11,
242
- "eval_loss": 0.8546438813209534,
243
- "eval_runtime": 29.1238,
244
- "eval_samples_per_second": 125.877,
245
- "eval_steps_per_second": 31.486,
246
  "step": 8500
247
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  {
249
  "epoch": 4.35,
250
- "learning_rate": 1.3007736943907157e-06,
251
- "loss": 0.4115,
252
  "step": 9000
253
  },
254
  {
255
  "epoch": 4.35,
256
- "eval_loss": 0.8430052995681763,
257
- "eval_runtime": 29.3455,
258
- "eval_samples_per_second": 124.926,
259
- "eval_steps_per_second": 31.248,
260
  "step": 9000
261
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
  {
263
  "epoch": 4.59,
264
- "learning_rate": 8.172147001934237e-07,
265
- "loss": 0.4192,
266
  "step": 9500
267
  },
268
  {
269
  "epoch": 4.59,
270
- "eval_loss": 0.8553691506385803,
271
- "eval_runtime": 30.0888,
272
- "eval_samples_per_second": 121.839,
273
- "eval_steps_per_second": 30.476,
274
  "step": 9500
275
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  {
277
  "epoch": 4.84,
278
- "learning_rate": 3.3365570599613157e-07,
279
- "loss": 0.4101,
280
  "step": 10000
281
  },
282
  {
283
  "epoch": 4.84,
284
- "eval_loss": 0.863072395324707,
285
- "eval_runtime": 29.1167,
286
- "eval_samples_per_second": 125.907,
287
- "eval_steps_per_second": 31.494,
288
  "step": 10000
289
  }
290
  ],
291
- "max_steps": 10340,
292
- "num_train_epochs": 5,
293
- "total_flos": 9148295382909930.0,
294
  "trial_name": null,
295
  "trial_params": null
296
  }
 
1
  {
2
+ "best_metric": 0.727655291557312,
3
+ "best_model_checkpoint": "deberta_v3_finetuned_predicting_effective_arguments/checkpoint-10000",
4
  "epoch": 4.835287009063444,
5
  "global_step": 10000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
+ {
11
+ "epoch": 0.05,
12
+ "learning_rate": 2e-07,
13
+ "loss": 1.0361,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.1,
18
+ "learning_rate": 3.98e-07,
19
+ "loss": 0.9751,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 0.15,
24
+ "learning_rate": 5.979999999999999e-07,
25
+ "loss": 0.9807,
26
+ "step": 300
27
+ },
28
+ {
29
+ "epoch": 0.19,
30
+ "learning_rate": 7.94e-07,
31
+ "loss": 0.9513,
32
+ "step": 400
33
+ },
34
  {
35
  "epoch": 0.24,
36
+ "learning_rate": 9.94e-07,
37
+ "loss": 0.9237,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 0.24,
42
+ "eval_loss": 0.9464540481567383,
43
+ "eval_runtime": 37.5769,
44
+ "eval_samples_per_second": 97.587,
45
+ "eval_steps_per_second": 24.403,
46
  "step": 500
47
  },
48
+ {
49
+ "epoch": 0.29,
50
+ "learning_rate": 9.955915919781764e-07,
51
+ "loss": 0.9367,
52
+ "step": 600
53
+ },
54
+ {
55
+ "epoch": 0.34,
56
+ "learning_rate": 9.81900265076038e-07,
57
+ "loss": 0.8993,
58
+ "step": 700
59
+ },
60
+ {
61
+ "epoch": 0.39,
62
+ "learning_rate": 9.594490241150311e-07,
63
+ "loss": 0.8586,
64
+ "step": 800
65
+ },
66
+ {
67
+ "epoch": 0.44,
68
+ "learning_rate": 9.282057505552949e-07,
69
+ "loss": 0.854,
70
+ "step": 900
71
+ },
72
  {
73
  "epoch": 0.48,
74
+ "learning_rate": 8.889381125453379e-07,
75
+ "loss": 0.8642,
76
  "step": 1000
77
  },
78
  {
79
  "epoch": 0.48,
80
+ "eval_loss": 0.8270628452301025,
81
+ "eval_runtime": 28.2214,
82
+ "eval_samples_per_second": 129.937,
83
+ "eval_steps_per_second": 32.493,
84
  "step": 1000
85
  },
86
+ {
87
+ "epoch": 0.53,
88
+ "learning_rate": 8.423819662432867e-07,
89
+ "loss": 0.8314,
90
+ "step": 1100
91
+ },
92
+ {
93
+ "epoch": 0.58,
94
+ "learning_rate": 7.894097508558568e-07,
95
+ "loss": 0.8317,
96
+ "step": 1200
97
+ },
98
+ {
99
+ "epoch": 0.63,
100
+ "learning_rate": 7.310141395581585e-07,
101
+ "loss": 0.7939,
102
+ "step": 1300
103
+ },
104
+ {
105
+ "epoch": 0.68,
106
+ "learning_rate": 6.682894372882701e-07,
107
+ "loss": 0.803,
108
+ "step": 1400
109
+ },
110
  {
111
  "epoch": 0.73,
112
+ "learning_rate": 6.024110740127264e-07,
113
+ "loss": 0.8029,
114
  "step": 1500
115
  },
116
  {
117
  "epoch": 0.73,
118
+ "eval_loss": 0.7635419368743896,
119
+ "eval_runtime": 28.2809,
120
+ "eval_samples_per_second": 129.663,
121
+ "eval_steps_per_second": 32.425,
122
  "step": 1500
123
  },
124
+ {
125
+ "epoch": 0.77,
126
+ "learning_rate": 5.346135777490083e-07,
127
+ "loss": 0.7959,
128
+ "step": 1600
129
+ },
130
+ {
131
+ "epoch": 0.82,
132
+ "learning_rate": 4.6616744011972247e-07,
133
+ "loss": 0.7708,
134
+ "step": 1700
135
+ },
136
+ {
137
+ "epoch": 0.87,
138
+ "learning_rate": 3.9835530796656867e-07,
139
+ "loss": 0.7636,
140
+ "step": 1800
141
+ },
142
+ {
143
+ "epoch": 0.92,
144
+ "learning_rate": 3.3244794718149894e-07,
145
+ "loss": 0.7864,
146
+ "step": 1900
147
+ },
148
  {
149
  "epoch": 0.97,
150
+ "learning_rate": 2.696804291810131e-07,
151
+ "loss": 0.8005,
152
  "step": 2000
153
  },
154
  {
155
  "epoch": 0.97,
156
+ "eval_loss": 0.7537589073181152,
157
+ "eval_runtime": 28.4034,
158
+ "eval_samples_per_second": 129.104,
159
+ "eval_steps_per_second": 32.285,
160
  "step": 2000
161
  },
162
+ {
163
+ "epoch": 1.02,
164
+ "learning_rate": 2.1122898627730768e-07,
165
+ "loss": 0.7924,
166
+ "step": 2100
167
+ },
168
+ {
169
+ "epoch": 1.06,
170
+ "learning_rate": 1.5818896966520534e-07,
171
+ "loss": 0.8072,
172
+ "step": 2200
173
+ },
174
+ {
175
+ "epoch": 1.11,
176
+ "learning_rate": 1.1155432308130763e-07,
177
+ "loss": 0.7997,
178
+ "step": 2300
179
+ },
180
+ {
181
+ "epoch": 1.16,
182
+ "learning_rate": 7.219895678887305e-08,
183
+ "loss": 0.7931,
184
+ "step": 2400
185
+ },
186
  {
187
  "epoch": 1.21,
188
+ "learning_rate": 4.0860370930748286e-08,
189
+ "loss": 0.7734,
190
  "step": 2500
191
  },
192
  {
193
  "epoch": 1.21,
194
+ "eval_loss": 0.7621562480926514,
195
+ "eval_runtime": 28.9226,
196
+ "eval_samples_per_second": 126.786,
197
+ "eval_steps_per_second": 31.705,
198
  "step": 2500
199
  },
200
+ {
201
+ "epoch": 1.26,
202
+ "learning_rate": 1.812583514064109e-08,
203
+ "loss": 0.7487,
204
+ "step": 2600
205
+ },
206
+ {
207
+ "epoch": 1.31,
208
+ "learning_rate": 4.421383399995704e-09,
209
+ "loss": 0.7911,
210
+ "step": 2700
211
+ },
212
+ {
213
+ "epoch": 1.35,
214
+ "learning_rate": 9.999961696285757e-07,
215
+ "loss": 0.7794,
216
+ "step": 2800
217
+ },
218
+ {
219
+ "epoch": 1.4,
220
+ "learning_rate": 9.950440411813353e-07,
221
+ "loss": 0.7922,
222
+ "step": 2900
223
+ },
224
  {
225
  "epoch": 1.45,
226
+ "learning_rate": 9.810025090182484e-07,
227
+ "loss": 0.7842,
228
  "step": 3000
229
  },
230
  {
231
  "epoch": 1.45,
232
+ "eval_loss": 0.748653769493103,
233
+ "eval_runtime": 27.9531,
234
+ "eval_samples_per_second": 131.184,
235
+ "eval_steps_per_second": 32.805,
236
  "step": 3000
237
  },
238
+ {
239
+ "epoch": 1.5,
240
+ "learning_rate": 9.581263257645987e-07,
241
+ "loss": 0.7731,
242
+ "step": 3100
243
+ },
244
+ {
245
+ "epoch": 1.55,
246
+ "learning_rate": 9.264785067160785e-07,
247
+ "loss": 0.7849,
248
+ "step": 3200
249
+ },
250
+ {
251
+ "epoch": 1.6,
252
+ "learning_rate": 8.868386909145071e-07,
253
+ "loss": 0.7745,
254
+ "step": 3300
255
+ },
256
+ {
257
+ "epoch": 1.64,
258
+ "learning_rate": 8.399497089461603e-07,
259
+ "loss": 0.7611,
260
+ "step": 3400
261
+ },
262
  {
263
  "epoch": 1.69,
264
+ "learning_rate": 7.872511787610445e-07,
265
+ "loss": 0.7543,
266
  "step": 3500
267
  },
268
  {
269
  "epoch": 1.69,
270
+ "eval_loss": 0.7955728769302368,
271
+ "eval_runtime": 28.0818,
272
+ "eval_samples_per_second": 130.583,
273
+ "eval_steps_per_second": 32.655,
274
  "step": 3500
275
  },
276
+ {
277
+ "epoch": 1.74,
278
+ "learning_rate": 7.286677103241362e-07,
279
+ "loss": 0.7793,
280
+ "step": 3600
281
+ },
282
+ {
283
+ "epoch": 1.79,
284
+ "learning_rate": 6.657991218409446e-07,
285
+ "loss": 0.7359,
286
+ "step": 3700
287
+ },
288
+ {
289
+ "epoch": 1.84,
290
+ "learning_rate": 5.998235396346351e-07,
291
+ "loss": 0.7602,
292
+ "step": 3800
293
+ },
294
+ {
295
+ "epoch": 1.89,
296
+ "learning_rate": 5.319773135574745e-07,
297
+ "loss": 0.7664,
298
+ "step": 3900
299
+ },
300
  {
301
  "epoch": 1.93,
302
+ "learning_rate": 4.635318484047261e-07,
303
+ "loss": 0.7883,
304
  "step": 4000
305
  },
306
  {
307
  "epoch": 1.93,
308
+ "eval_loss": 0.7467952370643616,
309
+ "eval_runtime": 28.6183,
310
+ "eval_samples_per_second": 128.135,
311
+ "eval_steps_per_second": 32.042,
312
  "step": 4000
313
  },
314
+ {
315
+ "epoch": 1.98,
316
+ "learning_rate": 3.9576977841621134e-07,
317
+ "loss": 0.7493,
318
+ "step": 4100
319
+ },
320
+ {
321
+ "epoch": 2.03,
322
+ "learning_rate": 3.299609313436215e-07,
323
+ "loss": 0.7312,
324
+ "step": 4200
325
+ },
326
+ {
327
+ "epoch": 2.08,
328
+ "learning_rate": 2.6733853250510007e-07,
329
+ "loss": 0.7387,
330
+ "step": 4300
331
+ },
332
+ {
333
+ "epoch": 2.13,
334
+ "learning_rate": 2.0907609475136373e-07,
335
+ "loss": 0.7492,
336
+ "step": 4400
337
+ },
338
  {
339
  "epoch": 2.18,
340
+ "learning_rate": 1.562654274139964e-07,
341
+ "loss": 0.7515,
342
  "step": 4500
343
  },
344
  {
345
  "epoch": 2.18,
346
+ "eval_loss": 0.7437878847122192,
347
+ "eval_runtime": 27.6555,
348
+ "eval_samples_per_second": 132.595,
349
+ "eval_steps_per_second": 33.158,
350
  "step": 4500
351
  },
352
+ {
353
+ "epoch": 2.22,
354
+ "learning_rate": 1.0989617633737314e-07,
355
+ "loss": 0.7218,
356
+ "step": 4600
357
+ },
358
+ {
359
+ "epoch": 2.27,
360
+ "learning_rate": 7.083727840391568e-08,
361
+ "loss": 0.7554,
362
+ "step": 4700
363
+ },
364
+ {
365
+ "epoch": 2.32,
366
+ "learning_rate": 4.00889953873248e-08,
367
+ "loss": 0.768,
368
+ "step": 4800
369
+ },
370
+ {
371
+ "epoch": 2.37,
372
+ "learning_rate": 1.7607339075303706e-08,
373
+ "loss": 0.725,
374
+ "step": 4900
375
+ },
376
  {
377
  "epoch": 2.42,
378
+ "learning_rate": 4.165483173136253e-09,
379
+ "loss": 0.716,
380
  "step": 5000
381
  },
382
  {
383
  "epoch": 2.42,
384
+ "eval_loss": 0.7440274953842163,
385
+ "eval_runtime": 28.4825,
386
+ "eval_samples_per_second": 128.746,
387
+ "eval_steps_per_second": 32.195,
388
  "step": 5000
389
  },
390
+ {
391
+ "epoch": 2.47,
392
+ "learning_rate": 9.999846785729901e-07,
393
+ "loss": 0.7534,
394
+ "step": 5100
395
+ },
396
+ {
397
+ "epoch": 2.51,
398
+ "learning_rate": 9.947653741800156e-07,
399
+ "loss": 0.7644,
400
+ "step": 5200
401
+ },
402
+ {
403
+ "epoch": 2.56,
404
+ "learning_rate": 9.80274410836591e-07,
405
+ "loss": 0.7631,
406
+ "step": 5300
407
+ },
408
+ {
409
+ "epoch": 2.61,
410
+ "learning_rate": 9.567833420446291e-07,
411
+ "loss": 0.7624,
412
+ "step": 5400
413
+ },
414
  {
415
  "epoch": 2.66,
416
+ "learning_rate": 9.24732378840687e-07,
417
+ "loss": 0.743,
418
  "step": 5500
419
  },
420
  {
421
  "epoch": 2.66,
422
+ "eval_loss": 0.7312911152839661,
423
+ "eval_runtime": 28.3334,
424
+ "eval_samples_per_second": 129.423,
425
+ "eval_steps_per_second": 32.365,
426
  "step": 5500
427
  },
428
+ {
429
+ "epoch": 2.71,
430
+ "learning_rate": 8.847221404583831e-07,
431
+ "loss": 0.7522,
432
+ "step": 5600
433
+ },
434
+ {
435
+ "epoch": 2.76,
436
+ "learning_rate": 8.380074821716343e-07,
437
+ "loss": 0.7367,
438
+ "step": 5700
439
+ },
440
+ {
441
+ "epoch": 2.8,
442
+ "learning_rate": 7.845215744177952e-07,
443
+ "loss": 0.7557,
444
+ "step": 5800
445
+ },
446
+ {
447
+ "epoch": 2.85,
448
+ "learning_rate": 7.257038727678722e-07,
449
+ "loss": 0.7329,
450
+ "step": 5900
451
+ },
452
  {
453
  "epoch": 2.9,
454
+ "learning_rate": 6.626565919248464e-07,
455
+ "loss": 0.726,
456
  "step": 6000
457
  },
458
  {
459
  "epoch": 2.9,
460
+ "eval_loss": 0.7325854301452637,
461
+ "eval_runtime": 27.7597,
462
+ "eval_samples_per_second": 132.098,
463
+ "eval_steps_per_second": 33.034,
464
  "step": 6000
465
  },
466
+ {
467
+ "epoch": 2.95,
468
+ "learning_rate": 5.965612068185233e-07,
469
+ "loss": 0.7804,
470
+ "step": 6100
471
+ },
472
+ {
473
+ "epoch": 3.0,
474
+ "learning_rate": 5.286563123483964e-07,
475
+ "loss": 0.7319,
476
+ "step": 6200
477
+ },
478
+ {
479
+ "epoch": 3.05,
480
+ "learning_rate": 4.602144127264582e-07,
481
+ "loss": 0.732,
482
+ "step": 6300
483
+ },
484
+ {
485
+ "epoch": 3.09,
486
+ "learning_rate": 3.925180753762438e-07,
487
+ "loss": 0.7665,
488
+ "step": 6400
489
+ },
490
  {
491
  "epoch": 3.14,
492
+ "learning_rate": 3.268358962522407e-07,
493
+ "loss": 0.7499,
494
  "step": 6500
495
  },
496
  {
497
  "epoch": 3.14,
498
+ "eval_loss": 0.7382772564888,
499
+ "eval_runtime": 27.6004,
500
+ "eval_samples_per_second": 132.86,
501
+ "eval_steps_per_second": 33.224,
502
  "step": 6500
503
  },
504
+ {
505
+ "epoch": 3.19,
506
+ "learning_rate": 2.64398726977746e-07,
507
+ "loss": 0.739,
508
+ "step": 6600
509
+ },
510
+ {
511
+ "epoch": 3.24,
512
+ "learning_rate": 2.063766092928542e-07,
513
+ "loss": 0.7201,
514
+ "step": 6700
515
+ },
516
+ {
517
+ "epoch": 3.29,
518
+ "learning_rate": 1.5385684904965596e-07,
519
+ "loss": 0.7013,
520
+ "step": 6800
521
+ },
522
+ {
523
+ "epoch": 3.34,
524
+ "learning_rate": 1.0782364063714005e-07,
525
+ "loss": 0.7234,
526
+ "step": 6900
527
+ },
528
  {
529
  "epoch": 3.38,
530
+ "learning_rate": 6.913962366403719e-08,
531
+ "loss": 0.7377,
532
  "step": 7000
533
  },
534
  {
535
  "epoch": 3.38,
536
+ "eval_loss": 0.7334151864051819,
537
+ "eval_runtime": 27.6353,
538
+ "eval_samples_per_second": 132.693,
539
+ "eval_steps_per_second": 33.182,
540
  "step": 7000
541
  },
542
+ {
543
+ "epoch": 3.43,
544
+ "learning_rate": 3.852971751825379e-08,
545
+ "loss": 0.7272,
546
+ "step": 7100
547
+ },
548
+ {
549
+ "epoch": 3.48,
550
+ "learning_rate": 1.6567536735284103e-08,
551
+ "loss": 0.7323,
552
+ "step": 7200
553
+ },
554
+ {
555
+ "epoch": 3.53,
556
+ "learning_rate": 3.664641744888397e-09,
557
+ "loss": 0.6981,
558
+ "step": 7300
559
+ },
560
+ {
561
+ "epoch": 3.58,
562
+ "learning_rate": 9.99937173568261e-07,
563
+ "loss": 0.6917,
564
+ "step": 7400
565
+ },
566
  {
567
  "epoch": 3.63,
568
+ "learning_rate": 9.94170412962655e-07,
569
+ "loss": 0.7459,
570
  "step": 7500
571
  },
572
  {
573
  "epoch": 3.63,
574
+ "eval_loss": 0.7427003979682922,
575
+ "eval_runtime": 27.4459,
576
+ "eval_samples_per_second": 133.608,
577
+ "eval_steps_per_second": 33.411,
578
  "step": 7500
579
  },
580
+ {
581
+ "epoch": 3.67,
582
+ "learning_rate": 9.79143142686205e-07,
583
+ "loss": 0.7406,
584
+ "step": 7600
585
+ },
586
+ {
587
+ "epoch": 3.72,
588
+ "learning_rate": 9.551369663680862e-07,
589
+ "loss": 0.7304,
590
+ "step": 7700
591
+ },
592
+ {
593
+ "epoch": 3.77,
594
+ "learning_rate": 9.226017479056962e-07,
595
+ "loss": 0.7278,
596
+ "step": 7800
597
+ },
598
+ {
599
+ "epoch": 3.82,
600
+ "learning_rate": 8.821471812372215e-07,
601
+ "loss": 0.7273,
602
+ "step": 7900
603
+ },
604
  {
605
  "epoch": 3.87,
606
+ "learning_rate": 8.345313649781101e-07,
607
+ "loss": 0.7186,
608
  "step": 8000
609
  },
610
  {
611
  "epoch": 3.87,
612
+ "eval_loss": 0.7306970953941345,
613
+ "eval_runtime": 27.6504,
614
+ "eval_samples_per_second": 132.62,
615
+ "eval_steps_per_second": 33.164,
616
  "step": 8000
617
  },
618
+ {
619
+ "epoch": 3.92,
620
+ "learning_rate": 7.806465960271379e-07,
621
+ "loss": 0.6956,
622
+ "step": 8100
623
+ },
624
+ {
625
+ "epoch": 3.96,
626
+ "learning_rate": 7.215026483628598e-07,
627
+ "loss": 0.7794,
628
+ "step": 8200
629
+ },
630
+ {
631
+ "epoch": 4.01,
632
+ "learning_rate": 6.582078503775666e-07,
633
+ "loss": 0.7304,
634
+ "step": 8300
635
+ },
636
+ {
637
+ "epoch": 4.06,
638
+ "learning_rate": 5.919483153501647e-07,
639
+ "loss": 0.7477,
640
+ "step": 8400
641
+ },
642
  {
643
  "epoch": 4.11,
644
+ "learning_rate": 5.239657142686591e-07,
645
+ "loss": 0.723,
646
  "step": 8500
647
  },
648
  {
649
  "epoch": 4.11,
650
+ "eval_loss": 0.7324436902999878,
651
+ "eval_runtime": 27.6456,
652
+ "eval_samples_per_second": 132.643,
653
+ "eval_steps_per_second": 33.17,
654
  "step": 8500
655
  },
656
+ {
657
+ "epoch": 4.16,
658
+ "learning_rate": 4.555340075285952e-07,
659
+ "loss": 0.7081,
660
+ "step": 8600
661
+ },
662
+ {
663
+ "epoch": 4.21,
664
+ "learning_rate": 3.8793557154393083e-07,
665
+ "loss": 0.717,
666
+ "step": 8700
667
+ },
668
+ {
669
+ "epoch": 4.26,
670
+ "learning_rate": 3.2243716764587667e-07,
671
+ "loss": 0.7229,
672
+ "step": 8800
673
+ },
674
+ {
675
+ "epoch": 4.3,
676
+ "learning_rate": 2.602662036006858e-07,
677
+ "loss": 0.713,
678
+ "step": 8900
679
+ },
680
  {
681
  "epoch": 4.35,
682
+ "learning_rate": 2.025877325938261e-07,
683
+ "loss": 0.7275,
684
  "step": 9000
685
  },
686
  {
687
  "epoch": 4.35,
688
+ "eval_loss": 0.7325617074966431,
689
+ "eval_runtime": 27.8844,
690
+ "eval_samples_per_second": 131.507,
691
+ "eval_steps_per_second": 32.886,
692
  "step": 9000
693
  },
694
+ {
695
+ "epoch": 4.4,
696
+ "learning_rate": 1.504826207082261e-07,
697
+ "loss": 0.693,
698
+ "step": 9100
699
+ },
700
+ {
701
+ "epoch": 4.45,
702
+ "learning_rate": 1.049272920272809e-07,
703
+ "loss": 0.689,
704
+ "step": 9200
705
+ },
706
+ {
707
+ "epoch": 4.5,
708
+ "learning_rate": 6.677543092937843e-08,
709
+ "loss": 0.7001,
710
+ "step": 9300
711
+ },
712
+ {
713
+ "epoch": 4.55,
714
+ "learning_rate": 3.674198446393789e-08,
715
+ "loss": 0.7398,
716
+ "step": 9400
717
+ },
718
  {
719
  "epoch": 4.59,
720
+ "learning_rate": 1.538976459652064e-08,
721
+ "loss": 0.7371,
722
  "step": 9500
723
  },
724
  {
725
  "epoch": 4.59,
726
+ "eval_loss": 0.7348757982254028,
727
+ "eval_runtime": 27.6591,
728
+ "eval_samples_per_second": 132.579,
729
+ "eval_steps_per_second": 33.154,
730
  "step": 9500
731
  },
732
+ {
733
+ "epoch": 4.64,
734
+ "learning_rate": 3.1189013903395455e-09,
735
+ "loss": 0.7164,
736
+ "step": 9600
737
+ },
738
+ {
739
+ "epoch": 4.69,
740
+ "learning_rate": 9.998406552336684e-07,
741
+ "loss": 0.7045,
742
+ "step": 9700
743
+ },
744
+ {
745
+ "epoch": 4.74,
746
+ "learning_rate": 9.934334446448806e-07,
747
+ "loss": 0.7095,
748
+ "step": 9800
749
+ },
750
+ {
751
+ "epoch": 4.79,
752
+ "learning_rate": 9.777795348078374e-07,
753
+ "loss": 0.7265,
754
+ "step": 9900
755
+ },
756
  {
757
  "epoch": 4.84,
758
+ "learning_rate": 9.531722722678393e-07,
759
+ "loss": 0.7267,
760
  "step": 10000
761
  },
762
  {
763
  "epoch": 4.84,
764
+ "eval_loss": 0.727655291557312,
765
+ "eval_runtime": 27.3264,
766
+ "eval_samples_per_second": 134.193,
767
+ "eval_steps_per_second": 33.557,
768
  "step": 10000
769
  }
770
  ],
771
+ "max_steps": 16544,
772
+ "num_train_epochs": 8,
773
+ "total_flos": 9147892997166300.0,
774
  "trial_name": null,
775
  "trial_params": null
776
  }
checkpoint-10000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c733ade4e8726b7d2050d20982a2b640dde2dd40f5221ce54a078013d218d18
3
  size 3375
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ffb2d40861e346c298ecb026926b1cd6fcf3c2ff8a5a38a5b78b121f9949508
3
  size 3375
checkpoint-2000/config.json CHANGED
@@ -5,7 +5,7 @@
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "hidden_act": "gelu",
8
- "hidden_dropout_prob": 0.1,
9
  "hidden_size": 768,
10
  "id2label": {
11
  "0": "LABEL_0",
 
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.2,
9
  "hidden_size": 768,
10
  "id2label": {
11
  "0": "LABEL_0",
checkpoint-2000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:473042aa6b50e90eeafe35a0eb6c00fdf7e8130b0c124e8e6bae633a7aaeb83e
3
- size 1475512665
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:626d2c0f57e0d16c0069652656de4057825c16712166a707211c8fe154203bc8
3
+ size 1475512793
checkpoint-2000/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:152f4de359aa4e010c15c54e93a34fb1197afb81c06c241d2c177d699fd0641c
3
  size 737770027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ab0240bd69843fd06b929f55bc2e760f01226d976e947792a27a2de9cf7783d
3
  size 737770027
checkpoint-2000/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7cb5f5ba70a378637f312cfcaa024819078650cf5a9b71109c9eef84768726b
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3be3a7e37abbb30b18f1d72e9e82d80209a1ffafba792a578625451aef21e954
3
  size 14503
checkpoint-2000/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9630290c2855c60ed3fcbed4eeefb978189ee21116d8c9e5613dc0cf8dde2d3
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4eccc082304cdb1b781a00d1171288ffa36d40ca690860809f81bcac4e20adae
3
  size 559
checkpoint-2000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33176767ded0ce16da286202ede558cb1cf892fa297e9e9cca486a46978b6600
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3abeb5aa5be592899ee5dc8748f9e3bf735434f8a9f384cf814f8b27ee9adb78
3
  size 623
checkpoint-2000/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.6859038472175598,
3
  "best_model_checkpoint": "deberta_v3_finetuned_predicting_effective_arguments/checkpoint-2000",
4
  "epoch": 0.9667673716012085,
5
  "global_step": 2000,
@@ -7,66 +7,162 @@
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 0.24,
12
- "learning_rate": 9.518375241779499e-06,
13
- "loss": 0.8315,
14
  "step": 500
15
  },
16
  {
17
  "epoch": 0.24,
18
- "eval_loss": 0.7444673180580139,
19
- "eval_runtime": 29.7252,
20
- "eval_samples_per_second": 123.33,
21
- "eval_steps_per_second": 30.849,
22
  "step": 500
23
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  {
25
  "epoch": 0.48,
26
- "learning_rate": 9.034816247582205e-06,
27
- "loss": 0.7232,
28
  "step": 1000
29
  },
30
  {
31
  "epoch": 0.48,
32
- "eval_loss": 0.7441162467002869,
33
- "eval_runtime": 29.4226,
34
- "eval_samples_per_second": 124.598,
35
- "eval_steps_per_second": 31.167,
36
  "step": 1000
37
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  {
39
  "epoch": 0.73,
40
- "learning_rate": 8.552224371373308e-06,
41
- "loss": 0.7269,
42
  "step": 1500
43
  },
44
  {
45
  "epoch": 0.73,
46
- "eval_loss": 0.7453668117523193,
47
- "eval_runtime": 29.5754,
48
- "eval_samples_per_second": 123.955,
49
- "eval_steps_per_second": 31.006,
50
  "step": 1500
51
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  {
53
  "epoch": 0.97,
54
- "learning_rate": 8.068665377176016e-06,
55
- "loss": 0.7185,
56
  "step": 2000
57
  },
58
  {
59
  "epoch": 0.97,
60
- "eval_loss": 0.6859038472175598,
61
- "eval_runtime": 29.569,
62
- "eval_samples_per_second": 123.981,
63
- "eval_steps_per_second": 31.012,
64
  "step": 2000
65
  }
66
  ],
67
- "max_steps": 10340,
68
- "num_train_epochs": 5,
69
- "total_flos": 1822810502059560.0,
70
  "trial_name": null,
71
  "trial_params": null
72
  }
 
1
  {
2
+ "best_metric": 0.7537589073181152,
3
  "best_model_checkpoint": "deberta_v3_finetuned_predicting_effective_arguments/checkpoint-2000",
4
  "epoch": 0.9667673716012085,
5
  "global_step": 2000,
 
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
+ {
11
+ "epoch": 0.05,
12
+ "learning_rate": 2e-07,
13
+ "loss": 1.0361,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.1,
18
+ "learning_rate": 3.98e-07,
19
+ "loss": 0.9751,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 0.15,
24
+ "learning_rate": 5.979999999999999e-07,
25
+ "loss": 0.9807,
26
+ "step": 300
27
+ },
28
+ {
29
+ "epoch": 0.19,
30
+ "learning_rate": 7.94e-07,
31
+ "loss": 0.9513,
32
+ "step": 400
33
+ },
34
  {
35
  "epoch": 0.24,
36
+ "learning_rate": 9.94e-07,
37
+ "loss": 0.9237,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 0.24,
42
+ "eval_loss": 0.9464540481567383,
43
+ "eval_runtime": 37.5769,
44
+ "eval_samples_per_second": 97.587,
45
+ "eval_steps_per_second": 24.403,
46
  "step": 500
47
  },
48
+ {
49
+ "epoch": 0.29,
50
+ "learning_rate": 9.955915919781764e-07,
51
+ "loss": 0.9367,
52
+ "step": 600
53
+ },
54
+ {
55
+ "epoch": 0.34,
56
+ "learning_rate": 9.81900265076038e-07,
57
+ "loss": 0.8993,
58
+ "step": 700
59
+ },
60
+ {
61
+ "epoch": 0.39,
62
+ "learning_rate": 9.594490241150311e-07,
63
+ "loss": 0.8586,
64
+ "step": 800
65
+ },
66
+ {
67
+ "epoch": 0.44,
68
+ "learning_rate": 9.282057505552949e-07,
69
+ "loss": 0.854,
70
+ "step": 900
71
+ },
72
  {
73
  "epoch": 0.48,
74
+ "learning_rate": 8.889381125453379e-07,
75
+ "loss": 0.8642,
76
  "step": 1000
77
  },
78
  {
79
  "epoch": 0.48,
80
+ "eval_loss": 0.8270628452301025,
81
+ "eval_runtime": 28.2214,
82
+ "eval_samples_per_second": 129.937,
83
+ "eval_steps_per_second": 32.493,
84
  "step": 1000
85
  },
86
+ {
87
+ "epoch": 0.53,
88
+ "learning_rate": 8.423819662432867e-07,
89
+ "loss": 0.8314,
90
+ "step": 1100
91
+ },
92
+ {
93
+ "epoch": 0.58,
94
+ "learning_rate": 7.894097508558568e-07,
95
+ "loss": 0.8317,
96
+ "step": 1200
97
+ },
98
+ {
99
+ "epoch": 0.63,
100
+ "learning_rate": 7.310141395581585e-07,
101
+ "loss": 0.7939,
102
+ "step": 1300
103
+ },
104
+ {
105
+ "epoch": 0.68,
106
+ "learning_rate": 6.682894372882701e-07,
107
+ "loss": 0.803,
108
+ "step": 1400
109
+ },
110
  {
111
  "epoch": 0.73,
112
+ "learning_rate": 6.024110740127264e-07,
113
+ "loss": 0.8029,
114
  "step": 1500
115
  },
116
  {
117
  "epoch": 0.73,
118
+ "eval_loss": 0.7635419368743896,
119
+ "eval_runtime": 28.2809,
120
+ "eval_samples_per_second": 129.663,
121
+ "eval_steps_per_second": 32.425,
122
  "step": 1500
123
  },
124
+ {
125
+ "epoch": 0.77,
126
+ "learning_rate": 5.346135777490083e-07,
127
+ "loss": 0.7959,
128
+ "step": 1600
129
+ },
130
+ {
131
+ "epoch": 0.82,
132
+ "learning_rate": 4.6616744011972247e-07,
133
+ "loss": 0.7708,
134
+ "step": 1700
135
+ },
136
+ {
137
+ "epoch": 0.87,
138
+ "learning_rate": 3.9835530796656867e-07,
139
+ "loss": 0.7636,
140
+ "step": 1800
141
+ },
142
+ {
143
+ "epoch": 0.92,
144
+ "learning_rate": 3.3244794718149894e-07,
145
+ "loss": 0.7864,
146
+ "step": 1900
147
+ },
148
  {
149
  "epoch": 0.97,
150
+ "learning_rate": 2.696804291810131e-07,
151
+ "loss": 0.8005,
152
  "step": 2000
153
  },
154
  {
155
  "epoch": 0.97,
156
+ "eval_loss": 0.7537589073181152,
157
+ "eval_runtime": 28.4034,
158
+ "eval_samples_per_second": 129.104,
159
+ "eval_steps_per_second": 32.285,
160
  "step": 2000
161
  }
162
  ],
163
+ "max_steps": 16544,
164
+ "num_train_epochs": 8,
165
+ "total_flos": 1828884830909760.0,
166
  "trial_name": null,
167
  "trial_params": null
168
  }
checkpoint-2000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c733ade4e8726b7d2050d20982a2b640dde2dd40f5221ce54a078013d218d18
3
  size 3375
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ffb2d40861e346c298ecb026926b1cd6fcf3c2ff8a5a38a5b78b121f9949508
3
  size 3375
config.json CHANGED
@@ -5,7 +5,7 @@
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "hidden_act": "gelu",
8
- "hidden_dropout_prob": 0.1,
9
  "hidden_size": 768,
10
  "id2label": {
11
  "0": "LABEL_0",
 
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.2,
9
  "hidden_size": 768,
10
  "id2label": {
11
  "0": "LABEL_0",
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:152f4de359aa4e010c15c54e93a34fb1197afb81c06c241d2c177d699fd0641c
3
  size 737770027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:183b92732073ecb3e37d10ffc34996ac6fe70d5a5295f1d211ad7775260a3e4f
3
  size 737770027
runs/Jul03_17-16-15_8a98c40ff775/events.out.tfevents.1656868588.8a98c40ff775.71.0 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b3617a801c60d083a94daa04872ab8100ae1aa4bc27f09980a749aaf121cb98
3
- size 12918
 
 
 
 
runs/Jul03_21-34-55_8a98c40ff775/events.out.tfevents.1656884100.8a98c40ff775.71.4 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b5e60884f2f24cd30d861b6d76f2432d12a76a66f3a0189fb8ddbabac92b6b68
3
- size 9495
 
 
 
 
runs/Jul03_23-15-45_8a98c40ff775/1656890147.0297482/events.out.tfevents.1656890147.8a98c40ff775.71.7 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7841a1cc1f026e1d8f7e27ebfc6c2345433308cf68d12ab48b2256e8b26519a2
3
- size 5435
 
 
 
 
runs/Jul03_23-16-08_8a98c40ff775/1656890170.9030218/events.out.tfevents.1656890170.8a98c40ff775.71.9 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5dbfc67c518b8e9b6453cf23fcecb24d018090efab32201f1068779e87121f75
3
- size 5435
 
 
 
 
runs/Jul03_23-16-08_8a98c40ff775/events.out.tfevents.1656890170.8a98c40ff775.71.8 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:aeaeddd48689ec141326ec77032494af4461bb353420d795c77f5dd6b7bae92d
3
- size 4005
 
 
 
 
runs/Jul03_23-26-33_8a98c40ff775/1656890797.636419/events.out.tfevents.1656890797.8a98c40ff775.5349.1 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:83ee3cfe3d4c4e5adfe96c2d421fe9902f2b13d7880adfdaa4f942184344c537
3
- size 5435
 
 
 
 
runs/Jul03_23-26-33_8a98c40ff775/events.out.tfevents.1656890797.8a98c40ff775.5349.0 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:15a8e80f13acce54563ebb8cb7c22816065730ffe17ebaba086b1299d54ba061
3
- size 7001
 
 
 
 
runs/Jul04_00-33-28_8a98c40ff775/1656894810.6894863/events.out.tfevents.1656894810.8a98c40ff775.8083.1 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f8db2b966d521d474da361acfa1a1d7fdbf1c646c12217f96f06b8c168f5766
3
- size 5435
 
 
 
 
runs/{Jul03_17-16-15_8a98c40ff775/1656868588.8381069/events.out.tfevents.1656868588.8a98c40ff775.71.1 β†’ Jul26_21-03-46_1fe91f198d71/1658869435.620103/events.out.tfevents.1658869435.1fe91f198d71.71.1} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a91a894342ce694eb4bb9fcf877374867217236e883fbbdd94ac75ace1877e2
3
  size 5435
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6485b7a2dffeb1975234009c45dcfd6419d71586d0e5c9b414f400258d1ce9b
3
  size 5435
runs/{Jul03_19-51-55_8a98c40ff775/events.out.tfevents.1656877916.8a98c40ff775.71.2 β†’ Jul26_21-03-46_1fe91f198d71/events.out.tfevents.1658869435.1fe91f198d71.71.0} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35827bfa6445ea5a01d5ced03834737282e3b1f671fbd102582adee699417f07
3
- size 9494
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4f66a750ddef0868289783b7c625cada735943a5b2c56c08ec75354fbdd384c
3
+ size 6742
runs/{Jul03_21-34-55_8a98c40ff775/1656884100.2687025/events.out.tfevents.1656884100.8a98c40ff775.71.5 β†’ Jul26_21-10-23_1fe91f198d71/1658869825.093272/events.out.tfevents.1658869825.1fe91f198d71.71.3} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99f49eb3bde319c3f5ef9f807d1151facc106a9fb0c084815bae84a77df2fa45
3
  size 5435
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3507bdbfe9d3f6eb69447dba21acc822c993a5cb10a358dc39ed139bc7ca44ae
3
  size 5435
runs/{Jul03_23-15-45_8a98c40ff775/events.out.tfevents.1656890147.8a98c40ff775.71.6 β†’ Jul26_21-10-23_1fe91f198d71/events.out.tfevents.1658869825.1fe91f198d71.71.2} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:463d132c8dfcd42fb09abc4cc79c8623cbb23d16ee57b85cea3f2563e3d34fdb
3
  size 4100
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f04c78aa68e1a98a7ea678d1ec309091ec845b61e79e79a4e85923665acc8c20
3
  size 4100
runs/{Jul03_19-51-55_8a98c40ff775/1656877916.4434352/events.out.tfevents.1656877916.8a98c40ff775.71.3 β†’ Jul26_21-13-22_1fe91f198d71/1658870005.9725268/events.out.tfevents.1658870005.1fe91f198d71.1213.1} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3c99694ed2ab4d1146b576083af4bcb952ce499269cd1acf5266484e68c6e29
3
  size 5435
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8514910d27122ee8e76b361c1ee7444303a7c1520614670837bc4fab14114a2
3
  size 5435
runs/{Jul04_00-33-28_8a98c40ff775/events.out.tfevents.1656894810.8a98c40ff775.8083.0 β†’ Jul26_21-13-22_1fe91f198d71/events.out.tfevents.1658870005.1fe91f198d71.1213.0} RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f961be96500c82d6a3b7e8868f1848795efdd257c198b4d4325150fd6d274217
3
  size 39221
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb9bbc6749502c5c71b9d1df2d6d1835de4ccc9b642bed7b8318b2d995d27a47
3
  size 39221
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c733ade4e8726b7d2050d20982a2b640dde2dd40f5221ce54a078013d218d18
3
  size 3375
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ffb2d40861e346c298ecb026926b1cd6fcf3c2ff8a5a38a5b78b121f9949508
3
  size 3375