ShengdingHu commited on
Commit
759a22e
1 Parent(s): aaa0872

Training in progress, step 200

Browse files
all_results.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_accuracy": 96.7,
4
- "eval_average_metrics": 96.7,
5
- "eval_loss": 0.044097092002630234,
6
- "eval_runtime": 2.8834,
7
- "eval_samples_per_second": 346.813,
8
  "test_accuracy": 93.34862385321101,
9
  "test_average_metrics": 93.34862385321101,
10
- "test_loss": 0.0836009681224823,
11
- "test_runtime": 2.7323,
12
- "test_samples_per_second": 319.145,
13
- "train_loss": 0.3596552710935294,
14
- "train_runtime": 685.371,
15
  "train_samples": 66349,
16
- "train_samples_per_second": 290.422,
17
- "train_steps_per_second": 2.906
18
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_accuracy": 97.5,
4
+ "eval_average_metrics": 97.5,
5
+ "eval_loss": 0.03714486584067345,
6
+ "eval_runtime": 5.7314,
7
+ "eval_samples_per_second": 174.477,
8
  "test_accuracy": 93.34862385321101,
9
  "test_average_metrics": 93.34862385321101,
10
+ "test_loss": 0.07569558918476105,
11
+ "test_runtime": 5.6934,
12
+ "test_samples_per_second": 153.159,
13
+ "train_loss": 0.058686515289424354,
14
+ "train_runtime": 1966.0928,
15
  "train_samples": 66349,
16
+ "train_samples_per_second": 101.24,
17
+ "train_steps_per_second": 3.165
18
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_accuracy": 96.7,
4
- "eval_average_metrics": 96.7,
5
- "eval_loss": 0.044097092002630234,
6
- "eval_runtime": 2.8834,
7
- "eval_samples_per_second": 346.813
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_accuracy": 97.5,
4
+ "eval_average_metrics": 97.5,
5
+ "eval_loss": 0.03714486584067345,
6
+ "eval_runtime": 5.7314,
7
+ "eval_samples_per_second": 174.477
8
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7b9e962d197234e245c55a2ee612e5cc71e922fffd99120ed0ff83826fd6db29
3
- size 7551621
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcddae80c7ed01b1852363b0b7d392786aacd68ed7ff8f3067ff8498b338c0bc
3
+ size 2631685
runs/Feb01_01-17-38_node2/events.out.tfevents.1643649573.node2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a942af01c3847061f09c4292bb816e9b6a4da1858d1cb9c43c6076250f61078
3
- size 15843
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e132aaf8f5d06a9ea3156129b23be0ad6ebde851ffcf6504f1b7a70d7ffca35
3
+ size 16519
runs/Feb01_01-17-38_node2/events.out.tfevents.1643651545.node2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6241c004a09a25989d4af4e90c33d228131bac5698bb6d24c840c9100742464d
3
+ size 684
runs/Feb02_15-44-43_node1/1643787954.8036234/events.out.tfevents.1643787954.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79b5e5b1aa85a1172ed0231111a5f3861a5885155c426002dc6453f10689c3f4
3
+ size 5011
runs/Feb02_15-44-43_node1/events.out.tfevents.1643787954.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eead2938783f064d4f4a1dbd0e9f906ae1b32a7eba3fed4ee4dff7ef16ce1e16
3
+ size 4299
test_results.json CHANGED
@@ -2,7 +2,7 @@
2
  "epoch": 3.0,
3
  "test_accuracy": 93.34862385321101,
4
  "test_average_metrics": 93.34862385321101,
5
- "test_loss": 0.0836009681224823,
6
- "test_runtime": 2.7323,
7
- "test_samples_per_second": 319.145
8
  }
 
2
  "epoch": 3.0,
3
  "test_accuracy": 93.34862385321101,
4
  "test_average_metrics": 93.34862385321101,
5
+ "test_loss": 0.07569558918476105,
6
+ "test_runtime": 5.6934,
7
+ "test_samples_per_second": 153.159
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "train_loss": 0.3596552710935294,
4
- "train_runtime": 685.371,
5
  "train_samples": 66349,
6
- "train_samples_per_second": 290.422,
7
- "train_steps_per_second": 2.906
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "train_loss": 0.058686515289424354,
4
+ "train_runtime": 1966.0928,
5
  "train_samples": 66349,
6
+ "train_samples_per_second": 101.24,
7
+ "train_steps_per_second": 3.165
8
  }
trainer_state.json CHANGED
@@ -1,70 +1,376 @@
1
  {
2
- "best_metric": 96.7,
3
- "best_model_checkpoint": "outputs/bitfit/t5-base/sst2/checkpoint-500",
4
  "epoch": 3.0,
5
- "global_step": 1992,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.75,
12
- "learning_rate": 0.0003,
13
- "loss": 1.2354,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  "step": 500
15
  },
16
  {
17
- "epoch": 0.75,
 
 
 
 
 
 
 
 
 
18
  "eval_accuracy": 96.7,
19
  "eval_average_metrics": 96.7,
20
- "eval_loss": 0.044097092002630234,
21
- "eval_runtime": 2.8987,
22
- "eval_samples_per_second": 344.985,
23
- "step": 500
24
  },
25
  {
26
- "epoch": 1.51,
27
- "learning_rate": 0.00019946380697050936,
28
- "loss": 0.0704,
29
  "step": 1000
30
  },
31
  {
32
- "epoch": 1.51,
33
- "eval_accuracy": 96.5,
34
- "eval_average_metrics": 96.5,
35
- "eval_loss": 0.042973704636096954,
36
- "eval_runtime": 2.8885,
37
- "eval_samples_per_second": 346.198,
38
  "step": 1000
39
  },
40
  {
41
- "epoch": 2.26,
42
- "learning_rate": 9.892761394101876e-05,
43
- "loss": 0.0651,
44
- "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
45
  },
46
  {
47
- "epoch": 2.26,
48
- "eval_accuracy": 96.5,
49
- "eval_average_metrics": 96.5,
50
- "eval_loss": 0.04074199125170708,
51
- "eval_runtime": 2.8768,
52
- "eval_samples_per_second": 347.613,
53
  "step": 1500
54
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  {
56
  "epoch": 3.0,
57
- "step": 1992,
58
- "total_flos": 1.5337491316024032e+16,
59
- "train_loss": 0.3596552710935294,
60
- "train_runtime": 685.371,
61
- "train_samples_per_second": 290.422,
62
- "train_steps_per_second": 2.906
63
  }
64
  ],
65
- "max_steps": 1992,
66
  "num_train_epochs": 3,
67
- "total_flos": 1.5337491316024032e+16,
68
  "trial_name": null,
69
  "trial_params": null
70
  }
 
1
  {
2
+ "best_metric": 97.5,
3
+ "best_model_checkpoint": "outputs/bitfit/t5-base/sst2/checkpoint-3000",
4
  "epoch": 3.0,
5
+ "global_step": 6222,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.1,
12
+ "eval_accuracy": 97.3,
13
+ "eval_average_metrics": 97.3,
14
+ "eval_loss": 0.04369654133915901,
15
+ "eval_runtime": 5.1897,
16
+ "eval_samples_per_second": 192.69,
17
+ "step": 200
18
+ },
19
+ {
20
+ "epoch": 0.19,
21
+ "eval_accuracy": 97.3,
22
+ "eval_average_metrics": 97.3,
23
+ "eval_loss": 0.042765092104673386,
24
+ "eval_runtime": 5.6341,
25
+ "eval_samples_per_second": 177.492,
26
+ "step": 400
27
+ },
28
+ {
29
+ "epoch": 0.24,
30
+ "learning_rate": 0.00027589199614271937,
31
+ "loss": 0.1281,
32
  "step": 500
33
  },
34
  {
35
+ "epoch": 0.29,
36
+ "eval_accuracy": 97.2,
37
+ "eval_average_metrics": 97.2,
38
+ "eval_loss": 0.037586040794849396,
39
+ "eval_runtime": 6.2353,
40
+ "eval_samples_per_second": 160.377,
41
+ "step": 600
42
+ },
43
+ {
44
+ "epoch": 0.39,
45
  "eval_accuracy": 96.7,
46
  "eval_average_metrics": 96.7,
47
+ "eval_loss": 0.04588627442717552,
48
+ "eval_runtime": 5.966,
49
+ "eval_samples_per_second": 167.617,
50
+ "step": 800
51
  },
52
  {
53
+ "epoch": 0.48,
54
+ "learning_rate": 0.0002517839922854387,
55
+ "loss": 0.0649,
56
  "step": 1000
57
  },
58
  {
59
+ "epoch": 0.48,
60
+ "eval_accuracy": 97.1,
61
+ "eval_average_metrics": 97.1,
62
+ "eval_loss": 0.040396977216005325,
63
+ "eval_runtime": 5.3919,
64
+ "eval_samples_per_second": 185.463,
65
  "step": 1000
66
  },
67
  {
68
+ "epoch": 0.58,
69
+ "eval_accuracy": 97.2,
70
+ "eval_average_metrics": 97.2,
71
+ "eval_loss": 0.03829416632652283,
72
+ "eval_runtime": 5.3561,
73
+ "eval_samples_per_second": 186.702,
74
+ "step": 1200
75
+ },
76
+ {
77
+ "epoch": 0.68,
78
+ "eval_accuracy": 97.3,
79
+ "eval_average_metrics": 97.3,
80
+ "eval_loss": 0.039076462388038635,
81
+ "eval_runtime": 5.9241,
82
+ "eval_samples_per_second": 168.803,
83
+ "step": 1400
84
  },
85
  {
86
+ "epoch": 0.72,
87
+ "learning_rate": 0.00022767598842815813,
88
+ "loss": 0.0578,
 
 
 
89
  "step": 1500
90
  },
91
+ {
92
+ "epoch": 0.77,
93
+ "eval_accuracy": 97.1,
94
+ "eval_average_metrics": 97.1,
95
+ "eval_loss": 0.04230912774801254,
96
+ "eval_runtime": 5.153,
97
+ "eval_samples_per_second": 194.06,
98
+ "step": 1600
99
+ },
100
+ {
101
+ "epoch": 0.87,
102
+ "eval_accuracy": 97.3,
103
+ "eval_average_metrics": 97.3,
104
+ "eval_loss": 0.03823951631784439,
105
+ "eval_runtime": 5.1279,
106
+ "eval_samples_per_second": 195.011,
107
+ "step": 1800
108
+ },
109
+ {
110
+ "epoch": 0.96,
111
+ "learning_rate": 0.00020356798457087753,
112
+ "loss": 0.0582,
113
+ "step": 2000
114
+ },
115
+ {
116
+ "epoch": 0.96,
117
+ "eval_accuracy": 97.1,
118
+ "eval_average_metrics": 97.1,
119
+ "eval_loss": 0.0395108200609684,
120
+ "eval_runtime": 5.8042,
121
+ "eval_samples_per_second": 172.29,
122
+ "step": 2000
123
+ },
124
+ {
125
+ "epoch": 1.06,
126
+ "eval_accuracy": 97.1,
127
+ "eval_average_metrics": 97.1,
128
+ "eval_loss": 0.04088559374213219,
129
+ "eval_runtime": 5.4894,
130
+ "eval_samples_per_second": 182.168,
131
+ "step": 2200
132
+ },
133
+ {
134
+ "epoch": 1.16,
135
+ "eval_accuracy": 97.2,
136
+ "eval_average_metrics": 97.2,
137
+ "eval_loss": 0.038739945739507675,
138
+ "eval_runtime": 4.6293,
139
+ "eval_samples_per_second": 216.018,
140
+ "step": 2400
141
+ },
142
+ {
143
+ "epoch": 1.21,
144
+ "learning_rate": 0.0001794599807135969,
145
+ "loss": 0.054,
146
+ "step": 2500
147
+ },
148
+ {
149
+ "epoch": 1.25,
150
+ "eval_accuracy": 97.3,
151
+ "eval_average_metrics": 97.3,
152
+ "eval_loss": 0.04564524069428444,
153
+ "eval_runtime": 5.2048,
154
+ "eval_samples_per_second": 192.132,
155
+ "step": 2600
156
+ },
157
+ {
158
+ "epoch": 1.35,
159
+ "eval_accuracy": 97.3,
160
+ "eval_average_metrics": 97.3,
161
+ "eval_loss": 0.03781759738922119,
162
+ "eval_runtime": 5.6194,
163
+ "eval_samples_per_second": 177.956,
164
+ "step": 2800
165
+ },
166
+ {
167
+ "epoch": 1.45,
168
+ "learning_rate": 0.00015535197685631627,
169
+ "loss": 0.0533,
170
+ "step": 3000
171
+ },
172
+ {
173
+ "epoch": 1.45,
174
+ "eval_accuracy": 97.5,
175
+ "eval_average_metrics": 97.5,
176
+ "eval_loss": 0.03714486584067345,
177
+ "eval_runtime": 5.328,
178
+ "eval_samples_per_second": 187.688,
179
+ "step": 3000
180
+ },
181
+ {
182
+ "epoch": 1.54,
183
+ "eval_accuracy": 97.5,
184
+ "eval_average_metrics": 97.5,
185
+ "eval_loss": 0.0382937453687191,
186
+ "eval_runtime": 4.0581,
187
+ "eval_samples_per_second": 246.419,
188
+ "step": 3200
189
+ },
190
+ {
191
+ "epoch": 1.64,
192
+ "eval_accuracy": 97.5,
193
+ "eval_average_metrics": 97.5,
194
+ "eval_loss": 0.037162039428949356,
195
+ "eval_runtime": 5.3134,
196
+ "eval_samples_per_second": 188.202,
197
+ "step": 3400
198
+ },
199
+ {
200
+ "epoch": 1.69,
201
+ "learning_rate": 0.00013124397299903566,
202
+ "loss": 0.0539,
203
+ "step": 3500
204
+ },
205
+ {
206
+ "epoch": 1.74,
207
+ "eval_accuracy": 97.5,
208
+ "eval_average_metrics": 97.5,
209
+ "eval_loss": 0.03954707458615303,
210
+ "eval_runtime": 5.9646,
211
+ "eval_samples_per_second": 167.655,
212
+ "step": 3600
213
+ },
214
+ {
215
+ "epoch": 1.83,
216
+ "eval_accuracy": 97.2,
217
+ "eval_average_metrics": 97.2,
218
+ "eval_loss": 0.037706729024648666,
219
+ "eval_runtime": 4.5321,
220
+ "eval_samples_per_second": 220.649,
221
+ "step": 3800
222
+ },
223
+ {
224
+ "epoch": 1.93,
225
+ "learning_rate": 0.00010713596914175504,
226
+ "loss": 0.0531,
227
+ "step": 4000
228
+ },
229
+ {
230
+ "epoch": 1.93,
231
+ "eval_accuracy": 97.2,
232
+ "eval_average_metrics": 97.2,
233
+ "eval_loss": 0.03779396042227745,
234
+ "eval_runtime": 6.6618,
235
+ "eval_samples_per_second": 150.108,
236
+ "step": 4000
237
+ },
238
+ {
239
+ "epoch": 2.03,
240
+ "eval_accuracy": 97.3,
241
+ "eval_average_metrics": 97.3,
242
+ "eval_loss": 0.04000015929341316,
243
+ "eval_runtime": 6.7004,
244
+ "eval_samples_per_second": 149.245,
245
+ "step": 4200
246
+ },
247
+ {
248
+ "epoch": 2.12,
249
+ "eval_accuracy": 97.1,
250
+ "eval_average_metrics": 97.1,
251
+ "eval_loss": 0.0396127812564373,
252
+ "eval_runtime": 5.9828,
253
+ "eval_samples_per_second": 167.145,
254
+ "step": 4400
255
+ },
256
+ {
257
+ "epoch": 2.17,
258
+ "learning_rate": 8.302796528447444e-05,
259
+ "loss": 0.049,
260
+ "step": 4500
261
+ },
262
+ {
263
+ "epoch": 2.22,
264
+ "eval_accuracy": 97.2,
265
+ "eval_average_metrics": 97.2,
266
+ "eval_loss": 0.0382530614733696,
267
+ "eval_runtime": 5.4179,
268
+ "eval_samples_per_second": 184.574,
269
+ "step": 4600
270
+ },
271
+ {
272
+ "epoch": 2.31,
273
+ "eval_accuracy": 97.0,
274
+ "eval_average_metrics": 97.0,
275
+ "eval_loss": 0.04122977331280708,
276
+ "eval_runtime": 4.9766,
277
+ "eval_samples_per_second": 200.94,
278
+ "step": 4800
279
+ },
280
+ {
281
+ "epoch": 2.41,
282
+ "learning_rate": 5.891996142719383e-05,
283
+ "loss": 0.0484,
284
+ "step": 5000
285
+ },
286
+ {
287
+ "epoch": 2.41,
288
+ "eval_accuracy": 97.3,
289
+ "eval_average_metrics": 97.3,
290
+ "eval_loss": 0.03976716473698616,
291
+ "eval_runtime": 5.1121,
292
+ "eval_samples_per_second": 195.615,
293
+ "step": 5000
294
+ },
295
+ {
296
+ "epoch": 2.51,
297
+ "eval_accuracy": 97.2,
298
+ "eval_average_metrics": 97.2,
299
+ "eval_loss": 0.03887654095888138,
300
+ "eval_runtime": 5.3028,
301
+ "eval_samples_per_second": 188.578,
302
+ "step": 5200
303
+ },
304
+ {
305
+ "epoch": 2.6,
306
+ "eval_accuracy": 97.3,
307
+ "eval_average_metrics": 97.3,
308
+ "eval_loss": 0.03931749612092972,
309
+ "eval_runtime": 6.019,
310
+ "eval_samples_per_second": 166.14,
311
+ "step": 5400
312
+ },
313
+ {
314
+ "epoch": 2.65,
315
+ "learning_rate": 3.481195756991321e-05,
316
+ "loss": 0.0478,
317
+ "step": 5500
318
+ },
319
+ {
320
+ "epoch": 2.7,
321
+ "eval_accuracy": 97.1,
322
+ "eval_average_metrics": 97.1,
323
+ "eval_loss": 0.03971054032444954,
324
+ "eval_runtime": 5.7951,
325
+ "eval_samples_per_second": 172.558,
326
+ "step": 5600
327
+ },
328
+ {
329
+ "epoch": 2.8,
330
+ "eval_accuracy": 97.1,
331
+ "eval_average_metrics": 97.1,
332
+ "eval_loss": 0.04016176983714104,
333
+ "eval_runtime": 5.5419,
334
+ "eval_samples_per_second": 180.443,
335
+ "step": 5800
336
+ },
337
+ {
338
+ "epoch": 2.89,
339
+ "learning_rate": 1.0703953712632592e-05,
340
+ "loss": 0.0444,
341
+ "step": 6000
342
+ },
343
+ {
344
+ "epoch": 2.89,
345
+ "eval_accuracy": 97.0,
346
+ "eval_average_metrics": 97.0,
347
+ "eval_loss": 0.04050704091787338,
348
+ "eval_runtime": 6.1769,
349
+ "eval_samples_per_second": 161.894,
350
+ "step": 6000
351
+ },
352
+ {
353
+ "epoch": 2.99,
354
+ "eval_accuracy": 97.2,
355
+ "eval_average_metrics": 97.2,
356
+ "eval_loss": 0.04039543867111206,
357
+ "eval_runtime": 5.4148,
358
+ "eval_samples_per_second": 184.68,
359
+ "step": 6200
360
+ },
361
  {
362
  "epoch": 3.0,
363
+ "step": 6222,
364
+ "total_flos": 1.3280955617597184e+16,
365
+ "train_loss": 0.058686515289424354,
366
+ "train_runtime": 1966.0928,
367
+ "train_samples_per_second": 101.24,
368
+ "train_steps_per_second": 3.165
369
  }
370
  ],
371
+ "max_steps": 6222,
372
  "num_train_epochs": 3,
373
+ "total_flos": 1.3280955617597184e+16,
374
  "trial_name": null,
375
  "trial_params": null
376
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f376179840c9389819429f3133a6999a78148775cfff0e7518608ef9c3d59b09
3
  size 3183
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d084e8b52f5c7cc4a6a29f9f14decebc1ce43459b020a9c05c1c22bd9831401a
3
  size 3183
training_config.json CHANGED
@@ -1 +1 @@
1
- {"bottleneck_dim": 24, "dataset_config_name": ["en"], "delta_type": "adapter", "do_eval": true, "do_test": true, "do_train": true, "eval_dataset_config_name": ["en"], "eval_dataset_name": "sst2", "eval_steps": 200, "evaluation_strategy": "steps", "greater_is_better": true, "learning_rate": 0.0003, "load_best_model_at_end": true, "max_source_length": 128, "metric_for_best_model": "average_metrics", "model_name_or_path": "../../../../plm_cache/t5-base", "num_train_epochs": 3, "output_dir": "outputs/bitfit/t5-base/sst2", "overwrite_output_dir": true, "per_device_eval_batch_size": 32, "per_device_train_batch_size": 32, "predict_with_generate": true, "push_to_hub": true, "save_steps": 200, "save_strategy": "steps", "save_total_limit": 1, "seed": 42, "split_validation_test": true, "task_name": "sst2", "test_dataset_config_name": ["en"], "test_dataset_name": "sst2", "tokenizer_name": "../../../../plm_cache/t5-base", "unfrozen_modules": ["deltas", "layer_norm", "final_layer_norm"], "warmup_steps": 0}
 
1
+ {"dataset_config_name": ["en"], "delta_type": "lora", "do_eval": true, "do_test": true, "do_train": true, "eval_dataset_config_name": ["en"], "eval_dataset_name": "sst2", "eval_steps": 200, "evaluation_strategy": "steps", "greater_is_better": true, "learning_rate": 0.0003, "load_best_model_at_end": true, "lora_r": 8, "max_source_length": 128, "metric_for_best_model": "average_metrics", "model_name_or_path": "../../../../plm_cache/t5-base", "num_train_epochs": 3, "output_dir": "outputs/bitfit/t5-base/sst2", "overwrite_output_dir": true, "per_device_eval_batch_size": 32, "per_device_train_batch_size": 32, "predict_with_generate": true, "push_to_hub": true, "save_steps": 200, "save_strategy": "steps", "save_total_limit": 1, "seed": 42, "split_validation_test": true, "task_name": "sst2", "test_dataset_config_name": ["en"], "test_dataset_name": "sst2", "tokenizer_name": "../../../../plm_cache/t5-base", "unfrozen_modules": ["deltas", "layer_norm", "final_layer_norm"], "warmup_steps": 0}