ShengdingHu commited on
Commit
208df58
1 Parent(s): c7ab553

Training in progress, step 100

Browse files
all_results.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 71.47335423197492,
4
- "eval_average_metrics": 71.47335423197492,
5
- "eval_loss": 0.2988086938858032,
6
- "eval_runtime": 0.8126,
7
- "eval_samples_per_second": 392.589,
8
- "test_accuracy": 69.27899686520375,
9
- "test_average_metrics": 69.27899686520375,
10
- "test_loss": 0.2875460088253021,
11
- "test_runtime": 0.8027,
12
- "test_samples_per_second": 397.401,
13
- "train_loss": 0.2150040188957663,
14
- "train_runtime": 491.7183,
15
  "train_samples": 5428,
16
- "train_samples_per_second": 220.777,
17
- "train_steps_per_second": 6.915
18
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 70.84639498432603,
4
+ "eval_average_metrics": 70.84639498432603,
5
+ "eval_loss": 0.4690648913383484,
6
+ "eval_runtime": 1.8144,
7
+ "eval_samples_per_second": 175.82,
8
+ "test_accuracy": 67.7115987460815,
9
+ "test_average_metrics": 67.7115987460815,
10
+ "test_loss": 0.48792609572410583,
11
+ "test_runtime": 1.6468,
12
+ "test_samples_per_second": 193.706,
13
+ "train_loss": 0.0949506963000578,
14
+ "train_runtime": 1454.9221,
15
  "train_samples": 5428,
16
+ "train_samples_per_second": 74.616,
17
+ "train_steps_per_second": 2.337
18
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_accuracy": 71.47335423197492,
4
- "eval_average_metrics": 71.47335423197492,
5
- "eval_loss": 0.2988086938858032,
6
- "eval_runtime": 0.8126,
7
- "eval_samples_per_second": 392.589
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_accuracy": 70.84639498432603,
4
+ "eval_average_metrics": 70.84639498432603,
5
+ "eval_loss": 0.4690648913383484,
6
+ "eval_runtime": 1.8144,
7
+ "eval_samples_per_second": 175.82
8
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:966e269ab94880e5284fd75c374ebef3c43b7a3f2b868d2022d66e30f55a0efd
3
- size 7551621
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aff2614350b293dad2d6ae82715e397ab9ceb21b4b77949ae04ebcdf52c8b07f
3
+ size 2631685
runs/Feb01_06-49-55_node1/events.out.tfevents.1643669464.node1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f31934e5a4b92c0f2192e2f975422c0b2c4f343f28522ecd9f507ca2d1bccbe0
3
- size 15567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee5277355201115aac2d88d1d183052efcef7b11c5793719e2cd59bb4ab1afd4
3
+ size 16565
runs/Feb01_06-49-55_node1/events.out.tfevents.1643670922.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9db180bef957074b16990a2dab72b84c35acd54b7f3860acbe0817769f2318c
3
+ size 684
runs/Feb02_18-14-58_node1/1643796963.6869369/events.out.tfevents.1643796963.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b0a783c92cd57a8e8ffd4eb423e89d000c6183b6f122f1fa5858810206e6221
3
+ size 5038
runs/Feb02_18-14-58_node1/events.out.tfevents.1643796963.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8222c4d151f5adce7080499b7d80f8ae972dcf3aa0fd23fd723ec4e8c39b248
3
+ size 4327
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "test_accuracy": 69.27899686520375,
4
- "test_average_metrics": 69.27899686520375,
5
- "test_loss": 0.2875460088253021,
6
- "test_runtime": 0.8027,
7
- "test_samples_per_second": 397.401
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "test_accuracy": 67.7115987460815,
4
+ "test_average_metrics": 67.7115987460815,
5
+ "test_loss": 0.48792609572410583,
6
+ "test_runtime": 1.6468,
7
+ "test_samples_per_second": 193.706
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.2150040188957663,
4
- "train_runtime": 491.7183,
5
  "train_samples": 5428,
6
- "train_samples_per_second": 220.777,
7
- "train_steps_per_second": 6.915
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.0949506963000578,
4
+ "train_runtime": 1454.9221,
5
  "train_samples": 5428,
6
+ "train_samples_per_second": 74.616,
7
+ "train_steps_per_second": 2.337
8
  }
trainer_state.json CHANGED
@@ -1,214 +1,367 @@
1
  {
2
- "best_metric": 71.47335423197492,
3
- "best_model_checkpoint": "outputs/bitfit/t5-base/superglue-wic/checkpoint-2000",
4
  "epoch": 20.0,
5
  "global_step": 3400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 1.18,
12
- "eval_accuracy": 63.63636363636363,
13
- "eval_average_metrics": 63.63636363636363,
14
- "eval_loss": 0.27316540479660034,
15
- "eval_runtime": 0.8596,
16
- "eval_samples_per_second": 371.101,
17
  "step": 200
18
  },
 
 
 
 
 
 
 
 
 
19
  {
20
  "epoch": 2.35,
21
- "eval_accuracy": 66.77115987460816,
22
- "eval_average_metrics": 66.77115987460816,
23
- "eval_loss": 0.27486181259155273,
24
- "eval_runtime": 0.7485,
25
- "eval_samples_per_second": 426.175,
26
  "step": 400
27
  },
28
  {
29
  "epoch": 2.94,
30
  "learning_rate": 0.0002558823529411764,
31
- "loss": 0.3481,
 
 
 
 
 
 
 
 
 
32
  "step": 500
33
  },
34
  {
35
  "epoch": 3.53,
36
- "eval_accuracy": 66.77115987460816,
37
- "eval_average_metrics": 66.77115987460816,
38
- "eval_loss": 0.299835741519928,
39
- "eval_runtime": 0.7521,
40
- "eval_samples_per_second": 424.118,
41
  "step": 600
42
  },
 
 
 
 
 
 
 
 
 
43
  {
44
  "epoch": 4.71,
45
- "eval_accuracy": 68.96551724137932,
46
- "eval_average_metrics": 68.96551724137932,
47
- "eval_loss": 0.30409565567970276,
48
- "eval_runtime": 0.8111,
49
- "eval_samples_per_second": 393.309,
50
  "step": 800
51
  },
 
 
 
 
 
 
 
 
 
52
  {
53
  "epoch": 5.88,
54
  "learning_rate": 0.00021176470588235295,
55
- "loss": 0.2116,
56
  "step": 1000
57
  },
58
  {
59
  "epoch": 5.88,
60
- "eval_accuracy": 68.3385579937304,
61
- "eval_average_metrics": 68.3385579937304,
62
- "eval_loss": 0.3004938066005707,
63
- "eval_runtime": 0.837,
64
- "eval_samples_per_second": 381.134,
65
  "step": 1000
66
  },
 
 
 
 
 
 
 
 
 
67
  {
68
  "epoch": 7.06,
69
- "eval_accuracy": 66.4576802507837,
70
- "eval_average_metrics": 66.4576802507837,
71
- "eval_loss": 0.3050723671913147,
72
- "eval_runtime": 0.8243,
73
- "eval_samples_per_second": 386.998,
74
  "step": 1200
75
  },
 
 
 
 
 
 
 
 
 
76
  {
77
  "epoch": 8.24,
78
- "eval_accuracy": 68.65203761755487,
79
- "eval_average_metrics": 68.65203761755487,
80
- "eval_loss": 0.306122362613678,
81
- "eval_runtime": 0.7807,
82
- "eval_samples_per_second": 408.604,
83
  "step": 1400
84
  },
85
  {
86
  "epoch": 8.82,
87
  "learning_rate": 0.0001676470588235294,
88
- "loss": 0.2002,
 
 
 
 
 
 
 
 
 
89
  "step": 1500
90
  },
91
  {
92
  "epoch": 9.41,
93
- "eval_accuracy": 69.59247648902821,
94
- "eval_average_metrics": 69.59247648902821,
95
- "eval_loss": 0.3002206087112427,
96
- "eval_runtime": 0.7938,
97
- "eval_samples_per_second": 401.848,
98
  "step": 1600
99
  },
 
 
 
 
 
 
 
 
 
100
  {
101
  "epoch": 10.59,
102
- "eval_accuracy": 69.90595611285266,
103
- "eval_average_metrics": 69.90595611285266,
104
- "eval_loss": 0.3040717840194702,
105
- "eval_runtime": 0.7501,
106
- "eval_samples_per_second": 425.262,
107
  "step": 1800
108
  },
 
 
 
 
 
 
 
 
 
109
  {
110
  "epoch": 11.76,
111
  "learning_rate": 0.00012352941176470587,
112
- "loss": 0.191,
113
  "step": 2000
114
  },
115
  {
116
  "epoch": 11.76,
117
- "eval_accuracy": 71.47335423197492,
118
- "eval_average_metrics": 71.47335423197492,
119
- "eval_loss": 0.2988086938858032,
120
- "eval_runtime": 0.8123,
121
- "eval_samples_per_second": 392.721,
122
  "step": 2000
123
  },
 
 
 
 
 
 
 
 
 
124
  {
125
  "epoch": 12.94,
126
- "eval_accuracy": 69.59247648902821,
127
- "eval_average_metrics": 69.59247648902821,
128
- "eval_loss": 0.3100716471672058,
129
- "eval_runtime": 0.782,
130
- "eval_samples_per_second": 407.937,
131
  "step": 2200
132
  },
133
  {
134
- "epoch": 14.12,
135
  "eval_accuracy": 69.27899686520375,
136
  "eval_average_metrics": 69.27899686520375,
137
- "eval_loss": 0.3199401795864105,
138
- "eval_runtime": 0.7323,
139
- "eval_samples_per_second": 435.642,
 
 
 
 
 
 
 
 
 
140
  "step": 2400
141
  },
142
  {
143
  "epoch": 14.71,
144
  "learning_rate": 7.941176470588235e-05,
145
- "loss": 0.1875,
146
  "step": 2500
147
  },
148
  {
149
- "epoch": 15.29,
150
  "eval_accuracy": 68.3385579937304,
151
  "eval_average_metrics": 68.3385579937304,
152
- "eval_loss": 0.316545695066452,
153
- "eval_runtime": 0.8139,
154
- "eval_samples_per_second": 391.955,
 
 
 
 
 
 
 
 
 
155
  "step": 2600
156
  },
157
  {
158
- "epoch": 16.47,
159
  "eval_accuracy": 68.96551724137932,
160
  "eval_average_metrics": 68.96551724137932,
161
- "eval_loss": 0.3149982690811157,
162
- "eval_runtime": 0.9277,
163
- "eval_samples_per_second": 343.845,
 
 
 
 
 
 
 
 
 
164
  "step": 2800
165
  },
 
 
 
 
 
 
 
 
 
166
  {
167
  "epoch": 17.65,
168
  "learning_rate": 3.529411764705882e-05,
169
- "loss": 0.1808,
170
  "step": 3000
171
  },
172
  {
173
  "epoch": 17.65,
174
  "eval_accuracy": 69.27899686520375,
175
  "eval_average_metrics": 69.27899686520375,
176
- "eval_loss": 0.3163623511791229,
177
- "eval_runtime": 0.7966,
178
- "eval_samples_per_second": 400.465,
179
  "step": 3000
180
  },
 
 
 
 
 
 
 
 
 
181
  {
182
  "epoch": 18.82,
183
- "eval_accuracy": 69.59247648902821,
184
- "eval_average_metrics": 69.59247648902821,
185
- "eval_loss": 0.3151997923851013,
186
- "eval_runtime": 0.7837,
187
- "eval_samples_per_second": 407.019,
188
  "step": 3200
189
  },
 
 
 
 
 
 
 
 
 
190
  {
191
  "epoch": 20.0,
192
- "eval_accuracy": 69.27899686520375,
193
- "eval_average_metrics": 69.27899686520375,
194
- "eval_loss": 0.3175739645957947,
195
- "eval_runtime": 0.7847,
196
- "eval_samples_per_second": 406.504,
197
  "step": 3400
198
  },
199
  {
200
  "epoch": 20.0,
201
  "step": 3400,
202
- "total_flos": 7171341817136256.0,
203
- "train_loss": 0.2150040188957663,
204
- "train_runtime": 491.7183,
205
- "train_samples_per_second": 220.777,
206
- "train_steps_per_second": 6.915
207
  }
208
  ],
209
  "max_steps": 3400,
210
  "num_train_epochs": 20,
211
- "total_flos": 7171341817136256.0,
212
  "trial_name": null,
213
  "trial_params": null
214
  }
 
1
  {
2
+ "best_metric": 70.84639498432603,
3
+ "best_model_checkpoint": "outputs/bitfit/t5-base/superglue-wic/checkpoint-1700",
4
  "epoch": 20.0,
5
  "global_step": 3400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
+ {
11
+ "epoch": 0.59,
12
+ "eval_accuracy": 64.57680250783699,
13
+ "eval_average_metrics": 64.57680250783699,
14
+ "eval_loss": 0.2697466313838959,
15
+ "eval_runtime": 1.3478,
16
+ "eval_samples_per_second": 236.682,
17
+ "step": 100
18
+ },
19
  {
20
  "epoch": 1.18,
21
+ "eval_accuracy": 65.51724137931035,
22
+ "eval_average_metrics": 65.51724137931035,
23
+ "eval_loss": 0.3369597792625427,
24
+ "eval_runtime": 1.5122,
25
+ "eval_samples_per_second": 210.954,
26
  "step": 200
27
  },
28
+ {
29
+ "epoch": 1.76,
30
+ "eval_accuracy": 67.39811912225704,
31
+ "eval_average_metrics": 67.39811912225704,
32
+ "eval_loss": 0.29254111647605896,
33
+ "eval_runtime": 1.6714,
34
+ "eval_samples_per_second": 190.86,
35
+ "step": 300
36
+ },
37
  {
38
  "epoch": 2.35,
39
+ "eval_accuracy": 65.51724137931035,
40
+ "eval_average_metrics": 65.51724137931035,
41
+ "eval_loss": 0.30159813165664673,
42
+ "eval_runtime": 1.399,
43
+ "eval_samples_per_second": 228.019,
44
  "step": 400
45
  },
46
  {
47
  "epoch": 2.94,
48
  "learning_rate": 0.0002558823529411764,
49
+ "loss": 0.2549,
50
+ "step": 500
51
+ },
52
+ {
53
+ "epoch": 2.94,
54
+ "eval_accuracy": 69.90595611285266,
55
+ "eval_average_metrics": 69.90595611285266,
56
+ "eval_loss": 0.30272728204727173,
57
+ "eval_runtime": 1.7009,
58
+ "eval_samples_per_second": 187.551,
59
  "step": 500
60
  },
61
  {
62
  "epoch": 3.53,
63
+ "eval_accuracy": 67.39811912225704,
64
+ "eval_average_metrics": 67.39811912225704,
65
+ "eval_loss": 0.3448639512062073,
66
+ "eval_runtime": 1.6869,
67
+ "eval_samples_per_second": 189.1,
68
  "step": 600
69
  },
70
+ {
71
+ "epoch": 4.12,
72
+ "eval_accuracy": 69.27899686520375,
73
+ "eval_average_metrics": 69.27899686520375,
74
+ "eval_loss": 0.37638065218925476,
75
+ "eval_runtime": 1.3564,
76
+ "eval_samples_per_second": 235.188,
77
+ "step": 700
78
+ },
79
  {
80
  "epoch": 4.71,
81
+ "eval_accuracy": 68.02507836990596,
82
+ "eval_average_metrics": 68.02507836990596,
83
+ "eval_loss": 0.3278528153896332,
84
+ "eval_runtime": 1.436,
85
+ "eval_samples_per_second": 222.137,
86
  "step": 800
87
  },
88
+ {
89
+ "epoch": 5.29,
90
+ "eval_accuracy": 65.8307210031348,
91
+ "eval_average_metrics": 65.8307210031348,
92
+ "eval_loss": 0.37711796164512634,
93
+ "eval_runtime": 1.7367,
94
+ "eval_samples_per_second": 183.683,
95
+ "step": 900
96
+ },
97
  {
98
  "epoch": 5.88,
99
  "learning_rate": 0.00021176470588235295,
100
+ "loss": 0.1531,
101
  "step": 1000
102
  },
103
  {
104
  "epoch": 5.88,
105
+ "eval_accuracy": 68.96551724137932,
106
+ "eval_average_metrics": 68.96551724137932,
107
+ "eval_loss": 0.37187883257865906,
108
+ "eval_runtime": 1.5128,
109
+ "eval_samples_per_second": 210.871,
110
  "step": 1000
111
  },
112
+ {
113
+ "epoch": 6.47,
114
+ "eval_accuracy": 66.14420062695925,
115
+ "eval_average_metrics": 66.14420062695925,
116
+ "eval_loss": 0.4265880882740021,
117
+ "eval_runtime": 1.6439,
118
+ "eval_samples_per_second": 194.053,
119
+ "step": 1100
120
+ },
121
  {
122
  "epoch": 7.06,
123
+ "eval_accuracy": 67.0846394984326,
124
+ "eval_average_metrics": 67.0846394984326,
125
+ "eval_loss": 0.4190385937690735,
126
+ "eval_runtime": 1.694,
127
+ "eval_samples_per_second": 188.316,
128
  "step": 1200
129
  },
130
+ {
131
+ "epoch": 7.65,
132
+ "eval_accuracy": 67.7115987460815,
133
+ "eval_average_metrics": 67.7115987460815,
134
+ "eval_loss": 0.4279927611351013,
135
+ "eval_runtime": 1.6705,
136
+ "eval_samples_per_second": 190.965,
137
+ "step": 1300
138
+ },
139
  {
140
  "epoch": 8.24,
141
+ "eval_accuracy": 68.96551724137932,
142
+ "eval_average_metrics": 68.96551724137932,
143
+ "eval_loss": 0.4631330370903015,
144
+ "eval_runtime": 1.6505,
145
+ "eval_samples_per_second": 193.272,
146
  "step": 1400
147
  },
148
  {
149
  "epoch": 8.82,
150
  "learning_rate": 0.0001676470588235294,
151
+ "loss": 0.0995,
152
+ "step": 1500
153
+ },
154
+ {
155
+ "epoch": 8.82,
156
+ "eval_accuracy": 67.7115987460815,
157
+ "eval_average_metrics": 67.7115987460815,
158
+ "eval_loss": 0.46376463770866394,
159
+ "eval_runtime": 1.4266,
160
+ "eval_samples_per_second": 223.612,
161
  "step": 1500
162
  },
163
  {
164
  "epoch": 9.41,
165
+ "eval_accuracy": 68.96551724137932,
166
+ "eval_average_metrics": 68.96551724137932,
167
+ "eval_loss": 0.509791374206543,
168
+ "eval_runtime": 1.5279,
169
+ "eval_samples_per_second": 208.785,
170
  "step": 1600
171
  },
172
+ {
173
+ "epoch": 10.0,
174
+ "eval_accuracy": 70.84639498432603,
175
+ "eval_average_metrics": 70.84639498432603,
176
+ "eval_loss": 0.4690648913383484,
177
+ "eval_runtime": 1.3636,
178
+ "eval_samples_per_second": 233.934,
179
+ "step": 1700
180
+ },
181
  {
182
  "epoch": 10.59,
183
+ "eval_accuracy": 69.27899686520375,
184
+ "eval_average_metrics": 69.27899686520375,
185
+ "eval_loss": 0.5513712763786316,
186
+ "eval_runtime": 1.6761,
187
+ "eval_samples_per_second": 190.323,
188
  "step": 1800
189
  },
190
+ {
191
+ "epoch": 11.18,
192
+ "eval_accuracy": 68.65203761755487,
193
+ "eval_average_metrics": 68.65203761755487,
194
+ "eval_loss": 0.5640075206756592,
195
+ "eval_runtime": 1.498,
196
+ "eval_samples_per_second": 212.948,
197
+ "step": 1900
198
+ },
199
  {
200
  "epoch": 11.76,
201
  "learning_rate": 0.00012352941176470587,
202
+ "loss": 0.0627,
203
  "step": 2000
204
  },
205
  {
206
  "epoch": 11.76,
207
+ "eval_accuracy": 68.3385579937304,
208
+ "eval_average_metrics": 68.3385579937304,
209
+ "eval_loss": 0.5400956273078918,
210
+ "eval_runtime": 1.7505,
211
+ "eval_samples_per_second": 182.237,
212
  "step": 2000
213
  },
214
+ {
215
+ "epoch": 12.35,
216
+ "eval_accuracy": 66.77115987460816,
217
+ "eval_average_metrics": 66.77115987460816,
218
+ "eval_loss": 0.6046501994132996,
219
+ "eval_runtime": 1.6979,
220
+ "eval_samples_per_second": 187.882,
221
+ "step": 2100
222
+ },
223
  {
224
  "epoch": 12.94,
225
+ "eval_accuracy": 70.53291536050156,
226
+ "eval_average_metrics": 70.53291536050156,
227
+ "eval_loss": 0.6463834047317505,
228
+ "eval_runtime": 1.3771,
229
+ "eval_samples_per_second": 231.652,
230
  "step": 2200
231
  },
232
  {
233
+ "epoch": 13.53,
234
  "eval_accuracy": 69.27899686520375,
235
  "eval_average_metrics": 69.27899686520375,
236
+ "eval_loss": 0.6423152089118958,
237
+ "eval_runtime": 1.6672,
238
+ "eval_samples_per_second": 191.343,
239
+ "step": 2300
240
+ },
241
+ {
242
+ "epoch": 14.12,
243
+ "eval_accuracy": 68.96551724137932,
244
+ "eval_average_metrics": 68.96551724137932,
245
+ "eval_loss": 0.6988397240638733,
246
+ "eval_runtime": 1.6913,
247
+ "eval_samples_per_second": 188.613,
248
  "step": 2400
249
  },
250
  {
251
  "epoch": 14.71,
252
  "learning_rate": 7.941176470588235e-05,
253
+ "loss": 0.037,
254
  "step": 2500
255
  },
256
  {
257
+ "epoch": 14.71,
258
  "eval_accuracy": 68.3385579937304,
259
  "eval_average_metrics": 68.3385579937304,
260
+ "eval_loss": 0.7036728858947754,
261
+ "eval_runtime": 1.7114,
262
+ "eval_samples_per_second": 186.402,
263
+ "step": 2500
264
+ },
265
+ {
266
+ "epoch": 15.29,
267
+ "eval_accuracy": 68.65203761755487,
268
+ "eval_average_metrics": 68.65203761755487,
269
+ "eval_loss": 0.6903794407844543,
270
+ "eval_runtime": 1.6091,
271
+ "eval_samples_per_second": 198.248,
272
  "step": 2600
273
  },
274
  {
275
+ "epoch": 15.88,
276
  "eval_accuracy": 68.96551724137932,
277
  "eval_average_metrics": 68.96551724137932,
278
+ "eval_loss": 0.7062700390815735,
279
+ "eval_runtime": 1.7338,
280
+ "eval_samples_per_second": 183.989,
281
+ "step": 2700
282
+ },
283
+ {
284
+ "epoch": 16.47,
285
+ "eval_accuracy": 70.21943573667711,
286
+ "eval_average_metrics": 70.21943573667711,
287
+ "eval_loss": 0.7178221940994263,
288
+ "eval_runtime": 1.3196,
289
+ "eval_samples_per_second": 241.74,
290
  "step": 2800
291
  },
292
+ {
293
+ "epoch": 17.06,
294
+ "eval_accuracy": 68.96551724137932,
295
+ "eval_average_metrics": 68.96551724137932,
296
+ "eval_loss": 0.7901595234870911,
297
+ "eval_runtime": 1.6693,
298
+ "eval_samples_per_second": 191.103,
299
+ "step": 2900
300
+ },
301
  {
302
  "epoch": 17.65,
303
  "learning_rate": 3.529411764705882e-05,
304
+ "loss": 0.0252,
305
  "step": 3000
306
  },
307
  {
308
  "epoch": 17.65,
309
  "eval_accuracy": 69.27899686520375,
310
  "eval_average_metrics": 69.27899686520375,
311
+ "eval_loss": 0.8428929448127747,
312
+ "eval_runtime": 1.6724,
313
+ "eval_samples_per_second": 190.74,
314
  "step": 3000
315
  },
316
+ {
317
+ "epoch": 18.24,
318
+ "eval_accuracy": 68.96551724137932,
319
+ "eval_average_metrics": 68.96551724137932,
320
+ "eval_loss": 0.824032187461853,
321
+ "eval_runtime": 1.1559,
322
+ "eval_samples_per_second": 275.979,
323
+ "step": 3100
324
+ },
325
  {
326
  "epoch": 18.82,
327
+ "eval_accuracy": 69.27899686520375,
328
+ "eval_average_metrics": 69.27899686520375,
329
+ "eval_loss": 0.8500473499298096,
330
+ "eval_runtime": 1.7687,
331
+ "eval_samples_per_second": 180.362,
332
  "step": 3200
333
  },
334
+ {
335
+ "epoch": 19.41,
336
+ "eval_accuracy": 68.3385579937304,
337
+ "eval_average_metrics": 68.3385579937304,
338
+ "eval_loss": 0.8623896241188049,
339
+ "eval_runtime": 1.7067,
340
+ "eval_samples_per_second": 186.905,
341
+ "step": 3300
342
+ },
343
  {
344
  "epoch": 20.0,
345
+ "eval_accuracy": 68.3385579937304,
346
+ "eval_average_metrics": 68.3385579937304,
347
+ "eval_loss": 0.8675826787948608,
348
+ "eval_runtime": 1.7047,
349
+ "eval_samples_per_second": 187.128,
350
  "step": 3400
351
  },
352
  {
353
  "epoch": 20.0,
354
  "step": 3400,
355
+ "total_flos": 7227776409587712.0,
356
+ "train_loss": 0.0949506963000578,
357
+ "train_runtime": 1454.9221,
358
+ "train_samples_per_second": 74.616,
359
+ "train_steps_per_second": 2.337
360
  }
361
  ],
362
  "max_steps": 3400,
363
  "num_train_epochs": 20,
364
+ "total_flos": 7227776409587712.0,
365
  "trial_name": null,
366
  "trial_params": null
367
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39961be8916a7a4027a7bea7ee448ceffcf6b80b1a505293bcee68d6fe5fbe36
3
  size 3183
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49357d69ca929ce39aa55fa0c5e58329d29fbab1145e7ac0e58a4d4056492e11
3
  size 3183
training_config.json CHANGED
@@ -1 +1 @@
1
- {"bottleneck_dim": 24, "dataset_config_name": ["en"], "delta_type": "adapter", "do_eval": true, "do_test": true, "do_train": true, "eval_dataset_config_name": ["en"], "eval_dataset_name": "superglue-wic", "eval_steps": 100, "evaluation_strategy": "steps", "greater_is_better": true, "learning_rate": 0.0003, "load_best_model_at_end": true, "max_source_length": 256, "metric_for_best_model": "average_metrics", "model_name_or_path": "../../../../plm_cache/t5-base", "num_train_epochs": 20, "output_dir": "outputs/bitfit/t5-base/superglue-wic", "overwrite_output_dir": true, "per_device_eval_batch_size": 32, "per_device_train_batch_size": 32, "predict_with_generate": true, "push_to_hub": true, "save_steps": 100, "save_strategy": "steps", "save_total_limit": 1, "seed": 42, "split_validation_test": true, "task_name": "superglue-wic", "test_dataset_config_name": ["en"], "test_dataset_name": "superglue-wic", "tokenizer_name": "../../../../plm_cache/t5-base", "unfrozen_modules": ["deltas", "layer_norm", "final_layer_norm"], "warmup_steps": 0}
 
1
+ {"dataset_config_name": ["en"], "delta_type": "lora", "do_eval": true, "do_test": true, "do_train": true, "eval_dataset_config_name": ["en"], "eval_dataset_name": "superglue-wic", "eval_steps": 100, "evaluation_strategy": "steps", "greater_is_better": true, "learning_rate": 0.0003, "load_best_model_at_end": true, "lora_r": 8, "max_source_length": 256, "metric_for_best_model": "average_metrics", "model_name_or_path": "../../../../plm_cache/t5-base", "num_train_epochs": 20, "output_dir": "outputs/bitfit/t5-base/superglue-wic", "overwrite_output_dir": true, "per_device_eval_batch_size": 32, "per_device_train_batch_size": 32, "predict_with_generate": true, "push_to_hub": true, "save_steps": 100, "save_strategy": "steps", "save_total_limit": 1, "seed": 42, "split_validation_test": true, "task_name": "superglue-wic", "test_dataset_config_name": ["en"], "test_dataset_name": "superglue-wic", "tokenizer_name": "../../../../plm_cache/t5-base", "unfrozen_modules": ["deltas", "layer_norm", "final_layer_norm"], "warmup_steps": 0}