ShengdingHu commited on
Commit
a01a840
1 Parent(s): 6015cfb

Training in progress, step 200

Browse files
all_results.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_average_metrics": 52.359840871035765,
4
- "eval_loss": 0.2282242476940155,
5
- "eval_matthews_correlation": 52.359840871035765,
6
- "eval_runtime": 0.9075,
7
- "eval_samples_per_second": 574.107,
8
- "test_average_metrics": 63.24519496421077,
9
- "test_loss": 0.18458954989910126,
10
- "test_matthews_correlation": 63.24519496421077,
11
- "test_runtime": 0.8502,
12
- "test_samples_per_second": 614.001,
13
  "train_loss": 0.5282898814179177,
14
- "train_runtime": 424.1274,
15
  "train_samples": 8551,
16
- "train_samples_per_second": 403.228,
17
- "train_steps_per_second": 4.055
18
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_average_metrics": 52.55846445665645,
4
+ "eval_loss": 0.22525694966316223,
5
+ "eval_matthews_correlation": 52.55846445665645,
6
+ "eval_runtime": 1.2083,
7
+ "eval_samples_per_second": 431.173,
8
+ "test_average_metrics": 63.756766360647745,
9
+ "test_loss": 0.18106061220169067,
10
+ "test_matthews_correlation": 63.756766360647745,
11
+ "test_runtime": 1.2266,
12
+ "test_samples_per_second": 425.574,
13
  "train_loss": 0.5282898814179177,
14
+ "train_runtime": 422.8779,
15
  "train_samples": 8551,
16
+ "train_samples_per_second": 404.419,
17
+ "train_steps_per_second": 4.067
18
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_average_metrics": 52.359840871035765,
4
- "eval_loss": 0.2282242476940155,
5
- "eval_matthews_correlation": 52.359840871035765,
6
- "eval_runtime": 0.9075,
7
- "eval_samples_per_second": 574.107
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_average_metrics": 52.55846445665645,
4
+ "eval_loss": 0.22525694966316223,
5
+ "eval_matthews_correlation": 52.55846445665645,
6
+ "eval_runtime": 1.2083,
7
+ "eval_samples_per_second": 431.173
8
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5c97f3e6f26418395f43684b519f6fbbfe61587fdf0b7f909b63153c847e99b
3
- size 1084131
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3bbc6bb60a0e9a5a611381f99725d8ff5f079fda737349a7ec175298f1b464e
3
+ size 7551621
runs/Feb01_00-35-21_node1/1643647072.4568229/events.out.tfevents.1643647072.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2eec98c8506b77fac2f6f93648af3514b1d856bc587d91098be10bbd438852d8
3
+ size 5011
runs/Feb01_00-35-21_node1/events.out.tfevents.1643647072.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fb0a5519639038b689386956ca41fc8f5f1bec052201aaec72142a139e237ed
3
+ size 4304
runs/Jan31_20-22-53_node1/events.out.tfevents.1643631902.node1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:875ddb215692219fe42ebcabf199c8821b5675ca060a7f4893ee0e578eb4c486
3
- size 7113
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7b0be464bdd99e7e876ca5647ad2c7836c421cbd20a648d6a2648b1b17b8627
3
+ size 7801
runs/Jan31_20-22-53_node1/events.out.tfevents.1643632326.node1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67469b42186d9407b3095485ef49514c1f0f317abf04186e198a20bb130ee140
3
+ size 708
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "test_average_metrics": 63.24519496421077,
4
- "test_loss": 0.18458954989910126,
5
- "test_matthews_correlation": 63.24519496421077,
6
- "test_runtime": 0.8502,
7
- "test_samples_per_second": 614.001
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "test_average_metrics": 63.756766360647745,
4
+ "test_loss": 0.18106061220169067,
5
+ "test_matthews_correlation": 63.756766360647745,
6
+ "test_runtime": 1.2266,
7
+ "test_samples_per_second": 425.574
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
  "train_loss": 0.5282898814179177,
4
- "train_runtime": 424.1274,
5
  "train_samples": 8551,
6
- "train_samples_per_second": 403.228,
7
- "train_steps_per_second": 4.055
8
  }
 
1
  {
2
  "epoch": 20.0,
3
  "train_loss": 0.5282898814179177,
4
+ "train_runtime": 422.8779,
5
  "train_samples": 8551,
6
+ "train_samples_per_second": 404.419,
7
+ "train_steps_per_second": 4.067
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 52.359840871035765,
3
- "best_model_checkpoint": "outputs/bitfit/t5-base/cola/checkpoint-1290",
4
  "epoch": 20.0,
5
  "global_step": 1720,
6
  "is_hyper_param_search": false,
@@ -8,49 +8,22 @@
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 1.0,
12
- "eval_average_metrics": 0.0,
13
- "eval_loss": 4.8104567527771,
14
- "eval_matthews_correlation": 0.0,
15
- "eval_runtime": 0.763,
16
- "eval_samples_per_second": 682.828,
17
- "step": 86
18
  },
19
  {
20
- "epoch": 2.0,
21
- "eval_average_metrics": 8.869450752831899,
22
- "eval_loss": 0.27582260966300964,
23
- "eval_matthews_correlation": 8.869450752831899,
24
- "eval_runtime": 0.8417,
25
- "eval_samples_per_second": 619.016,
26
- "step": 172
27
- },
28
- {
29
- "epoch": 3.0,
30
- "eval_average_metrics": 43.97683870357735,
31
- "eval_loss": 0.21786993741989136,
32
- "eval_matthews_correlation": 43.97683870357735,
33
- "eval_runtime": 0.8315,
34
- "eval_samples_per_second": 626.599,
35
- "step": 258
36
- },
37
- {
38
- "epoch": 4.0,
39
- "eval_average_metrics": 46.78131759250157,
40
- "eval_loss": 0.22430144250392914,
41
- "eval_matthews_correlation": 46.78131759250157,
42
- "eval_runtime": 0.8615,
43
- "eval_samples_per_second": 604.782,
44
- "step": 344
45
- },
46
- {
47
- "epoch": 5.0,
48
- "eval_average_metrics": 49.53306157776009,
49
- "eval_loss": 0.21720413863658905,
50
- "eval_matthews_correlation": 49.53306157776009,
51
- "eval_runtime": 0.8515,
52
- "eval_samples_per_second": 611.84,
53
- "step": 430
54
  },
55
  {
56
  "epoch": 5.81,
@@ -59,58 +32,22 @@
59
  "step": 500
60
  },
61
  {
62
- "epoch": 6.0,
63
- "eval_average_metrics": 48.16295515684713,
64
- "eval_loss": 0.22913119196891785,
65
- "eval_matthews_correlation": 48.16295515684713,
66
- "eval_runtime": 0.8751,
67
- "eval_samples_per_second": 595.357,
68
- "step": 516
69
- },
70
- {
71
- "epoch": 7.0,
72
- "eval_average_metrics": 47.85938832793908,
73
- "eval_loss": 0.23619267344474792,
74
- "eval_matthews_correlation": 47.85938832793908,
75
- "eval_runtime": 0.8864,
76
- "eval_samples_per_second": 587.749,
77
- "step": 602
78
- },
79
- {
80
- "epoch": 8.0,
81
- "eval_average_metrics": 49.724093419139464,
82
- "eval_loss": 0.2390584498643875,
83
- "eval_matthews_correlation": 49.724093419139464,
84
- "eval_runtime": 0.8668,
85
- "eval_samples_per_second": 601.078,
86
- "step": 688
87
- },
88
- {
89
- "epoch": 9.0,
90
- "eval_average_metrics": 48.86384938654592,
91
- "eval_loss": 0.22121600806713104,
92
- "eval_matthews_correlation": 48.86384938654592,
93
- "eval_runtime": 0.8908,
94
- "eval_samples_per_second": 584.879,
95
- "step": 774
96
- },
97
- {
98
- "epoch": 10.0,
99
- "eval_average_metrics": 48.08237616875362,
100
- "eval_loss": 0.2251322716474533,
101
- "eval_matthews_correlation": 48.08237616875362,
102
- "eval_runtime": 0.8537,
103
- "eval_samples_per_second": 610.303,
104
- "step": 860
105
  },
106
  {
107
- "epoch": 11.0,
108
- "eval_average_metrics": 47.083822335710934,
109
- "eval_loss": 0.23612065613269806,
110
- "eval_matthews_correlation": 47.083822335710934,
111
- "eval_runtime": 0.852,
112
- "eval_samples_per_second": 611.521,
113
- "step": 946
114
  },
115
  {
116
  "epoch": 11.63,
@@ -119,58 +56,31 @@
119
  "step": 1000
120
  },
121
  {
122
- "epoch": 12.0,
123
- "eval_average_metrics": 49.76615268960096,
124
- "eval_loss": 0.2268751710653305,
125
- "eval_matthews_correlation": 49.76615268960096,
126
- "eval_runtime": 0.8867,
127
- "eval_samples_per_second": 587.54,
128
- "step": 1032
129
- },
130
- {
131
- "epoch": 13.0,
132
- "eval_average_metrics": 50.45736339822007,
133
- "eval_loss": 0.22725746035575867,
134
- "eval_matthews_correlation": 50.45736339822007,
135
- "eval_runtime": 0.848,
136
- "eval_samples_per_second": 614.383,
137
- "step": 1118
138
  },
139
  {
140
- "epoch": 14.0,
141
  "eval_average_metrics": 51.11915736464516,
142
- "eval_loss": 0.22515951097011566,
143
  "eval_matthews_correlation": 51.11915736464516,
144
- "eval_runtime": 0.8495,
145
- "eval_samples_per_second": 613.291,
146
- "step": 1204
147
  },
148
  {
149
- "epoch": 15.0,
150
- "eval_average_metrics": 52.359840871035765,
151
- "eval_loss": 0.2282242476940155,
152
- "eval_matthews_correlation": 52.359840871035765,
153
- "eval_runtime": 0.9084,
154
- "eval_samples_per_second": 573.516,
155
- "step": 1290
156
- },
157
- {
158
- "epoch": 16.0,
159
- "eval_average_metrics": 50.78390137665081,
160
- "eval_loss": 0.24213995039463043,
161
- "eval_matthews_correlation": 50.78390137665081,
162
- "eval_runtime": 0.7976,
163
- "eval_samples_per_second": 653.201,
164
- "step": 1376
165
- },
166
- {
167
- "epoch": 17.0,
168
- "eval_average_metrics": 51.452415683714314,
169
- "eval_loss": 0.22793160378932953,
170
- "eval_matthews_correlation": 51.452415683714314,
171
- "eval_runtime": 0.8964,
172
- "eval_samples_per_second": 581.215,
173
- "step": 1462
174
  },
175
  {
176
  "epoch": 17.44,
@@ -179,40 +89,22 @@
179
  "step": 1500
180
  },
181
  {
182
- "epoch": 18.0,
183
- "eval_average_metrics": 51.452415683714314,
184
- "eval_loss": 0.23017793893814087,
185
- "eval_matthews_correlation": 51.452415683714314,
186
- "eval_runtime": 0.8935,
187
- "eval_samples_per_second": 583.099,
188
- "step": 1548
189
- },
190
- {
191
- "epoch": 19.0,
192
- "eval_average_metrics": 50.9850664467699,
193
- "eval_loss": 0.2308470457792282,
194
- "eval_matthews_correlation": 50.9850664467699,
195
- "eval_runtime": 0.8691,
196
- "eval_samples_per_second": 599.475,
197
- "step": 1634
198
- },
199
- {
200
- "epoch": 20.0,
201
- "eval_average_metrics": 50.9850664467699,
202
- "eval_loss": 0.22891275584697723,
203
- "eval_matthews_correlation": 50.9850664467699,
204
- "eval_runtime": 0.8366,
205
- "eval_samples_per_second": 622.759,
206
- "step": 1720
207
  },
208
  {
209
  "epoch": 20.0,
210
  "step": 1720,
211
  "total_flos": 6646762904679216.0,
212
  "train_loss": 0.5282898814179177,
213
- "train_runtime": 424.1274,
214
- "train_samples_per_second": 403.228,
215
- "train_steps_per_second": 4.055
216
  }
217
  ],
218
  "max_steps": 1720,
 
1
  {
2
+ "best_metric": 52.55846445665645,
3
+ "best_model_checkpoint": "outputs/bitfit/t5-base/cola/checkpoint-800",
4
  "epoch": 20.0,
5
  "global_step": 1720,
6
  "is_hyper_param_search": false,
 
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 2.33,
12
+ "eval_average_metrics": 21.053609210794303,
13
+ "eval_loss": 0.2652588486671448,
14
+ "eval_matthews_correlation": 21.053609210794303,
15
+ "eval_runtime": 0.8287,
16
+ "eval_samples_per_second": 628.717,
17
+ "step": 200
18
  },
19
  {
20
+ "epoch": 4.65,
21
+ "eval_average_metrics": 44.5285953164197,
22
+ "eval_loss": 0.23253390192985535,
23
+ "eval_matthews_correlation": 44.5285953164197,
24
+ "eval_runtime": 0.8258,
25
+ "eval_samples_per_second": 630.924,
26
+ "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  },
28
  {
29
  "epoch": 5.81,
 
32
  "step": 500
33
  },
34
  {
35
+ "epoch": 6.98,
36
+ "eval_average_metrics": 47.511676688521085,
37
+ "eval_loss": 0.25159838795661926,
38
+ "eval_matthews_correlation": 47.511676688521085,
39
+ "eval_runtime": 0.8388,
40
+ "eval_samples_per_second": 621.09,
41
+ "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  },
43
  {
44
+ "epoch": 9.3,
45
+ "eval_average_metrics": 52.55846445665645,
46
+ "eval_loss": 0.22525694966316223,
47
+ "eval_matthews_correlation": 52.55846445665645,
48
+ "eval_runtime": 1.5205,
49
+ "eval_samples_per_second": 342.658,
50
+ "step": 800
51
  },
52
  {
53
  "epoch": 11.63,
 
56
  "step": 1000
57
  },
58
  {
59
+ "epoch": 11.63,
60
+ "eval_average_metrics": 50.296653045050874,
61
+ "eval_loss": 0.23001490533351898,
62
+ "eval_matthews_correlation": 50.296653045050874,
63
+ "eval_runtime": 1.2989,
64
+ "eval_samples_per_second": 401.107,
65
+ "step": 1000
 
 
 
 
 
 
 
 
 
66
  },
67
  {
68
+ "epoch": 13.95,
69
  "eval_average_metrics": 51.11915736464516,
70
+ "eval_loss": 0.22642329335212708,
71
  "eval_matthews_correlation": 51.11915736464516,
72
+ "eval_runtime": 1.2231,
73
+ "eval_samples_per_second": 425.968,
74
+ "step": 1200
75
  },
76
  {
77
+ "epoch": 16.28,
78
+ "eval_average_metrics": 50.87277150375961,
79
+ "eval_loss": 0.22626039385795593,
80
+ "eval_matthews_correlation": 50.87277150375961,
81
+ "eval_runtime": 1.1916,
82
+ "eval_samples_per_second": 437.245,
83
+ "step": 1400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  },
85
  {
86
  "epoch": 17.44,
 
89
  "step": 1500
90
  },
91
  {
92
+ "epoch": 18.6,
93
+ "eval_average_metrics": 52.035569173764515,
94
+ "eval_loss": 0.22766831517219543,
95
+ "eval_matthews_correlation": 52.035569173764515,
96
+ "eval_runtime": 1.2214,
97
+ "eval_samples_per_second": 426.575,
98
+ "step": 1600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  },
100
  {
101
  "epoch": 20.0,
102
  "step": 1720,
103
  "total_flos": 6646762904679216.0,
104
  "train_loss": 0.5282898814179177,
105
+ "train_runtime": 422.8779,
106
+ "train_samples_per_second": 404.419,
107
+ "train_steps_per_second": 4.067
108
  }
109
  ],
110
  "max_steps": 1720,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a646a3fb66d1560b545d08f84e93ab16cd3153ff19e20982a22b554a34ec127
3
  size 3183
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d45981c3869ac88ceb617a2173cfab542cb6a12ef612e4821407250b68788864
3
  size 3183
training_config.json CHANGED
@@ -1 +1 @@
1
- {"dataset_config_name": ["en"], "delta_type": "bitfit", "do_eval": true, "do_test": true, "do_train": true, "eval_dataset_config_name": ["en"], "eval_dataset_name": "cola", "eval_steps": 200, "evaluation_strategy": "steps", "greater_is_better": true, "learning_rate": 0.0003, "load_best_model_at_end": true, "max_source_length": 128, "metric_for_best_model": "average_metrics", "model_name_or_path": "../../../../plm_cache/t5-base", "num_train_epochs": 20, "output_dir": "outputs/bitfit/t5-base/cola", "overwrite_output_dir": true, "per_device_eval_batch_size": 100, "per_device_train_batch_size": 100, "predict_with_generate": true, "push_to_hub": true, "save_steps": 200, "save_strategy": "steps", "save_total_limit": 1, "seed": 42, "split_validation_test": true, "task_name": "cola", "test_dataset_config_name": ["en"], "test_dataset_name": "cola", "tokenizer_name": "../../../../plm_cache/t5-base", "warmup_steps": 500}
 
1
+ {"bottleneck_dim": 24, "dataset_config_name": ["en"], "delta_type": "adapter", "do_eval": true, "do_test": true, "do_train": true, "eval_dataset_config_name": ["en"], "eval_dataset_name": "cola", "eval_steps": 200, "evaluation_strategy": "steps", "greater_is_better": true, "learning_rate": 0.0003, "load_best_model_at_end": true, "max_source_length": 128, "metric_for_best_model": "average_metrics", "model_name_or_path": "../../../../plm_cache/t5-base", "num_train_epochs": 20, "output_dir": "outputs/bitfit/t5-base/cola", "overwrite_output_dir": true, "per_device_eval_batch_size": 100, "per_device_train_batch_size": 100, "predict_with_generate": true, "push_to_hub": true, "save_steps": 200, "save_strategy": "steps", "save_total_limit": 1, "seed": 42, "split_validation_test": true, "task_name": "cola", "test_dataset_config_name": ["en"], "test_dataset_name": "cola", "tokenizer_name": "../../../../plm_cache/t5-base", "unfrozen_modules": ["deltas", "layer_norm", "final_layer_norm"], "warmup_steps": 500}