ChrisZeng commited on
Commit
9bcf651
1 Parent(s): c30db38

Training in progress, epoch 9

Browse files
checkpoint-1467/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8016f04445628708c33bf649e49067d8ba01bd9c9f7fb7ec23bf16e84302415b
3
- size 2681490814
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27ff37e113e06eff1db241a9c3d07e590a00e217cfcbb8577767c193b7941dd1
3
+ size 2681485310
checkpoint-1467/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7633807fb9e36e6b1a07c28414b0d317873a57fcff768c36a39a20807d0f3b4f
3
  size 1340743917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa59b62075ac20c4333b546c0b025606a618273ec6b78f95fe165738d3a0f2af
3
  size 1340743917
checkpoint-1467/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:219e774be4c8030937df1faf8420e42aff6b143155dd664192f9426967cb7e7d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e36b0f788cfae254f69b987374e8eb7dea9d5f493b9011eb51e4135cc476bfda
3
  size 14503
checkpoint-1467/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:604e5dda890598891dfe18e4a7a828eac1f5718acc7a3382899d2cbac39d14db
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96405c1f878387bb6d62cce12f08fc3133dfd93159844cc7dba9f524f0c58020
3
  size 623
checkpoint-1467/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.2981628179550171,
3
- "best_model_checkpoint": "outputs/electra-nli-efl-tweeteval/checkpoint-978",
4
  "epoch": 8.997323135755257,
5
  "global_step": 1467,
6
  "is_hyper_param_search": false,
@@ -9,152 +9,152 @@
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "learning_rate": 9.499999999999999e-07,
13
- "loss": 0.4384,
14
  "step": 163
15
  },
16
  {
17
  "epoch": 1.0,
18
- "eval_accuracy": 0.7444061962134251,
19
- "eval_f1": 0.7308261375858633,
20
- "eval_loss": 0.39615127444267273,
21
- "eval_runtime": 9.0599,
22
- "eval_samples_per_second": 256.514,
23
- "eval_steps_per_second": 32.119,
24
  "step": 163
25
  },
26
  {
27
  "epoch": 2.0,
28
- "learning_rate": 9e-07,
29
- "loss": 0.3447,
30
  "step": 326
31
  },
32
  {
33
  "epoch": 2.0,
34
- "eval_accuracy": 0.76592082616179,
35
- "eval_f1": 0.7552159046464709,
36
- "eval_loss": 0.3409559428691864,
37
- "eval_runtime": 9.4378,
38
- "eval_samples_per_second": 246.244,
39
- "eval_steps_per_second": 30.833,
40
  "step": 326
41
  },
42
  {
43
  "epoch": 3.0,
44
- "learning_rate": 8.499999999999999e-07,
45
- "loss": 0.3057,
46
  "step": 489
47
  },
48
  {
49
  "epoch": 3.0,
50
- "eval_accuracy": 0.7749569707401033,
51
- "eval_f1": 0.768808341108185,
52
- "eval_loss": 0.32338443398475647,
53
- "eval_runtime": 9.0418,
54
- "eval_samples_per_second": 257.028,
55
- "eval_steps_per_second": 32.184,
56
  "step": 489
57
  },
58
  {
59
  "epoch": 4.0,
60
- "learning_rate": 8e-07,
61
- "loss": 0.287,
62
  "step": 652
63
  },
64
  {
65
  "epoch": 4.0,
66
- "eval_accuracy": 0.7857142857142857,
67
- "eval_f1": 0.7778970154753132,
68
- "eval_loss": 0.3068828284740448,
69
- "eval_runtime": 9.3662,
70
- "eval_samples_per_second": 248.128,
71
- "eval_steps_per_second": 31.069,
72
  "step": 652
73
  },
74
  {
75
  "epoch": 5.0,
76
- "learning_rate": 7.5e-07,
77
- "loss": 0.2742,
78
  "step": 815
79
  },
80
  {
81
  "epoch": 5.0,
82
- "eval_accuracy": 0.7887263339070568,
83
- "eval_f1": 0.7821763089027973,
84
- "eval_loss": 0.30303624272346497,
85
- "eval_runtime": 8.908,
86
- "eval_samples_per_second": 260.89,
87
- "eval_steps_per_second": 32.667,
88
  "step": 815
89
  },
90
  {
91
  "epoch": 6.0,
92
- "learning_rate": 7e-07,
93
- "loss": 0.2676,
94
  "step": 978
95
  },
96
  {
97
  "epoch": 6.0,
98
- "eval_accuracy": 0.7938898450946644,
99
- "eval_f1": 0.7850614050415754,
100
- "eval_loss": 0.2981628179550171,
101
- "eval_runtime": 9.0189,
102
- "eval_samples_per_second": 257.681,
103
- "eval_steps_per_second": 32.266,
104
  "step": 978
105
  },
106
  {
107
  "epoch": 7.0,
108
- "learning_rate": 6.5e-07,
109
- "loss": 0.2585,
110
  "step": 1141
111
  },
112
  {
113
  "epoch": 7.0,
114
- "eval_accuracy": 0.7908777969018933,
115
- "eval_f1": 0.7821955847641968,
116
- "eval_loss": 0.3001907467842102,
117
- "eval_runtime": 8.8872,
118
- "eval_samples_per_second": 261.499,
119
- "eval_steps_per_second": 32.744,
120
  "step": 1141
121
  },
122
  {
123
  "epoch": 8.0,
124
- "learning_rate": 6e-07,
125
- "loss": 0.2526,
126
  "step": 1304
127
  },
128
  {
129
  "epoch": 8.0,
130
- "eval_accuracy": 0.7943201376936316,
131
- "eval_f1": 0.7876461988304093,
132
- "eval_loss": 0.30516260862350464,
133
- "eval_runtime": 9.0042,
134
- "eval_samples_per_second": 258.103,
135
- "eval_steps_per_second": 32.318,
136
  "step": 1304
137
  },
138
  {
139
  "epoch": 9.0,
140
- "learning_rate": 5.5e-07,
141
- "loss": 0.2479,
142
  "step": 1467
143
  },
144
  {
145
  "epoch": 9.0,
146
- "eval_accuracy": 0.7938898450946644,
147
- "eval_f1": 0.784665589216992,
148
- "eval_loss": 0.2996860444545746,
149
- "eval_runtime": 9.1157,
150
- "eval_samples_per_second": 254.944,
151
- "eval_steps_per_second": 31.923,
152
  "step": 1467
153
  }
154
  ],
155
- "max_steps": 3260,
156
- "num_train_epochs": 20,
157
- "total_flos": 2.7397418508888e+16,
158
  "trial_name": null,
159
  "trial_params": null
160
  }
 
1
  {
2
+ "best_metric": 0.29900965094566345,
3
+ "best_model_checkpoint": "outputs/electra-nli-efl-tweeteval/checkpoint-1467",
4
  "epoch": 8.997323135755257,
5
  "global_step": 1467,
6
  "is_hyper_param_search": false,
 
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
+ "learning_rate": 9e-07,
13
+ "loss": 0.439,
14
  "step": 163
15
  },
16
  {
17
  "epoch": 1.0,
18
+ "eval_accuracy": 0.745697074010327,
19
+ "eval_f1": 0.732240056847258,
20
+ "eval_loss": 0.3982622027397156,
21
+ "eval_runtime": 8.9237,
22
+ "eval_samples_per_second": 260.431,
23
+ "eval_steps_per_second": 32.61,
24
  "step": 163
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "learning_rate": 8e-07,
29
+ "loss": 0.3465,
30
  "step": 326
31
  },
32
  {
33
  "epoch": 2.0,
34
+ "eval_accuracy": 0.7620481927710844,
35
+ "eval_f1": 0.750740067157349,
36
+ "eval_loss": 0.3448249399662018,
37
+ "eval_runtime": 9.0059,
38
+ "eval_samples_per_second": 258.053,
39
+ "eval_steps_per_second": 32.312,
40
  "step": 326
41
  },
42
  {
43
  "epoch": 3.0,
44
+ "learning_rate": 7e-07,
45
+ "loss": 0.3089,
46
  "step": 489
47
  },
48
  {
49
  "epoch": 3.0,
50
+ "eval_accuracy": 0.7693631669535284,
51
+ "eval_f1": 0.7633651185887134,
52
+ "eval_loss": 0.3303545117378235,
53
+ "eval_runtime": 9.3737,
54
+ "eval_samples_per_second": 247.927,
55
+ "eval_steps_per_second": 31.044,
56
  "step": 489
57
  },
58
  {
59
  "epoch": 4.0,
60
+ "learning_rate": 6e-07,
61
+ "loss": 0.2916,
62
  "step": 652
63
  },
64
  {
65
  "epoch": 4.0,
66
+ "eval_accuracy": 0.7839931153184165,
67
+ "eval_f1": 0.7736843738845695,
68
+ "eval_loss": 0.30892524123191833,
69
+ "eval_runtime": 9.0737,
70
+ "eval_samples_per_second": 256.125,
71
+ "eval_steps_per_second": 32.071,
72
  "step": 652
73
  },
74
  {
75
  "epoch": 5.0,
76
+ "learning_rate": 5e-07,
77
+ "loss": 0.2826,
78
  "step": 815
79
  },
80
  {
81
  "epoch": 5.0,
82
+ "eval_accuracy": 0.7839931153184165,
83
+ "eval_f1": 0.7744514636272033,
84
+ "eval_loss": 0.3055116832256317,
85
+ "eval_runtime": 9.1262,
86
+ "eval_samples_per_second": 254.651,
87
+ "eval_steps_per_second": 31.886,
88
  "step": 815
89
  },
90
  {
91
  "epoch": 6.0,
92
+ "learning_rate": 4e-07,
93
+ "loss": 0.2747,
94
  "step": 978
95
  },
96
  {
97
  "epoch": 6.0,
98
+ "eval_accuracy": 0.7857142857142857,
99
+ "eval_f1": 0.7771340101366444,
100
+ "eval_loss": 0.30407363176345825,
101
+ "eval_runtime": 9.1656,
102
+ "eval_samples_per_second": 253.557,
103
+ "eval_steps_per_second": 31.749,
104
  "step": 978
105
  },
106
  {
107
  "epoch": 7.0,
108
+ "learning_rate": 3e-07,
109
+ "loss": 0.2678,
110
  "step": 1141
111
  },
112
  {
113
  "epoch": 7.0,
114
+ "eval_accuracy": 0.7865748709122203,
115
+ "eval_f1": 0.7768983029852594,
116
+ "eval_loss": 0.3031200170516968,
117
+ "eval_runtime": 9.6529,
118
+ "eval_samples_per_second": 240.756,
119
+ "eval_steps_per_second": 30.146,
120
  "step": 1141
121
  },
122
  {
123
  "epoch": 8.0,
124
+ "learning_rate": 2e-07,
125
+ "loss": 0.2641,
126
  "step": 1304
127
  },
128
  {
129
  "epoch": 8.0,
130
+ "eval_accuracy": 0.7874354561101549,
131
+ "eval_f1": 0.7772050769924619,
132
+ "eval_loss": 0.3010457754135132,
133
+ "eval_runtime": 9.1465,
134
+ "eval_samples_per_second": 254.087,
135
+ "eval_steps_per_second": 31.816,
136
  "step": 1304
137
  },
138
  {
139
  "epoch": 9.0,
140
+ "learning_rate": 1e-07,
141
+ "loss": 0.2631,
142
  "step": 1467
143
  },
144
  {
145
  "epoch": 9.0,
146
+ "eval_accuracy": 0.7917383820998278,
147
+ "eval_f1": 0.7830140707046156,
148
+ "eval_loss": 0.29900965094566345,
149
+ "eval_runtime": 9.9577,
150
+ "eval_samples_per_second": 233.387,
151
+ "eval_steps_per_second": 29.224,
152
  "step": 1467
153
  }
154
  ],
155
+ "max_steps": 1630,
156
+ "num_train_epochs": 10,
157
+ "total_flos": 2.739683604981792e+16,
158
  "trial_name": null,
159
  "trial_params": null
160
  }
checkpoint-1467/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50dde5eeea306f54118173d342686475ad9209b6c2cac103f7b114d5f582dc36
3
  size 3119
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd48e93c542d5f8f840918341d20dc98e6dbd60ec7052cf9f5610075d1655eaf
3
  size 3119
checkpoint-1630/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f10ac8aa6914a68e479931a864f18ba10f4c2c3365275928cdcaec362c14888
3
- size 2681490814
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58949addab61a91ba7273887e0d81fb845296dc8d00ee4a1d5f426047707bd55
3
+ size 2681485310
checkpoint-1630/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3f456ad6fff0711c705ef4a02fde64a2461f640a2ebaf6d8128cf037da6c194
3
  size 1340743917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a94b56e1da42f00703387b7c987adc79cef5d650801e77d14a802f4e92301a9c
3
  size 1340743917
checkpoint-1630/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19de93fa7ce45dde7d1a60c4c97b9e4ec495777424b4e2a14fb06079d8444157
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d895ad69969472b320a4216795557386f8649e29cb20bf0cdadbc0088844bdcb
3
  size 14503
checkpoint-1630/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c924e180775c2b50e4679ec9d5d5d33fb3eed968a176cbb01236411034beab21
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c3e7a1b845fb89890dfefde14b31ecc3084465b9c0621dd07e0fdee41ef59fc
3
  size 623
checkpoint-1630/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 0.2981628179550171,
3
- "best_model_checkpoint": "outputs/electra-nli-efl-tweeteval/checkpoint-978",
4
  "epoch": 9.997323135755257,
5
  "global_step": 1630,
6
  "is_hyper_param_search": false,
@@ -9,168 +9,168 @@
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "learning_rate": 9.499999999999999e-07,
13
- "loss": 0.4384,
14
  "step": 163
15
  },
16
  {
17
  "epoch": 1.0,
18
- "eval_accuracy": 0.7444061962134251,
19
- "eval_f1": 0.7308261375858633,
20
- "eval_loss": 0.39615127444267273,
21
- "eval_runtime": 9.0599,
22
- "eval_samples_per_second": 256.514,
23
- "eval_steps_per_second": 32.119,
24
  "step": 163
25
  },
26
  {
27
  "epoch": 2.0,
28
- "learning_rate": 9e-07,
29
- "loss": 0.3447,
30
  "step": 326
31
  },
32
  {
33
  "epoch": 2.0,
34
- "eval_accuracy": 0.76592082616179,
35
- "eval_f1": 0.7552159046464709,
36
- "eval_loss": 0.3409559428691864,
37
- "eval_runtime": 9.4378,
38
- "eval_samples_per_second": 246.244,
39
- "eval_steps_per_second": 30.833,
40
  "step": 326
41
  },
42
  {
43
  "epoch": 3.0,
44
- "learning_rate": 8.499999999999999e-07,
45
- "loss": 0.3057,
46
  "step": 489
47
  },
48
  {
49
  "epoch": 3.0,
50
- "eval_accuracy": 0.7749569707401033,
51
- "eval_f1": 0.768808341108185,
52
- "eval_loss": 0.32338443398475647,
53
- "eval_runtime": 9.0418,
54
- "eval_samples_per_second": 257.028,
55
- "eval_steps_per_second": 32.184,
56
  "step": 489
57
  },
58
  {
59
  "epoch": 4.0,
60
- "learning_rate": 8e-07,
61
- "loss": 0.287,
62
  "step": 652
63
  },
64
  {
65
  "epoch": 4.0,
66
- "eval_accuracy": 0.7857142857142857,
67
- "eval_f1": 0.7778970154753132,
68
- "eval_loss": 0.3068828284740448,
69
- "eval_runtime": 9.3662,
70
- "eval_samples_per_second": 248.128,
71
- "eval_steps_per_second": 31.069,
72
  "step": 652
73
  },
74
  {
75
  "epoch": 5.0,
76
- "learning_rate": 7.5e-07,
77
- "loss": 0.2742,
78
  "step": 815
79
  },
80
  {
81
  "epoch": 5.0,
82
- "eval_accuracy": 0.7887263339070568,
83
- "eval_f1": 0.7821763089027973,
84
- "eval_loss": 0.30303624272346497,
85
- "eval_runtime": 8.908,
86
- "eval_samples_per_second": 260.89,
87
- "eval_steps_per_second": 32.667,
88
  "step": 815
89
  },
90
  {
91
  "epoch": 6.0,
92
- "learning_rate": 7e-07,
93
- "loss": 0.2676,
94
  "step": 978
95
  },
96
  {
97
  "epoch": 6.0,
98
- "eval_accuracy": 0.7938898450946644,
99
- "eval_f1": 0.7850614050415754,
100
- "eval_loss": 0.2981628179550171,
101
- "eval_runtime": 9.0189,
102
- "eval_samples_per_second": 257.681,
103
- "eval_steps_per_second": 32.266,
104
  "step": 978
105
  },
106
  {
107
  "epoch": 7.0,
108
- "learning_rate": 6.5e-07,
109
- "loss": 0.2585,
110
  "step": 1141
111
  },
112
  {
113
  "epoch": 7.0,
114
- "eval_accuracy": 0.7908777969018933,
115
- "eval_f1": 0.7821955847641968,
116
- "eval_loss": 0.3001907467842102,
117
- "eval_runtime": 8.8872,
118
- "eval_samples_per_second": 261.499,
119
- "eval_steps_per_second": 32.744,
120
  "step": 1141
121
  },
122
  {
123
  "epoch": 8.0,
124
- "learning_rate": 6e-07,
125
- "loss": 0.2526,
126
  "step": 1304
127
  },
128
  {
129
  "epoch": 8.0,
130
- "eval_accuracy": 0.7943201376936316,
131
- "eval_f1": 0.7876461988304093,
132
- "eval_loss": 0.30516260862350464,
133
- "eval_runtime": 9.0042,
134
- "eval_samples_per_second": 258.103,
135
- "eval_steps_per_second": 32.318,
136
  "step": 1304
137
  },
138
  {
139
  "epoch": 9.0,
140
- "learning_rate": 5.5e-07,
141
- "loss": 0.2479,
142
  "step": 1467
143
  },
144
  {
145
  "epoch": 9.0,
146
- "eval_accuracy": 0.7938898450946644,
147
- "eval_f1": 0.784665589216992,
148
- "eval_loss": 0.2996860444545746,
149
- "eval_runtime": 9.1157,
150
- "eval_samples_per_second": 254.944,
151
- "eval_steps_per_second": 31.923,
152
  "step": 1467
153
  },
154
  {
155
  "epoch": 10.0,
156
- "learning_rate": 5e-07,
157
- "loss": 0.2451,
158
  "step": 1630
159
  },
160
  {
161
  "epoch": 10.0,
162
- "eval_accuracy": 0.7956110154905336,
163
- "eval_f1": 0.787314593871832,
164
- "eval_loss": 0.3013566732406616,
165
- "eval_runtime": 9.3541,
166
- "eval_samples_per_second": 248.446,
167
- "eval_steps_per_second": 31.109,
168
  "step": 1630
169
  }
170
  ],
171
- "max_steps": 3260,
172
- "num_train_epochs": 20,
173
- "total_flos": 3.0441815576382144e+16,
174
  "trial_name": null,
175
  "trial_params": null
176
  }
 
1
  {
2
+ "best_metric": 0.29900965094566345,
3
+ "best_model_checkpoint": "outputs/electra-nli-efl-tweeteval/checkpoint-1467",
4
  "epoch": 9.997323135755257,
5
  "global_step": 1630,
6
  "is_hyper_param_search": false,
 
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
+ "learning_rate": 9e-07,
13
+ "loss": 0.439,
14
  "step": 163
15
  },
16
  {
17
  "epoch": 1.0,
18
+ "eval_accuracy": 0.745697074010327,
19
+ "eval_f1": 0.732240056847258,
20
+ "eval_loss": 0.3982622027397156,
21
+ "eval_runtime": 8.9237,
22
+ "eval_samples_per_second": 260.431,
23
+ "eval_steps_per_second": 32.61,
24
  "step": 163
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "learning_rate": 8e-07,
29
+ "loss": 0.3465,
30
  "step": 326
31
  },
32
  {
33
  "epoch": 2.0,
34
+ "eval_accuracy": 0.7620481927710844,
35
+ "eval_f1": 0.750740067157349,
36
+ "eval_loss": 0.3448249399662018,
37
+ "eval_runtime": 9.0059,
38
+ "eval_samples_per_second": 258.053,
39
+ "eval_steps_per_second": 32.312,
40
  "step": 326
41
  },
42
  {
43
  "epoch": 3.0,
44
+ "learning_rate": 7e-07,
45
+ "loss": 0.3089,
46
  "step": 489
47
  },
48
  {
49
  "epoch": 3.0,
50
+ "eval_accuracy": 0.7693631669535284,
51
+ "eval_f1": 0.7633651185887134,
52
+ "eval_loss": 0.3303545117378235,
53
+ "eval_runtime": 9.3737,
54
+ "eval_samples_per_second": 247.927,
55
+ "eval_steps_per_second": 31.044,
56
  "step": 489
57
  },
58
  {
59
  "epoch": 4.0,
60
+ "learning_rate": 6e-07,
61
+ "loss": 0.2916,
62
  "step": 652
63
  },
64
  {
65
  "epoch": 4.0,
66
+ "eval_accuracy": 0.7839931153184165,
67
+ "eval_f1": 0.7736843738845695,
68
+ "eval_loss": 0.30892524123191833,
69
+ "eval_runtime": 9.0737,
70
+ "eval_samples_per_second": 256.125,
71
+ "eval_steps_per_second": 32.071,
72
  "step": 652
73
  },
74
  {
75
  "epoch": 5.0,
76
+ "learning_rate": 5e-07,
77
+ "loss": 0.2826,
78
  "step": 815
79
  },
80
  {
81
  "epoch": 5.0,
82
+ "eval_accuracy": 0.7839931153184165,
83
+ "eval_f1": 0.7744514636272033,
84
+ "eval_loss": 0.3055116832256317,
85
+ "eval_runtime": 9.1262,
86
+ "eval_samples_per_second": 254.651,
87
+ "eval_steps_per_second": 31.886,
88
  "step": 815
89
  },
90
  {
91
  "epoch": 6.0,
92
+ "learning_rate": 4e-07,
93
+ "loss": 0.2747,
94
  "step": 978
95
  },
96
  {
97
  "epoch": 6.0,
98
+ "eval_accuracy": 0.7857142857142857,
99
+ "eval_f1": 0.7771340101366444,
100
+ "eval_loss": 0.30407363176345825,
101
+ "eval_runtime": 9.1656,
102
+ "eval_samples_per_second": 253.557,
103
+ "eval_steps_per_second": 31.749,
104
  "step": 978
105
  },
106
  {
107
  "epoch": 7.0,
108
+ "learning_rate": 3e-07,
109
+ "loss": 0.2678,
110
  "step": 1141
111
  },
112
  {
113
  "epoch": 7.0,
114
+ "eval_accuracy": 0.7865748709122203,
115
+ "eval_f1": 0.7768983029852594,
116
+ "eval_loss": 0.3031200170516968,
117
+ "eval_runtime": 9.6529,
118
+ "eval_samples_per_second": 240.756,
119
+ "eval_steps_per_second": 30.146,
120
  "step": 1141
121
  },
122
  {
123
  "epoch": 8.0,
124
+ "learning_rate": 2e-07,
125
+ "loss": 0.2641,
126
  "step": 1304
127
  },
128
  {
129
  "epoch": 8.0,
130
+ "eval_accuracy": 0.7874354561101549,
131
+ "eval_f1": 0.7772050769924619,
132
+ "eval_loss": 0.3010457754135132,
133
+ "eval_runtime": 9.1465,
134
+ "eval_samples_per_second": 254.087,
135
+ "eval_steps_per_second": 31.816,
136
  "step": 1304
137
  },
138
  {
139
  "epoch": 9.0,
140
+ "learning_rate": 1e-07,
141
+ "loss": 0.2631,
142
  "step": 1467
143
  },
144
  {
145
  "epoch": 9.0,
146
+ "eval_accuracy": 0.7917383820998278,
147
+ "eval_f1": 0.7830140707046156,
148
+ "eval_loss": 0.29900965094566345,
149
+ "eval_runtime": 9.9577,
150
+ "eval_samples_per_second": 233.387,
151
+ "eval_steps_per_second": 29.224,
152
  "step": 1467
153
  },
154
  {
155
  "epoch": 10.0,
156
+ "learning_rate": 0.0,
157
+ "loss": 0.2615,
158
  "step": 1630
159
  },
160
  {
161
  "epoch": 10.0,
162
+ "eval_accuracy": 0.7895869191049913,
163
+ "eval_f1": 0.7804941506557442,
164
+ "eval_loss": 0.3004284203052521,
165
+ "eval_runtime": 9.1779,
166
+ "eval_samples_per_second": 253.217,
167
+ "eval_steps_per_second": 31.707,
168
  "step": 1630
169
  }
170
  ],
171
+ "max_steps": 1630,
172
+ "num_train_epochs": 10,
173
+ "total_flos": 3.0430166394980544e+16,
174
  "trial_name": null,
175
  "trial_params": null
176
  }
checkpoint-1630/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50dde5eeea306f54118173d342686475ad9209b6c2cac103f7b114d5f582dc36
3
  size 3119
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd48e93c542d5f8f840918341d20dc98e6dbd60ec7052cf9f5610075d1655eaf
3
  size 3119
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a424c985030a874a9e4bfc84349addc7b4ac2efb4d1b6b44956f1f85a1ff96e1
3
  size 1340743917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a94b56e1da42f00703387b7c987adc79cef5d650801e77d14a802f4e92301a9c
3
  size 1340743917