gokuls commited on
Commit
cf102cf
1 Parent(s): 0698d1c

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: apache-2.0
3
  tags:
4
  - generated_from_trainer
@@ -14,7 +16,7 @@ model-index:
14
  name: Text Classification
15
  type: text-classification
16
  dataset:
17
- name: glue
18
  type: glue
19
  config: mrpc
20
  split: validation
@@ -22,10 +24,10 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.7034313725490197
26
  - name: F1
27
  type: f1
28
- value: 0.8032520325203252
29
  ---
30
 
31
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -33,12 +35,12 @@ should probably proofread and complete it, then remove this comment. -->
33
 
34
  # mobilebert_sa_GLUE_Experiment_mrpc
35
 
36
- This model is a fine-tuned version of [google/mobilebert-uncased](https://huggingface.co/google/mobilebert-uncased) on the glue dataset.
37
  It achieves the following results on the evaluation set:
38
- - Loss: 0.9829
39
- - Accuracy: 0.7034
40
- - F1: 0.8033
41
- - Combined Score: 0.7533
42
 
43
  ## Model description
44
 
 
1
  ---
2
+ language:
3
+ - en
4
  license: apache-2.0
5
  tags:
6
  - generated_from_trainer
 
16
  name: Text Classification
17
  type: text-classification
18
  dataset:
19
+ name: GLUE MRPC
20
  type: glue
21
  config: mrpc
22
  split: validation
 
24
  metrics:
25
  - name: Accuracy
26
  type: accuracy
27
+ value: 0.6838235294117647
28
  - name: F1
29
  type: f1
30
+ value: 0.8122270742358079
31
  ---
32
 
33
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
35
 
36
  # mobilebert_sa_GLUE_Experiment_mrpc
37
 
38
+ This model is a fine-tuned version of [google/mobilebert-uncased](https://huggingface.co/google/mobilebert-uncased) on the GLUE MRPC dataset.
39
  It achieves the following results on the evaluation set:
40
+ - Loss: 0.6145
41
+ - Accuracy: 0.6838
42
+ - F1: 0.8122
43
+ - Combined Score: 0.7480
44
 
45
  ## Model description
46
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 16.0,
3
- "eval_accuracy": 0.6617647058823529,
4
- "eval_combined_score": 0.7139332003988036,
5
- "eval_f1": 0.7661016949152543,
6
- "eval_loss": 0.6120356917381287,
7
- "eval_runtime": 0.4331,
8
  "eval_samples": 408,
9
- "eval_samples_per_second": 942.046,
10
- "eval_steps_per_second": 4.618,
11
- "train_loss": 0.555461339155833,
12
- "train_runtime": 276.9705,
13
  "train_samples": 3668,
14
- "train_samples_per_second": 662.164,
15
- "train_steps_per_second": 2.708
16
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.6838235294117647,
4
+ "eval_combined_score": 0.7480253018237863,
5
+ "eval_f1": 0.8122270742358079,
6
+ "eval_loss": 0.614494800567627,
7
+ "eval_runtime": 0.7302,
8
  "eval_samples": 408,
9
+ "eval_samples_per_second": 558.746,
10
+ "eval_steps_per_second": 5.478,
11
+ "train_loss": 0.5173478488264412,
12
+ "train_runtime": 338.752,
13
  "train_samples": 3668,
14
+ "train_samples_per_second": 541.399,
15
+ "train_steps_per_second": 4.28
16
  }
eval_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
- "epoch": 16.0,
3
- "eval_accuracy": 0.6617647058823529,
4
- "eval_combined_score": 0.7139332003988036,
5
- "eval_f1": 0.7661016949152543,
6
- "eval_loss": 0.6120356917381287,
7
- "eval_runtime": 0.4331,
8
  "eval_samples": 408,
9
- "eval_samples_per_second": 942.046,
10
- "eval_steps_per_second": 4.618
11
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.6838235294117647,
4
+ "eval_combined_score": 0.7480253018237863,
5
+ "eval_f1": 0.8122270742358079,
6
+ "eval_loss": 0.614494800567627,
7
+ "eval_runtime": 0.7302,
8
  "eval_samples": 408,
9
+ "eval_samples_per_second": 558.746,
10
+ "eval_steps_per_second": 5.478
11
  }
logs/events.out.tfevents.1674619910.garda.1933772.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57035a7fbfa5c7c7cf6f89b1ff21360913b2ea3bb30d5bbcea8efb074d9bc888
3
+ size 467
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 16.0,
3
- "train_loss": 0.555461339155833,
4
- "train_runtime": 276.9705,
5
  "train_samples": 3668,
6
- "train_samples_per_second": 662.164,
7
- "train_steps_per_second": 2.708
8
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "train_loss": 0.5173478488264412,
4
+ "train_runtime": 338.752,
5
  "train_samples": 3668,
6
+ "train_samples_per_second": 541.399,
7
+ "train_steps_per_second": 4.28
8
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.6120356917381287,
3
- "best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_mrpc/checkpoint-165",
4
- "epoch": 16.0,
5
- "global_step": 240,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10,288 +10,186 @@
10
  {
11
  "epoch": 1.0,
12
  "learning_rate": 4.9e-05,
13
- "loss": 0.6462,
14
- "step": 15
15
  },
16
  {
17
  "epoch": 1.0,
18
  "eval_accuracy": 0.6838235294117647,
19
  "eval_combined_score": 0.7480253018237863,
20
  "eval_f1": 0.8122270742358079,
21
- "eval_loss": 0.6247462034225464,
22
- "eval_runtime": 0.416,
23
- "eval_samples_per_second": 980.655,
24
- "eval_steps_per_second": 4.807,
25
- "step": 15
26
  },
27
  {
28
  "epoch": 2.0,
29
  "learning_rate": 4.8e-05,
30
- "loss": 0.6292,
31
- "step": 30
32
  },
33
  {
34
  "epoch": 2.0,
35
  "eval_accuracy": 0.6838235294117647,
36
  "eval_combined_score": 0.7480253018237863,
37
  "eval_f1": 0.8122270742358079,
38
- "eval_loss": 0.6235859990119934,
39
- "eval_runtime": 0.4143,
40
- "eval_samples_per_second": 984.679,
41
- "eval_steps_per_second": 4.827,
42
- "step": 30
43
  },
44
  {
45
  "epoch": 3.0,
46
  "learning_rate": 4.7e-05,
47
- "loss": 0.6312,
48
- "step": 45
49
  },
50
  {
51
  "epoch": 3.0,
52
  "eval_accuracy": 0.6838235294117647,
53
  "eval_combined_score": 0.7480253018237863,
54
  "eval_f1": 0.8122270742358079,
55
- "eval_loss": 0.6266204118728638,
56
- "eval_runtime": 0.4139,
57
- "eval_samples_per_second": 985.824,
58
- "eval_steps_per_second": 4.832,
59
- "step": 45
60
  },
61
  {
62
  "epoch": 4.0,
63
  "learning_rate": 4.600000000000001e-05,
64
  "loss": 0.6291,
65
- "step": 60
66
  },
67
  {
68
  "epoch": 4.0,
69
  "eval_accuracy": 0.6838235294117647,
70
  "eval_combined_score": 0.7480253018237863,
71
  "eval_f1": 0.8122270742358079,
72
- "eval_loss": 0.6229846477508545,
73
- "eval_runtime": 0.4134,
74
- "eval_samples_per_second": 987.054,
75
- "eval_steps_per_second": 4.839,
76
- "step": 60
77
  },
78
  {
79
  "epoch": 5.0,
80
  "learning_rate": 4.5e-05,
81
- "loss": 0.6306,
82
- "step": 75
83
  },
84
  {
85
  "epoch": 5.0,
86
  "eval_accuracy": 0.6838235294117647,
87
  "eval_combined_score": 0.7480253018237863,
88
  "eval_f1": 0.8122270742358079,
89
- "eval_loss": 0.622560977935791,
90
- "eval_runtime": 0.4291,
91
- "eval_samples_per_second": 950.747,
92
- "eval_steps_per_second": 4.661,
93
- "step": 75
94
  },
95
  {
96
  "epoch": 6.0,
97
  "learning_rate": 4.4000000000000006e-05,
98
- "loss": 0.6265,
99
- "step": 90
100
  },
101
  {
102
  "epoch": 6.0,
103
- "eval_accuracy": 0.6838235294117647,
104
- "eval_combined_score": 0.7480253018237863,
105
- "eval_f1": 0.8122270742358079,
106
- "eval_loss": 0.6228667497634888,
107
- "eval_runtime": 0.4149,
108
- "eval_samples_per_second": 983.438,
109
- "eval_steps_per_second": 4.821,
110
- "step": 90
111
  },
112
  {
113
  "epoch": 7.0,
114
  "learning_rate": 4.3e-05,
115
- "loss": 0.6287,
116
- "step": 105
117
  },
118
  {
119
  "epoch": 7.0,
120
- "eval_accuracy": 0.6838235294117647,
121
- "eval_combined_score": 0.7480253018237863,
122
- "eval_f1": 0.8122270742358079,
123
- "eval_loss": 0.6208460330963135,
124
- "eval_runtime": 0.5134,
125
- "eval_samples_per_second": 794.775,
126
- "eval_steps_per_second": 3.896,
127
- "step": 105
128
  },
129
  {
130
  "epoch": 8.0,
131
  "learning_rate": 4.2e-05,
132
- "loss": 0.6215,
133
- "step": 120
134
  },
135
  {
136
  "epoch": 8.0,
137
- "eval_accuracy": 0.6838235294117647,
138
- "eval_combined_score": 0.7480253018237863,
139
- "eval_f1": 0.8122270742358079,
140
- "eval_loss": 0.621772289276123,
141
- "eval_runtime": 0.4654,
142
- "eval_samples_per_second": 876.685,
143
- "eval_steps_per_second": 4.297,
144
- "step": 120
145
  },
146
  {
147
  "epoch": 9.0,
148
- "learning_rate": 4.1e-05,
149
- "loss": 0.6117,
150
- "step": 135
151
  },
152
  {
153
  "epoch": 9.0,
154
- "eval_accuracy": 0.7009803921568627,
155
- "eval_combined_score": 0.7591728308089702,
156
- "eval_f1": 0.8173652694610778,
157
- "eval_loss": 0.6204394698143005,
158
- "eval_runtime": 0.4684,
159
- "eval_samples_per_second": 871.136,
160
- "eval_steps_per_second": 4.27,
161
- "step": 135
162
  },
163
  {
164
  "epoch": 10.0,
165
- "learning_rate": 4e-05,
166
- "loss": 0.592,
167
- "step": 150
168
  },
169
  {
170
  "epoch": 10.0,
171
- "eval_accuracy": 0.7009803921568627,
172
- "eval_combined_score": 0.7588986044868398,
173
- "eval_f1": 0.8168168168168168,
174
- "eval_loss": 0.6202014088630676,
175
- "eval_runtime": 0.4641,
176
- "eval_samples_per_second": 879.144,
177
- "eval_steps_per_second": 4.31,
178
- "step": 150
179
- },
180
- {
181
- "epoch": 11.0,
182
- "learning_rate": 3.9000000000000006e-05,
183
- "loss": 0.5599,
184
- "step": 165
185
- },
186
- {
187
- "epoch": 11.0,
188
- "eval_accuracy": 0.6617647058823529,
189
- "eval_combined_score": 0.7139332003988036,
190
- "eval_f1": 0.7661016949152543,
191
- "eval_loss": 0.6120356917381287,
192
- "eval_runtime": 0.4633,
193
- "eval_samples_per_second": 880.609,
194
- "eval_steps_per_second": 4.317,
195
- "step": 165
196
- },
197
- {
198
- "epoch": 12.0,
199
- "learning_rate": 3.8e-05,
200
- "loss": 0.5079,
201
- "step": 180
202
  },
203
  {
204
- "epoch": 12.0,
205
- "eval_accuracy": 0.696078431372549,
206
- "eval_combined_score": 0.7453902090637579,
207
- "eval_f1": 0.7947019867549668,
208
- "eval_loss": 0.6299110651016235,
209
- "eval_runtime": 0.4656,
210
- "eval_samples_per_second": 876.31,
211
- "eval_steps_per_second": 4.296,
212
- "step": 180
213
- },
214
- {
215
- "epoch": 13.0,
216
- "learning_rate": 3.7e-05,
217
- "loss": 0.4459,
218
- "step": 195
219
- },
220
- {
221
- "epoch": 13.0,
222
- "eval_accuracy": 0.7009803921568627,
223
- "eval_combined_score": 0.7501612487100103,
224
- "eval_f1": 0.799342105263158,
225
- "eval_loss": 0.767034649848938,
226
- "eval_runtime": 0.4718,
227
- "eval_samples_per_second": 864.746,
228
- "eval_steps_per_second": 4.239,
229
- "step": 195
230
- },
231
- {
232
- "epoch": 14.0,
233
- "learning_rate": 3.606666666666667e-05,
234
- "loss": 0.4334,
235
- "step": 210
236
- },
237
- {
238
- "epoch": 14.0,
239
- "eval_accuracy": 0.7083333333333334,
240
- "eval_combined_score": 0.7592703349282297,
241
- "eval_f1": 0.810207336523126,
242
- "eval_loss": 0.8543146848678589,
243
- "eval_runtime": 0.4957,
244
- "eval_samples_per_second": 823.012,
245
- "eval_steps_per_second": 4.034,
246
- "step": 210
247
- },
248
- {
249
- "epoch": 15.0,
250
- "learning_rate": 3.513333333333334e-05,
251
- "loss": 0.3764,
252
- "step": 225
253
- },
254
- {
255
- "epoch": 15.0,
256
- "eval_accuracy": 0.6666666666666666,
257
- "eval_combined_score": 0.7131919905771495,
258
- "eval_f1": 0.7597173144876325,
259
- "eval_loss": 0.7447585463523865,
260
- "eval_runtime": 0.502,
261
- "eval_samples_per_second": 812.78,
262
- "eval_steps_per_second": 3.984,
263
- "step": 225
264
- },
265
- {
266
- "epoch": 16.0,
267
- "learning_rate": 3.4133333333333334e-05,
268
- "loss": 0.3172,
269
- "step": 240
270
- },
271
- {
272
- "epoch": 16.0,
273
- "eval_accuracy": 0.7058823529411765,
274
- "eval_combined_score": 0.7529411764705882,
275
- "eval_f1": 0.7999999999999999,
276
- "eval_loss": 0.8614088296890259,
277
- "eval_runtime": 0.5042,
278
- "eval_samples_per_second": 809.272,
279
- "eval_steps_per_second": 3.967,
280
- "step": 240
281
- },
282
- {
283
- "epoch": 16.0,
284
- "step": 240,
285
- "total_flos": 1863795957825536.0,
286
- "train_loss": 0.555461339155833,
287
- "train_runtime": 276.9705,
288
- "train_samples_per_second": 662.164,
289
- "train_steps_per_second": 2.708
290
  }
291
  ],
292
- "max_steps": 750,
293
  "num_train_epochs": 50,
294
- "total_flos": 1863795957825536.0,
295
  "trial_name": null,
296
  "trial_params": null
297
  }
 
1
  {
2
+ "best_metric": 0.614494800567627,
3
+ "best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_mrpc/checkpoint-145",
4
+ "epoch": 10.0,
5
+ "global_step": 290,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10
  {
11
  "epoch": 1.0,
12
  "learning_rate": 4.9e-05,
13
+ "loss": 0.6377,
14
+ "step": 29
15
  },
16
  {
17
  "epoch": 1.0,
18
  "eval_accuracy": 0.6838235294117647,
19
  "eval_combined_score": 0.7480253018237863,
20
  "eval_f1": 0.8122270742358079,
21
+ "eval_loss": 0.6240395903587341,
22
+ "eval_runtime": 0.7115,
23
+ "eval_samples_per_second": 573.413,
24
+ "eval_steps_per_second": 5.622,
25
+ "step": 29
26
  },
27
  {
28
  "epoch": 2.0,
29
  "learning_rate": 4.8e-05,
30
+ "loss": 0.6309,
31
+ "step": 58
32
  },
33
  {
34
  "epoch": 2.0,
35
  "eval_accuracy": 0.6838235294117647,
36
  "eval_combined_score": 0.7480253018237863,
37
  "eval_f1": 0.8122270742358079,
38
+ "eval_loss": 0.6235603094100952,
39
+ "eval_runtime": 0.7114,
40
+ "eval_samples_per_second": 573.506,
41
+ "eval_steps_per_second": 5.623,
42
+ "step": 58
43
  },
44
  {
45
  "epoch": 3.0,
46
  "learning_rate": 4.7e-05,
47
+ "loss": 0.6306,
48
+ "step": 87
49
  },
50
  {
51
  "epoch": 3.0,
52
  "eval_accuracy": 0.6838235294117647,
53
  "eval_combined_score": 0.7480253018237863,
54
  "eval_f1": 0.8122270742358079,
55
+ "eval_loss": 0.6232509016990662,
56
+ "eval_runtime": 0.716,
57
+ "eval_samples_per_second": 569.871,
58
+ "eval_steps_per_second": 5.587,
59
+ "step": 87
60
  },
61
  {
62
  "epoch": 4.0,
63
  "learning_rate": 4.600000000000001e-05,
64
  "loss": 0.6291,
65
+ "step": 116
66
  },
67
  {
68
  "epoch": 4.0,
69
  "eval_accuracy": 0.6838235294117647,
70
  "eval_combined_score": 0.7480253018237863,
71
  "eval_f1": 0.8122270742358079,
72
+ "eval_loss": 0.6225568056106567,
73
+ "eval_runtime": 0.7145,
74
+ "eval_samples_per_second": 571.018,
75
+ "eval_steps_per_second": 5.598,
76
+ "step": 116
77
  },
78
  {
79
  "epoch": 5.0,
80
  "learning_rate": 4.5e-05,
81
+ "loss": 0.6222,
82
+ "step": 145
83
  },
84
  {
85
  "epoch": 5.0,
86
  "eval_accuracy": 0.6838235294117647,
87
  "eval_combined_score": 0.7480253018237863,
88
  "eval_f1": 0.8122270742358079,
89
+ "eval_loss": 0.614494800567627,
90
+ "eval_runtime": 0.7135,
91
+ "eval_samples_per_second": 571.84,
92
+ "eval_steps_per_second": 5.606,
93
+ "step": 145
94
  },
95
  {
96
  "epoch": 6.0,
97
  "learning_rate": 4.4000000000000006e-05,
98
+ "loss": 0.5736,
99
+ "step": 174
100
  },
101
  {
102
  "epoch": 6.0,
103
+ "eval_accuracy": 0.7009803921568627,
104
+ "eval_combined_score": 0.7474496555378909,
105
+ "eval_f1": 0.793918918918919,
106
+ "eval_loss": 0.620771586894989,
107
+ "eval_runtime": 0.7131,
108
+ "eval_samples_per_second": 572.122,
109
+ "eval_steps_per_second": 5.609,
110
+ "step": 174
111
  },
112
  {
113
  "epoch": 7.0,
114
  "learning_rate": 4.3e-05,
115
+ "loss": 0.488,
116
+ "step": 203
117
  },
118
  {
119
  "epoch": 7.0,
120
+ "eval_accuracy": 0.6936274509803921,
121
+ "eval_combined_score": 0.7365844485942525,
122
+ "eval_f1": 0.7795414462081129,
123
+ "eval_loss": 0.6414217948913574,
124
+ "eval_runtime": 0.7129,
125
+ "eval_samples_per_second": 572.297,
126
+ "eval_steps_per_second": 5.611,
127
+ "step": 203
128
  },
129
  {
130
  "epoch": 8.0,
131
  "learning_rate": 4.2e-05,
132
+ "loss": 0.3939,
133
+ "step": 232
134
  },
135
  {
136
  "epoch": 8.0,
137
+ "eval_accuracy": 0.7279411764705882,
138
+ "eval_combined_score": 0.7700619587936697,
139
+ "eval_f1": 0.8121827411167513,
140
+ "eval_loss": 0.7659339308738708,
141
+ "eval_runtime": 0.7166,
142
+ "eval_samples_per_second": 569.346,
143
+ "eval_steps_per_second": 5.582,
144
+ "step": 232
145
  },
146
  {
147
  "epoch": 9.0,
148
+ "learning_rate": 4.103448275862069e-05,
149
+ "loss": 0.3038,
150
+ "step": 261
151
  },
152
  {
153
  "epoch": 9.0,
154
+ "eval_accuracy": 0.7083333333333334,
155
+ "eval_combined_score": 0.7554933665008292,
156
+ "eval_f1": 0.8026533996683249,
157
+ "eval_loss": 0.8875442147254944,
158
+ "eval_runtime": 0.7168,
159
+ "eval_samples_per_second": 569.19,
160
+ "eval_steps_per_second": 5.58,
161
+ "step": 261
162
  },
163
  {
164
  "epoch": 10.0,
165
+ "learning_rate": 4.003448275862069e-05,
166
+ "loss": 0.2636,
167
+ "step": 290
168
  },
169
  {
170
  "epoch": 10.0,
171
+ "eval_accuracy": 0.7034313725490197,
172
+ "eval_combined_score": 0.7533417025346725,
173
+ "eval_f1": 0.8032520325203252,
174
+ "eval_loss": 0.9829146265983582,
175
+ "eval_runtime": 0.7178,
176
+ "eval_samples_per_second": 568.4,
177
+ "eval_steps_per_second": 5.573,
178
+ "step": 290
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  },
180
  {
181
+ "epoch": 10.0,
182
+ "step": 290,
183
+ "total_flos": 1150074298040320.0,
184
+ "train_loss": 0.5173478488264412,
185
+ "train_runtime": 338.752,
186
+ "train_samples_per_second": 541.399,
187
+ "train_steps_per_second": 4.28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  }
189
  ],
190
+ "max_steps": 1450,
191
  "num_train_epochs": 50,
192
+ "total_flos": 1150074298040320.0,
193
  "trial_name": null,
194
  "trial_params": null
195
  }