gokuls commited on
Commit
c0a0130
1 Parent(s): c4eefba

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: apache-2.0
3
  tags:
4
  - generated_from_trainer
@@ -13,7 +15,7 @@ model-index:
13
  name: Text Classification
14
  type: text-classification
15
  dataset:
16
- name: glue
17
  type: glue
18
  config: cola
19
  split: validation
@@ -21,7 +23,7 @@ model-index:
21
  metrics:
22
  - name: Matthews Correlation
23
  type: matthews_correlation
24
- value: 0.08118499547243287
25
  ---
26
 
27
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -29,10 +31,10 @@ should probably proofread and complete it, then remove this comment. -->
29
 
30
  # mobilebert_sa_GLUE_Experiment_cola
31
 
32
- This model is a fine-tuned version of [google/mobilebert-uncased](https://huggingface.co/google/mobilebert-uncased) on the glue dataset.
33
  It achieves the following results on the evaluation set:
34
- - Loss: 0.6915
35
- - Matthews Correlation: 0.0812
36
 
37
  ## Model description
38
 
 
1
  ---
2
+ language:
3
+ - en
4
  license: apache-2.0
5
  tags:
6
  - generated_from_trainer
 
15
  name: Text Classification
16
  type: text-classification
17
  dataset:
18
+ name: GLUE COLA
19
  type: glue
20
  config: cola
21
  split: validation
 
23
  metrics:
24
  - name: Matthews Correlation
25
  type: matthews_correlation
26
+ value: 0.0
27
  ---
28
 
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
31
 
32
  # mobilebert_sa_GLUE_Experiment_cola
33
 
34
+ This model is a fine-tuned version of [google/mobilebert-uncased](https://huggingface.co/google/mobilebert-uncased) on the GLUE COLA dataset.
35
  It achieves the following results on the evaluation set:
36
+ - Loss: 0.6153
37
+ - Matthews Correlation: 0.0
38
 
39
  ## Model description
40
 
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 12.0,
3
- "eval_loss": 0.6131083965301514,
4
  "eval_matthews_correlation": 0.0,
5
- "eval_runtime": 1.1058,
6
  "eval_samples": 1043,
7
- "eval_samples_per_second": 943.228,
8
- "eval_steps_per_second": 4.522,
9
- "train_loss": 0.5614397408915501,
10
- "train_runtime": 434.3841,
11
  "train_samples": 8551,
12
- "train_samples_per_second": 984.267,
13
- "train_steps_per_second": 3.914
14
  }
 
1
  {
2
+ "epoch": 9.0,
3
+ "eval_loss": 0.61527419090271,
4
  "eval_matthews_correlation": 0.0,
5
+ "eval_runtime": 1.7447,
6
  "eval_samples": 1043,
7
+ "eval_samples_per_second": 597.816,
8
+ "eval_steps_per_second": 5.159,
9
+ "train_loss": 0.543749636876247,
10
+ "train_runtime": 541.9676,
11
  "train_samples": 8551,
12
+ "train_samples_per_second": 788.885,
13
+ "train_steps_per_second": 6.181
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 12.0,
3
- "eval_loss": 0.6131083965301514,
4
  "eval_matthews_correlation": 0.0,
5
- "eval_runtime": 1.1058,
6
  "eval_samples": 1043,
7
- "eval_samples_per_second": 943.228,
8
- "eval_steps_per_second": 4.522
9
  }
 
1
  {
2
+ "epoch": 9.0,
3
+ "eval_loss": 0.61527419090271,
4
  "eval_matthews_correlation": 0.0,
5
+ "eval_runtime": 1.7447,
6
  "eval_samples": 1043,
7
+ "eval_samples_per_second": 597.816,
8
+ "eval_steps_per_second": 5.159
9
  }
logs/events.out.tfevents.1674619494.garda.1933772.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45d64c11e0c13780753cf00e730d891e2abbd3c61c178a0a9caad941b25ab798
3
+ size 375
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 12.0,
3
- "train_loss": 0.5614397408915501,
4
- "train_runtime": 434.3841,
5
  "train_samples": 8551,
6
- "train_samples_per_second": 984.267,
7
- "train_steps_per_second": 3.914
8
  }
 
1
  {
2
+ "epoch": 9.0,
3
+ "train_loss": 0.543749636876247,
4
+ "train_runtime": 541.9676,
5
  "train_samples": 8551,
6
+ "train_samples_per_second": 788.885,
7
+ "train_steps_per_second": 6.181
8
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.6131083965301514,
3
- "best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_cola/checkpoint-238",
4
- "epoch": 12.0,
5
- "global_step": 408,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10,196 +10,151 @@
10
  {
11
  "epoch": 1.0,
12
  "learning_rate": 4.9e-05,
13
- "loss": 0.6197,
14
- "step": 34
15
  },
16
  {
17
  "epoch": 1.0,
18
- "eval_loss": 0.6238651871681213,
19
  "eval_matthews_correlation": 0.0,
20
- "eval_runtime": 1.0726,
21
- "eval_samples_per_second": 972.413,
22
- "eval_steps_per_second": 4.662,
23
- "step": 34
24
  },
25
  {
26
  "epoch": 2.0,
27
  "learning_rate": 4.8e-05,
28
  "loss": 0.6078,
29
- "step": 68
30
  },
31
  {
32
  "epoch": 2.0,
33
- "eval_loss": 0.6178815364837646,
34
  "eval_matthews_correlation": 0.0,
35
- "eval_runtime": 1.0582,
36
- "eval_samples_per_second": 985.593,
37
- "eval_steps_per_second": 4.725,
38
- "step": 68
39
  },
40
  {
41
  "epoch": 3.0,
42
  "learning_rate": 4.7e-05,
43
- "loss": 0.6064,
44
- "step": 102
45
  },
46
  {
47
  "epoch": 3.0,
48
- "eval_loss": 0.6179934144020081,
49
  "eval_matthews_correlation": 0.0,
50
- "eval_runtime": 1.3387,
51
- "eval_samples_per_second": 779.113,
52
- "eval_steps_per_second": 3.735,
53
- "step": 102
54
  },
55
  {
56
  "epoch": 4.0,
57
  "learning_rate": 4.600000000000001e-05,
58
- "loss": 0.6073,
59
- "step": 136
60
  },
61
  {
62
  "epoch": 4.0,
63
- "eval_loss": 0.6175711750984192,
64
  "eval_matthews_correlation": 0.0,
65
- "eval_runtime": 1.1189,
66
- "eval_samples_per_second": 932.188,
67
- "eval_steps_per_second": 4.469,
68
- "step": 136
69
  },
70
  {
71
  "epoch": 5.0,
72
  "learning_rate": 4.5e-05,
73
- "loss": 0.6069,
74
- "step": 170
75
  },
76
  {
77
  "epoch": 5.0,
78
- "eval_loss": 0.6172661781311035,
79
- "eval_matthews_correlation": 0.0,
80
- "eval_runtime": 1.1205,
81
- "eval_samples_per_second": 930.859,
82
- "eval_steps_per_second": 4.462,
83
- "step": 170
84
  },
85
  {
86
  "epoch": 6.0,
87
  "learning_rate": 4.4000000000000006e-05,
88
- "loss": 0.6043,
89
- "step": 204
90
  },
91
  {
92
  "epoch": 6.0,
93
- "eval_loss": 0.6166184544563293,
94
- "eval_matthews_correlation": 0.0,
95
- "eval_runtime": 1.2344,
96
- "eval_samples_per_second": 844.955,
97
- "eval_steps_per_second": 4.051,
98
- "step": 204
99
  },
100
  {
101
  "epoch": 7.0,
102
  "learning_rate": 4.3e-05,
103
- "loss": 0.6004,
104
- "step": 238
105
  },
106
  {
107
  "epoch": 7.0,
108
- "eval_loss": 0.6131083965301514,
109
- "eval_matthews_correlation": 0.0,
110
- "eval_runtime": 1.1421,
111
- "eval_samples_per_second": 913.231,
112
- "eval_steps_per_second": 4.378,
113
- "step": 238
114
  },
115
  {
116
  "epoch": 8.0,
117
  "learning_rate": 4.2e-05,
118
- "loss": 0.5842,
119
- "step": 272
120
  },
121
  {
122
  "epoch": 8.0,
123
- "eval_loss": 0.6240708231925964,
124
- "eval_matthews_correlation": 0.0951039122870703,
125
- "eval_runtime": 1.1486,
126
- "eval_samples_per_second": 908.065,
127
- "eval_steps_per_second": 4.353,
128
- "step": 272
129
  },
130
  {
131
  "epoch": 9.0,
132
  "learning_rate": 4.1e-05,
133
- "loss": 0.5192,
134
- "step": 306
 
 
 
 
 
 
 
 
 
135
  },
136
  {
137
  "epoch": 9.0,
138
- "eval_loss": 0.6361746191978455,
139
- "eval_matthews_correlation": 0.059760920069176514,
140
- "eval_runtime": 1.2007,
141
- "eval_samples_per_second": 868.661,
142
- "eval_steps_per_second": 4.164,
143
- "step": 306
144
- },
145
- {
146
- "epoch": 10.0,
147
- "learning_rate": 4e-05,
148
- "loss": 0.4884,
149
- "step": 340
150
- },
151
- {
152
- "epoch": 10.0,
153
- "eval_loss": 0.7009902596473694,
154
- "eval_matthews_correlation": 0.08008155523655092,
155
- "eval_runtime": 1.1475,
156
- "eval_samples_per_second": 908.97,
157
- "eval_steps_per_second": 4.357,
158
- "step": 340
159
- },
160
- {
161
- "epoch": 11.0,
162
- "learning_rate": 3.9000000000000006e-05,
163
- "loss": 0.4559,
164
- "step": 374
165
- },
166
- {
167
- "epoch": 11.0,
168
- "eval_loss": 0.6731011867523193,
169
- "eval_matthews_correlation": 0.09051190856095573,
170
- "eval_runtime": 1.1134,
171
- "eval_samples_per_second": 936.763,
172
- "eval_steps_per_second": 4.491,
173
- "step": 374
174
- },
175
- {
176
- "epoch": 12.0,
177
- "learning_rate": 3.8e-05,
178
- "loss": 0.4367,
179
- "step": 408
180
- },
181
- {
182
- "epoch": 12.0,
183
- "eval_loss": 0.6893478035926819,
184
- "eval_matthews_correlation": 0.09007205990892461,
185
- "eval_runtime": 1.2098,
186
- "eval_samples_per_second": 862.13,
187
- "eval_steps_per_second": 4.133,
188
- "step": 408
189
- },
190
- {
191
- "epoch": 12.0,
192
- "step": 408,
193
- "total_flos": 3258721140473856.0,
194
- "train_loss": 0.5614397408915501,
195
- "train_runtime": 434.3841,
196
- "train_samples_per_second": 984.267,
197
- "train_steps_per_second": 3.914
198
  }
199
  ],
200
- "max_steps": 1700,
201
  "num_train_epochs": 50,
202
- "total_flos": 3258721140473856.0,
203
  "trial_name": null,
204
  "trial_params": null
205
  }
 
1
  {
2
+ "best_metric": 0.61527419090271,
3
+ "best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_cola/checkpoint-268",
4
+ "epoch": 9.0,
5
+ "global_step": 603,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10
  {
11
  "epoch": 1.0,
12
  "learning_rate": 4.9e-05,
13
+ "loss": 0.6122,
14
+ "step": 67
15
  },
16
  {
17
  "epoch": 1.0,
18
+ "eval_loss": 0.6183971762657166,
19
  "eval_matthews_correlation": 0.0,
20
+ "eval_runtime": 1.7411,
21
+ "eval_samples_per_second": 599.045,
22
+ "eval_steps_per_second": 5.169,
23
+ "step": 67
24
  },
25
  {
26
  "epoch": 2.0,
27
  "learning_rate": 4.8e-05,
28
  "loss": 0.6078,
29
+ "step": 134
30
  },
31
  {
32
  "epoch": 2.0,
33
+ "eval_loss": 0.6179646253585815,
34
  "eval_matthews_correlation": 0.0,
35
+ "eval_runtime": 1.7343,
36
+ "eval_samples_per_second": 601.399,
37
+ "eval_steps_per_second": 5.189,
38
+ "step": 134
39
  },
40
  {
41
  "epoch": 3.0,
42
  "learning_rate": 4.7e-05,
43
+ "loss": 0.607,
44
+ "step": 201
45
  },
46
  {
47
  "epoch": 3.0,
48
+ "eval_loss": 0.6185427904129028,
49
  "eval_matthews_correlation": 0.0,
50
+ "eval_runtime": 1.7316,
51
+ "eval_samples_per_second": 602.316,
52
+ "eval_steps_per_second": 5.197,
53
+ "step": 201
54
  },
55
  {
56
  "epoch": 4.0,
57
  "learning_rate": 4.600000000000001e-05,
58
+ "loss": 0.6052,
59
+ "step": 268
60
  },
61
  {
62
  "epoch": 4.0,
63
+ "eval_loss": 0.61527419090271,
64
  "eval_matthews_correlation": 0.0,
65
+ "eval_runtime": 1.7366,
66
+ "eval_samples_per_second": 600.599,
67
+ "eval_steps_per_second": 5.183,
68
+ "step": 268
69
  },
70
  {
71
  "epoch": 5.0,
72
  "learning_rate": 4.5e-05,
73
+ "loss": 0.5822,
74
+ "step": 335
75
  },
76
  {
77
  "epoch": 5.0,
78
+ "eval_loss": 0.6292034983634949,
79
+ "eval_matthews_correlation": 0.05057353484097579,
80
+ "eval_runtime": 1.7382,
81
+ "eval_samples_per_second": 600.051,
82
+ "eval_steps_per_second": 5.178,
83
+ "step": 335
84
  },
85
  {
86
  "epoch": 6.0,
87
  "learning_rate": 4.4000000000000006e-05,
88
+ "loss": 0.5193,
89
+ "step": 402
90
  },
91
  {
92
  "epoch": 6.0,
93
+ "eval_loss": 0.6421763896942139,
94
+ "eval_matthews_correlation": 0.074290264269209,
95
+ "eval_runtime": 1.7355,
96
+ "eval_samples_per_second": 600.967,
97
+ "eval_steps_per_second": 5.186,
98
+ "step": 402
99
  },
100
  {
101
  "epoch": 7.0,
102
  "learning_rate": 4.3e-05,
103
+ "loss": 0.4783,
104
+ "step": 469
105
  },
106
  {
107
  "epoch": 7.0,
108
+ "eval_loss": 0.7020387649536133,
109
+ "eval_matthews_correlation": 0.06292685160191117,
110
+ "eval_runtime": 1.7327,
111
+ "eval_samples_per_second": 601.938,
112
+ "eval_steps_per_second": 5.194,
113
+ "step": 469
114
  },
115
  {
116
  "epoch": 8.0,
117
  "learning_rate": 4.2e-05,
118
+ "loss": 0.4504,
119
+ "step": 536
120
  },
121
  {
122
  "epoch": 8.0,
123
+ "eval_loss": 0.7421594858169556,
124
+ "eval_matthews_correlation": 0.08342146557730178,
125
+ "eval_runtime": 1.7355,
126
+ "eval_samples_per_second": 600.992,
127
+ "eval_steps_per_second": 5.186,
128
+ "step": 536
129
  },
130
  {
131
  "epoch": 9.0,
132
  "learning_rate": 4.1e-05,
133
+ "loss": 0.4315,
134
+ "step": 603
135
+ },
136
+ {
137
+ "epoch": 9.0,
138
+ "eval_loss": 0.6915299296379089,
139
+ "eval_matthews_correlation": 0.08118499547243287,
140
+ "eval_runtime": 1.743,
141
+ "eval_samples_per_second": 598.381,
142
+ "eval_steps_per_second": 5.163,
143
+ "step": 603
144
  },
145
  {
146
  "epoch": 9.0,
147
+ "step": 603,
148
+ "total_flos": 2412992519995392.0,
149
+ "train_loss": 0.543749636876247,
150
+ "train_runtime": 541.9676,
151
+ "train_samples_per_second": 788.885,
152
+ "train_steps_per_second": 6.181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  }
154
  ],
155
+ "max_steps": 3350,
156
  "num_train_epochs": 50,
157
+ "total_flos": 2412992519995392.0,
158
  "trial_name": null,
159
  "trial_params": null
160
  }