gokulsrinivasagan commited on
Commit
c939ca3
·
verified ·
1 Parent(s): b95f615

End of training

Browse files
README.md CHANGED
@@ -1,13 +1,28 @@
1
  ---
2
  library_name: transformers
 
 
3
  base_model: gokulsrinivasagan/bert_base_lda_20_v1
4
  tags:
5
  - generated_from_trainer
 
 
6
  metrics:
7
  - accuracy
8
  model-index:
9
  - name: bert_base_lda_20_v1_wnli
10
- results: []
 
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -15,10 +30,10 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # bert_base_lda_20_v1_wnli
17
 
18
- This model is a fine-tuned version of [gokulsrinivasagan/bert_base_lda_20_v1](https://huggingface.co/gokulsrinivasagan/bert_base_lda_20_v1) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.6939
21
- - Accuracy: 0.5070
22
 
23
  ## Model description
24
 
 
1
  ---
2
  library_name: transformers
3
+ language:
4
+ - en
5
  base_model: gokulsrinivasagan/bert_base_lda_20_v1
6
  tags:
7
  - generated_from_trainer
8
+ datasets:
9
+ - glue
10
  metrics:
11
  - accuracy
12
  model-index:
13
  - name: bert_base_lda_20_v1_wnli
14
+ results:
15
+ - task:
16
+ name: Text Classification
17
+ type: text-classification
18
+ dataset:
19
+ name: GLUE WNLI
20
+ type: glue
21
+ args: wnli
22
+ metrics:
23
+ - name: Accuracy
24
+ type: accuracy
25
+ value: 0.5633802816901409
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
30
 
31
  # bert_base_lda_20_v1_wnli
32
 
33
+ This model is a fine-tuned version of [gokulsrinivasagan/bert_base_lda_20_v1](https://huggingface.co/gokulsrinivasagan/bert_base_lda_20_v1) on the GLUE WNLI dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 0.6815
36
+ - Accuracy: 0.5634
37
 
38
  ## Model description
39
 
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 7.0,
3
  "eval_accuracy": 0.5633802816901409,
4
- "eval_loss": 0.6882702708244324,
5
- "eval_runtime": 0.0556,
6
  "eval_samples": 71,
7
- "eval_samples_per_second": 1277.692,
8
- "eval_steps_per_second": 17.996,
9
- "total_flos": 584764320537600.0,
10
- "train_loss": 1.2976507572900682,
11
- "train_runtime": 23.7039,
12
  "train_samples": 635,
13
- "train_samples_per_second": 1339.444,
14
- "train_steps_per_second": 6.328
15
  }
 
1
  {
2
+ "epoch": 13.0,
3
  "eval_accuracy": 0.5633802816901409,
4
+ "eval_loss": 0.6814756989479065,
5
+ "eval_runtime": 0.0587,
6
  "eval_samples": 71,
7
+ "eval_samples_per_second": 1208.91,
8
+ "eval_steps_per_second": 17.027,
9
+ "total_flos": 1085990880998400.0,
10
+ "train_loss": 0.7166132437877166,
11
+ "train_runtime": 48.6385,
12
  "train_samples": 635,
13
+ "train_samples_per_second": 652.775,
14
+ "train_steps_per_second": 3.084
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 7.0,
3
  "eval_accuracy": 0.5633802816901409,
4
- "eval_loss": 0.6882702708244324,
5
- "eval_runtime": 0.0556,
6
  "eval_samples": 71,
7
- "eval_samples_per_second": 1277.692,
8
- "eval_steps_per_second": 17.996
9
  }
 
1
  {
2
+ "epoch": 13.0,
3
  "eval_accuracy": 0.5633802816901409,
4
+ "eval_loss": 0.6814756989479065,
5
+ "eval_runtime": 0.0587,
6
  "eval_samples": 71,
7
+ "eval_samples_per_second": 1208.91,
8
+ "eval_steps_per_second": 17.027
9
  }
logs/events.out.tfevents.1733317888.ki-g0008.1207389.15 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dcc198a717dff21e1695472e120727d91f981b69d0448f62da19b1b01ad053f
3
+ size 405
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 7.0,
3
- "total_flos": 584764320537600.0,
4
- "train_loss": 1.2976507572900682,
5
- "train_runtime": 23.7039,
6
  "train_samples": 635,
7
- "train_samples_per_second": 1339.444,
8
- "train_steps_per_second": 6.328
9
  }
 
1
  {
2
+ "epoch": 13.0,
3
+ "total_flos": 1085990880998400.0,
4
+ "train_loss": 0.7166132437877166,
5
+ "train_runtime": 48.6385,
6
  "train_samples": 635,
7
+ "train_samples_per_second": 652.775,
8
+ "train_steps_per_second": 3.084
9
  }
trainer_state.json CHANGED
@@ -1,133 +1,229 @@
1
  {
2
- "best_metric": 0.6882702708244324,
3
- "best_model_checkpoint": "bert_base_lda_20_v1_wnli/checkpoint-6",
4
- "epoch": 7.0,
5
  "eval_steps": 500,
6
- "global_step": 21,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 16.016019821166992,
14
- "learning_rate": 0.00098,
15
- "loss": 1.2108,
16
  "step": 3
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.5633802816901409,
21
- "eval_loss": 1.61472749710083,
22
- "eval_runtime": 0.0531,
23
- "eval_samples_per_second": 1337.217,
24
- "eval_steps_per_second": 18.834,
25
  "step": 3
26
  },
27
  {
28
  "epoch": 2.0,
29
- "grad_norm": 4.4702959060668945,
30
- "learning_rate": 0.00096,
31
- "loss": 1.164,
32
  "step": 6
33
  },
34
  {
35
  "epoch": 2.0,
36
- "eval_accuracy": 0.5633802816901409,
37
- "eval_loss": 0.6882702708244324,
38
- "eval_runtime": 0.0528,
39
- "eval_samples_per_second": 1345.768,
40
- "eval_steps_per_second": 18.954,
41
  "step": 6
42
  },
43
  {
44
  "epoch": 3.0,
45
- "grad_norm": 13.5480375289917,
46
- "learning_rate": 0.00094,
47
- "loss": 1.7783,
48
  "step": 9
49
  },
50
  {
51
  "epoch": 3.0,
52
  "eval_accuracy": 0.5633802816901409,
53
- "eval_loss": 1.376787781715393,
54
- "eval_runtime": 0.053,
55
- "eval_samples_per_second": 1339.708,
56
- "eval_steps_per_second": 18.869,
57
  "step": 9
58
  },
59
  {
60
  "epoch": 4.0,
61
- "grad_norm": 21.326387405395508,
62
- "learning_rate": 0.00092,
63
- "loss": 2.2468,
64
  "step": 12
65
  },
66
  {
67
  "epoch": 4.0,
68
- "eval_accuracy": 0.43661971830985913,
69
- "eval_loss": 1.2927902936935425,
70
- "eval_runtime": 0.0556,
71
- "eval_samples_per_second": 1278.076,
72
- "eval_steps_per_second": 18.001,
73
  "step": 12
74
  },
75
  {
76
  "epoch": 5.0,
77
- "grad_norm": 5.3779616355896,
78
- "learning_rate": 0.0009000000000000001,
79
- "loss": 1.1044,
80
  "step": 15
81
  },
82
  {
83
  "epoch": 5.0,
84
- "eval_accuracy": 0.43661971830985913,
85
- "eval_loss": 1.021649718284607,
86
- "eval_runtime": 0.0526,
87
- "eval_samples_per_second": 1349.604,
88
- "eval_steps_per_second": 19.009,
89
  "step": 15
90
  },
91
  {
92
  "epoch": 6.0,
93
- "grad_norm": 0.6860456466674805,
94
- "learning_rate": 0.00088,
95
- "loss": 0.7894,
96
  "step": 18
97
  },
98
  {
99
  "epoch": 6.0,
100
  "eval_accuracy": 0.5633802816901409,
101
- "eval_loss": 0.7181998491287231,
102
- "eval_runtime": 0.0531,
103
- "eval_samples_per_second": 1337.217,
104
- "eval_steps_per_second": 18.834,
105
  "step": 18
106
  },
107
  {
108
  "epoch": 7.0,
109
- "grad_norm": 380.8271789550781,
110
- "learning_rate": 0.00086,
111
- "loss": 0.7898,
112
  "step": 21
113
  },
114
  {
115
  "epoch": 7.0,
116
- "eval_accuracy": 0.5633802816901409,
117
- "eval_loss": 0.7561619281768799,
118
- "eval_runtime": 0.0528,
119
- "eval_samples_per_second": 1344.522,
120
- "eval_steps_per_second": 18.937,
121
  "step": 21
122
  },
123
  {
124
- "epoch": 7.0,
125
- "step": 21,
126
- "total_flos": 584764320537600.0,
127
- "train_loss": 1.2976507572900682,
128
- "train_runtime": 23.7039,
129
- "train_samples_per_second": 1339.444,
130
- "train_steps_per_second": 6.328
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  }
132
  ],
133
  "logging_steps": 1,
@@ -156,7 +252,7 @@
156
  "attributes": {}
157
  }
158
  },
159
- "total_flos": 584764320537600.0,
160
  "train_batch_size": 256,
161
  "trial_name": null,
162
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.6814756989479065,
3
+ "best_model_checkpoint": "bert_base_lda_20_v1_wnli/checkpoint-24",
4
+ "epoch": 13.0,
5
  "eval_steps": 500,
6
+ "global_step": 39,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 2.379216432571411,
14
+ "learning_rate": 4.9e-05,
15
+ "loss": 0.8433,
16
  "step": 3
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.43661971830985913,
21
+ "eval_loss": 0.733467161655426,
22
+ "eval_runtime": 0.0534,
23
+ "eval_samples_per_second": 1329.611,
24
+ "eval_steps_per_second": 18.727,
25
  "step": 3
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "grad_norm": 5.9057841300964355,
30
+ "learning_rate": 4.8e-05,
31
+ "loss": 0.7419,
32
  "step": 6
33
  },
34
  {
35
  "epoch": 2.0,
36
+ "eval_accuracy": 0.43661971830985913,
37
+ "eval_loss": 0.7789943218231201,
38
+ "eval_runtime": 0.0547,
39
+ "eval_samples_per_second": 1298.937,
40
+ "eval_steps_per_second": 18.295,
41
  "step": 6
42
  },
43
  {
44
  "epoch": 3.0,
45
+ "grad_norm": 3.7641913890838623,
46
+ "learning_rate": 4.7e-05,
47
+ "loss": 0.7237,
48
  "step": 9
49
  },
50
  {
51
  "epoch": 3.0,
52
  "eval_accuracy": 0.5633802816901409,
53
+ "eval_loss": 0.6985859870910645,
54
+ "eval_runtime": 0.0566,
55
+ "eval_samples_per_second": 1254.04,
56
+ "eval_steps_per_second": 17.663,
57
  "step": 9
58
  },
59
  {
60
  "epoch": 4.0,
61
+ "grad_norm": 1.4714839458465576,
62
+ "learning_rate": 4.600000000000001e-05,
63
+ "loss": 0.7156,
64
  "step": 12
65
  },
66
  {
67
  "epoch": 4.0,
68
+ "eval_accuracy": 0.4507042253521127,
69
+ "eval_loss": 0.7405645847320557,
70
+ "eval_runtime": 0.0626,
71
+ "eval_samples_per_second": 1134.494,
72
+ "eval_steps_per_second": 15.979,
73
  "step": 12
74
  },
75
  {
76
  "epoch": 5.0,
77
+ "grad_norm": 0.7439093589782715,
78
+ "learning_rate": 4.5e-05,
79
+ "loss": 0.7143,
80
  "step": 15
81
  },
82
  {
83
  "epoch": 5.0,
84
+ "eval_accuracy": 0.5633802816901409,
85
+ "eval_loss": 0.6858494281768799,
86
+ "eval_runtime": 0.0533,
87
+ "eval_samples_per_second": 1330.936,
88
+ "eval_steps_per_second": 18.746,
89
  "step": 15
90
  },
91
  {
92
  "epoch": 6.0,
93
+ "grad_norm": 0.680374264717102,
94
+ "learning_rate": 4.4000000000000006e-05,
95
+ "loss": 0.6989,
96
  "step": 18
97
  },
98
  {
99
  "epoch": 6.0,
100
  "eval_accuracy": 0.5633802816901409,
101
+ "eval_loss": 0.6855193376541138,
102
+ "eval_runtime": 0.0533,
103
+ "eval_samples_per_second": 1332.037,
104
+ "eval_steps_per_second": 18.761,
105
  "step": 18
106
  },
107
  {
108
  "epoch": 7.0,
109
+ "grad_norm": 0.9396877884864807,
110
+ "learning_rate": 4.3e-05,
111
+ "loss": 0.6949,
112
  "step": 21
113
  },
114
  {
115
  "epoch": 7.0,
116
+ "eval_accuracy": 0.5070422535211268,
117
+ "eval_loss": 0.693166971206665,
118
+ "eval_runtime": 0.0529,
119
+ "eval_samples_per_second": 1343.406,
120
+ "eval_steps_per_second": 18.921,
121
  "step": 21
122
  },
123
  {
124
+ "epoch": 8.0,
125
+ "grad_norm": 2.6080543994903564,
126
+ "learning_rate": 4.2e-05,
127
+ "loss": 0.6949,
128
+ "step": 24
129
+ },
130
+ {
131
+ "epoch": 8.0,
132
+ "eval_accuracy": 0.5633802816901409,
133
+ "eval_loss": 0.6814756989479065,
134
+ "eval_runtime": 0.0551,
135
+ "eval_samples_per_second": 1288.171,
136
+ "eval_steps_per_second": 18.143,
137
+ "step": 24
138
+ },
139
+ {
140
+ "epoch": 9.0,
141
+ "grad_norm": 0.7388402223587036,
142
+ "learning_rate": 4.1e-05,
143
+ "loss": 0.6963,
144
+ "step": 27
145
+ },
146
+ {
147
+ "epoch": 9.0,
148
+ "eval_accuracy": 0.49295774647887325,
149
+ "eval_loss": 0.696027934551239,
150
+ "eval_runtime": 0.0535,
151
+ "eval_samples_per_second": 1327.027,
152
+ "eval_steps_per_second": 18.691,
153
+ "step": 27
154
+ },
155
+ {
156
+ "epoch": 10.0,
157
+ "grad_norm": 0.8557111620903015,
158
+ "learning_rate": 4e-05,
159
+ "loss": 0.6932,
160
+ "step": 30
161
+ },
162
+ {
163
+ "epoch": 10.0,
164
+ "eval_accuracy": 0.5352112676056338,
165
+ "eval_loss": 0.6830435991287231,
166
+ "eval_runtime": 0.0551,
167
+ "eval_samples_per_second": 1288.116,
168
+ "eval_steps_per_second": 18.142,
169
+ "step": 30
170
+ },
171
+ {
172
+ "epoch": 11.0,
173
+ "grad_norm": 1.1222957372665405,
174
+ "learning_rate": 3.9000000000000006e-05,
175
+ "loss": 0.6989,
176
+ "step": 33
177
+ },
178
+ {
179
+ "epoch": 11.0,
180
+ "eval_accuracy": 0.5352112676056338,
181
+ "eval_loss": 0.6837586760520935,
182
+ "eval_runtime": 0.0534,
183
+ "eval_samples_per_second": 1330.448,
184
+ "eval_steps_per_second": 18.739,
185
+ "step": 33
186
+ },
187
+ {
188
+ "epoch": 12.0,
189
+ "grad_norm": 1.9935102462768555,
190
+ "learning_rate": 3.8e-05,
191
+ "loss": 0.6969,
192
+ "step": 36
193
+ },
194
+ {
195
+ "epoch": 12.0,
196
+ "eval_accuracy": 0.49295774647887325,
197
+ "eval_loss": 0.7047203779220581,
198
+ "eval_runtime": 0.0537,
199
+ "eval_samples_per_second": 1321.152,
200
+ "eval_steps_per_second": 18.608,
201
+ "step": 36
202
+ },
203
+ {
204
+ "epoch": 13.0,
205
+ "grad_norm": 1.0254613161087036,
206
+ "learning_rate": 3.7e-05,
207
+ "loss": 0.7032,
208
+ "step": 39
209
+ },
210
+ {
211
+ "epoch": 13.0,
212
+ "eval_accuracy": 0.5070422535211268,
213
+ "eval_loss": 0.6938820481300354,
214
+ "eval_runtime": 0.0538,
215
+ "eval_samples_per_second": 1319.104,
216
+ "eval_steps_per_second": 18.579,
217
+ "step": 39
218
+ },
219
+ {
220
+ "epoch": 13.0,
221
+ "step": 39,
222
+ "total_flos": 1085990880998400.0,
223
+ "train_loss": 0.7166132437877166,
224
+ "train_runtime": 48.6385,
225
+ "train_samples_per_second": 652.775,
226
+ "train_steps_per_second": 3.084
227
  }
228
  ],
229
  "logging_steps": 1,
 
252
  "attributes": {}
253
  }
254
  },
255
+ "total_flos": 1085990880998400.0,
256
  "train_batch_size": 256,
257
  "trial_name": null,
258
  "trial_params": null