gokuls commited on
Commit
ce2df6c
1 Parent(s): 6d176db

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  tags:
3
  - generated_from_trainer
4
  datasets:
@@ -13,7 +15,7 @@ model-index:
13
  name: Text Classification
14
  type: text-classification
15
  dataset:
16
- name: glue
17
  type: glue
18
  config: mrpc
19
  split: validation
@@ -21,10 +23,10 @@ model-index:
21
  metrics:
22
  - name: Accuracy
23
  type: accuracy
24
- value: 0.5735294117647058
25
  - name: F1
26
  type: f1
27
- value: 0.65748031496063
28
  ---
29
 
30
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,12 +34,12 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  # hBERTv1_new_pretrain_w_init__mrpc
34
 
35
- This model is a fine-tuned version of [gokuls/bert_12_layer_model_v1_complete_training_new_wt_init](https://huggingface.co/gokuls/bert_12_layer_model_v1_complete_training_new_wt_init) on the glue dataset.
36
  It achieves the following results on the evaluation set:
37
- - Loss: 1.3076
38
- - Accuracy: 0.5735
39
- - F1: 0.6575
40
- - Combined Score: 0.6155
41
 
42
  ## Model description
43
 
 
1
  ---
2
+ language:
3
+ - en
4
  tags:
5
  - generated_from_trainer
6
  datasets:
 
15
  name: Text Classification
16
  type: text-classification
17
  dataset:
18
+ name: GLUE MRPC
19
  type: glue
20
  config: mrpc
21
  split: validation
 
23
  metrics:
24
  - name: Accuracy
25
  type: accuracy
26
+ value: 0.6862745098039216
27
  - name: F1
28
  type: f1
29
+ value: 0.7894736842105262
30
  ---
31
 
32
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
34
 
35
  # hBERTv1_new_pretrain_w_init__mrpc
36
 
37
+ This model is a fine-tuned version of [gokuls/bert_12_layer_model_v1_complete_training_new_wt_init](https://huggingface.co/gokuls/bert_12_layer_model_v1_complete_training_new_wt_init) on the GLUE MRPC dataset.
38
  It achieves the following results on the evaluation set:
39
+ - Loss: 0.6082
40
+ - Accuracy: 0.6863
41
+ - F1: 0.7895
42
+ - Combined Score: 0.7379
43
 
44
  ## Model description
45
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "epoch": 9.0,
3
- "eval_accuracy": 0.6838235294117647,
4
- "eval_combined_score": 0.7480253018237863,
5
- "eval_f1": 0.8122270742358079,
6
- "eval_loss": 0.6237123012542725,
7
- "eval_runtime": 0.7659,
8
  "eval_samples": 408,
9
- "eval_samples_per_second": 532.701,
10
- "eval_steps_per_second": 5.223,
11
- "train_loss": 0.9404267789760312,
12
- "train_runtime": 225.9356,
13
  "train_samples": 3668,
14
- "train_samples_per_second": 811.736,
15
- "train_steps_per_second": 6.418
16
  }
 
1
  {
2
  "epoch": 9.0,
3
+ "eval_accuracy": 0.6862745098039216,
4
+ "eval_combined_score": 0.737874097007224,
5
+ "eval_f1": 0.7894736842105262,
6
+ "eval_loss": 0.6082243323326111,
7
+ "eval_runtime": 0.748,
8
  "eval_samples": 408,
9
+ "eval_samples_per_second": 545.43,
10
+ "eval_steps_per_second": 5.347,
11
+ "train_loss": 0.46000806216535894,
12
+ "train_runtime": 225.7005,
13
  "train_samples": 3668,
14
+ "train_samples_per_second": 812.581,
15
+ "train_steps_per_second": 6.424
16
  }
eval_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "epoch": 9.0,
3
- "eval_accuracy": 0.6838235294117647,
4
- "eval_combined_score": 0.7480253018237863,
5
- "eval_f1": 0.8122270742358079,
6
- "eval_loss": 0.6237123012542725,
7
- "eval_runtime": 0.7659,
8
  "eval_samples": 408,
9
- "eval_samples_per_second": 532.701,
10
- "eval_steps_per_second": 5.223
11
  }
 
1
  {
2
  "epoch": 9.0,
3
+ "eval_accuracy": 0.6862745098039216,
4
+ "eval_combined_score": 0.737874097007224,
5
+ "eval_f1": 0.7894736842105262,
6
+ "eval_loss": 0.6082243323326111,
7
+ "eval_runtime": 0.748,
8
  "eval_samples": 408,
9
+ "eval_samples_per_second": 545.43,
10
+ "eval_steps_per_second": 5.347
11
  }
logs/events.out.tfevents.1686033598.serv-3317.3377907.8 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52302955d2af1b6fc5043f428ac37b205e9677853456c337d771e77f98b25884
3
+ size 467
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 9.0,
3
- "train_loss": 0.9404267789760312,
4
- "train_runtime": 225.9356,
5
  "train_samples": 3668,
6
- "train_samples_per_second": 811.736,
7
- "train_steps_per_second": 6.418
8
  }
 
1
  {
2
  "epoch": 9.0,
3
+ "train_loss": 0.46000806216535894,
4
+ "train_runtime": 225.7005,
5
  "train_samples": 3668,
6
+ "train_samples_per_second": 812.581,
7
+ "train_steps_per_second": 6.424
8
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.6237123012542725,
3
  "best_model_checkpoint": "hBERTv1_new_pretrain_w_init__mrpc/checkpoint-116",
4
  "epoch": 9.0,
5
  "global_step": 261,
@@ -9,8 +9,8 @@
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "learning_rate": 0.00049,
13
- "loss": 3.2785,
14
  "step": 29
15
  },
16
  {
@@ -18,16 +18,16 @@
18
  "eval_accuracy": 0.6838235294117647,
19
  "eval_combined_score": 0.7480253018237863,
20
  "eval_f1": 0.8122270742358079,
21
- "eval_loss": 0.6238367557525635,
22
- "eval_runtime": 0.7409,
23
- "eval_samples_per_second": 550.699,
24
- "eval_steps_per_second": 5.399,
25
  "step": 29
26
  },
27
  {
28
  "epoch": 2.0,
29
- "learning_rate": 0.00048,
30
- "loss": 0.7343,
31
  "step": 58
32
  },
33
  {
@@ -35,16 +35,16 @@
35
  "eval_accuracy": 0.6838235294117647,
36
  "eval_combined_score": 0.7480253018237863,
37
  "eval_f1": 0.8122270742358079,
38
- "eval_loss": 0.6785768866539001,
39
- "eval_runtime": 0.7398,
40
- "eval_samples_per_second": 551.485,
41
- "eval_steps_per_second": 5.407,
42
  "step": 58
43
  },
44
  {
45
  "epoch": 3.0,
46
- "learning_rate": 0.00047,
47
- "loss": 0.6377,
48
  "step": 87
49
  },
50
  {
@@ -52,122 +52,122 @@
52
  "eval_accuracy": 0.6838235294117647,
53
  "eval_combined_score": 0.7480253018237863,
54
  "eval_f1": 0.8122270742358079,
55
- "eval_loss": 0.62451171875,
56
- "eval_runtime": 0.7405,
57
- "eval_samples_per_second": 550.974,
58
- "eval_steps_per_second": 5.402,
59
  "step": 87
60
  },
61
  {
62
  "epoch": 4.0,
63
- "learning_rate": 0.00046,
64
- "loss": 0.6353,
65
  "step": 116
66
  },
67
  {
68
  "epoch": 4.0,
69
- "eval_accuracy": 0.6838235294117647,
70
- "eval_combined_score": 0.7480253018237863,
71
- "eval_f1": 0.8122270742358079,
72
- "eval_loss": 0.6237123012542725,
73
- "eval_runtime": 0.7399,
74
- "eval_samples_per_second": 551.451,
75
- "eval_steps_per_second": 5.406,
76
  "step": 116
77
  },
78
  {
79
  "epoch": 5.0,
80
- "learning_rate": 0.00045000000000000004,
81
- "loss": 0.6344,
82
  "step": 145
83
  },
84
  {
85
  "epoch": 5.0,
86
- "eval_accuracy": 0.6838235294117647,
87
- "eval_combined_score": 0.7480253018237863,
88
- "eval_f1": 0.8122270742358079,
89
- "eval_loss": 0.6243776679039001,
90
- "eval_runtime": 0.7419,
91
- "eval_samples_per_second": 549.914,
92
- "eval_steps_per_second": 5.391,
93
  "step": 145
94
  },
95
  {
96
  "epoch": 6.0,
97
- "learning_rate": 0.00044,
98
- "loss": 0.6314,
99
  "step": 174
100
  },
101
  {
102
  "epoch": 6.0,
103
- "eval_accuracy": 0.6838235294117647,
104
- "eval_combined_score": 0.7480253018237863,
105
- "eval_f1": 0.8122270742358079,
106
- "eval_loss": 0.6323529481887817,
107
- "eval_runtime": 0.7418,
108
- "eval_samples_per_second": 550.048,
109
- "eval_steps_per_second": 5.393,
110
  "step": 174
111
  },
112
  {
113
  "epoch": 7.0,
114
- "learning_rate": 0.00043,
115
- "loss": 0.6431,
116
  "step": 203
117
  },
118
  {
119
  "epoch": 7.0,
120
- "eval_accuracy": 0.6838235294117647,
121
- "eval_combined_score": 0.7480253018237863,
122
- "eval_f1": 0.8122270742358079,
123
- "eval_loss": 0.6401510834693909,
124
- "eval_runtime": 0.7429,
125
- "eval_samples_per_second": 549.206,
126
- "eval_steps_per_second": 5.384,
127
  "step": 203
128
  },
129
  {
130
  "epoch": 8.0,
131
- "learning_rate": 0.00042,
132
- "loss": 0.6347,
133
  "step": 232
134
  },
135
  {
136
  "epoch": 8.0,
137
- "eval_accuracy": 0.6838235294117647,
138
- "eval_combined_score": 0.7480253018237863,
139
- "eval_f1": 0.8122270742358079,
140
- "eval_loss": 0.6335784196853638,
141
- "eval_runtime": 0.7439,
142
- "eval_samples_per_second": 548.43,
143
- "eval_steps_per_second": 5.377,
144
  "step": 232
145
  },
146
  {
147
  "epoch": 9.0,
148
- "learning_rate": 0.00041,
149
- "loss": 0.6343,
150
  "step": 261
151
  },
152
  {
153
  "epoch": 9.0,
154
- "eval_accuracy": 0.6838235294117647,
155
- "eval_combined_score": 0.7480253018237863,
156
- "eval_f1": 0.8122270742358079,
157
- "eval_loss": 0.6257611513137817,
158
- "eval_runtime": 0.7412,
159
- "eval_samples_per_second": 550.441,
160
- "eval_steps_per_second": 5.396,
161
  "step": 261
162
  },
163
  {
164
  "epoch": 9.0,
165
  "step": 261,
166
  "total_flos": 4888970768941056.0,
167
- "train_loss": 0.9404267789760312,
168
- "train_runtime": 225.9356,
169
- "train_samples_per_second": 811.736,
170
- "train_steps_per_second": 6.418
171
  }
172
  ],
173
  "max_steps": 1450,
 
1
  {
2
+ "best_metric": 0.6082243323326111,
3
  "best_model_checkpoint": "hBERTv1_new_pretrain_w_init__mrpc/checkpoint-116",
4
  "epoch": 9.0,
5
  "global_step": 261,
 
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
+ "learning_rate": 3.9200000000000004e-05,
13
+ "loss": 0.7111,
14
  "step": 29
15
  },
16
  {
 
18
  "eval_accuracy": 0.6838235294117647,
19
  "eval_combined_score": 0.7480253018237863,
20
  "eval_f1": 0.8122270742358079,
21
+ "eval_loss": 0.6563864350318909,
22
+ "eval_runtime": 0.7398,
23
+ "eval_samples_per_second": 551.531,
24
+ "eval_steps_per_second": 5.407,
25
  "step": 29
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "learning_rate": 3.8400000000000005e-05,
30
+ "loss": 0.6641,
31
  "step": 58
32
  },
33
  {
 
35
  "eval_accuracy": 0.6838235294117647,
36
  "eval_combined_score": 0.7480253018237863,
37
  "eval_f1": 0.8122270742358079,
38
+ "eval_loss": 0.6160457730293274,
39
+ "eval_runtime": 0.7439,
40
+ "eval_samples_per_second": 548.435,
41
+ "eval_steps_per_second": 5.377,
42
  "step": 58
43
  },
44
  {
45
  "epoch": 3.0,
46
+ "learning_rate": 3.76e-05,
47
+ "loss": 0.6156,
48
  "step": 87
49
  },
50
  {
 
52
  "eval_accuracy": 0.6838235294117647,
53
  "eval_combined_score": 0.7480253018237863,
54
  "eval_f1": 0.8122270742358079,
55
+ "eval_loss": 0.6354035139083862,
56
+ "eval_runtime": 0.7456,
57
+ "eval_samples_per_second": 547.23,
58
+ "eval_steps_per_second": 5.365,
59
  "step": 87
60
  },
61
  {
62
  "epoch": 4.0,
63
+ "learning_rate": 3.680000000000001e-05,
64
+ "loss": 0.5817,
65
  "step": 116
66
  },
67
  {
68
  "epoch": 4.0,
69
+ "eval_accuracy": 0.6862745098039216,
70
+ "eval_combined_score": 0.737874097007224,
71
+ "eval_f1": 0.7894736842105262,
72
+ "eval_loss": 0.6082243323326111,
73
+ "eval_runtime": 0.7468,
74
+ "eval_samples_per_second": 546.334,
75
+ "eval_steps_per_second": 5.356,
76
  "step": 116
77
  },
78
  {
79
  "epoch": 5.0,
80
+ "learning_rate": 3.6e-05,
81
+ "loss": 0.5091,
82
  "step": 145
83
  },
84
  {
85
  "epoch": 5.0,
86
+ "eval_accuracy": 0.5073529411764706,
87
+ "eval_combined_score": 0.5115077958894401,
88
+ "eval_f1": 0.5156626506024096,
89
+ "eval_loss": 0.7811743021011353,
90
+ "eval_runtime": 0.7431,
91
+ "eval_samples_per_second": 549.025,
92
+ "eval_steps_per_second": 5.383,
93
  "step": 145
94
  },
95
  {
96
  "epoch": 6.0,
97
+ "learning_rate": 3.52e-05,
98
+ "loss": 0.3973,
99
  "step": 174
100
  },
101
  {
102
  "epoch": 6.0,
103
+ "eval_accuracy": 0.6544117647058824,
104
+ "eval_combined_score": 0.705444224321853,
105
+ "eval_f1": 0.7564766839378239,
106
+ "eval_loss": 0.7949338555335999,
107
+ "eval_runtime": 0.7416,
108
+ "eval_samples_per_second": 550.17,
109
+ "eval_steps_per_second": 5.394,
110
  "step": 174
111
  },
112
  {
113
  "epoch": 7.0,
114
+ "learning_rate": 3.44e-05,
115
+ "loss": 0.2966,
116
  "step": 203
117
  },
118
  {
119
  "epoch": 7.0,
120
+ "eval_accuracy": 0.6078431372549019,
121
+ "eval_combined_score": 0.6482795452811474,
122
+ "eval_f1": 0.688715953307393,
123
+ "eval_loss": 1.0388320684432983,
124
+ "eval_runtime": 0.7411,
125
+ "eval_samples_per_second": 550.501,
126
+ "eval_steps_per_second": 5.397,
127
  "step": 203
128
  },
129
  {
130
  "epoch": 8.0,
131
+ "learning_rate": 3.3600000000000004e-05,
132
+ "loss": 0.2024,
133
  "step": 232
134
  },
135
  {
136
  "epoch": 8.0,
137
+ "eval_accuracy": 0.6200980392156863,
138
+ "eval_combined_score": 0.6662642329659136,
139
+ "eval_f1": 0.7124304267161411,
140
+ "eval_loss": 1.0064674615859985,
141
+ "eval_runtime": 0.7424,
142
+ "eval_samples_per_second": 549.6,
143
+ "eval_steps_per_second": 5.388,
144
  "step": 232
145
  },
146
  {
147
  "epoch": 9.0,
148
+ "learning_rate": 3.28e-05,
149
+ "loss": 0.1621,
150
  "step": 261
151
  },
152
  {
153
  "epoch": 9.0,
154
+ "eval_accuracy": 0.5735294117647058,
155
+ "eval_combined_score": 0.6155048633626679,
156
+ "eval_f1": 0.65748031496063,
157
+ "eval_loss": 1.307615876197815,
158
+ "eval_runtime": 0.7454,
159
+ "eval_samples_per_second": 547.342,
160
+ "eval_steps_per_second": 5.366,
161
  "step": 261
162
  },
163
  {
164
  "epoch": 9.0,
165
  "step": 261,
166
  "total_flos": 4888970768941056.0,
167
+ "train_loss": 0.46000806216535894,
168
+ "train_runtime": 225.7005,
169
+ "train_samples_per_second": 812.581,
170
+ "train_steps_per_second": 6.424
171
  }
172
  ],
173
  "max_steps": 1450,