gokulsrinivasagan commited on
Commit
581b443
·
verified ·
1 Parent(s): 76a8aa4

End of training

Browse files
README.md CHANGED
@@ -1,14 +1,32 @@
1
  ---
2
  library_name: transformers
 
 
3
  base_model: gokulsrinivasagan/bert_tiny_lda_20_v1
4
  tags:
5
  - generated_from_trainer
 
 
6
  metrics:
7
  - accuracy
8
  - f1
9
  model-index:
10
  - name: bert_tiny_lda_20_v1_qqp
11
- results: []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  ---
13
 
14
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -16,12 +34,12 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # bert_tiny_lda_20_v1_qqp
18
 
19
- This model is a fine-tuned version of [gokulsrinivasagan/bert_tiny_lda_20_v1](https://huggingface.co/gokulsrinivasagan/bert_tiny_lda_20_v1) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.4276
22
- - Accuracy: 0.8535
23
- - F1: 0.7964
24
- - Combined Score: 0.8249
25
 
26
  ## Model description
27
 
 
1
  ---
2
  library_name: transformers
3
+ language:
4
+ - en
5
  base_model: gokulsrinivasagan/bert_tiny_lda_20_v1
6
  tags:
7
  - generated_from_trainer
8
+ datasets:
9
+ - glue
10
  metrics:
11
  - accuracy
12
  - f1
13
  model-index:
14
  - name: bert_tiny_lda_20_v1_qqp
15
+ results:
16
+ - task:
17
+ name: Text Classification
18
+ type: text-classification
19
+ dataset:
20
+ name: GLUE QQP
21
+ type: glue
22
+ args: qqp
23
+ metrics:
24
+ - name: Accuracy
25
+ type: accuracy
26
+ value: 0.8355181795696265
27
+ - name: F1
28
+ type: f1
29
+ value: 0.7821386450006552
30
  ---
31
 
32
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
34
 
35
  # bert_tiny_lda_20_v1_qqp
36
 
37
+ This model is a fine-tuned version of [gokulsrinivasagan/bert_tiny_lda_20_v1](https://huggingface.co/gokulsrinivasagan/bert_tiny_lda_20_v1) on the GLUE QQP dataset.
38
  It achieves the following results on the evaluation set:
39
+ - Loss: 0.3641
40
+ - Accuracy: 0.8355
41
+ - F1: 0.7821
42
+ - Combined Score: 0.8088
43
 
44
  ## Model description
45
 
all_results.json CHANGED
@@ -1,17 +1,17 @@
1
  {
2
  "epoch": 8.0,
3
- "eval_accuracy": 0.6318327974276527,
4
- "eval_combined_score": 0.3159163987138264,
5
- "eval_f1": 0.0,
6
- "eval_loss": 0.6569345593452454,
7
- "eval_runtime": 12.4853,
8
  "eval_samples": 40430,
9
- "eval_samples_per_second": 3238.216,
10
- "eval_steps_per_second": 12.655,
11
  "total_flos": 7.633075201391002e+16,
12
- "train_loss": 0.6589229814446258,
13
- "train_runtime": 1678.7379,
14
  "train_samples": 363846,
15
- "train_samples_per_second": 10836.891,
16
- "train_steps_per_second": 42.353
17
  }
 
1
  {
2
  "epoch": 8.0,
3
+ "eval_accuracy": 0.8355181795696265,
4
+ "eval_combined_score": 0.8088284122851408,
5
+ "eval_f1": 0.7821386450006552,
6
+ "eval_loss": 0.3640855848789215,
7
+ "eval_runtime": 12.4138,
8
  "eval_samples": 40430,
9
+ "eval_samples_per_second": 3256.856,
10
+ "eval_steps_per_second": 12.728,
11
  "total_flos": 7.633075201391002e+16,
12
+ "train_loss": 0.2928101691180308,
13
+ "train_runtime": 1671.3114,
14
  "train_samples": 363846,
15
+ "train_samples_per_second": 10885.045,
16
+ "train_steps_per_second": 42.541
17
  }
eval_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "epoch": 8.0,
3
- "eval_accuracy": 0.6318327974276527,
4
- "eval_combined_score": 0.3159163987138264,
5
- "eval_f1": 0.0,
6
- "eval_loss": 0.6569345593452454,
7
- "eval_runtime": 12.4853,
8
  "eval_samples": 40430,
9
- "eval_samples_per_second": 3238.216,
10
- "eval_steps_per_second": 12.655
11
  }
 
1
  {
2
  "epoch": 8.0,
3
+ "eval_accuracy": 0.8355181795696265,
4
+ "eval_combined_score": 0.8088284122851408,
5
+ "eval_f1": 0.7821386450006552,
6
+ "eval_loss": 0.3640855848789215,
7
+ "eval_runtime": 12.4138,
8
  "eval_samples": 40430,
9
+ "eval_samples_per_second": 3256.856,
10
+ "eval_steps_per_second": 12.728
11
  }
logs/events.out.tfevents.1733325674.ki-g0008.1207389.25 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee653cce571097c8c8c3e394a37c5e7072236dbeb5ff7b0a029cf2c555ccfde5
3
+ size 515
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 8.0,
3
  "total_flos": 7.633075201391002e+16,
4
- "train_loss": 0.6589229814446258,
5
- "train_runtime": 1678.7379,
6
  "train_samples": 363846,
7
- "train_samples_per_second": 10836.891,
8
- "train_steps_per_second": 42.353
9
  }
 
1
  {
2
  "epoch": 8.0,
3
  "total_flos": 7.633075201391002e+16,
4
+ "train_loss": 0.2928101691180308,
5
+ "train_runtime": 1671.3114,
6
  "train_samples": 363846,
7
+ "train_samples_per_second": 10885.045,
8
+ "train_steps_per_second": 42.541
9
  }
trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.6569345593452454,
3
  "best_model_checkpoint": "bert_tiny_lda_20_v1_qqp/checkpoint-4266",
4
  "epoch": 8.0,
5
  "eval_steps": 500,
@@ -10,156 +10,156 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "grad_norm": 0.3419596254825592,
14
- "learning_rate": 0.00098,
15
- "loss": 0.6612,
16
  "step": 1422
17
  },
18
  {
19
  "epoch": 1.0,
20
- "eval_accuracy": 0.6318327974276527,
21
- "eval_combined_score": 0.3159163987138264,
22
- "eval_f1": 0.0,
23
- "eval_loss": 0.6581591367721558,
24
- "eval_runtime": 12.6846,
25
- "eval_samples_per_second": 3187.326,
26
- "eval_steps_per_second": 12.456,
27
  "step": 1422
28
  },
29
  {
30
  "epoch": 2.0,
31
- "grad_norm": 0.10207448154687881,
32
- "learning_rate": 0.00096,
33
- "loss": 0.659,
34
  "step": 2844
35
  },
36
  {
37
  "epoch": 2.0,
38
- "eval_accuracy": 0.6318327974276527,
39
- "eval_combined_score": 0.3159163987138264,
40
- "eval_f1": 0.0,
41
- "eval_loss": 0.6575562953948975,
42
- "eval_runtime": 12.7272,
43
- "eval_samples_per_second": 3176.652,
44
- "eval_steps_per_second": 12.414,
45
  "step": 2844
46
  },
47
  {
48
  "epoch": 3.0,
49
- "grad_norm": 0.05861695855855942,
50
- "learning_rate": 0.00094,
51
- "loss": 0.6587,
52
  "step": 4266
53
  },
54
  {
55
  "epoch": 3.0,
56
- "eval_accuracy": 0.6318327974276527,
57
- "eval_combined_score": 0.3159163987138264,
58
- "eval_f1": 0.0,
59
- "eval_loss": 0.6569345593452454,
60
- "eval_runtime": 12.6759,
61
- "eval_samples_per_second": 3189.518,
62
- "eval_steps_per_second": 12.465,
63
  "step": 4266
64
  },
65
  {
66
  "epoch": 4.0,
67
- "grad_norm": 0.08356954157352448,
68
- "learning_rate": 0.00092,
69
- "loss": 0.6585,
70
  "step": 5688
71
  },
72
  {
73
  "epoch": 4.0,
74
- "eval_accuracy": 0.6318327974276527,
75
- "eval_combined_score": 0.3159163987138264,
76
- "eval_f1": 0.0,
77
- "eval_loss": 0.6573521494865417,
78
- "eval_runtime": 12.632,
79
- "eval_samples_per_second": 3200.604,
80
- "eval_steps_per_second": 12.508,
81
  "step": 5688
82
  },
83
  {
84
  "epoch": 5.0,
85
- "grad_norm": 0.12464679032564163,
86
- "learning_rate": 0.0009000000000000001,
87
- "loss": 0.6585,
88
  "step": 7110
89
  },
90
  {
91
  "epoch": 5.0,
92
- "eval_accuracy": 0.6318327974276527,
93
- "eval_combined_score": 0.3159163987138264,
94
- "eval_f1": 0.0,
95
- "eval_loss": 0.6573521494865417,
96
- "eval_runtime": 12.6421,
97
- "eval_samples_per_second": 3198.044,
98
- "eval_steps_per_second": 12.498,
99
  "step": 7110
100
  },
101
  {
102
  "epoch": 6.0,
103
- "grad_norm": 0.03694356605410576,
104
- "learning_rate": 0.00088,
105
- "loss": 0.6585,
106
  "step": 8532
107
  },
108
  {
109
  "epoch": 6.0,
110
- "eval_accuracy": 0.6318327974276527,
111
- "eval_combined_score": 0.3159163987138264,
112
- "eval_f1": 0.0,
113
- "eval_loss": 0.658168613910675,
114
- "eval_runtime": 12.4501,
115
- "eval_samples_per_second": 3247.364,
116
- "eval_steps_per_second": 12.691,
117
  "step": 8532
118
  },
119
  {
120
  "epoch": 7.0,
121
- "grad_norm": 0.050063714385032654,
122
- "learning_rate": 0.00086,
123
- "loss": 0.6585,
124
  "step": 9954
125
  },
126
  {
127
  "epoch": 7.0,
128
- "eval_accuracy": 0.6318327974276527,
129
- "eval_combined_score": 0.3159163987138264,
130
- "eval_f1": 0.0,
131
- "eval_loss": 0.6573427319526672,
132
- "eval_runtime": 12.5094,
133
- "eval_samples_per_second": 3231.962,
134
- "eval_steps_per_second": 12.63,
135
  "step": 9954
136
  },
137
  {
138
  "epoch": 8.0,
139
- "grad_norm": 0.11186650395393372,
140
- "learning_rate": 0.00084,
141
- "loss": 0.6585,
142
  "step": 11376
143
  },
144
  {
145
  "epoch": 8.0,
146
- "eval_accuracy": 0.6318327974276527,
147
- "eval_combined_score": 0.3159163987138264,
148
- "eval_f1": 0.0,
149
- "eval_loss": 0.6573427319526672,
150
- "eval_runtime": 12.4754,
151
- "eval_samples_per_second": 3240.777,
152
- "eval_steps_per_second": 12.665,
153
  "step": 11376
154
  },
155
  {
156
  "epoch": 8.0,
157
  "step": 11376,
158
  "total_flos": 7.633075201391002e+16,
159
- "train_loss": 0.6589229814446258,
160
- "train_runtime": 1678.7379,
161
- "train_samples_per_second": 10836.891,
162
- "train_steps_per_second": 42.353
163
  }
164
  ],
165
  "logging_steps": 1,
 
1
  {
2
+ "best_metric": 0.3640855848789215,
3
  "best_model_checkpoint": "bert_tiny_lda_20_v1_qqp/checkpoint-4266",
4
  "epoch": 8.0,
5
  "eval_steps": 500,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "grad_norm": 2.2224717140197754,
14
+ "learning_rate": 4.9e-05,
15
+ "loss": 0.4896,
16
  "step": 1422
17
  },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.7902300272075191,
21
+ "eval_combined_score": 0.7359430605989925,
22
+ "eval_f1": 0.6816560939904658,
23
+ "eval_loss": 0.4397190511226654,
24
+ "eval_runtime": 12.5102,
25
+ "eval_samples_per_second": 3231.772,
26
+ "eval_steps_per_second": 12.63,
27
  "step": 1422
28
  },
29
  {
30
  "epoch": 2.0,
31
+ "grad_norm": 2.527693510055542,
32
+ "learning_rate": 4.8e-05,
33
+ "loss": 0.3891,
34
  "step": 2844
35
  },
36
  {
37
  "epoch": 2.0,
38
+ "eval_accuracy": 0.8246599060103883,
39
+ "eval_combined_score": 0.7960138699152457,
40
+ "eval_f1": 0.7673678338201031,
41
+ "eval_loss": 0.3805816173553467,
42
+ "eval_runtime": 12.45,
43
+ "eval_samples_per_second": 3247.397,
44
+ "eval_steps_per_second": 12.691,
45
  "step": 2844
46
  },
47
  {
48
  "epoch": 3.0,
49
+ "grad_norm": 2.5017569065093994,
50
+ "learning_rate": 4.7e-05,
51
+ "loss": 0.3332,
52
  "step": 4266
53
  },
54
  {
55
  "epoch": 3.0,
56
+ "eval_accuracy": 0.8355181795696265,
57
+ "eval_combined_score": 0.8088284122851408,
58
+ "eval_f1": 0.7821386450006552,
59
+ "eval_loss": 0.3640855848789215,
60
+ "eval_runtime": 12.5303,
61
+ "eval_samples_per_second": 3226.578,
62
+ "eval_steps_per_second": 12.609,
63
  "step": 4266
64
  },
65
  {
66
  "epoch": 4.0,
67
+ "grad_norm": 3.1053719520568848,
68
+ "learning_rate": 4.600000000000001e-05,
69
+ "loss": 0.29,
70
  "step": 5688
71
  },
72
  {
73
  "epoch": 4.0,
74
+ "eval_accuracy": 0.8448182043037349,
75
+ "eval_combined_score": 0.8157882062236504,
76
+ "eval_f1": 0.786758208143566,
77
+ "eval_loss": 0.3665860891342163,
78
+ "eval_runtime": 12.4885,
79
+ "eval_samples_per_second": 3237.387,
80
+ "eval_steps_per_second": 12.652,
81
  "step": 5688
82
  },
83
  {
84
  "epoch": 5.0,
85
+ "grad_norm": 2.464869976043701,
86
+ "learning_rate": 4.5e-05,
87
+ "loss": 0.2535,
88
  "step": 7110
89
  },
90
  {
91
  "epoch": 5.0,
92
+ "eval_accuracy": 0.8485035864457087,
93
+ "eval_combined_score": 0.8231024058991432,
94
+ "eval_f1": 0.7977012253525777,
95
+ "eval_loss": 0.37237754464149475,
96
+ "eval_runtime": 12.5228,
97
+ "eval_samples_per_second": 3228.523,
98
+ "eval_steps_per_second": 12.617,
99
  "step": 7110
100
  },
101
  {
102
  "epoch": 6.0,
103
+ "grad_norm": 2.264439821243286,
104
+ "learning_rate": 4.4000000000000006e-05,
105
+ "loss": 0.2212,
106
  "step": 8532
107
  },
108
  {
109
  "epoch": 6.0,
110
+ "eval_accuracy": 0.8517437546376453,
111
+ "eval_combined_score": 0.8279754900450264,
112
+ "eval_f1": 0.8042072254524074,
113
+ "eval_loss": 0.3715839982032776,
114
+ "eval_runtime": 12.4973,
115
+ "eval_samples_per_second": 3235.1,
116
+ "eval_steps_per_second": 12.643,
117
  "step": 8532
118
  },
119
  {
120
  "epoch": 7.0,
121
+ "grad_norm": 2.810699701309204,
122
+ "learning_rate": 4.3e-05,
123
+ "loss": 0.1947,
124
  "step": 9954
125
  },
126
  {
127
  "epoch": 7.0,
128
+ "eval_accuracy": 0.8528320554044027,
129
+ "eval_combined_score": 0.8289389634820179,
130
+ "eval_f1": 0.805045871559633,
131
+ "eval_loss": 0.4038587510585785,
132
+ "eval_runtime": 12.2445,
133
+ "eval_samples_per_second": 3301.882,
134
+ "eval_steps_per_second": 12.904,
135
  "step": 9954
136
  },
137
  {
138
  "epoch": 8.0,
139
+ "grad_norm": 2.704545259475708,
140
+ "learning_rate": 4.2e-05,
141
+ "loss": 0.1711,
142
  "step": 11376
143
  },
144
  {
145
  "epoch": 8.0,
146
+ "eval_accuracy": 0.8534504081127875,
147
+ "eval_combined_score": 0.8249385694575275,
148
+ "eval_f1": 0.7964267308022677,
149
+ "eval_loss": 0.4275510013103485,
150
+ "eval_runtime": 12.4169,
151
+ "eval_samples_per_second": 3256.041,
152
+ "eval_steps_per_second": 12.725,
153
  "step": 11376
154
  },
155
  {
156
  "epoch": 8.0,
157
  "step": 11376,
158
  "total_flos": 7.633075201391002e+16,
159
+ "train_loss": 0.2928101691180308,
160
+ "train_runtime": 1671.3114,
161
+ "train_samples_per_second": 10885.045,
162
+ "train_steps_per_second": 42.541
163
  }
164
  ],
165
  "logging_steps": 1,