gokuls commited on
Commit
2c00811
·
1 Parent(s): ffa1bc0

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  tags:
3
  - generated_from_trainer
4
  datasets:
@@ -13,7 +15,7 @@ model-index:
13
  name: Text Classification
14
  type: text-classification
15
  dataset:
16
- name: glue
17
  type: glue
18
  config: mrpc
19
  split: validation
@@ -24,7 +26,7 @@ model-index:
24
  value: 0.6838235294117647
25
  - name: F1
26
  type: f1
27
- value: 0.7867768595041322
28
  ---
29
 
30
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,12 +34,12 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  # hBERTv2_new_no_pretrain_mrpc
34
 
35
- This model is a fine-tuned version of [](https://huggingface.co/) on the glue dataset.
36
  It achieves the following results on the evaluation set:
37
- - Loss: 1.1249
38
  - Accuracy: 0.6838
39
- - F1: 0.7868
40
- - Combined Score: 0.7353
41
 
42
  ## Model description
43
 
 
1
  ---
2
+ language:
3
+ - en
4
  tags:
5
  - generated_from_trainer
6
  datasets:
 
15
  name: Text Classification
16
  type: text-classification
17
  dataset:
18
+ name: GLUE MRPC
19
  type: glue
20
  config: mrpc
21
  split: validation
 
26
  value: 0.6838235294117647
27
  - name: F1
28
  type: f1
29
+ value: 0.7895595432300163
30
  ---
31
 
32
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
34
 
35
  # hBERTv2_new_no_pretrain_mrpc
36
 
37
+ This model is a fine-tuned version of [](https://huggingface.co/) on the GLUE MRPC dataset.
38
  It achieves the following results on the evaluation set:
39
+ - Loss: 0.5914
40
  - Accuracy: 0.6838
41
+ - F1: 0.7896
42
+ - Combined Score: 0.7367
43
 
44
  ## Model description
45
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 6.0,
3
  "eval_accuracy": 0.6838235294117647,
4
- "eval_combined_score": 0.7480253018237863,
5
- "eval_f1": 0.8122270742358079,
6
- "eval_loss": 0.6242262721061707,
7
- "eval_runtime": 0.6905,
8
  "eval_samples": 408,
9
- "eval_samples_per_second": 590.877,
10
- "eval_steps_per_second": 5.793,
11
- "train_loss": 0.7478585407651704,
12
- "train_runtime": 128.9129,
13
  "train_samples": 3668,
14
- "train_samples_per_second": 1422.666,
15
- "train_steps_per_second": 11.248
16
  }
 
1
  {
2
+ "epoch": 7.0,
3
  "eval_accuracy": 0.6838235294117647,
4
+ "eval_combined_score": 0.7366915363208906,
5
+ "eval_f1": 0.7895595432300163,
6
+ "eval_loss": 0.5914379358291626,
7
+ "eval_runtime": 1.8267,
8
  "eval_samples": 408,
9
+ "eval_samples_per_second": 223.355,
10
+ "eval_steps_per_second": 2.19,
11
+ "train_loss": 0.42316288548737324,
12
+ "train_runtime": 454.4346,
13
  "train_samples": 3668,
14
+ "train_samples_per_second": 403.578,
15
+ "train_steps_per_second": 3.191
16
  }
eval_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
- "epoch": 6.0,
3
  "eval_accuracy": 0.6838235294117647,
4
- "eval_combined_score": 0.7480253018237863,
5
- "eval_f1": 0.8122270742358079,
6
- "eval_loss": 0.6242262721061707,
7
- "eval_runtime": 0.6905,
8
  "eval_samples": 408,
9
- "eval_samples_per_second": 590.877,
10
- "eval_steps_per_second": 5.793
11
  }
 
1
  {
2
+ "epoch": 7.0,
3
  "eval_accuracy": 0.6838235294117647,
4
+ "eval_combined_score": 0.7366915363208906,
5
+ "eval_f1": 0.7895595432300163,
6
+ "eval_loss": 0.5914379358291626,
7
+ "eval_runtime": 1.8267,
8
  "eval_samples": 408,
9
+ "eval_samples_per_second": 223.355,
10
+ "eval_steps_per_second": 2.19
11
  }
logs/events.out.tfevents.1686749324.garda.2589545.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaed7936e0110bc2b9b2f2339b944c8cff507c0e7c9d592aa1bbaf89194af206
3
+ size 467
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 6.0,
3
- "train_loss": 0.7478585407651704,
4
- "train_runtime": 128.9129,
5
  "train_samples": 3668,
6
- "train_samples_per_second": 1422.666,
7
- "train_steps_per_second": 11.248
8
  }
 
1
  {
2
+ "epoch": 7.0,
3
+ "train_loss": 0.42316288548737324,
4
+ "train_runtime": 454.4346,
5
  "train_samples": 3668,
6
+ "train_samples_per_second": 403.578,
7
+ "train_steps_per_second": 3.191
8
  }
trainer_state.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "best_metric": 0.6242262721061707,
3
- "best_model_checkpoint": "hBERTv2_new_no_pretrain_mrpc/checkpoint-29",
4
- "epoch": 6.0,
5
- "global_step": 174,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "learning_rate": 0.0004906896551724138,
13
- "loss": 1.2542,
14
  "step": 29
15
  },
16
  {
@@ -18,110 +18,127 @@
18
  "eval_accuracy": 0.6838235294117647,
19
  "eval_combined_score": 0.7480253018237863,
20
  "eval_f1": 0.8122270742358079,
21
- "eval_loss": 0.6242262721061707,
22
- "eval_runtime": 0.6879,
23
- "eval_samples_per_second": 593.139,
24
- "eval_steps_per_second": 5.815,
25
  "step": 29
26
  },
27
  {
28
  "epoch": 2.0,
29
- "learning_rate": 0.0004806896551724138,
30
- "loss": 0.6656,
31
  "step": 58
32
  },
33
  {
34
  "epoch": 2.0,
35
  "eval_accuracy": 0.6838235294117647,
36
- "eval_combined_score": 0.7480253018237863,
37
- "eval_f1": 0.8122270742358079,
38
- "eval_loss": 0.6247109770774841,
39
- "eval_runtime": 0.6908,
40
- "eval_samples_per_second": 590.621,
41
- "eval_steps_per_second": 5.79,
42
  "step": 58
43
  },
44
  {
45
  "epoch": 3.0,
46
- "learning_rate": 0.0004706896551724138,
47
- "loss": 0.648,
48
  "step": 87
49
  },
50
  {
51
  "epoch": 3.0,
52
- "eval_accuracy": 0.6838235294117647,
53
- "eval_combined_score": 0.7480253018237863,
54
- "eval_f1": 0.8122270742358079,
55
- "eval_loss": 0.6308785080909729,
56
- "eval_runtime": 0.6926,
57
- "eval_samples_per_second": 589.073,
58
- "eval_steps_per_second": 5.775,
59
  "step": 87
60
  },
61
  {
62
  "epoch": 4.0,
63
- "learning_rate": 0.0004606896551724138,
64
- "loss": 0.6448,
65
  "step": 116
66
  },
67
  {
68
  "epoch": 4.0,
69
- "eval_accuracy": 0.6838235294117647,
70
- "eval_combined_score": 0.7480253018237863,
71
- "eval_f1": 0.8122270742358079,
72
- "eval_loss": 0.6373123526573181,
73
- "eval_runtime": 0.6915,
74
- "eval_samples_per_second": 589.995,
75
- "eval_steps_per_second": 5.784,
76
  "step": 116
77
  },
78
  {
79
  "epoch": 5.0,
80
- "learning_rate": 0.00045068965517241377,
81
- "loss": 0.6423,
82
  "step": 145
83
  },
84
  {
85
  "epoch": 5.0,
86
- "eval_accuracy": 0.6838235294117647,
87
- "eval_combined_score": 0.7480253018237863,
88
- "eval_f1": 0.8122270742358079,
89
- "eval_loss": 0.6278728246688843,
90
- "eval_runtime": 0.6904,
91
- "eval_samples_per_second": 591.002,
92
- "eval_steps_per_second": 5.794,
93
  "step": 145
94
  },
95
  {
96
  "epoch": 6.0,
97
- "learning_rate": 0.00044068965517241385,
98
- "loss": 0.6323,
99
  "step": 174
100
  },
101
  {
102
  "epoch": 6.0,
103
- "eval_accuracy": 0.6838235294117647,
104
- "eval_combined_score": 0.7480253018237863,
105
- "eval_f1": 0.8122270742358079,
106
- "eval_loss": 0.6351138949394226,
107
- "eval_runtime": 0.6918,
108
- "eval_samples_per_second": 589.757,
109
- "eval_steps_per_second": 5.782,
110
  "step": 174
111
  },
112
  {
113
- "epoch": 6.0,
114
- "step": 174,
115
- "total_flos": 3220632565186560.0,
116
- "train_loss": 0.7478585407651704,
117
- "train_runtime": 128.9129,
118
- "train_samples_per_second": 1422.666,
119
- "train_steps_per_second": 11.248
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  }
121
  ],
122
  "max_steps": 1450,
123
  "num_train_epochs": 50,
124
- "total_flos": 3220632565186560.0,
125
  "trial_name": null,
126
  "trial_params": null
127
  }
 
1
  {
2
+ "best_metric": 0.5914379358291626,
3
+ "best_model_checkpoint": "hBERTv2_new_no_pretrain_mrpc/checkpoint-58",
4
+ "epoch": 7.0,
5
+ "global_step": 203,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
+ "learning_rate": 3.9200000000000004e-05,
13
+ "loss": 0.6685,
14
  "step": 29
15
  },
16
  {
 
18
  "eval_accuracy": 0.6838235294117647,
19
  "eval_combined_score": 0.7480253018237863,
20
  "eval_f1": 0.8122270742358079,
21
+ "eval_loss": 0.6107444167137146,
22
+ "eval_runtime": 1.8389,
23
+ "eval_samples_per_second": 221.872,
24
+ "eval_steps_per_second": 2.175,
25
  "step": 29
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "learning_rate": 3.8400000000000005e-05,
30
+ "loss": 0.6337,
31
  "step": 58
32
  },
33
  {
34
  "epoch": 2.0,
35
  "eval_accuracy": 0.6838235294117647,
36
+ "eval_combined_score": 0.7366915363208906,
37
+ "eval_f1": 0.7895595432300163,
38
+ "eval_loss": 0.5914379358291626,
39
+ "eval_runtime": 1.8463,
40
+ "eval_samples_per_second": 220.985,
41
+ "eval_steps_per_second": 2.167,
42
  "step": 58
43
  },
44
  {
45
  "epoch": 3.0,
46
+ "learning_rate": 3.76e-05,
47
+ "loss": 0.529,
48
  "step": 87
49
  },
50
  {
51
  "epoch": 3.0,
52
+ "eval_accuracy": 0.6642156862745098,
53
+ "eval_combined_score": 0.7173674746280421,
54
+ "eval_f1": 0.7705192629815745,
55
+ "eval_loss": 0.6385138034820557,
56
+ "eval_runtime": 1.8437,
57
+ "eval_samples_per_second": 221.293,
58
+ "eval_steps_per_second": 2.17,
59
  "step": 87
60
  },
61
  {
62
  "epoch": 4.0,
63
+ "learning_rate": 3.680000000000001e-05,
64
+ "loss": 0.4182,
65
  "step": 116
66
  },
67
  {
68
  "epoch": 4.0,
69
+ "eval_accuracy": 0.6985294117647058,
70
+ "eval_combined_score": 0.7518003635685653,
71
+ "eval_f1": 0.8050713153724247,
72
+ "eval_loss": 0.6618954539299011,
73
+ "eval_runtime": 1.8368,
74
+ "eval_samples_per_second": 222.12,
75
+ "eval_steps_per_second": 2.178,
76
  "step": 116
77
  },
78
  {
79
  "epoch": 5.0,
80
+ "learning_rate": 3.6e-05,
81
+ "loss": 0.3095,
82
  "step": 145
83
  },
84
  {
85
  "epoch": 5.0,
86
+ "eval_accuracy": 0.6470588235294118,
87
+ "eval_combined_score": 0.7019077901430844,
88
+ "eval_f1": 0.7567567567567568,
89
+ "eval_loss": 1.0039604902267456,
90
+ "eval_runtime": 1.8407,
91
+ "eval_samples_per_second": 221.66,
92
+ "eval_steps_per_second": 2.173,
93
  "step": 145
94
  },
95
  {
96
  "epoch": 6.0,
97
+ "learning_rate": 3.52e-05,
98
+ "loss": 0.2219,
99
  "step": 174
100
  },
101
  {
102
  "epoch": 6.0,
103
+ "eval_accuracy": 0.6225490196078431,
104
+ "eval_combined_score": 0.6659914909359972,
105
+ "eval_f1": 0.7094339622641511,
106
+ "eval_loss": 0.945849597454071,
107
+ "eval_runtime": 1.8381,
108
+ "eval_samples_per_second": 221.964,
109
+ "eval_steps_per_second": 2.176,
110
  "step": 174
111
  },
112
  {
113
+ "epoch": 7.0,
114
+ "learning_rate": 3.44e-05,
115
+ "loss": 0.1813,
116
+ "step": 203
117
+ },
118
+ {
119
+ "epoch": 7.0,
120
+ "eval_accuracy": 0.6838235294117647,
121
+ "eval_combined_score": 0.7353001944579485,
122
+ "eval_f1": 0.7867768595041322,
123
+ "eval_loss": 1.1248677968978882,
124
+ "eval_runtime": 1.8303,
125
+ "eval_samples_per_second": 222.913,
126
+ "eval_steps_per_second": 2.185,
127
+ "step": 203
128
+ },
129
+ {
130
+ "epoch": 7.0,
131
+ "step": 203,
132
+ "total_flos": 3757404659384320.0,
133
+ "train_loss": 0.42316288548737324,
134
+ "train_runtime": 454.4346,
135
+ "train_samples_per_second": 403.578,
136
+ "train_steps_per_second": 3.191
137
  }
138
  ],
139
  "max_steps": 1450,
140
  "num_train_epochs": 50,
141
+ "total_flos": 3757404659384320.0,
142
  "trial_name": null,
143
  "trial_params": null
144
  }