gokuls commited on
Commit
772aaff
1 Parent(s): 5613cdc

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  license: apache-2.0
3
  tags:
4
  - generated_from_trainer
@@ -13,7 +15,7 @@ model-index:
13
  name: Text Classification
14
  type: text-classification
15
  dataset:
16
- name: glue
17
  type: glue
18
  config: qnli
19
  split: validation
@@ -21,7 +23,7 @@ model-index:
21
  metrics:
22
  - name: Accuracy
23
  type: accuracy
24
- value: 0.589602782354018
25
  ---
26
 
27
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -29,10 +31,10 @@ should probably proofread and complete it, then remove this comment. -->
29
 
30
  # mobilebert_sa_GLUE_Experiment_qnli
31
 
32
- This model is a fine-tuned version of [google/mobilebert-uncased](https://huggingface.co/google/mobilebert-uncased) on the glue dataset.
33
  It achieves the following results on the evaluation set:
34
- - Loss: 0.7055
35
- - Accuracy: 0.5896
36
 
37
  ## Model description
38
 
 
1
  ---
2
+ language:
3
+ - en
4
  license: apache-2.0
5
  tags:
6
  - generated_from_trainer
 
15
  name: Text Classification
16
  type: text-classification
17
  dataset:
18
+ name: GLUE QNLI
19
  type: glue
20
  config: qnli
21
  split: validation
 
23
  metrics:
24
  - name: Accuracy
25
  type: accuracy
26
+ value: 0.6093721398498994
27
  ---
28
 
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
31
 
32
  # mobilebert_sa_GLUE_Experiment_qnli
33
 
34
+ This model is a fine-tuned version of [google/mobilebert-uncased](https://huggingface.co/google/mobilebert-uncased) on the GLUE QNLI dataset.
35
  It achieves the following results on the evaluation set:
36
+ - Loss: 0.6487
37
+ - Accuracy: 0.6094
38
 
39
  ## Model description
40
 
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 7.0,
3
- "eval_accuracy": 0.6086399414241259,
4
- "eval_loss": 0.652438759803772,
5
- "eval_runtime": 5.4307,
6
  "eval_samples": 5463,
7
- "eval_samples_per_second": 1005.942,
8
- "eval_steps_per_second": 4.051,
9
- "train_loss": 0.6110314930772948,
10
- "train_runtime": 2217.6487,
11
  "train_samples": 104743,
12
- "train_samples_per_second": 2361.578,
13
- "train_steps_per_second": 9.244
14
  }
 
1
  {
2
  "epoch": 7.0,
3
+ "eval_accuracy": 0.6093721398498994,
4
+ "eval_loss": 0.6487034559249878,
5
+ "eval_runtime": 8.8318,
6
  "eval_samples": 5463,
7
+ "eval_samples_per_second": 618.561,
8
+ "eval_steps_per_second": 4.869,
9
+ "train_loss": 0.603873611681752,
10
+ "train_runtime": 4016.5926,
11
  "train_samples": 104743,
12
+ "train_samples_per_second": 1303.879,
13
+ "train_steps_per_second": 10.195
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 7.0,
3
- "eval_accuracy": 0.6086399414241259,
4
- "eval_loss": 0.652438759803772,
5
- "eval_runtime": 5.4307,
6
  "eval_samples": 5463,
7
- "eval_samples_per_second": 1005.942,
8
- "eval_steps_per_second": 4.051
9
  }
 
1
  {
2
  "epoch": 7.0,
3
+ "eval_accuracy": 0.6093721398498994,
4
+ "eval_loss": 0.6487034559249878,
5
+ "eval_runtime": 8.8318,
6
  "eval_samples": 5463,
7
+ "eval_samples_per_second": 618.561,
8
+ "eval_steps_per_second": 4.869
9
  }
logs/events.out.tfevents.1674624040.garda.1933772.8 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22f9ee41b991603e11e8e06c11c3ea53371f180b56206617bb3f628c3f8ac999
3
+ size 363
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 7.0,
3
- "train_loss": 0.6110314930772948,
4
- "train_runtime": 2217.6487,
5
  "train_samples": 104743,
6
- "train_samples_per_second": 2361.578,
7
- "train_steps_per_second": 9.244
8
  }
 
1
  {
2
  "epoch": 7.0,
3
+ "train_loss": 0.603873611681752,
4
+ "train_runtime": 4016.5926,
5
  "train_samples": 104743,
6
+ "train_samples_per_second": 1303.879,
7
+ "train_steps_per_second": 10.195
8
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.652438759803772,
3
- "best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_qnli/checkpoint-820",
4
  "epoch": 7.0,
5
- "global_step": 2870,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10,121 +10,121 @@
10
  {
11
  "epoch": 1.0,
12
  "learning_rate": 4.9e-05,
13
- "loss": 0.6824,
14
- "step": 410
15
  },
16
  {
17
  "epoch": 1.0,
18
- "eval_accuracy": 0.5996705107084019,
19
- "eval_loss": 0.6577625870704651,
20
- "eval_runtime": 5.6422,
21
- "eval_samples_per_second": 968.24,
22
- "eval_steps_per_second": 3.899,
23
- "step": 410
24
  },
25
  {
26
  "epoch": 2.0,
27
  "learning_rate": 4.8e-05,
28
- "loss": 0.6441,
29
- "step": 820
30
  },
31
  {
32
  "epoch": 2.0,
33
- "eval_accuracy": 0.6086399414241259,
34
- "eval_loss": 0.652438759803772,
35
- "eval_runtime": 5.5355,
36
- "eval_samples_per_second": 986.904,
37
- "eval_steps_per_second": 3.974,
38
- "step": 820
39
  },
40
  {
41
  "epoch": 3.0,
42
- "learning_rate": 4.7e-05,
43
- "loss": 0.6202,
44
- "step": 1230
45
  },
46
  {
47
  "epoch": 3.0,
48
- "eval_accuracy": 0.6071755445725792,
49
- "eval_loss": 0.6553815603256226,
50
- "eval_runtime": 5.4718,
51
- "eval_samples_per_second": 998.389,
52
- "eval_steps_per_second": 4.021,
53
- "step": 1230
54
  },
55
  {
56
  "epoch": 4.0,
57
- "learning_rate": 4.600000000000001e-05,
58
- "loss": 0.6009,
59
- "step": 1640
60
  },
61
  {
62
  "epoch": 4.0,
63
- "eval_accuracy": 0.6051619989017024,
64
- "eval_loss": 0.6619159579277039,
65
- "eval_runtime": 5.4488,
66
- "eval_samples_per_second": 1002.613,
67
- "eval_steps_per_second": 4.038,
68
- "step": 1640
69
  },
70
  {
71
  "epoch": 5.0,
72
- "learning_rate": 4.5e-05,
73
- "loss": 0.587,
74
- "step": 2050
75
  },
76
  {
77
  "epoch": 5.0,
78
- "eval_accuracy": 0.598572213069742,
79
- "eval_loss": 0.668364405632019,
80
- "eval_runtime": 5.5979,
81
- "eval_samples_per_second": 975.905,
82
- "eval_steps_per_second": 3.93,
83
- "step": 2050
84
  },
85
  {
86
  "epoch": 6.0,
87
- "learning_rate": 4.400243902439024e-05,
88
- "loss": 0.5755,
89
- "step": 2460
90
  },
91
  {
92
  "epoch": 6.0,
93
  "eval_accuracy": 0.5978400146439685,
94
- "eval_loss": 0.680844783782959,
95
- "eval_runtime": 5.501,
96
- "eval_samples_per_second": 993.091,
97
- "eval_steps_per_second": 3.999,
98
- "step": 2460
99
  },
100
  {
101
  "epoch": 7.0,
102
- "learning_rate": 4.3002439024390246e-05,
103
- "loss": 0.5671,
104
- "step": 2870
105
  },
106
  {
107
  "epoch": 7.0,
108
- "eval_accuracy": 0.5844773933736043,
109
- "eval_loss": 0.7068149447441101,
110
- "eval_runtime": 5.4741,
111
- "eval_samples_per_second": 997.977,
112
- "eval_steps_per_second": 4.019,
113
- "step": 2870
114
  },
115
  {
116
  "epoch": 7.0,
117
- "step": 2870,
118
- "total_flos": 2.328477811133645e+16,
119
- "train_loss": 0.6110314930772948,
120
- "train_runtime": 2217.6487,
121
- "train_samples_per_second": 2361.578,
122
- "train_steps_per_second": 9.244
123
  }
124
  ],
125
- "max_steps": 20500,
126
  "num_train_epochs": 50,
127
- "total_flos": 2.328477811133645e+16,
128
  "trial_name": null,
129
  "trial_params": null
130
  }
 
1
  {
2
+ "best_metric": 0.6487034559249878,
3
+ "best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_qnli/checkpoint-1638",
4
  "epoch": 7.0,
5
+ "global_step": 5733,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10
  {
11
  "epoch": 1.0,
12
  "learning_rate": 4.9e-05,
13
+ "loss": 0.6754,
14
+ "step": 819
15
  },
16
  {
17
  "epoch": 1.0,
18
+ "eval_accuracy": 0.6177924217462932,
19
+ "eval_loss": 0.6491163372993469,
20
+ "eval_runtime": 8.8255,
21
+ "eval_samples_per_second": 619.001,
22
+ "eval_steps_per_second": 4.872,
23
+ "step": 819
24
  },
25
  {
26
  "epoch": 2.0,
27
  "learning_rate": 4.8e-05,
28
+ "loss": 0.6369,
29
+ "step": 1638
30
  },
31
  {
32
  "epoch": 2.0,
33
+ "eval_accuracy": 0.6093721398498994,
34
+ "eval_loss": 0.6487034559249878,
35
+ "eval_runtime": 8.8229,
36
+ "eval_samples_per_second": 619.186,
37
+ "eval_steps_per_second": 4.874,
38
+ "step": 1638
39
  },
40
  {
41
  "epoch": 3.0,
42
+ "learning_rate": 4.7001221001221e-05,
43
+ "loss": 0.6125,
44
+ "step": 2457
45
  },
46
  {
47
  "epoch": 3.0,
48
+ "eval_accuracy": 0.6088229910305692,
49
+ "eval_loss": 0.6555132269859314,
50
+ "eval_runtime": 8.8047,
51
+ "eval_samples_per_second": 620.464,
52
+ "eval_steps_per_second": 4.884,
53
+ "step": 2457
54
  },
55
  {
56
  "epoch": 4.0,
57
+ "learning_rate": 4.6001221001221e-05,
58
+ "loss": 0.5942,
59
+ "step": 3276
60
  },
61
  {
62
  "epoch": 4.0,
63
+ "eval_accuracy": 0.6027823540179389,
64
+ "eval_loss": 0.6647323369979858,
65
+ "eval_runtime": 8.7921,
66
+ "eval_samples_per_second": 621.356,
67
+ "eval_steps_per_second": 4.891,
68
+ "step": 3276
69
  },
70
  {
71
  "epoch": 5.0,
72
+ "learning_rate": 4.5001221001221004e-05,
73
+ "loss": 0.5805,
74
+ "step": 4095
75
  },
76
  {
77
  "epoch": 5.0,
78
+ "eval_accuracy": 0.5934468240893283,
79
+ "eval_loss": 0.6735221147537231,
80
+ "eval_runtime": 8.6724,
81
+ "eval_samples_per_second": 629.928,
82
+ "eval_steps_per_second": 4.958,
83
+ "step": 4095
84
  },
85
  {
86
  "epoch": 6.0,
87
+ "learning_rate": 4.400244200244201e-05,
88
+ "loss": 0.5689,
89
+ "step": 4914
90
  },
91
  {
92
  "epoch": 6.0,
93
  "eval_accuracy": 0.5978400146439685,
94
+ "eval_loss": 0.6893225312232971,
95
+ "eval_runtime": 8.8418,
96
+ "eval_samples_per_second": 617.861,
97
+ "eval_steps_per_second": 4.863,
98
+ "step": 4914
99
  },
100
  {
101
  "epoch": 7.0,
102
+ "learning_rate": 4.3002442002442004e-05,
103
+ "loss": 0.5587,
104
+ "step": 5733
105
  },
106
  {
107
  "epoch": 7.0,
108
+ "eval_accuracy": 0.589602782354018,
109
+ "eval_loss": 0.7054734230041504,
110
+ "eval_runtime": 8.8696,
111
+ "eval_samples_per_second": 615.924,
112
+ "eval_steps_per_second": 4.848,
113
+ "step": 5733
114
  },
115
  {
116
  "epoch": 7.0,
117
+ "step": 5733,
118
+ "total_flos": 2.2988974586855424e+16,
119
+ "train_loss": 0.603873611681752,
120
+ "train_runtime": 4016.5926,
121
+ "train_samples_per_second": 1303.879,
122
+ "train_steps_per_second": 10.195
123
  }
124
  ],
125
+ "max_steps": 40950,
126
  "num_train_epochs": 50,
127
+ "total_flos": 2.2988974586855424e+16,
128
  "trial_name": null,
129
  "trial_params": null
130
  }