hafizhaaarama commited on
Commit
cc67a1a
·
verified ·
1 Parent(s): 8c5153f

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [distilbert-base-uncased](https://huggingface.co/distilbert-base-uncased) on the None dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 0.0067
20
 
21
  ## Model description
22
 
@@ -47,9 +47,9 @@ The following hyperparameters were used during training:
47
 
48
  | Training Loss | Epoch | Step | Validation Loss |
49
  |:-------------:|:-----:|:----:|:---------------:|
50
- | 0.1142 | 1.0 | 65 | 0.0378 |
51
- | 0.0136 | 2.0 | 130 | 0.0085 |
52
- | 0.0112 | 3.0 | 195 | 0.0067 |
53
 
54
 
55
  ### Framework versions
 
16
 
17
  This model is a fine-tuned version of [distilbert-base-uncased](https://huggingface.co/distilbert-base-uncased) on the None dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 0.0070
20
 
21
  ## Model description
22
 
 
47
 
48
  | Training Loss | Epoch | Step | Validation Loss |
49
  |:-------------:|:-----:|:----:|:---------------:|
50
+ | 0.1202 | 1.0 | 65 | 0.0436 |
51
+ | 0.0148 | 2.0 | 130 | 0.0088 |
52
+ | 0.011 | 3.0 | 195 | 0.0070 |
53
 
54
 
55
  ### Framework versions
checkpoint-130/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41179063387536d50edb87d73a7a8e3e17ab0178c6928a137b1cc12b48169fd2
3
  size 265491420
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27861ce206204eaba3e064bc1234915aa98cf2923b75a7cba38c64f75460a81f
3
  size 265491420
checkpoint-130/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f751b1c6ca73faa11fb8d4501feb09b4dbf8ad477891bc64f993f267bb8c3e9b
3
  size 531042682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:360b3ecc74ac89b61b1dbe47fd7e72dc35849c97292bb0c75f39106bf526ef29
3
  size 531042682
checkpoint-130/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 130,
3
- "best_metric": 0.00848204642534256,
4
  "best_model_checkpoint": "./multitask_model/checkpoint-130",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
@@ -11,109 +11,109 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.15384615384615385,
14
- "grad_norm": 8.5421142578125,
15
  "learning_rate": 1.907692307692308e-05,
16
- "loss": 2.3934,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.3076923076923077,
21
- "grad_norm": 7.595258712768555,
22
  "learning_rate": 1.8051282051282053e-05,
23
- "loss": 1.8013,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 0.46153846153846156,
28
- "grad_norm": 6.965435028076172,
29
  "learning_rate": 1.7025641025641026e-05,
30
- "loss": 1.1059,
31
  "step": 30
32
  },
33
  {
34
  "epoch": 0.6153846153846154,
35
- "grad_norm": 4.976907730102539,
36
  "learning_rate": 1.6000000000000003e-05,
37
- "loss": 0.5064,
38
  "step": 40
39
  },
40
  {
41
  "epoch": 0.7692307692307693,
42
- "grad_norm": 2.873666286468506,
43
  "learning_rate": 1.4974358974358976e-05,
44
- "loss": 0.2466,
45
  "step": 50
46
  },
47
  {
48
  "epoch": 0.9230769230769231,
49
- "grad_norm": 1.5674619674682617,
50
  "learning_rate": 1.3948717948717949e-05,
51
- "loss": 0.1142,
52
  "step": 60
53
  },
54
  {
55
  "epoch": 1.0,
56
- "eval_loss": 0.037802498787641525,
57
- "eval_runtime": 28.1554,
58
- "eval_samples_per_second": 4.582,
59
- "eval_steps_per_second": 0.604,
60
  "step": 65
61
  },
62
  {
63
  "epoch": 1.0769230769230769,
64
- "grad_norm": 0.6672631502151489,
65
  "learning_rate": 1.2923076923076925e-05,
66
- "loss": 0.0621,
67
  "step": 70
68
  },
69
  {
70
  "epoch": 1.2307692307692308,
71
- "grad_norm": 0.35425031185150146,
72
  "learning_rate": 1.1897435897435898e-05,
73
- "loss": 0.0367,
74
  "step": 80
75
  },
76
  {
77
  "epoch": 1.3846153846153846,
78
- "grad_norm": 0.26084840297698975,
79
  "learning_rate": 1.0871794871794871e-05,
80
- "loss": 0.0251,
81
  "step": 90
82
  },
83
  {
84
  "epoch": 1.5384615384615383,
85
- "grad_norm": 0.1835661381483078,
86
  "learning_rate": 9.846153846153848e-06,
87
- "loss": 0.0874,
88
  "step": 100
89
  },
90
  {
91
  "epoch": 1.6923076923076923,
92
- "grad_norm": 0.19026648998260498,
93
  "learning_rate": 8.820512820512821e-06,
94
- "loss": 0.0171,
95
  "step": 110
96
  },
97
  {
98
  "epoch": 1.8461538461538463,
99
- "grad_norm": 0.23326274752616882,
100
  "learning_rate": 7.794871794871796e-06,
101
- "loss": 0.0151,
102
  "step": 120
103
  },
104
  {
105
  "epoch": 2.0,
106
- "grad_norm": 0.18477407097816467,
107
  "learning_rate": 6.76923076923077e-06,
108
- "loss": 0.0136,
109
  "step": 130
110
  },
111
  {
112
  "epoch": 2.0,
113
- "eval_loss": 0.00848204642534256,
114
- "eval_runtime": 28.1768,
115
- "eval_samples_per_second": 4.578,
116
- "eval_steps_per_second": 0.603,
117
  "step": 130
118
  }
119
  ],
 
1
  {
2
  "best_global_step": 130,
3
+ "best_metric": 0.008769548498094082,
4
  "best_model_checkpoint": "./multitask_model/checkpoint-130",
5
  "epoch": 2.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.15384615384615385,
14
+ "grad_norm": 8.848017692565918,
15
  "learning_rate": 1.907692307692308e-05,
16
+ "loss": 2.3929,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.3076923076923077,
21
+ "grad_norm": 6.566382884979248,
22
  "learning_rate": 1.8051282051282053e-05,
23
+ "loss": 1.8245,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 0.46153846153846156,
28
+ "grad_norm": 6.213647365570068,
29
  "learning_rate": 1.7025641025641026e-05,
30
+ "loss": 1.153,
31
  "step": 30
32
  },
33
  {
34
  "epoch": 0.6153846153846154,
35
+ "grad_norm": 4.559297561645508,
36
  "learning_rate": 1.6000000000000003e-05,
37
+ "loss": 0.5311,
38
  "step": 40
39
  },
40
  {
41
  "epoch": 0.7692307692307693,
42
+ "grad_norm": 3.087829828262329,
43
  "learning_rate": 1.4974358974358976e-05,
44
+ "loss": 0.2819,
45
  "step": 50
46
  },
47
  {
48
  "epoch": 0.9230769230769231,
49
+ "grad_norm": 1.9372252225875854,
50
  "learning_rate": 1.3948717948717949e-05,
51
+ "loss": 0.1202,
52
  "step": 60
53
  },
54
  {
55
  "epoch": 1.0,
56
+ "eval_loss": 0.04360657185316086,
57
+ "eval_runtime": 27.4875,
58
+ "eval_samples_per_second": 4.693,
59
+ "eval_steps_per_second": 0.618,
60
  "step": 65
61
  },
62
  {
63
  "epoch": 1.0769230769230769,
64
+ "grad_norm": 1.4653043746948242,
65
  "learning_rate": 1.2923076923076925e-05,
66
+ "loss": 0.069,
67
  "step": 70
68
  },
69
  {
70
  "epoch": 1.2307692307692308,
71
+ "grad_norm": 0.5297175049781799,
72
  "learning_rate": 1.1897435897435898e-05,
73
+ "loss": 0.0403,
74
  "step": 80
75
  },
76
  {
77
  "epoch": 1.3846153846153846,
78
+ "grad_norm": 0.3341902196407318,
79
  "learning_rate": 1.0871794871794871e-05,
80
+ "loss": 0.0274,
81
  "step": 90
82
  },
83
  {
84
  "epoch": 1.5384615384615383,
85
+ "grad_norm": 0.22006157040596008,
86
  "learning_rate": 9.846153846153848e-06,
87
+ "loss": 0.0887,
88
  "step": 100
89
  },
90
  {
91
  "epoch": 1.6923076923076923,
92
+ "grad_norm": 0.24235276877880096,
93
  "learning_rate": 8.820512820512821e-06,
94
+ "loss": 0.0187,
95
  "step": 110
96
  },
97
  {
98
  "epoch": 1.8461538461538463,
99
+ "grad_norm": 0.19090554118156433,
100
  "learning_rate": 7.794871794871796e-06,
101
+ "loss": 0.0153,
102
  "step": 120
103
  },
104
  {
105
  "epoch": 2.0,
106
+ "grad_norm": 0.24662891030311584,
107
  "learning_rate": 6.76923076923077e-06,
108
+ "loss": 0.0148,
109
  "step": 130
110
  },
111
  {
112
  "epoch": 2.0,
113
+ "eval_loss": 0.008769548498094082,
114
+ "eval_runtime": 29.1744,
115
+ "eval_samples_per_second": 4.422,
116
+ "eval_steps_per_second": 0.583,
117
  "step": 130
118
  }
119
  ],
checkpoint-195/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78d96cb9d2591450e563169c41caf67ee99edbf505fd55501b48553893451855
3
  size 265491420
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:467fe70f53d9c1de6ce1b477b9256bd448dc5808b44f6e0de868d8191a929768
3
  size 265491420
checkpoint-195/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fff7650b7a1c4345087acb8b65ff2fa80bc0c7af06e48dbca8bfb799d2170d26
3
  size 531042682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5726bd848ac9e051330bcf9acbd3677b8e2242d416a21e08899de8c22110c311
3
  size 531042682
checkpoint-195/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 195,
3
- "best_metric": 0.006701565347611904,
4
  "best_model_checkpoint": "./multitask_model/checkpoint-195",
5
  "epoch": 3.0,
6
  "eval_steps": 500,
@@ -11,159 +11,159 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.15384615384615385,
14
- "grad_norm": 8.5421142578125,
15
  "learning_rate": 1.907692307692308e-05,
16
- "loss": 2.3934,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.3076923076923077,
21
- "grad_norm": 7.595258712768555,
22
  "learning_rate": 1.8051282051282053e-05,
23
- "loss": 1.8013,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 0.46153846153846156,
28
- "grad_norm": 6.965435028076172,
29
  "learning_rate": 1.7025641025641026e-05,
30
- "loss": 1.1059,
31
  "step": 30
32
  },
33
  {
34
  "epoch": 0.6153846153846154,
35
- "grad_norm": 4.976907730102539,
36
  "learning_rate": 1.6000000000000003e-05,
37
- "loss": 0.5064,
38
  "step": 40
39
  },
40
  {
41
  "epoch": 0.7692307692307693,
42
- "grad_norm": 2.873666286468506,
43
  "learning_rate": 1.4974358974358976e-05,
44
- "loss": 0.2466,
45
  "step": 50
46
  },
47
  {
48
  "epoch": 0.9230769230769231,
49
- "grad_norm": 1.5674619674682617,
50
  "learning_rate": 1.3948717948717949e-05,
51
- "loss": 0.1142,
52
  "step": 60
53
  },
54
  {
55
  "epoch": 1.0,
56
- "eval_loss": 0.037802498787641525,
57
- "eval_runtime": 28.1554,
58
- "eval_samples_per_second": 4.582,
59
- "eval_steps_per_second": 0.604,
60
  "step": 65
61
  },
62
  {
63
  "epoch": 1.0769230769230769,
64
- "grad_norm": 0.6672631502151489,
65
  "learning_rate": 1.2923076923076925e-05,
66
- "loss": 0.0621,
67
  "step": 70
68
  },
69
  {
70
  "epoch": 1.2307692307692308,
71
- "grad_norm": 0.35425031185150146,
72
  "learning_rate": 1.1897435897435898e-05,
73
- "loss": 0.0367,
74
  "step": 80
75
  },
76
  {
77
  "epoch": 1.3846153846153846,
78
- "grad_norm": 0.26084840297698975,
79
  "learning_rate": 1.0871794871794871e-05,
80
- "loss": 0.0251,
81
  "step": 90
82
  },
83
  {
84
  "epoch": 1.5384615384615383,
85
- "grad_norm": 0.1835661381483078,
86
  "learning_rate": 9.846153846153848e-06,
87
- "loss": 0.0874,
88
  "step": 100
89
  },
90
  {
91
  "epoch": 1.6923076923076923,
92
- "grad_norm": 0.19026648998260498,
93
  "learning_rate": 8.820512820512821e-06,
94
- "loss": 0.0171,
95
  "step": 110
96
  },
97
  {
98
  "epoch": 1.8461538461538463,
99
- "grad_norm": 0.23326274752616882,
100
  "learning_rate": 7.794871794871796e-06,
101
- "loss": 0.0151,
102
  "step": 120
103
  },
104
  {
105
  "epoch": 2.0,
106
- "grad_norm": 0.18477407097816467,
107
  "learning_rate": 6.76923076923077e-06,
108
- "loss": 0.0136,
109
  "step": 130
110
  },
111
  {
112
  "epoch": 2.0,
113
- "eval_loss": 0.00848204642534256,
114
- "eval_runtime": 28.1768,
115
- "eval_samples_per_second": 4.578,
116
- "eval_steps_per_second": 0.603,
117
  "step": 130
118
  },
119
  {
120
  "epoch": 2.1538461538461537,
121
- "grad_norm": 0.18010590970516205,
122
  "learning_rate": 5.743589743589743e-06,
123
- "loss": 0.0132,
124
  "step": 140
125
  },
126
  {
127
  "epoch": 2.3076923076923075,
128
- "grad_norm": 0.20541535317897797,
129
  "learning_rate": 4.717948717948718e-06,
130
  "loss": 0.0127,
131
  "step": 150
132
  },
133
  {
134
  "epoch": 2.4615384615384617,
135
- "grad_norm": 0.15995213389396667,
136
  "learning_rate": 3.692307692307693e-06,
137
  "loss": 0.0121,
138
  "step": 160
139
  },
140
  {
141
  "epoch": 2.6153846153846154,
142
- "grad_norm": 0.12473954260349274,
143
  "learning_rate": 2.666666666666667e-06,
144
- "loss": 0.0113,
145
  "step": 170
146
  },
147
  {
148
  "epoch": 2.769230769230769,
149
- "grad_norm": 0.14388339221477509,
150
  "learning_rate": 1.6410256410256412e-06,
151
- "loss": 0.0117,
152
  "step": 180
153
  },
154
  {
155
  "epoch": 2.9230769230769234,
156
- "grad_norm": 0.12231703847646713,
157
  "learning_rate": 6.153846153846155e-07,
158
- "loss": 0.0112,
159
  "step": 190
160
  },
161
  {
162
  "epoch": 3.0,
163
- "eval_loss": 0.006701565347611904,
164
- "eval_runtime": 27.6525,
165
- "eval_samples_per_second": 4.665,
166
- "eval_steps_per_second": 0.615,
167
  "step": 195
168
  }
169
  ],
 
1
  {
2
  "best_global_step": 195,
3
+ "best_metric": 0.006962933111935854,
4
  "best_model_checkpoint": "./multitask_model/checkpoint-195",
5
  "epoch": 3.0,
6
  "eval_steps": 500,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.15384615384615385,
14
+ "grad_norm": 8.848017692565918,
15
  "learning_rate": 1.907692307692308e-05,
16
+ "loss": 2.3929,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.3076923076923077,
21
+ "grad_norm": 6.566382884979248,
22
  "learning_rate": 1.8051282051282053e-05,
23
+ "loss": 1.8245,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 0.46153846153846156,
28
+ "grad_norm": 6.213647365570068,
29
  "learning_rate": 1.7025641025641026e-05,
30
+ "loss": 1.153,
31
  "step": 30
32
  },
33
  {
34
  "epoch": 0.6153846153846154,
35
+ "grad_norm": 4.559297561645508,
36
  "learning_rate": 1.6000000000000003e-05,
37
+ "loss": 0.5311,
38
  "step": 40
39
  },
40
  {
41
  "epoch": 0.7692307692307693,
42
+ "grad_norm": 3.087829828262329,
43
  "learning_rate": 1.4974358974358976e-05,
44
+ "loss": 0.2819,
45
  "step": 50
46
  },
47
  {
48
  "epoch": 0.9230769230769231,
49
+ "grad_norm": 1.9372252225875854,
50
  "learning_rate": 1.3948717948717949e-05,
51
+ "loss": 0.1202,
52
  "step": 60
53
  },
54
  {
55
  "epoch": 1.0,
56
+ "eval_loss": 0.04360657185316086,
57
+ "eval_runtime": 27.4875,
58
+ "eval_samples_per_second": 4.693,
59
+ "eval_steps_per_second": 0.618,
60
  "step": 65
61
  },
62
  {
63
  "epoch": 1.0769230769230769,
64
+ "grad_norm": 1.4653043746948242,
65
  "learning_rate": 1.2923076923076925e-05,
66
+ "loss": 0.069,
67
  "step": 70
68
  },
69
  {
70
  "epoch": 1.2307692307692308,
71
+ "grad_norm": 0.5297175049781799,
72
  "learning_rate": 1.1897435897435898e-05,
73
+ "loss": 0.0403,
74
  "step": 80
75
  },
76
  {
77
  "epoch": 1.3846153846153846,
78
+ "grad_norm": 0.3341902196407318,
79
  "learning_rate": 1.0871794871794871e-05,
80
+ "loss": 0.0274,
81
  "step": 90
82
  },
83
  {
84
  "epoch": 1.5384615384615383,
85
+ "grad_norm": 0.22006157040596008,
86
  "learning_rate": 9.846153846153848e-06,
87
+ "loss": 0.0887,
88
  "step": 100
89
  },
90
  {
91
  "epoch": 1.6923076923076923,
92
+ "grad_norm": 0.24235276877880096,
93
  "learning_rate": 8.820512820512821e-06,
94
+ "loss": 0.0187,
95
  "step": 110
96
  },
97
  {
98
  "epoch": 1.8461538461538463,
99
+ "grad_norm": 0.19090554118156433,
100
  "learning_rate": 7.794871794871796e-06,
101
+ "loss": 0.0153,
102
  "step": 120
103
  },
104
  {
105
  "epoch": 2.0,
106
+ "grad_norm": 0.24662891030311584,
107
  "learning_rate": 6.76923076923077e-06,
108
+ "loss": 0.0148,
109
  "step": 130
110
  },
111
  {
112
  "epoch": 2.0,
113
+ "eval_loss": 0.008769548498094082,
114
+ "eval_runtime": 29.1744,
115
+ "eval_samples_per_second": 4.422,
116
+ "eval_steps_per_second": 0.583,
117
  "step": 130
118
  },
119
  {
120
  "epoch": 2.1538461538461537,
121
+ "grad_norm": 0.18622471392154694,
122
  "learning_rate": 5.743589743589743e-06,
123
+ "loss": 0.0133,
124
  "step": 140
125
  },
126
  {
127
  "epoch": 2.3076923076923075,
128
+ "grad_norm": 0.13862484693527222,
129
  "learning_rate": 4.717948717948718e-06,
130
  "loss": 0.0127,
131
  "step": 150
132
  },
133
  {
134
  "epoch": 2.4615384615384617,
135
+ "grad_norm": 0.13039974868297577,
136
  "learning_rate": 3.692307692307693e-06,
137
  "loss": 0.0121,
138
  "step": 160
139
  },
140
  {
141
  "epoch": 2.6153846153846154,
142
+ "grad_norm": 0.12112937867641449,
143
  "learning_rate": 2.666666666666667e-06,
144
+ "loss": 0.0117,
145
  "step": 170
146
  },
147
  {
148
  "epoch": 2.769230769230769,
149
+ "grad_norm": 0.14916230738162994,
150
  "learning_rate": 1.6410256410256412e-06,
151
+ "loss": 0.012,
152
  "step": 180
153
  },
154
  {
155
  "epoch": 2.9230769230769234,
156
+ "grad_norm": 0.1365734338760376,
157
  "learning_rate": 6.153846153846155e-07,
158
+ "loss": 0.011,
159
  "step": 190
160
  },
161
  {
162
  "epoch": 3.0,
163
+ "eval_loss": 0.006962933111935854,
164
+ "eval_runtime": 27.4605,
165
+ "eval_samples_per_second": 4.698,
166
+ "eval_steps_per_second": 0.619,
167
  "step": 195
168
  }
169
  ],
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78d96cb9d2591450e563169c41caf67ee99edbf505fd55501b48553893451855
3
  size 265491420
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:467fe70f53d9c1de6ce1b477b9256bd448dc5808b44f6e0de868d8191a929768
3
  size 265491420
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4628c062b8c76b1cad90b1fe27c66ca7c2601c9f18c0aa5b426dce034d778243
3
  size 265519274
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42fd68e28d72f8c64407387bf9a4a3618147720c354a88928db89f6c051081ef
3
  size 265519274