mstatt commited on
Commit
e2d2ecb
·
1 Parent(s): 453220f

Upload 11 files

Browse files
Files changed (7) hide show
  1. config.json +1 -1
  2. optimizer.pt +1 -1
  3. pytorch_model.bin +1 -1
  4. rng_state.pth +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +132 -87
  7. training_args.bin +1 -1
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "Falconsai/offensive_speech_detection",
3
  "activation": "gelu",
4
  "architectures": [
5
  "DistilBertForSequenceClassification"
 
1
  {
2
+ "_name_or_path": "./results/checkpoint-11000",
3
  "activation": "gelu",
4
  "architectures": [
5
  "DistilBertForSequenceClassification"
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4df7121aec1c35c069337a74d42c53b4a5b4923ca88aca3f5b33a51e937ced73
3
  size 535701061
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38a118be7202aa4db1e3d37d22adcfc6b4e3be00af436b916228fadd4dc2e40c
3
  size 535701061
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbc0d5fa6c859f571867e1c85354a56e06713c815ab1694e8dfcf40c64f92978
3
  size 267855533
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfcd56f2d823a9cdc2cfccb44e5e823d3200112622082e5cd94e93d019b9fc67
3
  size 267855533
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6160847c51c08356e853c3bca23f598a8d8a42785e2458b0c5fc923e4f1dadf
3
  size 14511
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01da4f1236b1946afbe43e51bdf7717aeb209652b7d7314cda674a4ca1e36a16
3
  size 14511
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51e06238497f705c88cffe1b96791159faf151e3e1acd4bc0004a4c3260ed549
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cc86f59df564bec394c86356ab586d662aa80b1cf79a7016636c67697d6ef91
3
  size 627
trainer_state.json CHANGED
@@ -1,151 +1,196 @@
1
  {
2
- "best_metric": 0.01884845644235611,
3
- "best_model_checkpoint": "./results/checkpoint-4500",
4
- "epoch": 0.9439899307740718,
5
- "global_step": 4500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.1,
12
- "learning_rate": 1.9300748199426614e-05,
13
- "loss": 0.0391,
14
  "step": 500
15
  },
16
  {
17
- "epoch": 0.1,
18
- "eval_accuracy": 0.990558615263572,
19
- "eval_loss": 0.04349859058856964,
20
- "eval_runtime": 154.2476,
21
- "eval_samples_per_second": 123.6,
22
- "eval_steps_per_second": 7.728,
23
  "step": 500
24
  },
25
  {
26
- "epoch": 0.21,
27
- "learning_rate": 1.860149639885323e-05,
28
- "loss": 0.0327,
29
  "step": 1000
30
  },
31
  {
32
- "epoch": 0.21,
33
- "eval_accuracy": 0.9903488067138736,
34
- "eval_loss": 0.04805811867117882,
35
- "eval_runtime": 154.05,
36
- "eval_samples_per_second": 123.759,
37
- "eval_steps_per_second": 7.738,
38
  "step": 1000
39
  },
40
  {
41
- "epoch": 0.31,
42
- "learning_rate": 1.790224459827984e-05,
43
- "loss": 0.0266,
44
  "step": 1500
45
  },
46
  {
47
- "epoch": 0.31,
48
- "eval_accuracy": 0.9911355887752425,
49
- "eval_loss": 0.0459710918366909,
50
- "eval_runtime": 154.4944,
51
- "eval_samples_per_second": 123.403,
52
- "eval_steps_per_second": 7.715,
53
  "step": 1500
54
  },
55
  {
56
- "epoch": 0.42,
57
- "learning_rate": 1.7202992797706454e-05,
58
- "loss": 0.035,
59
  "step": 2000
60
  },
61
  {
62
- "epoch": 0.42,
63
- "eval_accuracy": 0.9916601101494886,
64
- "eval_loss": 0.03351669758558273,
65
- "eval_runtime": 154.0214,
66
- "eval_samples_per_second": 123.782,
67
- "eval_steps_per_second": 7.739,
68
  "step": 2000
69
  },
70
  {
71
- "epoch": 0.52,
72
- "learning_rate": 1.650374099713307e-05,
73
- "loss": 0.0371,
74
  "step": 2500
75
  },
76
  {
77
- "epoch": 0.52,
78
- "eval_accuracy": 0.9930763178599528,
79
- "eval_loss": 0.025116313248872757,
80
- "eval_runtime": 153.2057,
81
- "eval_samples_per_second": 124.441,
82
- "eval_steps_per_second": 7.78,
83
  "step": 2500
84
  },
85
  {
86
- "epoch": 0.63,
87
- "learning_rate": 1.580448919655968e-05,
88
- "loss": 0.026,
89
  "step": 3000
90
  },
91
  {
92
- "epoch": 0.63,
93
- "eval_accuracy": 0.9938630999213218,
94
- "eval_loss": 0.027913494035601616,
95
- "eval_runtime": 153.7631,
96
- "eval_samples_per_second": 123.989,
97
- "eval_steps_per_second": 7.752,
98
  "step": 3000
99
  },
100
  {
101
- "epoch": 0.73,
102
- "learning_rate": 1.5105237395986297e-05,
103
- "loss": 0.0299,
104
  "step": 3500
105
  },
106
  {
107
- "epoch": 0.73,
108
- "eval_accuracy": 0.9934959349593496,
109
- "eval_loss": 0.027549268677830696,
110
- "eval_runtime": 153.8917,
111
- "eval_samples_per_second": 123.886,
112
- "eval_steps_per_second": 7.746,
113
  "step": 3500
114
  },
115
  {
116
- "epoch": 0.84,
117
- "learning_rate": 1.4405985595412911e-05,
118
- "loss": 0.0254,
119
  "step": 4000
120
  },
121
  {
122
- "epoch": 0.84,
123
- "eval_accuracy": 0.9945449777078416,
124
- "eval_loss": 0.024684084579348564,
125
- "eval_runtime": 153.4415,
126
- "eval_samples_per_second": 124.249,
127
- "eval_steps_per_second": 7.768,
128
  "step": 4000
129
  },
130
  {
131
- "epoch": 0.94,
132
- "learning_rate": 1.3706733794839521e-05,
133
- "loss": 0.0219,
134
  "step": 4500
135
  },
136
  {
137
- "epoch": 0.94,
138
- "eval_accuracy": 0.9954366640440598,
139
- "eval_loss": 0.01884845644235611,
140
- "eval_runtime": 153.901,
141
- "eval_samples_per_second": 123.878,
142
- "eval_steps_per_second": 7.745,
143
  "step": 4500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  }
145
  ],
146
- "max_steps": 14301,
147
  "num_train_epochs": 3,
148
- "total_flos": 9537652703232000.0,
149
  "trial_name": null,
150
  "trial_params": null
151
  }
 
1
  {
2
+ "best_metric": 0.022033799439668655,
3
+ "best_model_checkpoint": "./results/checkpoint-6000",
4
+ "epoch": 0.687915615684476,
5
+ "global_step": 6000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.06,
12
+ "learning_rate": 1.9617824657953072e-05,
13
+ "loss": 0.058,
14
  "step": 500
15
  },
16
  {
17
+ "epoch": 0.06,
18
+ "eval_accuracy": 0.9880467809436451,
19
+ "eval_loss": 0.04749465361237526,
20
+ "eval_runtime": 369.9155,
21
+ "eval_samples_per_second": 94.308,
22
+ "eval_steps_per_second": 5.896,
23
  "step": 500
24
  },
25
  {
26
+ "epoch": 0.11,
27
+ "learning_rate": 1.923564931590614e-05,
28
+ "loss": 0.0513,
29
  "step": 1000
30
  },
31
  {
32
+ "epoch": 0.11,
33
+ "eval_accuracy": 0.9892220374935504,
34
+ "eval_loss": 0.044388506561517715,
35
+ "eval_runtime": 313.5583,
36
+ "eval_samples_per_second": 111.258,
37
+ "eval_steps_per_second": 6.956,
38
  "step": 1000
39
  },
40
  {
41
+ "epoch": 0.17,
42
+ "learning_rate": 1.885347397385921e-05,
43
+ "loss": 0.0413,
44
  "step": 1500
45
  },
46
  {
47
+ "epoch": 0.17,
48
+ "eval_accuracy": 0.9901393108983546,
49
+ "eval_loss": 0.044446878135204315,
50
+ "eval_runtime": 322.6458,
51
+ "eval_samples_per_second": 108.125,
52
+ "eval_steps_per_second": 6.76,
53
  "step": 1500
54
  },
55
  {
56
+ "epoch": 0.23,
57
+ "learning_rate": 1.847129863181228e-05,
58
+ "loss": 0.0364,
59
  "step": 2000
60
  },
61
  {
62
+ "epoch": 0.23,
63
+ "eval_accuracy": 0.9911712434787594,
64
+ "eval_loss": 0.037910301238298416,
65
+ "eval_runtime": 258.562,
66
+ "eval_samples_per_second": 134.923,
67
+ "eval_steps_per_second": 8.435,
68
  "step": 2000
69
  },
70
  {
71
+ "epoch": 0.29,
72
+ "learning_rate": 1.8089123289765345e-05,
73
+ "loss": 0.0389,
74
  "step": 2500
75
  },
76
  {
77
+ "epoch": 0.29,
78
+ "eval_accuracy": 0.9921171816774638,
79
+ "eval_loss": 0.03007333353161812,
80
+ "eval_runtime": 258.8834,
81
+ "eval_samples_per_second": 134.756,
82
+ "eval_steps_per_second": 8.425,
83
  "step": 2500
84
  },
85
  {
86
+ "epoch": 0.34,
87
+ "learning_rate": 1.7706947947718412e-05,
88
+ "loss": 0.0343,
89
  "step": 3000
90
  },
91
  {
92
+ "epoch": 0.34,
93
+ "eval_accuracy": 0.9923465000286648,
94
+ "eval_loss": 0.03364783525466919,
95
+ "eval_runtime": 258.3829,
96
+ "eval_samples_per_second": 135.017,
97
+ "eval_steps_per_second": 8.441,
98
  "step": 3000
99
  },
100
  {
101
+ "epoch": 0.4,
102
+ "learning_rate": 1.7324772605671482e-05,
103
+ "loss": 0.0351,
104
  "step": 3500
105
  },
106
  {
107
+ "epoch": 0.4,
108
+ "eval_accuracy": 0.9918591985323626,
109
+ "eval_loss": 0.030066516250371933,
110
+ "eval_runtime": 258.5585,
111
+ "eval_samples_per_second": 134.925,
112
+ "eval_steps_per_second": 8.435,
113
  "step": 3500
114
  },
115
  {
116
+ "epoch": 0.46,
117
+ "learning_rate": 1.6942597263624552e-05,
118
+ "loss": 0.0343,
119
  "step": 4000
120
  },
121
  {
122
+ "epoch": 0.46,
123
+ "eval_accuracy": 0.9925471535859657,
124
+ "eval_loss": 0.02801605314016342,
125
+ "eval_runtime": 308.2791,
126
+ "eval_samples_per_second": 113.164,
127
+ "eval_steps_per_second": 7.075,
128
  "step": 4000
129
  },
130
  {
131
+ "epoch": 0.52,
132
+ "learning_rate": 1.6560421921577622e-05,
133
+ "loss": 0.0328,
134
  "step": 4500
135
  },
136
  {
137
+ "epoch": 0.52,
138
+ "eval_accuracy": 0.9937224101358711,
139
+ "eval_loss": 0.022993654012680054,
140
+ "eval_runtime": 304.7606,
141
+ "eval_samples_per_second": 114.47,
142
+ "eval_steps_per_second": 7.156,
143
  "step": 4500
144
+ },
145
+ {
146
+ "epoch": 0.57,
147
+ "learning_rate": 1.617824657953069e-05,
148
+ "loss": 0.0322,
149
+ "step": 5000
150
+ },
151
+ {
152
+ "epoch": 0.57,
153
+ "eval_accuracy": 0.9930631198761681,
154
+ "eval_loss": 0.022875914350152016,
155
+ "eval_runtime": 509.8625,
156
+ "eval_samples_per_second": 68.422,
157
+ "eval_steps_per_second": 4.278,
158
+ "step": 5000
159
+ },
160
+ {
161
+ "epoch": 0.63,
162
+ "learning_rate": 1.579607123748376e-05,
163
+ "loss": 0.0275,
164
+ "step": 5500
165
+ },
166
+ {
167
+ "epoch": 0.63,
168
+ "eval_accuracy": 0.994353035601674,
169
+ "eval_loss": 0.024034755304455757,
170
+ "eval_runtime": 296.8109,
171
+ "eval_samples_per_second": 117.536,
172
+ "eval_steps_per_second": 7.348,
173
+ "step": 5500
174
+ },
175
+ {
176
+ "epoch": 0.69,
177
+ "learning_rate": 1.541389589543683e-05,
178
+ "loss": 0.0288,
179
+ "step": 6000
180
+ },
181
+ {
182
+ "epoch": 0.69,
183
+ "eval_accuracy": 0.9946396835406753,
184
+ "eval_loss": 0.022033799439668655,
185
+ "eval_runtime": 293.7762,
186
+ "eval_samples_per_second": 118.75,
187
+ "eval_steps_per_second": 7.424,
188
+ "step": 6000
189
  }
190
  ],
191
+ "max_steps": 26166,
192
  "num_train_epochs": 3,
193
+ "total_flos": 1.2716870270976e+16,
194
  "trial_name": null,
195
  "trial_params": null
196
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1317ed9b28a901fc802c67584193ee79f96fe31da7695bec1e79e610e483cf78
3
  size 3963
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb244d95b7b47fb97bc09ca9a88060acdd00da87d07494e09e71d9be17b7177a
3
  size 3963