4ytk3 commited on
Commit
6283d6b
1 Parent(s): 1da8412
README.md CHANGED
@@ -16,8 +16,8 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  This model is a fine-tuned version of [rinna/japanese-gpt2-small](https://huggingface.co/rinna/japanese-gpt2-small) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 4.5630
20
- - Accuracy: 0.5320
21
 
22
  ## Model description
23
 
@@ -37,8 +37,8 @@ More information needed
37
 
38
  The following hyperparameters were used during training:
39
  - learning_rate: 5e-05
40
- - train_batch_size: 1
41
- - eval_batch_size: 1
42
  - seed: 42
43
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
  - lr_scheduler_type: linear
 
16
 
17
  This model is a fine-tuned version of [rinna/japanese-gpt2-small](https://huggingface.co/rinna/japanese-gpt2-small) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 4.1254
20
+ - Accuracy: 0.5704
21
 
22
  ## Model description
23
 
 
37
 
38
  The following hyperparameters were used during training:
39
  - learning_rate: 5e-05
40
+ - train_batch_size: 2
41
+ - eval_batch_size: 2
42
  - seed: 42
43
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
44
  - lr_scheduler_type: linear
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
  "epoch": 100.0,
3
- "eval_accuracy": 0.531983137829912,
4
- "eval_loss": 4.562988758087158,
5
- "eval_runtime": 1.506,
6
- "eval_samples": 32,
7
- "eval_samples_per_second": 21.249,
8
- "eval_steps_per_second": 21.249,
9
- "perplexity": 95.86958306210914,
10
- "train_loss": 0.4174059252133445,
11
- "train_runtime": 1920.7593,
12
- "train_samples": 126,
13
- "train_samples_per_second": 6.56,
14
- "train_steps_per_second": 6.56
15
  }
 
1
  {
2
  "epoch": 100.0,
3
+ "eval_accuracy": 0.5704086900172438,
4
+ "eval_loss": 4.125412940979004,
5
+ "eval_runtime": 4.3796,
6
+ "eval_samples": 89,
7
+ "eval_samples_per_second": 20.321,
8
+ "eval_steps_per_second": 10.275,
9
+ "perplexity": 61.89336227967192,
10
+ "train_loss": 0.605967927964706,
11
+ "train_runtime": 4572.1542,
12
+ "train_samples": 357,
13
+ "train_samples_per_second": 7.808,
14
+ "train_steps_per_second": 3.915
15
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "epoch": 100.0,
3
- "eval_accuracy": 0.531983137829912,
4
- "eval_loss": 4.562988758087158,
5
- "eval_runtime": 1.506,
6
- "eval_samples": 32,
7
- "eval_samples_per_second": 21.249,
8
- "eval_steps_per_second": 21.249,
9
- "perplexity": 95.86958306210914
10
  }
 
1
  {
2
  "epoch": 100.0,
3
+ "eval_accuracy": 0.5704086900172438,
4
+ "eval_loss": 4.125412940979004,
5
+ "eval_runtime": 4.3796,
6
+ "eval_samples": 89,
7
+ "eval_samples_per_second": 20.321,
8
+ "eval_steps_per_second": 10.275,
9
+ "perplexity": 61.89336227967192
10
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fb42704c5960fbfa8a3998f2850cfbfa22257aaca3595bbf070714b4c5901e22
3
  size 454312509
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b1ecfdfe8f31a00765cae3c0c1dbebc82db67f6db00a2323b5c37dafd213617
3
  size 454312509
tokenizer_config.json CHANGED
@@ -13,6 +13,5 @@
13
  "sp_model_kwargs": {},
14
  "special_tokens_map_file": "C:\\Users\\Nakasone Natsuki/.cache\\huggingface\\hub\\models--rinna--japanese-gpt2-small\\snapshots\\d35a68cf1fea74b71708ce898b351471b5c698ce\\special_tokens_map.json",
15
  "tokenizer_class": "T5Tokenizer",
16
- "unk_token": "<unk>",
17
- "use_fast": false
18
  }
 
13
  "sp_model_kwargs": {},
14
  "special_tokens_map_file": "C:\\Users\\Nakasone Natsuki/.cache\\huggingface\\hub\\models--rinna--japanese-gpt2-small\\snapshots\\d35a68cf1fea74b71708ce898b351471b5c698ce\\special_tokens_map.json",
15
  "tokenizer_class": "T5Tokenizer",
16
+ "unk_token": "<unk>"
 
17
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 100.0,
3
- "train_loss": 0.4174059252133445,
4
- "train_runtime": 1920.7593,
5
- "train_samples": 126,
6
- "train_samples_per_second": 6.56,
7
- "train_steps_per_second": 6.56
8
  }
 
1
  {
2
  "epoch": 100.0,
3
+ "train_loss": 0.605967927964706,
4
+ "train_runtime": 4572.1542,
5
+ "train_samples": 357,
6
+ "train_samples_per_second": 7.808,
7
+ "train_steps_per_second": 3.915
8
  }
trainer_state.json CHANGED
@@ -2,174 +2,234 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 100.0,
5
- "global_step": 12600,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 3.97,
12
- "learning_rate": 4.801587301587302e-05,
13
- "loss": 3.3806,
14
  "step": 500
15
  },
16
  {
17
- "epoch": 7.94,
18
- "learning_rate": 4.603174603174603e-05,
19
- "loss": 2.0351,
20
  "step": 1000
21
  },
22
  {
23
- "epoch": 11.9,
24
- "learning_rate": 4.404761904761905e-05,
25
- "loss": 1.3335,
26
  "step": 1500
27
  },
28
  {
29
- "epoch": 15.87,
30
- "learning_rate": 4.2063492063492065e-05,
31
- "loss": 0.9008,
32
  "step": 2000
33
  },
34
  {
35
- "epoch": 19.84,
36
- "learning_rate": 4.007936507936508e-05,
37
- "loss": 0.641,
38
  "step": 2500
39
  },
40
  {
41
- "epoch": 23.81,
42
- "learning_rate": 3.809523809523809e-05,
43
- "loss": 0.475,
44
  "step": 3000
45
  },
46
  {
47
- "epoch": 27.78,
48
- "learning_rate": 3.611111111111111e-05,
49
- "loss": 0.3548,
50
  "step": 3500
51
  },
52
  {
53
- "epoch": 31.75,
54
- "learning_rate": 3.412698412698413e-05,
55
- "loss": 0.2571,
56
  "step": 4000
57
  },
58
  {
59
- "epoch": 35.71,
60
- "learning_rate": 3.2142857142857144e-05,
61
- "loss": 0.1845,
62
  "step": 4500
63
  },
64
  {
65
- "epoch": 39.68,
66
- "learning_rate": 3.0158730158730158e-05,
67
- "loss": 0.1332,
68
  "step": 5000
69
  },
70
  {
71
- "epoch": 43.65,
72
- "learning_rate": 2.8174603174603175e-05,
73
- "loss": 0.1001,
74
  "step": 5500
75
  },
76
  {
77
- "epoch": 47.62,
78
- "learning_rate": 2.6190476190476192e-05,
79
- "loss": 0.0823,
80
  "step": 6000
81
  },
82
  {
83
- "epoch": 51.59,
84
- "learning_rate": 2.4206349206349206e-05,
85
- "loss": 0.0696,
86
  "step": 6500
87
  },
88
  {
89
- "epoch": 55.56,
90
- "learning_rate": 2.2222222222222223e-05,
91
- "loss": 0.0626,
92
  "step": 7000
93
  },
94
  {
95
- "epoch": 59.52,
96
- "learning_rate": 2.023809523809524e-05,
97
- "loss": 0.0564,
98
  "step": 7500
99
  },
100
  {
101
- "epoch": 63.49,
102
- "learning_rate": 1.8253968253968254e-05,
103
- "loss": 0.0532,
104
  "step": 8000
105
  },
106
  {
107
- "epoch": 67.46,
108
- "learning_rate": 1.626984126984127e-05,
109
- "loss": 0.0495,
110
  "step": 8500
111
  },
112
  {
113
- "epoch": 71.43,
114
- "learning_rate": 1.4285714285714285e-05,
115
- "loss": 0.048,
116
  "step": 9000
117
  },
118
  {
119
- "epoch": 75.4,
120
- "learning_rate": 1.2301587301587301e-05,
121
- "loss": 0.0454,
122
  "step": 9500
123
  },
124
  {
125
- "epoch": 79.37,
126
- "learning_rate": 1.0317460317460318e-05,
127
- "loss": 0.0438,
128
  "step": 10000
129
  },
130
  {
131
- "epoch": 83.33,
132
- "learning_rate": 8.333333333333334e-06,
133
- "loss": 0.0427,
134
  "step": 10500
135
  },
136
  {
137
- "epoch": 87.3,
138
- "learning_rate": 6.349206349206349e-06,
139
- "loss": 0.041,
140
  "step": 11000
141
  },
142
  {
143
- "epoch": 91.27,
144
- "learning_rate": 4.365079365079365e-06,
145
- "loss": 0.0409,
146
  "step": 11500
147
  },
148
  {
149
- "epoch": 95.24,
150
- "learning_rate": 2.3809523809523808e-06,
151
- "loss": 0.0401,
152
  "step": 12000
153
  },
154
  {
155
- "epoch": 99.21,
156
- "learning_rate": 3.9682539682539683e-07,
157
- "loss": 0.0395,
158
  "step": 12500
159
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  {
161
  "epoch": 100.0,
162
- "step": 12600,
163
- "total_flos": 6584559206400000.0,
164
- "train_loss": 0.4174059252133445,
165
- "train_runtime": 1920.7593,
166
- "train_samples_per_second": 6.56,
167
- "train_steps_per_second": 6.56
168
  }
169
  ],
170
- "max_steps": 12600,
171
  "num_train_epochs": 100,
172
- "total_flos": 6584559206400000.0,
173
  "trial_name": null,
174
  "trial_params": null
175
  }
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 100.0,
5
+ "global_step": 17900,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 2.79,
12
+ "learning_rate": 4.860335195530727e-05,
13
+ "loss": 3.59,
14
  "step": 500
15
  },
16
  {
17
+ "epoch": 5.59,
18
+ "learning_rate": 4.7206703910614525e-05,
19
+ "loss": 2.6596,
20
  "step": 1000
21
  },
22
  {
23
+ "epoch": 8.38,
24
+ "learning_rate": 4.581005586592179e-05,
25
+ "loss": 2.1604,
26
  "step": 1500
27
  },
28
  {
29
+ "epoch": 11.17,
30
+ "learning_rate": 4.4413407821229054e-05,
31
+ "loss": 1.7776,
32
  "step": 2000
33
  },
34
  {
35
+ "epoch": 13.97,
36
+ "learning_rate": 4.301675977653631e-05,
37
+ "loss": 1.4733,
38
  "step": 2500
39
  },
40
  {
41
+ "epoch": 16.76,
42
+ "learning_rate": 4.1620111731843576e-05,
43
+ "loss": 1.2269,
44
  "step": 3000
45
  },
46
  {
47
+ "epoch": 19.55,
48
+ "learning_rate": 4.022346368715084e-05,
49
+ "loss": 1.0393,
50
  "step": 3500
51
  },
52
  {
53
+ "epoch": 22.35,
54
+ "learning_rate": 3.8826815642458106e-05,
55
+ "loss": 0.8902,
56
  "step": 4000
57
  },
58
  {
59
+ "epoch": 25.14,
60
+ "learning_rate": 3.743016759776536e-05,
61
+ "loss": 0.7686,
62
  "step": 4500
63
  },
64
  {
65
+ "epoch": 27.93,
66
+ "learning_rate": 3.603351955307263e-05,
67
+ "loss": 0.6742,
68
  "step": 5000
69
  },
70
  {
71
+ "epoch": 30.73,
72
+ "learning_rate": 3.463687150837989e-05,
73
+ "loss": 0.5921,
74
  "step": 5500
75
  },
76
  {
77
+ "epoch": 33.52,
78
+ "learning_rate": 3.324022346368715e-05,
79
+ "loss": 0.528,
80
  "step": 6000
81
  },
82
  {
83
+ "epoch": 36.31,
84
+ "learning_rate": 3.1843575418994415e-05,
85
+ "loss": 0.4712,
86
  "step": 6500
87
  },
88
  {
89
+ "epoch": 39.11,
90
+ "learning_rate": 3.0446927374301676e-05,
91
+ "loss": 0.422,
92
  "step": 7000
93
  },
94
  {
95
+ "epoch": 41.9,
96
+ "learning_rate": 2.9050279329608944e-05,
97
+ "loss": 0.3746,
98
  "step": 7500
99
  },
100
  {
101
+ "epoch": 44.69,
102
+ "learning_rate": 2.7653631284916205e-05,
103
+ "loss": 0.3347,
104
  "step": 8000
105
  },
106
  {
107
+ "epoch": 47.49,
108
+ "learning_rate": 2.6256983240223466e-05,
109
+ "loss": 0.2979,
110
  "step": 8500
111
  },
112
  {
113
+ "epoch": 50.28,
114
+ "learning_rate": 2.4860335195530728e-05,
115
+ "loss": 0.2651,
116
  "step": 9000
117
  },
118
  {
119
+ "epoch": 53.07,
120
+ "learning_rate": 2.346368715083799e-05,
121
+ "loss": 0.2358,
122
  "step": 9500
123
  },
124
  {
125
+ "epoch": 55.87,
126
+ "learning_rate": 2.206703910614525e-05,
127
+ "loss": 0.2078,
128
  "step": 10000
129
  },
130
  {
131
+ "epoch": 58.66,
132
+ "learning_rate": 2.0670391061452515e-05,
133
+ "loss": 0.1848,
134
  "step": 10500
135
  },
136
  {
137
+ "epoch": 61.45,
138
+ "learning_rate": 1.9273743016759776e-05,
139
+ "loss": 0.1643,
140
  "step": 11000
141
  },
142
  {
143
+ "epoch": 64.25,
144
+ "learning_rate": 1.787709497206704e-05,
145
+ "loss": 0.148,
146
  "step": 11500
147
  },
148
  {
149
+ "epoch": 67.04,
150
+ "learning_rate": 1.64804469273743e-05,
151
+ "loss": 0.1332,
152
  "step": 12000
153
  },
154
  {
155
+ "epoch": 69.83,
156
+ "learning_rate": 1.5083798882681566e-05,
157
+ "loss": 0.1205,
158
  "step": 12500
159
  },
160
+ {
161
+ "epoch": 72.63,
162
+ "learning_rate": 1.3687150837988827e-05,
163
+ "loss": 0.1115,
164
+ "step": 13000
165
+ },
166
+ {
167
+ "epoch": 75.42,
168
+ "learning_rate": 1.229050279329609e-05,
169
+ "loss": 0.1037,
170
+ "step": 13500
171
+ },
172
+ {
173
+ "epoch": 78.21,
174
+ "learning_rate": 1.0893854748603351e-05,
175
+ "loss": 0.0977,
176
+ "step": 14000
177
+ },
178
+ {
179
+ "epoch": 81.01,
180
+ "learning_rate": 9.497206703910614e-06,
181
+ "loss": 0.0922,
182
+ "step": 14500
183
+ },
184
+ {
185
+ "epoch": 83.8,
186
+ "learning_rate": 8.100558659217877e-06,
187
+ "loss": 0.0879,
188
+ "step": 15000
189
+ },
190
+ {
191
+ "epoch": 86.59,
192
+ "learning_rate": 6.70391061452514e-06,
193
+ "loss": 0.0841,
194
+ "step": 15500
195
+ },
196
+ {
197
+ "epoch": 89.39,
198
+ "learning_rate": 5.307262569832402e-06,
199
+ "loss": 0.0823,
200
+ "step": 16000
201
+ },
202
+ {
203
+ "epoch": 92.18,
204
+ "learning_rate": 3.910614525139665e-06,
205
+ "loss": 0.0794,
206
+ "step": 16500
207
+ },
208
+ {
209
+ "epoch": 94.97,
210
+ "learning_rate": 2.5139664804469276e-06,
211
+ "loss": 0.078,
212
+ "step": 17000
213
+ },
214
+ {
215
+ "epoch": 97.77,
216
+ "learning_rate": 1.11731843575419e-06,
217
+ "loss": 0.0766,
218
+ "step": 17500
219
+ },
220
  {
221
  "epoch": 100.0,
222
+ "step": 17900,
223
+ "total_flos": 1.86562510848e+16,
224
+ "train_loss": 0.605967927964706,
225
+ "train_runtime": 4572.1542,
226
+ "train_samples_per_second": 7.808,
227
+ "train_steps_per_second": 3.915
228
  }
229
  ],
230
+ "max_steps": 17900,
231
  "num_train_epochs": 100,
232
+ "total_flos": 1.86562510848e+16,
233
  "trial_name": null,
234
  "trial_params": null
235
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7ac81ab65eb17c529d1cf5cf61b06a1389796c3e7231768beb7db1914446e0b
3
  size 3387
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b18bac2198d54aadd67e1c4bee82cba6dd7721c5f7865e4cbb060597ad10aef
3
  size 3387