AlekseyKorshuk commited on
Commit
9ca4c91
1 Parent(s): 9607c04

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/ot-rus")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/dh75fodw/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on O.T (RUS)'s lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/2z8je9ki) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/2z8je9ki/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
45
  dataset = load_dataset("huggingartists/ot-rus")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/35byet4r/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on O.T (RUS)'s lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/2p2tawej) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/2p2tawej/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "gpt2",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
1
  {
2
+ "_name_or_path": "huggingartists/ot-rus",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 2.1647870540618896, "eval_runtime": 5.6073, "eval_samples_per_second": 22.471, "eval_steps_per_second": 2.853, "epoch": 1.0}
1
+ {"eval_loss": 2.1074936389923096, "eval_runtime": 5.5699, "eval_samples_per_second": 22.263, "eval_steps_per_second": 2.873, "epoch": 2.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7280534ebae80cbd7a0ff181370ac219ef1d383b6a75ec3ace45003f7fa3b4c2
3
  size 497764120
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07011c203546934b6bdabaeae788ab6a69d4e88dfb66510ddc805a805f61bc05
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80aa3d46df7706d73aab712bda5577adf024661077c53095998751be4b4bb475
3
  size 995603825
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14a546279a0f707235c018ab8978fcf6b535a23f50333ef00dc4c7776bfdd3b3
3
  size 995603825
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e920d39caf1d90c47efebeb7622ea08b9d909896b23eb32a2e43d029e6277013
3
  size 510403817
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71ee1123b7c5a177aaa470ceeb18d3f4774881c9c284c22b3f07eb4b7fcd7595
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6d05b9438e6f0b829d1a0fd700693afb37fc20834900c9a611d951a29b89e0e
3
- size 14567
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd50da534a44c60e56efa338fe7a8a2934c906739bbceef2efb60aba8acaa3be
3
+ size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d42e804a9d8c3eb6fb374f2bef948d45b343d1713752796e2fab2e4688dc861b
3
  size 623
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c4614765a0c82684c77b1a0e60e0bd2f59801705437e85879cf2668b5350536
3
  size 623
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "gpt2", "tokenizer_class": "GPT2Tokenizer"}
1
+ {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/ot-rus", "tokenizer_class": "GPT2Tokenizer"}
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 2.1647870540618896,
3
- "best_model_checkpoint": "output/ot-rus/checkpoint-93",
4
- "epoch": 1.0,
5
- "global_step": 93,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -122,11 +122,141 @@
122
  "eval_samples_per_second": 23.177,
123
  "eval_steps_per_second": 2.943,
124
  "step": 93
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  }
126
  ],
127
- "max_steps": 93,
128
- "num_train_epochs": 1,
129
- "total_flos": 97069989888000.0,
130
  "trial_name": null,
131
  "trial_params": null
132
  }
1
  {
2
+ "best_metric": 2.1074936389923096,
3
+ "best_model_checkpoint": "output/ot-rus/checkpoint-188",
4
+ "epoch": 2.0,
5
+ "global_step": 188,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
122
  "eval_samples_per_second": 23.177,
123
  "eval_steps_per_second": 2.943,
124
  "step": 93
125
+ },
126
+ {
127
+ "epoch": 1.0,
128
+ "eval_loss": 2.152095079421997,
129
+ "eval_runtime": 5.3828,
130
+ "eval_samples_per_second": 23.036,
131
+ "eval_steps_per_second": 2.972,
132
+ "step": 94
133
+ },
134
+ {
135
+ "epoch": 1.01,
136
+ "learning_rate": 3.830872810453363e-08,
137
+ "loss": 2.1472,
138
+ "step": 95
139
+ },
140
+ {
141
+ "epoch": 1.06,
142
+ "learning_rate": 1.3746270344901413e-06,
143
+ "loss": 2.2033,
144
+ "step": 100
145
+ },
146
+ {
147
+ "epoch": 1.12,
148
+ "learning_rate": 4.58381008254605e-06,
149
+ "loss": 2.1682,
150
+ "step": 105
151
+ },
152
+ {
153
+ "epoch": 1.17,
154
+ "learning_rate": 9.576451662754438e-06,
155
+ "loss": 2.1479,
156
+ "step": 110
157
+ },
158
+ {
159
+ "epoch": 1.22,
160
+ "learning_rate": 1.6213459316852997e-05,
161
+ "loss": 2.2949,
162
+ "step": 115
163
+ },
164
+ {
165
+ "epoch": 1.28,
166
+ "learning_rate": 2.4309929383066146e-05,
167
+ "loss": 2.2226,
168
+ "step": 120
169
+ },
170
+ {
171
+ "epoch": 1.33,
172
+ "learning_rate": 3.3640298318194444e-05,
173
+ "loss": 2.1933,
174
+ "step": 125
175
+ },
176
+ {
177
+ "epoch": 1.38,
178
+ "learning_rate": 4.3944626783346644e-05,
179
+ "loss": 2.1917,
180
+ "step": 130
181
+ },
182
+ {
183
+ "epoch": 1.44,
184
+ "learning_rate": 5.493584142187686e-05,
185
+ "loss": 2.2443,
186
+ "step": 135
187
+ },
188
+ {
189
+ "epoch": 1.49,
190
+ "learning_rate": 6.630773257727353e-05,
191
+ "loss": 2.2132,
192
+ "step": 140
193
+ },
194
+ {
195
+ "epoch": 1.54,
196
+ "learning_rate": 7.774348513864122e-05,
197
+ "loss": 2.2304,
198
+ "step": 145
199
+ },
200
+ {
201
+ "epoch": 1.6,
202
+ "learning_rate": 8.892450484875447e-05,
203
+ "loss": 2.0878,
204
+ "step": 150
205
+ },
206
+ {
207
+ "epoch": 1.65,
208
+ "learning_rate": 9.953929417822461e-05,
209
+ "loss": 2.2156,
210
+ "step": 155
211
+ },
212
+ {
213
+ "epoch": 1.7,
214
+ "learning_rate": 0.00010929213048843373,
215
+ "loss": 2.165,
216
+ "step": 160
217
+ },
218
+ {
219
+ "epoch": 1.76,
220
+ "learning_rate": 0.00011791130471402592,
221
+ "loss": 2.0622,
222
+ "step": 165
223
+ },
224
+ {
225
+ "epoch": 1.81,
226
+ "learning_rate": 0.00012515669103944476,
227
+ "loss": 2.2076,
228
+ "step": 170
229
+ },
230
+ {
231
+ "epoch": 1.86,
232
+ "learning_rate": 0.00013082643668217578,
233
+ "loss": 2.2435,
234
+ "step": 175
235
+ },
236
+ {
237
+ "epoch": 1.91,
238
+ "learning_rate": 0.00013476258540873022,
239
+ "loss": 2.1772,
240
+ "step": 180
241
+ },
242
+ {
243
+ "epoch": 1.97,
244
+ "learning_rate": 0.00013685547811507137,
245
+ "loss": 2.1434,
246
+ "step": 185
247
+ },
248
+ {
249
+ "epoch": 2.0,
250
+ "eval_loss": 2.1074936389923096,
251
+ "eval_runtime": 5.358,
252
+ "eval_samples_per_second": 23.143,
253
+ "eval_steps_per_second": 2.986,
254
+ "step": 188
255
  }
256
  ],
257
+ "max_steps": 188,
258
+ "num_train_epochs": 2,
259
+ "total_flos": 194793209856000.0,
260
  "trial_name": null,
261
  "trial_params": null
262
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7356065e64a7ed2f99760f766f2d6d5548e73090f96e18983074ac05700b0e4
3
  size 2671
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b11e3b4f7a9410936e65efe533a2197fa464fb8c605563f3fdb5b97e67df2e0d
3
  size 2671