diff --git "a/wandb/run-20210709_144100-2k1kyrq2/files/output.log" "b/wandb/run-20210709_144100-2k1kyrq2/files/output.log"
new file mode 100644--- /dev/null
+++ "b/wandb/run-20210709_144100-2k1kyrq2/files/output.log"
@@ -0,0 +1,424 @@
+INFO:__main__:Training/evaluation parameters TrainingArguments(
+_n_gpu=0,
+adafactor=False,
+adam_beta1=0.9,
+adam_beta2=0.98,
+adam_epsilon=1e-08,
+dataloader_drop_last=False,
+dataloader_num_workers=64,
+dataloader_pin_memory=True,
+ddp_find_unused_parameters=None,
+debug=[],
+deepspeed=None,
+disable_tqdm=False,
+do_eval=True,
+do_predict=False,
+do_train=True,
+eval_accumulation_steps=None,
+eval_steps=5000,
+evaluation_strategy=IntervalStrategy.NO,
+fp16=False,
+fp16_backend=auto,
+fp16_full_eval=False,
+fp16_opt_level=O1,
+gradient_accumulation_steps=1,
+greater_is_better=None,
+group_by_length=False,
+ignore_data_skip=False,
+label_names=None,
+label_smoothing_factor=0.0,
+learning_rate=0.0024,
+length_column_name=length,
+load_best_model_at_end=False,
+local_rank=-1,
+log_level=-1,
+log_level_replica=-1,
+log_on_each_node=True,
+logging_dir=/home/cahya/Work/flax-community/gpt2-medium-indonesian/runs/Jul09_14-41-04_t1v-n-528d9406-w-0,
+logging_first_step=False,
+logging_steps=5000,
+logging_strategy=IntervalStrategy.STEPS,
+lr_scheduler_type=SchedulerType.LINEAR,
+max_grad_norm=1.0,
+max_steps=-1,
+metric_for_best_model=None,
+mp_parameters=,
+no_cuda=False,
+num_train_epochs=20.0,
+output_dir=/home/cahya/Work/flax-community/gpt2-medium-indonesian,
+overwrite_output_dir=True,
+past_index=-1,
+per_device_eval_batch_size=24,
+per_device_train_batch_size=24,
+prediction_loss_only=False,
+push_to_hub=True,
+push_to_hub_model_id=gpt2-medium-indonesian,
+push_to_hub_organization=None,
+push_to_hub_token=None,
+remove_unused_columns=True,
+report_to=['tensorboard', 'wandb'],
+resume_from_checkpoint=None,
+run_name=/home/cahya/Work/flax-community/gpt2-medium-indonesian,
+save_on_each_node=False,
+save_steps=5000,
+save_strategy=IntervalStrategy.STEPS,
+save_total_limit=None,
+seed=42,
+sharded_ddp=[],
+skip_memory_metrics=True,
+tpu_metrics_debug=False,
+tpu_num_cores=None,
+use_legacy_prediction_loop=False,
+warmup_ratio=0.0,
+warmup_steps=1000,
+weight_decay=0.01,
+)
+WARNING:datasets.builder:Reusing dataset oscar (/home/cahya/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2)
+WARNING:datasets.builder:Reusing dataset oscar (/home/cahya/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2)
+WARNING:datasets.builder:Reusing dataset oscar (/home/cahya/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2)
+loading configuration file /home/cahya/Work/flax-community/gpt2-medium-indonesian/config.json
+Model config GPT2Config {
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.0,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.0,
+  "eos_token_id": 50256,
+  "gradient_checkpointing": false,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 1024,
+  "n_head": 16,
+  "n_inner": null,
+  "n_layer": 24,
+  "n_positions": 1024,
+  "n_special": 0,
+  "predict_special_tokens": true,
+  "resid_pdrop": 0.0,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "transformers_version": "4.9.0.dev0",
+  "use_cache": true,
+  "vocab_size": 50257
+}
+Could not locate the tokenizer configuration file, will try to use the model config instead.
+loading configuration file /home/cahya/Work/flax-community/gpt2-medium-indonesian/config.json
+Model config GPT2Config {
+  "activation_function": "gelu_new",
+  "architectures": [
+    "GPT2LMHeadModel"
+  ],
+  "attn_pdrop": 0.0,
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.0,
+  "eos_token_id": 50256,
+  "gradient_checkpointing": false,
+  "initializer_range": 0.02,
+  "layer_norm_epsilon": 1e-05,
+  "model_type": "gpt2",
+  "n_ctx": 1024,
+  "n_embd": 1024,
+  "n_head": 16,
+  "n_inner": null,
+  "n_layer": 24,
+  "n_positions": 1024,
+  "n_special": 0,
+  "predict_special_tokens": true,
+  "resid_pdrop": 0.0,
+  "scale_attn_weights": true,
+  "summary_activation": null,
+  "summary_first_dropout": 0.1,
+  "summary_proj_to_labels": true,
+  "summary_type": "cls_index",
+  "summary_use_proj": true,
+  "task_specific_params": {
+    "text-generation": {
+      "do_sample": true,
+      "max_length": 50
+    }
+  },
+  "transformers_version": "4.9.0.dev0",
+  "use_cache": true,
+  "vocab_size": 50257
+}
+Didn't find file /home/cahya/Work/flax-community/gpt2-medium-indonesian/vocab.json. We won't load it.
+Didn't find file /home/cahya/Work/flax-community/gpt2-medium-indonesian/merges.txt. We won't load it.
+Didn't find file /home/cahya/Work/flax-community/gpt2-medium-indonesian/added_tokens.json. We won't load it.
+Didn't find file /home/cahya/Work/flax-community/gpt2-medium-indonesian/special_tokens_map.json. We won't load it.
+Didn't find file /home/cahya/Work/flax-community/gpt2-medium-indonesian/tokenizer_config.json. We won't load it.
+loading file None
+loading file None
+loading file /home/cahya/Work/flax-community/gpt2-medium-indonesian/tokenizer.json
+loading file None
+loading file None
+loading file None
+ #0:   0%|                                                                                                                                                                                                                   | 0/153 [00:00<?, ?ba/s]
+ #1:   0%|                                                                                                                                                                                                                   | 0/153 [00:00<?, ?ba/s]
+ #2:   0%|                                                                                                                                                                                                                   | 0/153 [00:00<?, ?ba/s]
+ #3:   0%|                                                                                                                                                                                                                   | 0/153 [00:00<?, ?ba/s]
+ #4:   0%|                                                                                                                                                                                                                   | 0/153 [00:00<?, ?ba/s]
+ #5:   0%|                                                                                                                                                                                                                   | 0/153 [00:00<?, ?ba/s]
+ #6:   0%|                                                                                                                                                                                                                   | 0/153 [00:00<?, ?ba/s]
+ #7:   0%|                                                                                                                                                                                                                   | 0/153 [00:00<?, ?ba/s]
+ #8:   0%|                                                                                                                                                                                                                   | 0/153 [00:00<?, ?ba/s]
+ #9:   0%|                                                                                                                                                                                                                   | 0/153 [00:00<?, ?ba/s]
+ #10:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #11:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #12:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #13:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #14:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #15:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #16:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #17:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #18:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #19:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #20:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #21:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #22:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #23:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #24:   1%|█▎                                                                                                                                                                                                        | 1/153 [00:01<03:24,  1.35s/ba]
+ #25:   1%|█▎                                                                                                                                                                                                        | 1/153 [00:01<04:07,  1.63s/ba]
+ #26:   1%|█▎                                                                                                                                                                                                        | 1/153 [00:01<04:17,  1.70s/ba]
+ #27:   1%|█▎                                                                                                                                                                                                        | 1/153 [00:01<03:40,  1.45s/ba]
+ #28:   1%|█▎                                                                                                                                                                                                        | 1/153 [00:01<02:45,  1.09s/ba]
+ #29:   1%|█▎                                                                                                                                                                                                        | 1/153 [00:01<03:56,  1.56s/ba]
+ #30:   1%|█▎                                                                                                                                                                                                        | 1/153 [00:01<04:02,  1.59s/ba]
+ #31:   1%|█▎                                                                                                                                                                                                        | 1/153 [00:01<04:03,  1.60s/ba]
+ #32:   1%|█▎                                                                                                                                                                                                        | 1/153 [00:01<04:23,  1.73s/ba]
+ #33:   1%|█▎                                                                                                                                                                                                        | 1/153 [00:01<04:08,  1.64s/ba]
+ #34:   1%|█▎                                                                                                                                                                                                        | 1/153 [00:01<04:02,  1.60s/ba]
+ #35:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #36:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #37:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #38:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #39:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #40:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #41:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #42:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #43:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #44:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #45:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #46:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #47:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #48:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #49:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #50:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #51:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #52:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #53:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ ... (more hidden) ...
+
+ #48:  97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍      | 148/153 [03:38<00:07,  1.50s/ba]
+ #52:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊         | 146/153 [03:37<00:10,  1.56s/ba]
+ #53:  96%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏       | 147/153 [03:37<00:09,  1.62s/ba]
+ ... (more hidden) ...
+ #52:  97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍      | 148/153 [03:41<00:08,  1.64s/ba]
+ ... (more hidden) ...
+ #53:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████    | 150/153 [03:41<00:04,  1.40s/ba]
+ #52:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████    | 150/153 [03:43<00:04,  1.52s/ba]
+ #53:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 151/153 [03:42<00:02,  1.35s/ba]
+ ... (more hidden) ...
+ #53:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 152/153 [03:44<00:01,  1.34s/ba]
+ #53: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 153/153 [03:44<00:00,  1.08s/ba]
+ #52:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 151/153 [03:45<00:02,  1.47s/ba]
+ #52:  99%|████████████████████████████████████████████████████████████████████████████████████��█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 152/153 [03:46<00:01,  1.46s/ba]
+ #43: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 153/153 [03:50<00:00,  1.07s/ba]
+ #44:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 151/153 [03:50<00:02,  1.41s/ba]
+ #44:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 152/153 [03:52<00:01,  1.47s/ba]
+ #44: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 153/153 [03:52<00:00,  1.12s/ba]
+ #39:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████    | 150/153 [03:54<00:03,  1.30s/ba]
+ #36:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊     | 149/153 [03:55<00:05,  1.39s/ba]
+ #34:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████    | 150/153 [03:57<00:04,  1.44s/ba]
+ #34:  99%|█████████████████████████████████████████████████████████████████████████████���███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 151/153 [03:59<00:02,  1.40s/ba]
+ #36:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 151/153 [03:58<00:02,  1.33s/ba]
+ #36:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋ | 152/153 [03:59<00:01,  1.26s/ba]
+ #36: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 153/153 [03:59<00:00,  1.01ba/s]
+ #27:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #28:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #29:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #31:  25%|███████████████████████████████████████████████████                                                                                                                                                         | 1/4 [00:01<00:05,  1.90s/ba]
+ #32:  25%|███████████████████████████████████████████████████                                                                                                                                                         | 1/4 [00:01<00:05,  1.81s/ba]
+ #33:  25%|███████████████████████████████████████████████████                                                                                                                                                         | 1/4 [00:01<00:04,  1.62s/ba]
+ #34:  25%|███████████████████████████████████████████████████                                                                                                                                                         | 1/4 [00:01<00:04,  1.65s/ba]
+ #35:  25%|███████████████████████████████████████████████████                                                                                                                                                         | 1/4 [00:01<00:05,  1.76s/ba]
+ #36:  25%|███████████████████████████████████████████████████                                                                                                                                                         | 1/4 [00:01<00:05,  1.82s/ba]
+ #37:  25%|███████████████████████████████████████████████████                                                                                                                                                         | 1/4 [00:01<00:05,  1.79s/ba]
+ #38:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #39:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #39:  25%|███████████████████████████████████████████████████                                                                                                                                                         | 1/4 [00:01<00:05,  1.75s/ba]
+ #41:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #41:  25%|███████████████████████████████████████████████████                                                                                                                                                         | 1/4 [00:01<00:04,  1.64s/ba]
+ #43:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #44:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #45:  25%|███████████████████████████████████████████████████                                                                                                                                                         | 1/4 [00:01<00:03,  1.10s/ba]
+ #46:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #47:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #48:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #49:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #50:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #50:  25%|███████████████████████████████████████████████████                                                                                                                                                         | 1/4 [00:01<00:03,  1.15s/ba]
+ #52:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #53:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #44:  50%|██████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                                      | 2/4 [00:03<00:03,  1.59s/ba]
+ #49:  50%|██████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                                      | 2/4 [00:02<00:02,  1.21s/ba]
+ ... (more hidden) ...
+ #46:  50%|██████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                                      | 2/4 [00:03<00:03,  1.53s/ba]
+ #45:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 3/4 [00:03<00:01,  1.05s/ba]
+ #51:  50%|██████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                                      | 2/4 [00:02<00:02,  1.27s/ba]
+ #43:  50%|██████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                                      | 2/4 [00:03<00:03,  1.74s/ba]
+ #53:  50%|██████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                                      | 2/4 [00:02<00:02,  1.28s/ba]
+ #48:  50%|██████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                                      | 2/4 [00:03<00:03,  1.61s/ba]
+ #35:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 3/4 [00:04<00:01,  1.48s/ba]
+ #21:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 3/4 [00:05<00:01,  1.79s/ba]
+ #21: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:05<00:00,  1.26s/ba]
+ #35: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:04<00:00,  1.06ba/s]
+ #37:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 3/4 [00:04<00:01,  1.48s/ba]
+ #52:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 3/4 [00:03<00:01,  1.14s/ba]
+ #29: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:05<00:00,  1.07s/ba]
+ #40:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 3/4 [00:04<00:01,  1.40s/ba]
+ #42:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 3/4 [00:04<00:01,  1.32s/ba]
+ #40: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:04<00:00,  1.12ba/s]
+ #38:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 3/4 [00:04<00:01,  1.44s/ba]
+ #37: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████��██████████████████████████████████████████████████████████████████████████████████| 4/4 [00:04<00:00,  1.04ba/s]
+ #42: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:04<00:00,  1.19ba/s]
+ #46:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 3/4 [00:04<00:01,  1.26s/ba]
+ #38: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:04<00:00,  1.09ba/s]
+ #41:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 3/4 [00:04<00:01,  1.68s/ba]
+ #43:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 3/4 [00:04<00:01,  1.57s/ba]
+ #43: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:04<00:00,  1.01ba/s]
+ #41: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:05<00:00,  1.06s/ba]
+ #48:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 3/4 [00:04<00:01,  1.45s/ba]
+ #51:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 3/4 [00:04<00:01,  1.46s/ba]
+ #51: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:04<00:00,  1.08ba/s]
+ #48: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:04<00:00,  1.08ba/s]
+ #47: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:04<00:00,  1.04s/ba]
+ #38:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #39:   1%|█▎                                                                                                                                                                                                        | 1/153 [00:01<04:15,  1.68s/ba]
+ #40:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #41:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #42:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #43:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #44:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #45:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #46:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #47:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #48:   1%|█▎                                                                                                                                                                                                        | 1/153 [00:02<05:41,  2.25s/ba]
+ #49:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #50:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #51:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #52:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #53:   0%|                                                                                                                                                                                                                  | 0/153 [00:00<?, ?ba/s]
+ #53:  79%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                         | 121/153 [12:25<04:19,  8.10s/ba]
+ ... (more hidden) ...
+
+ #53:  86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                            | 131/153 [13:35<02:16,  6.18s/ba]
+ #53:  86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                           | 132/153 [13:41<02:07,  6.05s/ba]
+ ... (more hidden) ...
+ #53:  87%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                          | 133/153 [13:45<01:53,  5.66s/ba]
+
+ #52:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                   | 138/153 [13:56<01:54,  7.63s/ba]
+ ... (more hidden) ...
+ #52:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                  | 139/153 [14:01<01:33,  6.67s/ba]
+
+ #53:  90%|███████████████████████████████████████���███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                     | 137/153 [14:04<01:17,  4.81s/ba]
+ #50:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌              | 142/153 [14:09<01:25,  7.78s/ba]
+ #53:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                   | 138/153 [14:08<01:09,  4.65s/ba]
+ #52:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎               | 141/153 [14:10<01:08,  5.71s/ba]
+ #53:  91%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                  | 139/153 [14:12<01:00,  4.35s/ba]
+
+ #52:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏           | 144/153 [14:24<00:44,  4.89s/ba]
+
+
+ #53:  94%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏           | 144/153 [14:33<00:38,  4.23s/ba]
+
+
+ #52:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊     | 149/153 [14:45<00:16,  4.08s/ba]
+ #52:  98%|█████████████████████████████████████████████████████████████████████████████████████████████���██████████████████████████████████████████████████████████████████████████████████████████████████████    | 150/153 [14:49<00:12,  4.02s/ba]
+ #52:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 151/153 [14:52<00:07,  3.83s/ba]
+ #53:  97%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊     | 149/153 [14:52<00:15,  3.87s/ba]
+ #53:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████    | 150/153 [14:56<00:11,  3.70s/ba]
+ #53:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 151/153 [14:59<00:07,  3.53s/ba]
+
+ #43:  87%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                          | 133/153 [15:27<00:44,  2.23s/ba]
+
+ #42:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                   | 138/153 [15:31<00:33,  2.21s/ba]
+
+
+ #43:  90%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                   | 138/153 [15:38<00:29,  1.94s/ba]
+ #44:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌          | 145/153 [15:38<00:19,  2.44s/ba]
+ #41: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 153/153 [15:42<00:00,  1.60s/ba]
+ #42:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉             | 143/153 [15:41<00:19,  1.97s/ba]
+ #44:  95%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊         | 146/153 [15:40<00:17,  2.45s/ba]
+ #43:  92%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎               | 141/153 [15:44<00:23,  1.98s/ba]
+ #43:  93%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌              | 142/153 [15:46<00:20,  1.91s/ba]
+ #43:  93%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉             | 143/153 [15:47<00:18,  1.80s/ba]
+
+ #43:  95%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌          | 145/153 [15:51<00:13,  1.66s/ba]
+ #44:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████��███████████████████████████████████▋ | 152/153 [15:51<00:01,  1.85s/ba]
+ #44: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 153/153 [15:52<00:00,  1.36s/ba]
+ #42:  99%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍  | 151/153 [15:54<00:03,  1.62s/ba]
+
+ #43:  97%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍      | 148/153 [15:55<00:07,  1.54s/ba]
+ #43:  98%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████    | 150/153 [15:58<00:04,  1.47s/ba]
+
+ #25:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #26:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #27:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #28:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #29:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #30:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #31:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #32:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #33:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #34:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #35:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #36:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #37:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #38:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #39:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #40:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #41:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #42:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #43:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #44:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #45:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #46:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #47:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #48:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #49:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #50:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #51:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #52:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ #53:   0%|                                                                                                                                                                                                                    | 0/4 [00:00<?, ?ba/s]
+ ... (more hidden) ...
+ #51:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████���███████████████████████████████████████████                                                   | 3/4 [00:11<00:04,  4.14s/ba]
+ #53:  50%|██████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                                      | 2/4 [00:10<00:10,  5.42s/ba]
+ #49:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 3/4 [00:13<00:04,  4.30s/ba]
+ ... (more hidden) ...
+ #52:  50%|██████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                                      | 2/4 [00:15<00:15,  7.93s/ba]
+ #53:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 3/4 [00:17<00:06,  6.12s/ba]
+ #47: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:21<00:00,  3.73s/ba]
+ #52:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 3/4 [00:20<00:06,  6.29s/ba]
+ #41:  50%|██████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                                      | 2/4 [00:24<00:23, 11.71s/ba]
+ #45:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 3/4 [00:24<00:06,  6.93s/ba]
+ #42:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 3/4 [00:27<00:07,  7.79s/ba]
+ #46:  75%|████████████████████████████████��████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 3/4 [00:26<00:08,  8.12s/ba]
+ #29:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 3/4 [00:29<00:08,  8.86s/ba]
+ #33:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 3/4 [00:29<00:08,  8.45s/ba]
+ #37:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 3/4 [00:29<00:08,  8.43s/ba]
+ #43:  75%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                   | 3/4 [00:28<00:08,  8.75s/ba]
+Step... (5000 | Loss: 3.704026460647583, Learning Rate: 0.00238410709425807)