diff --git "a/wandb/run-20210709_144100-2k1kyrq2/files/output.log" "b/wandb/run-20210709_144100-2k1kyrq2/files/output.log" new file mode 100644--- /dev/null +++ "b/wandb/run-20210709_144100-2k1kyrq2/files/output.log" @@ -0,0 +1,424 @@ +INFO:__main__:Training/evaluation parameters TrainingArguments( +_n_gpu=0, +adafactor=False, +adam_beta1=0.9, +adam_beta2=0.98, +adam_epsilon=1e-08, +dataloader_drop_last=False, +dataloader_num_workers=64, +dataloader_pin_memory=True, +ddp_find_unused_parameters=None, +debug=[], +deepspeed=None, +disable_tqdm=False, +do_eval=True, +do_predict=False, +do_train=True, +eval_accumulation_steps=None, +eval_steps=5000, +evaluation_strategy=IntervalStrategy.NO, +fp16=False, +fp16_backend=auto, +fp16_full_eval=False, +fp16_opt_level=O1, +gradient_accumulation_steps=1, +greater_is_better=None, +group_by_length=False, +ignore_data_skip=False, +label_names=None, +label_smoothing_factor=0.0, +learning_rate=0.0024, +length_column_name=length, +load_best_model_at_end=False, +local_rank=-1, +log_level=-1, +log_level_replica=-1, +log_on_each_node=True, +logging_dir=/home/cahya/Work/flax-community/gpt2-medium-indonesian/runs/Jul09_14-41-04_t1v-n-528d9406-w-0, +logging_first_step=False, +logging_steps=5000, +logging_strategy=IntervalStrategy.STEPS, +lr_scheduler_type=SchedulerType.LINEAR, +max_grad_norm=1.0, +max_steps=-1, +metric_for_best_model=None, +mp_parameters=, +no_cuda=False, +num_train_epochs=20.0, +output_dir=/home/cahya/Work/flax-community/gpt2-medium-indonesian, +overwrite_output_dir=True, +past_index=-1, +per_device_eval_batch_size=24, +per_device_train_batch_size=24, +prediction_loss_only=False, +push_to_hub=True, +push_to_hub_model_id=gpt2-medium-indonesian, +push_to_hub_organization=None, +push_to_hub_token=None, +remove_unused_columns=True, +report_to=['tensorboard', 'wandb'], +resume_from_checkpoint=None, +run_name=/home/cahya/Work/flax-community/gpt2-medium-indonesian, +save_on_each_node=False, +save_steps=5000, +save_strategy=IntervalStrategy.STEPS, +save_total_limit=None, +seed=42, +sharded_ddp=[], +skip_memory_metrics=True, +tpu_metrics_debug=False, +tpu_num_cores=None, +use_legacy_prediction_loop=False, +warmup_ratio=0.0, +warmup_steps=1000, +weight_decay=0.01, +) +WARNING:datasets.builder:Reusing dataset oscar (/home/cahya/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) +WARNING:datasets.builder:Reusing dataset oscar (/home/cahya/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) +WARNING:datasets.builder:Reusing dataset oscar (/home/cahya/.cache/huggingface/datasets/oscar/unshuffled_deduplicated_id/1.0.0/84838bd49d2295f62008383b05620571535451d84545037bb94d6f3501651df2) +loading configuration file /home/cahya/Work/flax-community/gpt2-medium-indonesian/config.json +Model config GPT2Config { + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.0, + "bos_token_id": 50256, + "embd_pdrop": 0.0, + "eos_token_id": 50256, + "gradient_checkpointing": false, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 1024, + "n_head": 16, + "n_inner": null, + "n_layer": 24, + "n_positions": 1024, + "n_special": 0, + "predict_special_tokens": true, + "resid_pdrop": 0.0, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.1, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 50 + } + }, + "transformers_version": "4.9.0.dev0", + "use_cache": true, + "vocab_size": 50257 +} +Could not locate the tokenizer configuration file, will try to use the model config instead. +loading configuration file /home/cahya/Work/flax-community/gpt2-medium-indonesian/config.json +Model config GPT2Config { + "activation_function": "gelu_new", + "architectures": [ + "GPT2LMHeadModel" + ], + "attn_pdrop": 0.0, + "bos_token_id": 50256, + "embd_pdrop": 0.0, + "eos_token_id": 50256, + "gradient_checkpointing": false, + "initializer_range": 0.02, + "layer_norm_epsilon": 1e-05, + "model_type": "gpt2", + "n_ctx": 1024, + "n_embd": 1024, + "n_head": 16, + "n_inner": null, + "n_layer": 24, + "n_positions": 1024, + "n_special": 0, + "predict_special_tokens": true, + "resid_pdrop": 0.0, + "scale_attn_weights": true, + "summary_activation": null, + "summary_first_dropout": 0.1, + "summary_proj_to_labels": true, + "summary_type": "cls_index", + "summary_use_proj": true, + "task_specific_params": { + "text-generation": { + "do_sample": true, + "max_length": 50 + } + }, + "transformers_version": "4.9.0.dev0", + "use_cache": true, + "vocab_size": 50257 +} +Didn't find file /home/cahya/Work/flax-community/gpt2-medium-indonesian/vocab.json. We won't load it. +Didn't find file /home/cahya/Work/flax-community/gpt2-medium-indonesian/merges.txt. We won't load it. +Didn't find file /home/cahya/Work/flax-community/gpt2-medium-indonesian/added_tokens.json. We won't load it. +Didn't find file /home/cahya/Work/flax-community/gpt2-medium-indonesian/special_tokens_map.json. We won't load it. +Didn't find file /home/cahya/Work/flax-community/gpt2-medium-indonesian/tokenizer_config.json. We won't load it. +loading file None +loading file None +loading file /home/cahya/Work/flax-community/gpt2-medium-indonesian/tokenizer.json +loading file None +loading file None +loading file None + #0: 0%| | 0/153 [00:00