File size: 6,012 Bytes
b9abaaf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
WARNING:__main__:Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, 16-bits training: False INFO:__main__:Training/evaluation parameters TrainingArguments( _n_gpu=1, accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False}, adafactor=False, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, auto_find_batch_size=False, batch_eval_metrics=False, bf16=False, bf16_full_eval=False, data_seed=None, dataloader_drop_last=False, dataloader_num_workers=0, dataloader_persistent_workers=False, dataloader_pin_memory=True, dataloader_prefetch_factor=None, ddp_backend=None, ddp_broadcast_buffers=None, ddp_bucket_cap_mb=None, ddp_find_unused_parameters=None, ddp_timeout=1800, debug=[], deepspeed=None, disable_tqdm=False, dispatch_batches=None, do_eval=True, do_predict=False, do_train=True, eval_accumulation_steps=None, eval_delay=0, eval_do_concat_batches=True, eval_on_start=False, eval_steps=500, eval_strategy=IntervalStrategy.STEPS, eval_use_gather_object=False, evaluation_strategy=steps, fp16=False, fp16_backend=auto, fp16_full_eval=False, fp16_opt_level=O1, fsdp=[], fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_min_num_params=0, fsdp_transformer_layer_cls_to_wrap=None, full_determinism=False, gradient_accumulation_steps=1, gradient_checkpointing=False, gradient_checkpointing_kwargs=None, greater_is_better=False, group_by_length=False, half_precision_backend=auto, hub_always_push=False, hub_model_id=None, hub_private_repo=False, hub_strategy=HubStrategy.EVERY_SAVE, hub_token=<HUB_TOKEN>, ignore_data_skip=False, include_inputs_for_metrics=False, include_num_input_tokens_seen=False, include_tokens_per_second=False, jit_mode_eval=False, label_names=None, label_smoothing_factor=0.0, learning_rate=5e-05, length_column_name=length, load_best_model_at_end=True, local_rank=0, log_level=passive, log_level_replica=warning, log_on_each_node=True, logging_dir=/home/iais_marenpielka/Bouthaina/results_fixed/runs/Aug25_13-33-53_lmgpu-node-04, logging_first_step=False, logging_nan_inf_filter=True, logging_steps=500, logging_strategy=IntervalStrategy.STEPS, lr_scheduler_kwargs={}, lr_scheduler_type=SchedulerType.LINEAR, max_grad_norm=1.0, max_steps=-1, metric_for_best_model=loss, mp_parameters=, neftune_noise_alpha=None, no_cuda=False, num_train_epochs=20.0, optim=OptimizerNames.ADAMW_TORCH, optim_args=None, optim_target_modules=None, output_dir=/home/iais_marenpielka/Bouthaina/results_fixed, overwrite_output_dir=False, past_index=-1, per_device_eval_batch_size=8, per_device_train_batch_size=8, prediction_loss_only=False, push_to_hub=True, push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=<PUSH_TO_HUB_TOKEN>, ray_scope=last, remove_unused_columns=True, report_to=[], restore_callback_states_from_checkpoint=False, resume_from_checkpoint=None, run_name=/home/iais_marenpielka/Bouthaina/results_fixed, save_on_each_node=False, save_only_model=False, save_safetensors=True, save_steps=500, save_strategy=IntervalStrategy.STEPS, save_total_limit=None, seed=42, skip_memory_metrics=True, split_batches=None, tf32=None, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, torch_empty_cache_steps=None, torchdynamo=None, tpu_metrics_debug=False, tpu_num_cores=None, use_cpu=False, use_ipex=False, use_legacy_prediction_loop=False, use_mps_device=False, warmup_ratio=0.0, warmup_steps=500, weight_decay=0.0, ) INFO:datasets.builder:Using custom data configuration default-c90064c89aa4f4b8 INFO:datasets.info:Loading Dataset Infos from /home/iais_marenpielka/Bouthaina/miniconda3/lib/python3.12/site-packages/datasets/packaged_modules/text INFO:datasets.builder:Overwrite dataset info from restored data version if exists. INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-c90064c89aa4f4b8/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101 INFO:datasets.builder:Found cached dataset text (/home/iais_marenpielka/.cache/huggingface/datasets/text/default-c90064c89aa4f4b8/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101) INFO:datasets.info:Loading Dataset info from /home/iais_marenpielka/.cache/huggingface/datasets/text/default-c90064c89aa4f4b8/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101 INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-c90064c89aa4f4b8/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-c0ee57decace0f18.arrow INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-c90064c89aa4f4b8/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-de0da9023cfd9691.arrow WARNING:__main__:The tokenizer picked seems to have a very large `model_max_length` (1000000000000000019884624838656). Using block_size=768 instead. You can change that default value by passing --block_size xxx. INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-c90064c89aa4f4b8/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-8a18d3af082678db.arrow INFO:datasets.arrow_dataset:Loading cached processed dataset at /home/iais_marenpielka/.cache/huggingface/datasets/text/default-c90064c89aa4f4b8/0.0.0/96636a050ef51804b84abbfd4f4ad440e01153c24b86293eb5c3b300a41f9101/cache-840375b4a2e73fb1.arrow WARNING:accelerate.utils.other:Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher. WARNING:root:Epoch 1.0: No losses recorded yet. INFO:absl:Using default tokenizer. |