R-facebook-bart-base-full-ft-with-tum-nlp-german-gpt2_easy-prior-pp-no-ls-4c77
/
training_parameter.json
{ | |
"output_dir": "./results/R-facebook-bart-base-full-ft-with-tum-nlp-german-gpt2_easy-prior-pp-no-ls-4c77", | |
"overwrite_output_dir": false, | |
"do_train": false, | |
"do_eval": true, | |
"do_predict": false, | |
"evaluation_strategy": "steps", | |
"prediction_loss_only": false, | |
"per_device_train_batch_size": 4, | |
"per_device_eval_batch_size": 1, | |
"per_gpu_train_batch_size": null, | |
"per_gpu_eval_batch_size": null, | |
"gradient_accumulation_steps": 8, | |
"eval_accumulation_steps": null, | |
"eval_delay": 0, | |
"learning_rate": 5e-05, | |
"weight_decay": 0.01, | |
"adam_beta1": 0.9, | |
"adam_beta2": 0.999, | |
"adam_epsilon": 1e-08, | |
"max_grad_norm": 1.0, | |
"num_train_epochs": 15, | |
"max_steps": -1, | |
"lr_scheduler_type": "linear", | |
"warmup_ratio": 0.0, | |
"warmup_steps": 100, | |
"log_level": "passive", | |
"log_level_replica": "warning", | |
"log_on_each_node": true, | |
"logging_dir": "./results/R-facebook-bart-base-full-ft-with-tum-nlp-german-gpt2_easy-prior-pp-no-ls-4c77/runs/Jul15_11-11-24_social-medium-3", | |
"logging_strategy": "steps", | |
"logging_first_step": true, | |
"logging_steps": 20, | |
"logging_nan_inf_filter": true, | |
"save_strategy": "steps", | |
"save_steps": 100, | |
"save_total_limit": 5, | |
"save_safetensors": false, | |
"save_on_each_node": false, | |
"no_cuda": false, | |
"use_mps_device": false, | |
"seed": 42, | |
"data_seed": null, | |
"jit_mode_eval": false, | |
"use_ipex": false, | |
"bf16": false, | |
"fp16": true, | |
"fp16_opt_level": "O1", | |
"half_precision_backend": "cuda_amp", | |
"bf16_full_eval": false, | |
"fp16_full_eval": false, | |
"tf32": null, | |
"local_rank": 0, | |
"ddp_backend": null, | |
"tpu_num_cores": null, | |
"tpu_metrics_debug": false, | |
"debug": [], | |
"dataloader_drop_last": false, | |
"eval_steps": 100, | |
"dataloader_num_workers": 23, | |
"past_index": -1, | |
"run_name": "./results/R-facebook-bart-base-full-ft-with-tum-nlp-german-gpt2_easy-prior-pp-no-ls-4c77", | |
"disable_tqdm": false, | |
"remove_unused_columns": true, | |
"label_names": null, | |
"load_best_model_at_end": true, | |
"metric_for_best_model": "sacrebleu", | |
"greater_is_better": true, | |
"ignore_data_skip": false, | |
"sharded_ddp": [], | |
"fsdp": [], | |
"fsdp_min_num_params": 0, | |
"fsdp_config": { | |
"fsdp_min_num_params": 0, | |
"xla": false, | |
"xla_fsdp_grad_ckpt": false | |
}, | |
"fsdp_transformer_layer_cls_to_wrap": null, | |
"deepspeed": null, | |
"label_smoothing_factor": 0.1, | |
"optim": "adamw_torch", | |
"optim_args": null, | |
"adafactor": false, | |
"group_by_length": true, | |
"length_column_name": "length", | |
"report_to": [ | |
"tensorboard" | |
], | |
"ddp_find_unused_parameters": null, | |
"ddp_bucket_cap_mb": null, | |
"dataloader_pin_memory": true, | |
"skip_memory_metrics": true, | |
"use_legacy_prediction_loop": false, | |
"push_to_hub": true, | |
"resume_from_checkpoint": null, | |
"hub_model_id": "nlp-lab-2023-seq2seq/R-facebook-bart-base-full-ft-with-tum-nlp-german-gpt2_easy-prior-pp-no-ls-4c77", | |
"hub_strategy": "checkpoint", | |
"hub_token": "<HUB_TOKEN>", | |
"hub_private_repo": true, | |
"gradient_checkpointing": false, | |
"include_inputs_for_metrics": true, | |
"fp16_backend": "auto", | |
"push_to_hub_model_id": null, | |
"push_to_hub_organization": null, | |
"push_to_hub_token": "<PUSH_TO_HUB_TOKEN>", | |
"mp_parameters": "", | |
"auto_find_batch_size": false, | |
"full_determinism": false, | |
"torchdynamo": null, | |
"ray_scope": "last", | |
"ddp_timeout": 1800, | |
"torch_compile": false, | |
"torch_compile_backend": null, | |
"torch_compile_mode": null, | |
"xpu_backend": null, | |
"sortish_sampler": false, | |
"predict_with_generate": true, | |
"generation_max_length": 196, | |
"generation_num_beams": 1, | |
"generation_config": null, | |
"prior_lambda": 5e-05, | |
"prior_loss": "perplexity", | |
"ls_theta": 0, | |
"generate_during_train": true, | |
"finetuning_strategy": "full" | |
} |