|
{ |
|
"data_paths": "[/root/mycode/__cache__/__temp__/mft_inputs/source_dataset/12600004_latest.jsonl]", |
|
"output_dir": "/root/mycode/__cache__/__temp__/mft_outputs/huggingface", |
|
"tb_dir": "/home/admin/logs/tfevent", |
|
"pretrained_model_path": "/gruntdata/code_gpt_nas/user/166675/model/base_model/codellama-7b-hf", |
|
"model_type": "llama", |
|
"load_raw_dataset": "True", |
|
"task_weights": "[1.0]", |
|
"data_weights": "[1.]", |
|
"low_cpu_mem_usage": "True", |
|
"data_split": "95,5,0", |
|
"padding_mode": "padding", |
|
"tokenize_mode": "sft", |
|
"weighted_loss_mode": "case3", |
|
"peft_type": "qlora", |
|
"quantization": "4bit", |
|
"lora_rank": "64", |
|
"lora_alpha": "32", |
|
"lora_dropout": "0.05", |
|
"per_device_train_batch_size": "1", |
|
"per_device_eval_batch_size": "1", |
|
"tokenizer_type": "AutoTokenizer", |
|
"learning_rate": "5e-05", |
|
"min_lr": "5e-06", |
|
"weight_decay": "0.1", |
|
"gradient_accumulation_steps": "1", |
|
"lr_scheduler_type": "cosine", |
|
"num_warmup_steps": "100", |
|
"num_train_epochs": "2", |
|
"seed": "42", |
|
"seq_length": "2048", |
|
"resume_from_checkpoint": "None", |
|
"log_interval": "10", |
|
"checkpointing_steps": "None", |
|
"evalation_steps": "500", |
|
"max_train_steps": "None", |
|
"epoch_checkpointing": "False", |
|
"shuffle_before_split": "True", |
|
"use_random_sampler": "True", |
|
"early_stopping": "True", |
|
"early_stopping_stall_num": "3", |
|
"saving_limit": "None", |
|
"use_dynamic_padding": "True", |
|
"selfpaced_interval": "1", |
|
"selfpaced_history_length": "400", |
|
"selfpaced_sample_valid_num": "1", |
|
"selfpaced_scale_factor": "50", |
|
"use_xformers": "True", |
|
"trust_remote_code": "True", |
|
"weight_by_num_documents": "True", |
|
"make_vocab_size_divisible_by": "128", |
|
"model_parallel_size": "1", |
|
"use_slow_tokenizer": "True", |
|
"world_size": "4" |
|
} |