CodeLlama-7b-GQL-hf / args.json
fanzhidongyzby's picture
upload model
bb10c68 verified
{
"data_paths": "[/root/mycode/__cache__/__temp__/mft_inputs/source_dataset/12600004_latest.jsonl]",
"output_dir": "/root/mycode/__cache__/__temp__/mft_outputs/huggingface",
"tb_dir": "/home/admin/logs/tfevent",
"pretrained_model_path": "/gruntdata/code_gpt_nas/user/166675/model/base_model/codellama-7b-hf",
"model_type": "llama",
"load_raw_dataset": "True",
"task_weights": "[1.0]",
"data_weights": "[1.]",
"low_cpu_mem_usage": "True",
"data_split": "95,5,0",
"padding_mode": "padding",
"tokenize_mode": "sft",
"weighted_loss_mode": "case3",
"peft_type": "qlora",
"quantization": "4bit",
"lora_rank": "64",
"lora_alpha": "32",
"lora_dropout": "0.05",
"per_device_train_batch_size": "1",
"per_device_eval_batch_size": "1",
"tokenizer_type": "AutoTokenizer",
"learning_rate": "5e-05",
"min_lr": "5e-06",
"weight_decay": "0.1",
"gradient_accumulation_steps": "1",
"lr_scheduler_type": "cosine",
"num_warmup_steps": "100",
"num_train_epochs": "2",
"seed": "42",
"seq_length": "2048",
"resume_from_checkpoint": "None",
"log_interval": "10",
"checkpointing_steps": "None",
"evalation_steps": "500",
"max_train_steps": "None",
"epoch_checkpointing": "False",
"shuffle_before_split": "True",
"use_random_sampler": "True",
"early_stopping": "True",
"early_stopping_stall_num": "3",
"saving_limit": "None",
"use_dynamic_padding": "True",
"selfpaced_interval": "1",
"selfpaced_history_length": "400",
"selfpaced_sample_valid_num": "1",
"selfpaced_scale_factor": "50",
"use_xformers": "True",
"trust_remote_code": "True",
"weight_by_num_documents": "True",
"make_vocab_size_divisible_by": "128",
"model_parallel_size": "1",
"use_slow_tokenizer": "True",
"world_size": "4"
}