|
{ |
|
"model_type": "solar-10-7b-instruct-v1", |
|
"model_id_or_path": "upstage/SOLAR-10.7B-Instruct-v1.0", |
|
"model_revision": "master", |
|
"model_cache_dir": "/home/aiscuser/Swift-Scripts/pretrained/upstage/SOLAR-10.7B-Instruct-v1.0", |
|
"sft_type": "lora", |
|
"freeze_parameters": 0.0, |
|
"tuner_backend": "swift", |
|
"template_type": "llama", |
|
"output_dir": "/home/aiscuser/Swift-Scripts/output/solar-10-7b-instruct-v1/v13-20240202-072530", |
|
"add_output_dir_suffix": true, |
|
"custom_output_dir_suffix": null, |
|
"ddp_backend": "nccl", |
|
"seed": 42, |
|
"resume_from_checkpoint": null, |
|
"dtype": "fp16", |
|
"dataset": [ |
|
"_custom_dataset" |
|
], |
|
"dataset_seed": 42, |
|
"dataset_test_ratio": 0.01, |
|
"train_dataset_sample": -1, |
|
"val_dataset_sample": null, |
|
"system": "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.", |
|
"max_length": 3072, |
|
"truncation_strategy": "delete", |
|
"check_dataset_strategy": "warning", |
|
"custom_train_dataset_path": [ |
|
"/home/aiscuser/Swift-Scripts/data/wsdm/model/Pseudo/best_eval_1.01/release_train_data.json" |
|
], |
|
"custom_val_dataset_path": [], |
|
"self_cognition_sample": 0, |
|
"model_name": null, |
|
"model_author": null, |
|
"quantization_bit": 0, |
|
"bnb_4bit_comp_dtype": "fp16", |
|
"bnb_4bit_quant_type": "nf4", |
|
"bnb_4bit_use_double_quant": true, |
|
"lora_target_modules": [ |
|
"down_proj", |
|
"k_proj", |
|
"o_proj", |
|
"up_proj", |
|
"gate_proj", |
|
"v_proj", |
|
"q_proj" |
|
], |
|
"lora_rank": 8, |
|
"lora_alpha": 16, |
|
"lora_dropout_p": 0.05, |
|
"neftune_alpha": 0.0, |
|
"gradient_checkpointing": true, |
|
"deepspeed_config_path": "/home/aiscuser/Swift-Scripts/config/zero2.json", |
|
"batch_size": 1, |
|
"eval_batch_size": 1, |
|
"num_train_epochs": 4, |
|
"max_steps": -1, |
|
"optim": "adamw_torch", |
|
"learning_rate": 0.0001, |
|
"weight_decay": 0.01, |
|
"gradient_accumulation_steps": 8, |
|
"max_grad_norm": 0.5, |
|
"predict_with_generate": false, |
|
"lr_scheduler_type": "linear", |
|
"warmup_ratio": 0.03, |
|
"eval_steps": 100, |
|
"save_steps": 100, |
|
"only_save_model": true, |
|
"save_total_limit": null, |
|
"logging_steps": 100, |
|
"dataloader_num_workers": 1, |
|
"push_to_hub": false, |
|
"hub_model_id": "solar-10-7b-instruct-v1-lora", |
|
"hub_private_repo": true, |
|
"push_hub_strategy": "push_best", |
|
"hub_token": null, |
|
"test_oom_error": false, |
|
"disable_tqdm": false, |
|
"lazy_tokenize": false, |
|
"preprocess_num_proc": 1, |
|
"use_flash_attn": null, |
|
"ignore_args_error": false, |
|
"logging_dir": "/home/aiscuser/Swift-Scripts/output/solar-10-7b-instruct-v1/v13-20240202-072530/runs", |
|
"report_to": [ |
|
"all" |
|
], |
|
"check_model_is_latest": false, |
|
"acc_strategy": "token", |
|
"save_on_each_node": true, |
|
"save_strategy": "steps", |
|
"save_safetensors": true, |
|
"max_new_tokens": 2048, |
|
"do_sample": true, |
|
"temperature": 0.3, |
|
"top_k": 20, |
|
"top_p": 0.7, |
|
"repetition_penalty": 1.05, |
|
"torch_dtype": "torch.float16", |
|
"fp16": true, |
|
"bf16": false, |
|
"bnb_4bit_compute_dtype": "torch.float16", |
|
"load_in_4bit": false, |
|
"load_in_8bit": false, |
|
"train_sampler_random": true, |
|
"deepspeed": { |
|
"train_batch_size": "auto", |
|
"train_micro_batch_size_per_gpu": "auto", |
|
"gradient_accumulation_steps": "auto", |
|
"gradient_clipping": "auto", |
|
"zero_allow_untested_optimizer": true, |
|
"fp16": { |
|
"enabled": "auto", |
|
"loss_scale": 0, |
|
"initial_scale_power": 16, |
|
"loss_scale_window": 1000, |
|
"hysteresis": 2, |
|
"min_loss_scale": 1 |
|
}, |
|
"zero_optimization": { |
|
"stage": 2, |
|
"allgather_partitions": true, |
|
"allgather_bucket_size": 500000000.0, |
|
"reduce_scatter": true, |
|
"reduce_bucket_size": 500000000.0, |
|
"overlap_comm": false, |
|
"contiguous_gradients": true |
|
} |
|
} |
|
} |