{ "cutoff_len": 1024, "save_step": 2000, "early_stop_test_step": 2000, "train_lora_candidate_num": 1, "train_lora_simultaneously_num": 1, "train_strategy": "optim", "lora": [ { "name": "alpaca-mixlora-7b", "optim": "adamw", "lr": 3e-4, "batch_size": 16, "micro_batch_size": 4, "test_batch_size": 64, "num_epochs": 2, "r": 8, "lora_alpha": 16, "lora_dropout": 0.05, "target_modules": { "q_proj": false, "k_proj": false, "v_proj": false, "o_proj": false, "w1_proj": true, "w2_proj": true, "w3_proj": true }, "routing_strategy": "mixtral", "num_experts": 8, "top_k": 3, "act_fn": "silu", "data": "yahma/alpaca-cleaned", "prompt": "template/alpaca.json", "group_by_length": false, "expand_side": "right" } ] }