phi3.5-mini-adapter_v0 / train_args.json
BTGFM's picture
Training in progress, step 20
663a8e4 verified
raw
history blame contribute delete
729 Bytes
{
"BASE_MODEL": "microsoft/Phi-3.5-mini-instruct",
"SEQ_LENGTH": 512,
"MAX_STEPS": 250,
"BATCH_SIZE": 4,
"GR_ACC_STEPS": 4,
"LR": 0.0002,
"LR_SCHEDULER_TYPE": "cosine",
"OPTIMIZER": "adamw_torch",
"WEIGHT_DECAY": 0.001,
"WARMUP_RATIO": 0.05,
"EVAL_FREQ": 10,
"SAVE_FREQ": 20,
"SAVE_LIMIT": 2,
"LOG_FREQ": 1,
"BF16": true,
"FP16": false,
"FIM_RATE": 0.5,
"FIM_SPM_RAT": 0.5,
"LORA_R": 16,
"LORA_ALPHA": 48,
"LORA_DROPOUT": 0.0,
"LORA_TARGET_MODULES": "all-linear",
"USE_NESTED_QUANT": true,
"BNB_4BIT_COMPUTE_DTYPE": "bfloat16",
"load_in_8bit": true,
"SEED": 0,
"EARLY_STOP_PATIENCE": 3,
"EARLY_STOP_THRESHOLD": 0.01
}