{ | |
"model_type": "krdmodel", | |
"vocab_size": 111, | |
"dim": 1024, | |
"n_layers": 16, | |
"n_heads": 16, | |
"n_kv_heads": 4, | |
"ffn_dim": 2816, | |
"max_seq_len": 2048, | |
"batch_size": 4, | |
"gradient_accumulation": 8, | |
"train_steps": 5000, | |
"lr": 2e-4, | |
"mixed_precision": "fp16", | |
"lora_rank": 32, | |
"use_flash": true, | |
"grad_checkpoint": true | |
} | |