| { | |
| "dataset": { | |
| "path": "/root/jb/personas/finance/budgeting/data/qa_pairs_train.json", | |
| "num_train_samples": 1148, | |
| "dataset_text_field": "text" | |
| }, | |
| "validation_dataset": { | |
| "path": "/root/jb/personas/finance/budgeting/data/qa_pairs_val.json", | |
| "num_eval_samples": 144 | |
| }, | |
| "model": { | |
| "name": "Qwen/Qwen3-4B-Instruct-2507", | |
| "load_in_4bit": false | |
| }, | |
| "lora": { | |
| "r": 16, | |
| "lora_alpha": 16, | |
| "lora_dropout": 0.05, | |
| "bias": "none", | |
| "target_modules": [ | |
| "q_proj", | |
| "k_proj", | |
| "v_proj", | |
| "o_proj", | |
| "gate_proj", | |
| "up_proj", | |
| "down_proj" | |
| ], | |
| "random_state": 3407 | |
| }, | |
| "training": { | |
| "output_dir": "/root/jb/personas/finance/budgeting/finetune_lora_output/2026-03-04_04-18-09", | |
| "max_seq_length": 2048, | |
| "num_train_epochs": 3.0, | |
| "max_steps": -1, | |
| "per_device_train_batch_size": 2, | |
| "gradient_accumulation_steps": 4, | |
| "learning_rate": 0.0001, | |
| "weight_decay": 0.01, | |
| "warmup_ratio": 0.03, | |
| "lr_scheduler_type": "cosine", | |
| "optim": "adamw_torch", | |
| "adam_beta1": 0.9, | |
| "adam_beta2": 0.95, | |
| "bf16": true, | |
| "seed": 3407, | |
| "logging_steps": 10, | |
| "save_strategy": "steps", | |
| "save_steps": 200, | |
| "save_total_limit": 2, | |
| "dataset_num_proc": 4, | |
| "packing": false | |
| } | |
| } |