| { | |
| "model_base": "Qwen/Qwen2.5-14B-Instruct", | |
| "model_name": "RegTech-14B-Instruct", | |
| "dataset": "./train.jsonl", | |
| "env_file": "/home/ubuntu/sophia-core-server/.tuning/.env.14B", | |
| "train_samples": 2134, | |
| "eval_samples": 237, | |
| "params": { | |
| "rank": 16, | |
| "alpha": 16, | |
| "dropout": 0.1, | |
| "lr": 2e-05, | |
| "scheduler": "cosine", | |
| "epochs": 1, | |
| "effective_batch": 4, | |
| "max_seq_length": 4096, | |
| "neftune_alpha": 5.0, | |
| "target_modules": [ | |
| "q_proj", | |
| "k_proj", | |
| "v_proj", | |
| "o_proj", | |
| "gate_proj", | |
| "up_proj", | |
| "down_proj" | |
| ] | |
| }, | |
| "results": { | |
| "total_steps": 534, | |
| "final_train_loss": 1.073, | |
| "best_eval_loss": 1.098220944404602, | |
| "best_eval_step": 520, | |
| "best_token_accuracy": 0.7624, | |
| "elapsed_minutes": 19.9 | |
| }, | |
| "loss_history": { | |
| "train": [ | |
| [ | |
| 5, | |
| 1.8103 | |
| ], | |
| [ | |
| 10, | |
| 1.9463 | |
| ], | |
| [ | |
| 15, | |
| 1.7076 | |
| ], | |
| [ | |
| 20, | |
| 1.8542 | |
| ], | |
| [ | |
| 25, | |
| 1.9426 | |
| ], | |
| [ | |
| 30, | |
| 2.026 | |
| ], | |
| [ | |
| 35, | |
| 1.9177 | |
| ], | |
| [ | |
| 40, | |
| 1.7516 | |
| ], | |
| [ | |
| 45, | |
| 1.655 | |
| ], | |
| [ | |
| 50, | |
| 1.4414 | |
| ], | |
| [ | |
| 55, | |
| 1.4496 | |
| ], | |
| [ | |
| 60, | |
| 1.3549 | |
| ], | |
| [ | |
| 65, | |
| 1.3485 | |
| ], | |
| [ | |
| 70, | |
| 1.6647 | |
| ], | |
| [ | |
| 75, | |
| 1.3798 | |
| ], | |
| [ | |
| 80, | |
| 1.5445 | |
| ], | |
| [ | |
| 85, | |
| 1.3067 | |
| ], | |
| [ | |
| 90, | |
| 1.3387 | |
| ], | |
| [ | |
| 95, | |
| 1.2908 | |
| ], | |
| [ | |
| 100, | |
| 1.2998 | |
| ], | |
| [ | |
| 105, | |
| 1.3263 | |
| ], | |
| [ | |
| 110, | |
| 1.3664 | |
| ], | |
| [ | |
| 115, | |
| 1.2699 | |
| ], | |
| [ | |
| 120, | |
| 1.3007 | |
| ], | |
| [ | |
| 125, | |
| 1.3573 | |
| ], | |
| [ | |
| 130, | |
| 1.243 | |
| ], | |
| [ | |
| 135, | |
| 1.3071 | |
| ], | |
| [ | |
| 140, | |
| 1.2171 | |
| ], | |
| [ | |
| 145, | |
| 1.1797 | |
| ], | |
| [ | |
| 150, | |
| 1.1813 | |
| ], | |
| [ | |
| 155, | |
| 1.3488 | |
| ], | |
| [ | |
| 160, | |
| 1.1427 | |
| ], | |
| [ | |
| 165, | |
| 1.2909 | |
| ], | |
| [ | |
| 170, | |
| 1.1646 | |
| ], | |
| [ | |
| 175, | |
| 1.0927 | |
| ], | |
| [ | |
| 180, | |
| 1.1764 | |
| ], | |
| [ | |
| 185, | |
| 1.1671 | |
| ], | |
| [ | |
| 190, | |
| 1.2527 | |
| ], | |
| [ | |
| 195, | |
| 1.0971 | |
| ], | |
| [ | |
| 200, | |
| 1.0705 | |
| ], | |
| [ | |
| 205, | |
| 1.1283 | |
| ], | |
| [ | |
| 210, | |
| 1.1866 | |
| ], | |
| [ | |
| 215, | |
| 1.1713 | |
| ], | |
| [ | |
| 220, | |
| 1.0456 | |
| ], | |
| [ | |
| 225, | |
| 1.137 | |
| ], | |
| [ | |
| 230, | |
| 1.074 | |
| ], | |
| [ | |
| 235, | |
| 1.062 | |
| ], | |
| [ | |
| 240, | |
| 1.0085 | |
| ], | |
| [ | |
| 245, | |
| 1.0669 | |
| ], | |
| [ | |
| 250, | |
| 0.9657 | |
| ], | |
| [ | |
| 255, | |
| 1.0515 | |
| ], | |
| [ | |
| 260, | |
| 1.0492 | |
| ], | |
| [ | |
| 265, | |
| 1.1586 | |
| ], | |
| [ | |
| 270, | |
| 1.0454 | |
| ], | |
| [ | |
| 275, | |
| 1.1171 | |
| ], | |
| [ | |
| 280, | |
| 1.0412 | |
| ], | |
| [ | |
| 285, | |
| 0.9569 | |
| ], | |
| [ | |
| 290, | |
| 1.0504 | |
| ], | |
| [ | |
| 295, | |
| 1.0302 | |
| ], | |
| [ | |
| 300, | |
| 1.0497 | |
| ], | |
| [ | |
| 305, | |
| 0.9437 | |
| ], | |
| [ | |
| 310, | |
| 0.9805 | |
| ], | |
| [ | |
| 315, | |
| 1.0755 | |
| ], | |
| [ | |
| 320, | |
| 1.0508 | |
| ], | |
| [ | |
| 325, | |
| 1.0563 | |
| ], | |
| [ | |
| 330, | |
| 0.9653 | |
| ], | |
| [ | |
| 335, | |
| 0.9692 | |
| ], | |
| [ | |
| 340, | |
| 1.1218 | |
| ], | |
| [ | |
| 345, | |
| 1.0367 | |
| ], | |
| [ | |
| 350, | |
| 0.9287 | |
| ], | |
| [ | |
| 355, | |
| 1.0165 | |
| ], | |
| [ | |
| 360, | |
| 1.0356 | |
| ], | |
| [ | |
| 365, | |
| 1.0249 | |
| ], | |
| [ | |
| 370, | |
| 0.9885 | |
| ], | |
| [ | |
| 375, | |
| 0.9588 | |
| ], | |
| [ | |
| 380, | |
| 0.977 | |
| ], | |
| [ | |
| 385, | |
| 0.9921 | |
| ], | |
| [ | |
| 390, | |
| 0.9863 | |
| ], | |
| [ | |
| 395, | |
| 1.0569 | |
| ], | |
| [ | |
| 400, | |
| 0.982 | |
| ], | |
| [ | |
| 405, | |
| 1.0738 | |
| ], | |
| [ | |
| 410, | |
| 1.034 | |
| ], | |
| [ | |
| 415, | |
| 1.067 | |
| ], | |
| [ | |
| 420, | |
| 1.044 | |
| ], | |
| [ | |
| 425, | |
| 0.9622 | |
| ], | |
| [ | |
| 430, | |
| 0.9102 | |
| ], | |
| [ | |
| 435, | |
| 1.0164 | |
| ], | |
| [ | |
| 440, | |
| 0.9833 | |
| ], | |
| [ | |
| 445, | |
| 0.9276 | |
| ], | |
| [ | |
| 450, | |
| 0.8751 | |
| ], | |
| [ | |
| 455, | |
| 1.0087 | |
| ], | |
| [ | |
| 460, | |
| 1.032 | |
| ], | |
| [ | |
| 465, | |
| 1.0099 | |
| ], | |
| [ | |
| 470, | |
| 1.0805 | |
| ], | |
| [ | |
| 475, | |
| 1.0326 | |
| ], | |
| [ | |
| 480, | |
| 0.968 | |
| ], | |
| [ | |
| 485, | |
| 0.948 | |
| ], | |
| [ | |
| 490, | |
| 1.0297 | |
| ], | |
| [ | |
| 495, | |
| 1.0083 | |
| ], | |
| [ | |
| 500, | |
| 0.955 | |
| ], | |
| [ | |
| 505, | |
| 0.967 | |
| ], | |
| [ | |
| 510, | |
| 0.8891 | |
| ], | |
| [ | |
| 515, | |
| 0.9298 | |
| ], | |
| [ | |
| 520, | |
| 0.9516 | |
| ], | |
| [ | |
| 525, | |
| 0.9553 | |
| ], | |
| [ | |
| 530, | |
| 1.073 | |
| ] | |
| ], | |
| "eval": [ | |
| [ | |
| 40, | |
| 1.9159308671951294 | |
| ], | |
| [ | |
| 80, | |
| 1.5397088527679443 | |
| ], | |
| [ | |
| 120, | |
| 1.398988127708435 | |
| ], | |
| [ | |
| 160, | |
| 1.3029450178146362 | |
| ], | |
| [ | |
| 200, | |
| 1.2370014190673828 | |
| ], | |
| [ | |
| 240, | |
| 1.1978071928024292 | |
| ], | |
| [ | |
| 280, | |
| 1.1654218435287476 | |
| ], | |
| [ | |
| 320, | |
| 1.1381187438964844 | |
| ], | |
| [ | |
| 360, | |
| 1.1211411952972412 | |
| ], | |
| [ | |
| 400, | |
| 1.107499599456787 | |
| ], | |
| [ | |
| 440, | |
| 1.1020454168319702 | |
| ], | |
| [ | |
| 480, | |
| 1.0991754531860352 | |
| ], | |
| [ | |
| 520, | |
| 1.098220944404602 | |
| ] | |
| ], | |
| "token_accuracy": [ | |
| [ | |
| 40, | |
| 0.6898 | |
| ], | |
| [ | |
| 80, | |
| 0.7085 | |
| ], | |
| [ | |
| 120, | |
| 0.7226 | |
| ], | |
| [ | |
| 160, | |
| 0.7344 | |
| ], | |
| [ | |
| 200, | |
| 0.7426 | |
| ], | |
| [ | |
| 240, | |
| 0.7473 | |
| ], | |
| [ | |
| 280, | |
| 0.7522 | |
| ], | |
| [ | |
| 320, | |
| 0.7564 | |
| ], | |
| [ | |
| 360, | |
| 0.7585 | |
| ], | |
| [ | |
| 400, | |
| 0.7605 | |
| ], | |
| [ | |
| 440, | |
| 0.7618 | |
| ], | |
| [ | |
| 480, | |
| 0.7624 | |
| ], | |
| [ | |
| 520, | |
| 0.7624 | |
| ] | |
| ] | |
| } | |
| } |