RegTech-4B-Instruct / training_report.json
MwSpace's picture
Upload RegTech-4B-Instruct
2e472f0 verified
{
"model_base": "Qwen/Qwen3-4B-Instruct-2507",
"model_name": "RegTech-4B-Instruct",
"dataset": "./train.jsonl",
"env_file": "/home/ubuntu/sophia-core-server/.tuning/.env.4B",
"train_samples": 923,
"eval_samples": 102,
"params": {
"rank": 16,
"alpha": 32,
"dropout": 0.1,
"lr": 5e-06,
"scheduler": "cosine",
"epochs": 3,
"effective_batch": 4,
"max_seq_length": 4096,
"neftune_alpha": 5.0,
"target_modules": [
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj"
]
},
"results": {
"total_steps": 693,
"final_train_loss": 1.241,
"best_eval_loss": 1.1907687187194824,
"best_eval_step": 680,
"elapsed_minutes": 11.9
},
"loss_history": {
"train": [
[
5,
1.9266
],
[
10,
2.1546
],
[
15,
2.377
],
[
20,
2.1251
],
[
25,
2.3294
],
[
30,
2.2016
],
[
35,
2.1198
],
[
40,
2.0536
],
[
45,
2.0804
],
[
50,
1.98
],
[
55,
1.6711
],
[
60,
2.043
],
[
65,
1.7905
],
[
70,
1.9725
],
[
75,
1.8905
],
[
80,
1.8013
],
[
85,
1.6943
],
[
90,
1.5479
],
[
95,
1.71
],
[
100,
1.7296
],
[
105,
1.6177
],
[
110,
1.5224
],
[
115,
1.6453
],
[
120,
1.4372
],
[
125,
1.3471
],
[
130,
1.7257
],
[
135,
1.307
],
[
140,
1.6563
],
[
145,
1.4156
],
[
150,
1.5117
],
[
155,
1.5545
],
[
160,
1.3888
],
[
165,
1.5327
],
[
170,
1.4855
],
[
175,
1.1613
],
[
180,
1.2964
],
[
185,
1.443
],
[
190,
1.4158
],
[
195,
1.3793
],
[
200,
1.1051
],
[
205,
1.3441
],
[
210,
1.2532
],
[
215,
1.296
],
[
220,
1.1679
],
[
225,
1.3165
],
[
230,
1.2472
],
[
235,
1.5172
],
[
240,
1.2431
],
[
245,
1.2391
],
[
250,
1.1359
],
[
255,
1.181
],
[
260,
1.3262
],
[
265,
1.4351
],
[
270,
1.2283
],
[
275,
1.2195
],
[
280,
1.3892
],
[
285,
1.4254
],
[
290,
1.2606
],
[
295,
1.1506
],
[
300,
1.138
],
[
305,
1.3738
],
[
310,
1.2216
],
[
315,
1.2873
],
[
320,
1.1959
],
[
325,
1.2746
],
[
330,
1.2089
],
[
335,
1.2466
],
[
340,
1.1855
],
[
345,
1.3954
],
[
350,
1.0918
],
[
355,
1.2062
],
[
360,
1.2809
],
[
365,
1.3698
],
[
370,
1.3147
],
[
375,
1.1451
],
[
380,
1.0194
],
[
385,
1.1412
],
[
390,
1.1606
],
[
395,
1.1469
],
[
400,
1.2781
],
[
405,
1.2447
],
[
410,
1.3158
],
[
415,
1.1832
],
[
420,
1.2036
],
[
425,
1.1602
],
[
430,
1.4217
],
[
435,
1.1954
],
[
440,
1.1913
],
[
445,
1.0779
],
[
450,
1.1522
],
[
455,
1.2223
],
[
460,
1.0818
],
[
465,
1.429
],
[
470,
1.145
],
[
475,
1.1874
],
[
480,
1.0495
],
[
485,
1.0664
],
[
490,
1.2633
],
[
495,
1.3179
],
[
500,
1.2056
],
[
505,
1.1259
],
[
510,
1.2683
],
[
515,
1.0229
],
[
520,
1.2201
],
[
525,
1.1291
],
[
530,
1.1688
],
[
535,
1.2019
],
[
540,
1.1733
],
[
545,
1.0913
],
[
550,
1.2309
],
[
555,
1.315
],
[
560,
1.1578
],
[
565,
1.2228
],
[
570,
0.9872
],
[
575,
1.1328
],
[
580,
1.1709
],
[
585,
1.1675
],
[
590,
1.3192
],
[
595,
1.0701
],
[
600,
1.1484
],
[
605,
0.9753
],
[
610,
1.2143
],
[
615,
1.238
],
[
620,
1.149
],
[
625,
1.1859
],
[
630,
1.225
],
[
635,
1.1635
],
[
640,
1.2214
],
[
645,
1.1366
],
[
650,
1.3328
],
[
655,
1.1442
],
[
660,
1.0937
],
[
665,
1.1968
],
[
670,
1.2908
],
[
675,
1.2003
],
[
680,
1.0809
],
[
685,
1.2908
],
[
690,
1.241
]
],
"eval": [
[
40,
2.1869590282440186
],
[
80,
1.8702703714370728
],
[
120,
1.5918081998825073
],
[
160,
1.460960865020752
],
[
200,
1.3858165740966797
],
[
240,
1.3407992124557495
],
[
280,
1.3039580583572388
],
[
320,
1.2727303504943848
],
[
360,
1.245557188987732
],
[
400,
1.2276334762573242
],
[
440,
1.213688850402832
],
[
480,
1.2049111127853394
],
[
520,
1.1985464096069336
],
[
560,
1.1946879625320435
],
[
600,
1.1919087171554565
],
[
640,
1.1909754276275635
],
[
680,
1.1907687187194824
]
]
}
}