RegTech-14B-Instruct / training_report.json
MwSpace's picture
Upload RegTech-14B-Instruct
14a198b verified
{
"model_base": "Qwen/Qwen2.5-14B-Instruct",
"model_name": "RegTech-14B-Instruct",
"dataset": "./train.jsonl",
"env_file": "/home/ubuntu/sophia-core-server/.tuning/.env.14B",
"train_samples": 2134,
"eval_samples": 237,
"params": {
"rank": 16,
"alpha": 16,
"dropout": 0.1,
"lr": 2e-05,
"scheduler": "cosine",
"epochs": 1,
"effective_batch": 4,
"max_seq_length": 4096,
"neftune_alpha": 5.0,
"target_modules": [
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj"
]
},
"results": {
"total_steps": 534,
"final_train_loss": 1.073,
"best_eval_loss": 1.098220944404602,
"best_eval_step": 520,
"best_token_accuracy": 0.7624,
"elapsed_minutes": 19.9
},
"loss_history": {
"train": [
[
5,
1.8103
],
[
10,
1.9463
],
[
15,
1.7076
],
[
20,
1.8542
],
[
25,
1.9426
],
[
30,
2.026
],
[
35,
1.9177
],
[
40,
1.7516
],
[
45,
1.655
],
[
50,
1.4414
],
[
55,
1.4496
],
[
60,
1.3549
],
[
65,
1.3485
],
[
70,
1.6647
],
[
75,
1.3798
],
[
80,
1.5445
],
[
85,
1.3067
],
[
90,
1.3387
],
[
95,
1.2908
],
[
100,
1.2998
],
[
105,
1.3263
],
[
110,
1.3664
],
[
115,
1.2699
],
[
120,
1.3007
],
[
125,
1.3573
],
[
130,
1.243
],
[
135,
1.3071
],
[
140,
1.2171
],
[
145,
1.1797
],
[
150,
1.1813
],
[
155,
1.3488
],
[
160,
1.1427
],
[
165,
1.2909
],
[
170,
1.1646
],
[
175,
1.0927
],
[
180,
1.1764
],
[
185,
1.1671
],
[
190,
1.2527
],
[
195,
1.0971
],
[
200,
1.0705
],
[
205,
1.1283
],
[
210,
1.1866
],
[
215,
1.1713
],
[
220,
1.0456
],
[
225,
1.137
],
[
230,
1.074
],
[
235,
1.062
],
[
240,
1.0085
],
[
245,
1.0669
],
[
250,
0.9657
],
[
255,
1.0515
],
[
260,
1.0492
],
[
265,
1.1586
],
[
270,
1.0454
],
[
275,
1.1171
],
[
280,
1.0412
],
[
285,
0.9569
],
[
290,
1.0504
],
[
295,
1.0302
],
[
300,
1.0497
],
[
305,
0.9437
],
[
310,
0.9805
],
[
315,
1.0755
],
[
320,
1.0508
],
[
325,
1.0563
],
[
330,
0.9653
],
[
335,
0.9692
],
[
340,
1.1218
],
[
345,
1.0367
],
[
350,
0.9287
],
[
355,
1.0165
],
[
360,
1.0356
],
[
365,
1.0249
],
[
370,
0.9885
],
[
375,
0.9588
],
[
380,
0.977
],
[
385,
0.9921
],
[
390,
0.9863
],
[
395,
1.0569
],
[
400,
0.982
],
[
405,
1.0738
],
[
410,
1.034
],
[
415,
1.067
],
[
420,
1.044
],
[
425,
0.9622
],
[
430,
0.9102
],
[
435,
1.0164
],
[
440,
0.9833
],
[
445,
0.9276
],
[
450,
0.8751
],
[
455,
1.0087
],
[
460,
1.032
],
[
465,
1.0099
],
[
470,
1.0805
],
[
475,
1.0326
],
[
480,
0.968
],
[
485,
0.948
],
[
490,
1.0297
],
[
495,
1.0083
],
[
500,
0.955
],
[
505,
0.967
],
[
510,
0.8891
],
[
515,
0.9298
],
[
520,
0.9516
],
[
525,
0.9553
],
[
530,
1.073
]
],
"eval": [
[
40,
1.9159308671951294
],
[
80,
1.5397088527679443
],
[
120,
1.398988127708435
],
[
160,
1.3029450178146362
],
[
200,
1.2370014190673828
],
[
240,
1.1978071928024292
],
[
280,
1.1654218435287476
],
[
320,
1.1381187438964844
],
[
360,
1.1211411952972412
],
[
400,
1.107499599456787
],
[
440,
1.1020454168319702
],
[
480,
1.0991754531860352
],
[
520,
1.098220944404602
]
],
"token_accuracy": [
[
40,
0.6898
],
[
80,
0.7085
],
[
120,
0.7226
],
[
160,
0.7344
],
[
200,
0.7426
],
[
240,
0.7473
],
[
280,
0.7522
],
[
320,
0.7564
],
[
360,
0.7585
],
[
400,
0.7605
],
[
440,
0.7618
],
[
480,
0.7624
],
[
520,
0.7624
]
]
}
}