RegTech-32B-Instruct / training_report.json
MwSpace's picture
Upload RegTech-32B-Instruct
854ab84 verified
{
"model_base": "Qwen/Qwen2.5-32B-Instruct",
"model_name": "RegTech-32B-Instruct",
"dataset": "./train.jsonl",
"env_file": "/home/ubuntu/sophia-core-server/.tuning/.env.32B",
"train_samples": 2330,
"eval_samples": 258,
"params": {
"rank": 16,
"alpha": 16,
"dropout": 0.05,
"lr": 2e-05,
"scheduler": "cosine",
"epochs": 1,
"effective_batch": 4,
"max_seq_length": 4096,
"neftune_alpha": 0.0,
"target_modules": [
"q_proj",
"k_proj",
"v_proj",
"o_proj",
"gate_proj",
"up_proj",
"down_proj"
]
},
"results": {
"total_steps": 583,
"final_train_loss": 0.931,
"best_eval_loss": 0.9896443486213684,
"best_eval_step": 560,
"best_token_accuracy": 0.7659,
"elapsed_minutes": 37.7
},
"loss_history": {
"train": [
[
5,
1.7845
],
[
10,
1.8161
],
[
15,
1.8696
],
[
20,
2.0645
],
[
25,
1.6354
],
[
30,
1.5951
],
[
35,
1.7529
],
[
40,
1.9265
],
[
45,
1.7091
],
[
50,
1.4318
],
[
55,
1.4539
],
[
60,
1.4346
],
[
65,
1.4506
],
[
70,
1.254
],
[
75,
1.3812
],
[
80,
1.2598
],
[
85,
1.4447
],
[
90,
1.1975
],
[
95,
1.1675
],
[
100,
1.394
],
[
105,
1.347
],
[
110,
1.2377
],
[
115,
1.1502
],
[
120,
1.2652
],
[
125,
1.2239
],
[
130,
1.1843
],
[
135,
1.2547
],
[
140,
1.2079
],
[
145,
1.1555
],
[
150,
1.1216
],
[
155,
1.101
],
[
160,
1.1316
],
[
165,
1.1601
],
[
170,
1.2139
],
[
175,
1.0971
],
[
180,
0.9276
],
[
185,
1.2373
],
[
190,
1.1523
],
[
195,
1.1123
],
[
200,
1.1195
],
[
205,
1.1249
],
[
210,
1.106
],
[
215,
1.1916
],
[
220,
1.0094
],
[
225,
0.9779
],
[
230,
1.1532
],
[
235,
1.1116
],
[
240,
1.0537
],
[
245,
1.12
],
[
250,
1.065
],
[
255,
0.9508
],
[
260,
0.9716
],
[
265,
0.9991
],
[
270,
1.0961
],
[
275,
1.04
],
[
280,
1.0153
],
[
285,
0.9728
],
[
290,
1.029
],
[
295,
0.9904
],
[
300,
0.9582
],
[
305,
1.0926
],
[
310,
1.0106
],
[
315,
0.98
],
[
320,
0.8666
],
[
325,
1.0373
],
[
330,
0.9106
],
[
335,
1.0044
],
[
340,
0.9259
],
[
345,
1.017
],
[
350,
1.0305
],
[
355,
0.9852
],
[
360,
1.0174
],
[
365,
0.8547
],
[
370,
0.9254
],
[
375,
0.8651
],
[
380,
0.9753
],
[
385,
0.9514
],
[
390,
1.0401
],
[
395,
0.9505
],
[
400,
1.0557
],
[
405,
0.9294
],
[
410,
0.9874
],
[
415,
0.998
],
[
420,
1.0099
],
[
425,
0.9127
],
[
430,
0.9525
],
[
435,
0.9343
],
[
440,
0.9187
],
[
445,
0.9685
],
[
450,
0.8536
],
[
455,
0.9055
],
[
460,
0.9866
],
[
465,
0.9282
],
[
470,
0.8396
],
[
475,
0.9417
],
[
480,
1.011
],
[
485,
0.9357
],
[
490,
0.8992
],
[
495,
1.0237
],
[
500,
0.8904
],
[
505,
0.9034
],
[
510,
0.9129
],
[
515,
0.9439
],
[
520,
0.8588
],
[
525,
1.0242
],
[
530,
0.8042
],
[
535,
0.8632
],
[
540,
0.8258
],
[
545,
0.9191
],
[
550,
0.965
],
[
555,
0.911
],
[
560,
1.0103
],
[
565,
0.9916
],
[
570,
0.9791
],
[
575,
0.9193
],
[
580,
0.931
]
],
"eval": [
[
40,
1.946990728378296
],
[
80,
1.5292835235595703
],
[
120,
1.3830845355987549
],
[
160,
1.2611178159713745
],
[
200,
1.186440110206604
],
[
240,
1.1333062648773193
],
[
280,
1.089249610900879
],
[
320,
1.0572314262390137
],
[
360,
1.0289398431777954
],
[
400,
1.0110489130020142
],
[
440,
1.000611662864685
],
[
480,
0.993800699710846
],
[
520,
0.9907287955284119
],
[
560,
0.9896443486213684
]
],
"token_accuracy": [
[
40,
0.6795
],
[
80,
0.7006
],
[
120,
0.7146
],
[
160,
0.7261
],
[
200,
0.734
],
[
240,
0.7425
],
[
280,
0.7489
],
[
320,
0.7535
],
[
360,
0.7587
],
[
400,
0.7615
],
[
440,
0.7639
],
[
480,
0.7651
],
[
520,
0.7657
],
[
560,
0.7659
]
]
}
}