Model save
Browse files- README.md +12 -14
- adapter_model.safetensors +1 -1
- all_results.json +4 -4
- runs/Apr25_04-55-28_612e66badb5c/events.out.tfevents.1714021048.612e66badb5c.14611.0 +3 -0
- train_results.json +4 -4
- trainer_state.json +75 -75
- training_args.bin +1 -1
README.md
CHANGED
@@ -2,13 +2,11 @@
|
|
2 |
license: apache-2.0
|
3 |
library_name: peft
|
4 |
tags:
|
5 |
-
- alignment-handbook
|
6 |
- trl
|
7 |
- dpo
|
|
|
8 |
- generated_from_trainer
|
9 |
base_model: alignment-handbook/zephyr-7b-sft-full
|
10 |
-
datasets:
|
11 |
-
- EllieS/timedial_dpo
|
12 |
model-index:
|
13 |
- name: zephyr-dpo-timedial-selfgen-mix2
|
14 |
results: []
|
@@ -19,17 +17,17 @@ should probably proofread and complete it, then remove this comment. -->
|
|
19 |
|
20 |
# zephyr-dpo-timedial-selfgen-mix2
|
21 |
|
22 |
-
This model is a fine-tuned version of [
|
23 |
It achieves the following results on the evaluation set:
|
|
|
|
|
|
|
|
|
24 |
- Loss: 0.2588
|
25 |
-
- Rewards/chosen: 0.2087
|
26 |
-
- Rewards/rejected: -1.0200
|
27 |
- Rewards/accuracies: 1.0
|
|
|
28 |
- Rewards/margins: 1.2287
|
29 |
-
-
|
30 |
-
- Logps/chosen: -6.8403
|
31 |
-
- Logits/rejected: -2.8679
|
32 |
-
- Logits/chosen: -2.8807
|
33 |
|
34 |
## Model description
|
35 |
|
@@ -62,10 +60,10 @@ The following hyperparameters were used during training:
|
|
62 |
|
63 |
### Training results
|
64 |
|
65 |
-
| Training Loss | Epoch | Step |
|
66 |
-
|
67 |
-
| 0.4825 | 0.35 | 100 |
|
68 |
-
| 0.2786 | 0.69 | 200 |
|
69 |
|
70 |
|
71 |
### Framework versions
|
|
|
2 |
license: apache-2.0
|
3 |
library_name: peft
|
4 |
tags:
|
|
|
5 |
- trl
|
6 |
- dpo
|
7 |
+
- alignment-handbook
|
8 |
- generated_from_trainer
|
9 |
base_model: alignment-handbook/zephyr-7b-sft-full
|
|
|
|
|
10 |
model-index:
|
11 |
- name: zephyr-dpo-timedial-selfgen-mix2
|
12 |
results: []
|
|
|
17 |
|
18 |
# zephyr-dpo-timedial-selfgen-mix2
|
19 |
|
20 |
+
This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on an unknown dataset.
|
21 |
It achieves the following results on the evaluation set:
|
22 |
+
- Logits/chosen: -2.8807
|
23 |
+
- Logits/rejected: -2.8679
|
24 |
+
- Logps/chosen: -6.8403
|
25 |
+
- Logps/rejected: -132.2824
|
26 |
- Loss: 0.2588
|
|
|
|
|
27 |
- Rewards/accuracies: 1.0
|
28 |
+
- Rewards/chosen: 0.2087
|
29 |
- Rewards/margins: 1.2287
|
30 |
+
- Rewards/rejected: -1.0200
|
|
|
|
|
|
|
31 |
|
32 |
## Model description
|
33 |
|
|
|
60 |
|
61 |
### Training results
|
62 |
|
63 |
+
| Training Loss | Epoch | Step | Logits/chosen | Logits/rejected | Logps/chosen | Logps/rejected | Validation Loss | Rewards/accuracies | Rewards/chosen | Rewards/margins | Rewards/rejected |
|
64 |
+
|:-------------:|:-----:|:----:|:-------------:|:---------------:|:------------:|:--------------:|:---------------:|:------------------:|:--------------:|:---------------:|:----------------:|
|
65 |
+
| 0.4825 | 0.35 | 100 | -2.9715 | -2.9751 | -4.6766 | -60.5070 | 0.4632 | 1.0 | 0.2303 | 0.5325 | -0.3022 |
|
66 |
+
| 0.2786 | 0.69 | 200 | -2.8807 | -2.8679 | -6.8403 | -132.2824 | 0.2588 | 1.0 | 0.2087 | 1.2287 | -1.0200 |
|
67 |
|
68 |
|
69 |
### Framework versions
|
adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 83946192
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9eebfd4ad7987bc9fd47f79609fadacdd943a8b6b0f4182920b062aedc908acb
|
3 |
size 83946192
|
all_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 1157,
|
6 |
-
"train_samples_per_second":
|
7 |
-
"train_steps_per_second":
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"train_loss": 0.06608598743755512,
|
4 |
+
"train_runtime": 154.0704,
|
5 |
"train_samples": 1157,
|
6 |
+
"train_samples_per_second": 7.51,
|
7 |
+
"train_steps_per_second": 1.876
|
8 |
}
|
runs/Apr25_04-55-28_612e66badb5c/events.out.tfevents.1714021048.612e66badb5c.14611.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec4532d2759cba55b54f4f95aa379f154fc94f1839ac4aa7c022e8e9341e943b
|
3 |
+
size 10676
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
-
"train_loss": 0.
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 1157,
|
6 |
-
"train_samples_per_second":
|
7 |
-
"train_steps_per_second":
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"train_loss": 0.06608598743755512,
|
4 |
+
"train_runtime": 154.0704,
|
5 |
"train_samples": 1157,
|
6 |
+
"train_samples_per_second": 7.51,
|
7 |
+
"train_steps_per_second": 1.876
|
8 |
}
|
trainer_state.json
CHANGED
@@ -357,132 +357,132 @@
|
|
357 |
},
|
358 |
{
|
359 |
"epoch": 0.73,
|
360 |
-
"grad_norm": 1.
|
361 |
"learning_rate": 1.0550820234444627e-06,
|
362 |
-
"logits/chosen": -2.
|
363 |
-
"logits/rejected": -2.
|
364 |
-
"logps/chosen": -7.
|
365 |
-
"logps/rejected": -136.
|
366 |
-
"loss": 0.
|
367 |
"rewards/accuracies": 1.0,
|
368 |
-
"rewards/chosen": 0.
|
369 |
-
"rewards/margins": 1.
|
370 |
-
"rewards/rejected": -1.
|
371 |
"step": 210
|
372 |
},
|
373 |
{
|
374 |
"epoch": 0.76,
|
375 |
-
"grad_norm": 1.
|
376 |
"learning_rate": 8.197041935593181e-07,
|
377 |
-
"logits/chosen": -2.
|
378 |
-
"logits/rejected": -2.
|
379 |
-
"logps/chosen": -3.
|
380 |
-
"logps/rejected": -143.
|
381 |
-
"loss": 0.
|
382 |
"rewards/accuracies": 1.0,
|
383 |
-
"rewards/chosen": 0.
|
384 |
-
"rewards/margins": 1.
|
385 |
-
"rewards/rejected": -1.
|
386 |
"step": 220
|
387 |
},
|
388 |
{
|
389 |
"epoch": 0.79,
|
390 |
-
"grad_norm": 0.
|
391 |
"learning_rate": 6.088288602287159e-07,
|
392 |
-
"logits/chosen": -2.
|
393 |
-
"logits/rejected": -2.
|
394 |
-
"logps/chosen": -1.
|
395 |
-
"logps/rejected": -147.
|
396 |
-
"loss": 0.
|
397 |
"rewards/accuracies": 1.0,
|
398 |
-
"rewards/chosen": 0.
|
399 |
-
"rewards/margins": 1.
|
400 |
-
"rewards/rejected": -1.
|
401 |
"step": 230
|
402 |
},
|
403 |
{
|
404 |
"epoch": 0.83,
|
405 |
-
"grad_norm": 1.
|
406 |
"learning_rate": 4.255310606625124e-07,
|
407 |
-
"logits/chosen": -2.
|
408 |
-
"logits/rejected": -2.
|
409 |
-
"logps/chosen": -2.
|
410 |
-
"logps/rejected": -151.
|
411 |
-
"loss": 0.
|
412 |
"rewards/accuracies": 1.0,
|
413 |
-
"rewards/chosen": 0.
|
414 |
-
"rewards/margins": 1.
|
415 |
-
"rewards/rejected": -1.
|
416 |
"step": 240
|
417 |
},
|
418 |
{
|
419 |
"epoch": 0.86,
|
420 |
-
"grad_norm": 0.
|
421 |
"learning_rate": 2.7248368952908055e-07,
|
422 |
-
"logits/chosen": -2.
|
423 |
-
"logits/rejected": -2.
|
424 |
-
"logps/chosen": -2.
|
425 |
-
"logps/rejected": -152.
|
426 |
-
"loss": 0.
|
427 |
"rewards/accuracies": 1.0,
|
428 |
-
"rewards/chosen": 0.
|
429 |
-
"rewards/margins": 1.
|
430 |
-
"rewards/rejected": -1.
|
431 |
"step": 250
|
432 |
},
|
433 |
{
|
434 |
"epoch": 0.9,
|
435 |
-
"grad_norm": 0.
|
436 |
"learning_rate": 1.5191852213221198e-07,
|
437 |
-
"logits/chosen": -2.
|
438 |
-
"logits/rejected": -2.
|
439 |
-
"logps/chosen": -3.
|
440 |
-
"logps/rejected": -156.
|
441 |
-
"loss": 0.
|
442 |
"rewards/accuracies": 1.0,
|
443 |
-
"rewards/chosen": 0.
|
444 |
-
"rewards/margins": 1.
|
445 |
-
"rewards/rejected": -1.
|
446 |
"step": 260
|
447 |
},
|
448 |
{
|
449 |
"epoch": 0.93,
|
450 |
-
"grad_norm": 0.
|
451 |
"learning_rate": 6.559367010166629e-08,
|
452 |
-
"logits/chosen": -2.
|
453 |
-
"logits/rejected": -2.
|
454 |
-
"logps/chosen": -2.
|
455 |
-
"logps/rejected": -152.
|
456 |
-
"loss": 0.
|
457 |
"rewards/accuracies": 1.0,
|
458 |
-
"rewards/chosen": 0.
|
459 |
-
"rewards/margins": 1.
|
460 |
-
"rewards/rejected": -1.
|
461 |
"step": 270
|
462 |
},
|
463 |
{
|
464 |
"epoch": 0.97,
|
465 |
-
"grad_norm": 1.
|
466 |
"learning_rate": 1.4767944166687032e-08,
|
467 |
-
"logits/chosen": -2.
|
468 |
-
"logits/rejected": -2.
|
469 |
-
"logps/chosen": -2.
|
470 |
-
"logps/rejected": -154.
|
471 |
"loss": 0.2011,
|
472 |
"rewards/accuracies": 1.0,
|
473 |
-
"rewards/chosen": 0.
|
474 |
-
"rewards/margins": 1.
|
475 |
-
"rewards/rejected": -1.
|
476 |
"step": 280
|
477 |
},
|
478 |
{
|
479 |
"epoch": 1.0,
|
480 |
"step": 289,
|
481 |
"total_flos": 0.0,
|
482 |
-
"train_loss": 0.
|
483 |
-
"train_runtime":
|
484 |
-
"train_samples_per_second":
|
485 |
-
"train_steps_per_second":
|
486 |
}
|
487 |
],
|
488 |
"logging_steps": 10,
|
|
|
357 |
},
|
358 |
{
|
359 |
"epoch": 0.73,
|
360 |
+
"grad_norm": 1.046875,
|
361 |
"learning_rate": 1.0550820234444627e-06,
|
362 |
+
"logits/chosen": -2.884814500808716,
|
363 |
+
"logits/rejected": -2.8724067211151123,
|
364 |
+
"logps/chosen": -7.440223693847656,
|
365 |
+
"logps/rejected": -136.234375,
|
366 |
+
"loss": 0.2538,
|
367 |
"rewards/accuracies": 1.0,
|
368 |
+
"rewards/chosen": 0.20774102210998535,
|
369 |
+
"rewards/margins": 1.261413812637329,
|
370 |
+
"rewards/rejected": -1.0536725521087646,
|
371 |
"step": 210
|
372 |
},
|
373 |
{
|
374 |
"epoch": 0.76,
|
375 |
+
"grad_norm": 1.5078125,
|
376 |
"learning_rate": 8.197041935593181e-07,
|
377 |
+
"logits/chosen": -2.8656671047210693,
|
378 |
+
"logits/rejected": -2.8470237255096436,
|
379 |
+
"logps/chosen": -3.4960861206054688,
|
380 |
+
"logps/rejected": -143.31100463867188,
|
381 |
+
"loss": 0.229,
|
382 |
"rewards/accuracies": 1.0,
|
383 |
+
"rewards/chosen": 0.23899272084236145,
|
384 |
+
"rewards/margins": 1.3653249740600586,
|
385 |
+
"rewards/rejected": -1.1263322830200195,
|
386 |
"step": 220
|
387 |
},
|
388 |
{
|
389 |
"epoch": 0.79,
|
390 |
+
"grad_norm": 0.9453125,
|
391 |
"learning_rate": 6.088288602287159e-07,
|
392 |
+
"logits/chosen": -2.866046190261841,
|
393 |
+
"logits/rejected": -2.8468241691589355,
|
394 |
+
"logps/chosen": -1.2698005437850952,
|
395 |
+
"logps/rejected": -147.30690002441406,
|
396 |
+
"loss": 0.2167,
|
397 |
"rewards/accuracies": 1.0,
|
398 |
+
"rewards/chosen": 0.2751404643058777,
|
399 |
+
"rewards/margins": 1.4320547580718994,
|
400 |
+
"rewards/rejected": -1.156914234161377,
|
401 |
"step": 230
|
402 |
},
|
403 |
{
|
404 |
"epoch": 0.83,
|
405 |
+
"grad_norm": 1.40625,
|
406 |
"learning_rate": 4.255310606625124e-07,
|
407 |
+
"logits/chosen": -2.8706631660461426,
|
408 |
+
"logits/rejected": -2.852060317993164,
|
409 |
+
"logps/chosen": -2.512279510498047,
|
410 |
+
"logps/rejected": -151.74819946289062,
|
411 |
+
"loss": 0.2084,
|
412 |
"rewards/accuracies": 1.0,
|
413 |
+
"rewards/chosen": 0.2570468485355377,
|
414 |
+
"rewards/margins": 1.4740240573883057,
|
415 |
+
"rewards/rejected": -1.2169771194458008,
|
416 |
"step": 240
|
417 |
},
|
418 |
{
|
419 |
"epoch": 0.86,
|
420 |
+
"grad_norm": 0.96875,
|
421 |
"learning_rate": 2.7248368952908055e-07,
|
422 |
+
"logits/chosen": -2.86161470413208,
|
423 |
+
"logits/rejected": -2.838373899459839,
|
424 |
+
"logps/chosen": -2.557521104812622,
|
425 |
+
"logps/rejected": -152.18783569335938,
|
426 |
+
"loss": 0.2107,
|
427 |
"rewards/accuracies": 1.0,
|
428 |
+
"rewards/chosen": 0.2602913975715637,
|
429 |
+
"rewards/margins": 1.4610974788665771,
|
430 |
+
"rewards/rejected": -1.2008062601089478,
|
431 |
"step": 250
|
432 |
},
|
433 |
{
|
434 |
"epoch": 0.9,
|
435 |
+
"grad_norm": 0.87890625,
|
436 |
"learning_rate": 1.5191852213221198e-07,
|
437 |
+
"logits/chosen": -2.846947431564331,
|
438 |
+
"logits/rejected": -2.818504810333252,
|
439 |
+
"logps/chosen": -3.4454503059387207,
|
440 |
+
"logps/rejected": -156.12364196777344,
|
441 |
+
"loss": 0.2052,
|
442 |
"rewards/accuracies": 1.0,
|
443 |
+
"rewards/chosen": 0.25028613209724426,
|
444 |
+
"rewards/margins": 1.4929627180099487,
|
445 |
+
"rewards/rejected": -1.2426766157150269,
|
446 |
"step": 260
|
447 |
},
|
448 |
{
|
449 |
"epoch": 0.93,
|
450 |
+
"grad_norm": 0.9296875,
|
451 |
"learning_rate": 6.559367010166629e-08,
|
452 |
+
"logits/chosen": -2.8502001762390137,
|
453 |
+
"logits/rejected": -2.8307251930236816,
|
454 |
+
"logps/chosen": -2.046212673187256,
|
455 |
+
"logps/rejected": -152.60047912597656,
|
456 |
+
"loss": 0.2112,
|
457 |
"rewards/accuracies": 1.0,
|
458 |
+
"rewards/chosen": 0.2626075744628906,
|
459 |
+
"rewards/margins": 1.4720337390899658,
|
460 |
+
"rewards/rejected": -1.2094261646270752,
|
461 |
"step": 270
|
462 |
},
|
463 |
{
|
464 |
"epoch": 0.97,
|
465 |
+
"grad_norm": 1.5,
|
466 |
"learning_rate": 1.4767944166687032e-08,
|
467 |
+
"logits/chosen": -2.854003429412842,
|
468 |
+
"logits/rejected": -2.827923059463501,
|
469 |
+
"logps/chosen": -2.4084525108337402,
|
470 |
+
"logps/rejected": -154.87083435058594,
|
471 |
"loss": 0.2011,
|
472 |
"rewards/accuracies": 1.0,
|
473 |
+
"rewards/chosen": 0.2612733244895935,
|
474 |
+
"rewards/margins": 1.5091934204101562,
|
475 |
+
"rewards/rejected": -1.247920274734497,
|
476 |
"step": 280
|
477 |
},
|
478 |
{
|
479 |
"epoch": 1.0,
|
480 |
"step": 289,
|
481 |
"total_flos": 0.0,
|
482 |
+
"train_loss": 0.06608598743755512,
|
483 |
+
"train_runtime": 154.0704,
|
484 |
+
"train_samples_per_second": 7.51,
|
485 |
+
"train_steps_per_second": 1.876
|
486 |
}
|
487 |
],
|
488 |
"logging_steps": 10,
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5112
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:68d7e87f0cf192833a8bc67551a5749dfc4594ac8ce2b861912976bde3429e7b
|
3 |
size 5112
|