lole25 commited on
Commit
be46998
1 Parent(s): 5d13082

Model save

Browse files
README.md CHANGED
@@ -2,13 +2,9 @@
2
  license: mit
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
- - generated_from_trainer
7
  - trl
8
  - dpo
9
  - generated_from_trainer
10
- datasets:
11
- - HuggingFaceH4/ultrafeedback_binarized
12
  base_model: microsoft/phi-2
13
  model-index:
14
  - name: phi-2-gpo-ultrafeedback-lora
@@ -20,17 +16,17 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  # phi-2-gpo-ultrafeedback-lora
22
 
23
- This model is a fine-tuned version of [lole25/phi-2-sft-ultrachat-lora](https://huggingface.co/lole25/phi-2-sft-ultrachat-lora) on the HuggingFaceH4/ultrafeedback_binarized dataset.
24
  It achieves the following results on the evaluation set:
25
- - Loss: 0.0021
26
- - Rewards/chosen: -0.0083
27
- - Rewards/rejected: -0.0184
28
- - Rewards/accuracies: 0.6920
29
- - Rewards/margins: 0.0101
30
- - Logps/rejected: -233.2711
31
- - Logps/chosen: -261.0694
32
- - Logits/rejected: 0.8833
33
- - Logits/chosen: 0.7809
34
 
35
  ## Model description
36
 
@@ -65,17 +61,17 @@ The following hyperparameters were used during training:
65
 
66
  ### Training results
67
 
68
- | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
69
- |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
70
- | 0.0026 | 0.21 | 100 | 0.0025 | 0.0001 | -0.0005 | 0.5080 | 0.0006 | -231.4896 | -260.2373 | 0.9175 | 0.8151 |
71
- | 0.0023 | 0.42 | 200 | 0.0023 | -0.0015 | -0.0068 | 0.6560 | 0.0053 | -232.1152 | -260.3932 | 0.9120 | 0.8092 |
72
- | 0.0022 | 0.63 | 300 | 0.0022 | -0.0067 | -0.0141 | 0.6700 | 0.0073 | -232.8447 | -260.9179 | 0.9022 | 0.7992 |
73
- | 0.0021 | 0.84 | 400 | 0.0022 | -0.0092 | -0.0178 | 0.6640 | 0.0086 | -233.2157 | -261.1620 | 0.8914 | 0.7884 |
74
- | 0.0022 | 1.05 | 500 | 0.0021 | -0.0094 | -0.0193 | 0.7100 | 0.0098 | -233.3614 | -261.1852 | 0.8853 | 0.7821 |
75
- | 0.002 | 1.26 | 600 | 0.0021 | -0.0088 | -0.0185 | 0.6940 | 0.0097 | -233.2843 | -261.1207 | 0.8840 | 0.7815 |
76
- | 0.0021 | 1.47 | 700 | 0.0021 | -0.0083 | -0.0182 | 0.7000 | 0.0099 | -233.2560 | -261.0788 | 0.8816 | 0.7790 |
77
- | 0.0021 | 1.67 | 800 | 0.0021 | -0.0082 | -0.0184 | 0.6940 | 0.0102 | -233.2740 | -261.0643 | 0.8811 | 0.7781 |
78
- | 0.0021 | 1.88 | 900 | 0.0021 | -0.0085 | -0.0178 | 0.6900 | 0.0093 | -233.2118 | -261.0922 | 0.8833 | 0.7806 |
79
 
80
 
81
  ### Framework versions
 
2
  license: mit
3
  library_name: peft
4
  tags:
 
 
5
  - trl
6
  - dpo
7
  - generated_from_trainer
 
 
8
  base_model: microsoft/phi-2
9
  model-index:
10
  - name: phi-2-gpo-ultrafeedback-lora
 
16
 
17
  # phi-2-gpo-ultrafeedback-lora
18
 
19
+ This model is a fine-tuned version of [microsoft/phi-2](https://huggingface.co/microsoft/phi-2) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.0004
22
+ - Rewards/chosen: -0.0084
23
+ - Rewards/rejected: -0.0177
24
+ - Rewards/accuracies: 0.6700
25
+ - Rewards/margins: 0.0093
26
+ - Logps/rejected: -233.2047
27
+ - Logps/chosen: -261.0818
28
+ - Logits/rejected: 0.8824
29
+ - Logits/chosen: 0.7796
30
 
31
  ## Model description
32
 
 
61
 
62
  ### Training results
63
 
64
+ | Training Loss | Epoch | Step | Logits/chosen | Logits/rejected | Logps/chosen | Logps/rejected | Validation Loss | Rewards/accuracies | Rewards/chosen | Rewards/margins | Rewards/rejected |
65
+ |:-------------:|:-----:|:----:|:-------------:|:---------------:|:------------:|:--------------:|:---------------:|:------------------:|:--------------:|:---------------:|:----------------:|
66
+ | 0.0026 | 0.21 | 100 | 0.8151 | 0.9175 | -260.2373 | -231.4896 | 0.0025 | 0.5080 | 0.0001 | 0.0006 | -0.0005 |
67
+ | 0.0023 | 0.42 | 200 | 0.8092 | 0.9120 | -260.3932 | -232.1152 | 0.0023 | 0.6560 | -0.0015 | 0.0053 | -0.0068 |
68
+ | 0.0022 | 0.63 | 300 | 0.7992 | 0.9022 | -260.9179 | -232.8447 | 0.0022 | 0.6700 | -0.0067 | 0.0073 | -0.0141 |
69
+ | 0.0021 | 0.84 | 400 | 0.7884 | 0.8914 | -261.1620 | -233.2157 | 0.0022 | 0.6640 | -0.0092 | 0.0086 | -0.0178 |
70
+ | 0.0022 | 1.05 | 500 | 0.7821 | 0.8853 | -261.1852 | -233.3614 | 0.0021 | 0.7100 | -0.0094 | 0.0098 | -0.0193 |
71
+ | 0.002 | 1.26 | 600 | 0.7815 | 0.8840 | -261.1207 | -233.2843 | 0.0021 | 0.6940 | -0.0088 | 0.0097 | -0.0185 |
72
+ | 0.0021 | 1.47 | 700 | 0.7790 | 0.8816 | -261.0788 | -233.2560 | 0.0021 | 0.7000 | -0.0083 | 0.0099 | -0.0182 |
73
+ | 0.0021 | 1.67 | 800 | 0.7781 | 0.8811 | -261.0643 | -233.2740 | 0.0021 | 0.6940 | -0.0082 | 0.0102 | -0.0184 |
74
+ | 0.0021 | 1.88 | 900 | 0.7806 | 0.8833 | -261.0922 | -233.2118 | 0.0021 | 0.6900 | -0.0085 | 0.0093 | -0.0178 |
75
 
76
 
77
  ### Framework versions
adapter_config.json CHANGED
@@ -20,8 +20,8 @@
20
  "revision": null,
21
  "target_modules": [
22
  "q_proj",
23
- "k_proj",
24
  "dense",
 
25
  "v_proj"
26
  ],
27
  "task_type": "CAUSAL_LM"
 
20
  "revision": null,
21
  "target_modules": [
22
  "q_proj",
 
23
  "dense",
24
+ "k_proj",
25
  "v_proj"
26
  ],
27
  "task_type": "CAUSAL_LM"
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0dea53272310862afda3712da656b50c1d9a7ad7a46f0642635168a85f6d5a0
3
  size 41977616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1a5ec7e556d2b963020bd2793b0483b939a6de653aa4ba0aec480a313b5a507
3
  size 41977616
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
  "epoch": 2.0,
3
- "eval_logits/chosen": 0.7808946371078491,
4
- "eval_logits/rejected": 0.8833128213882446,
5
- "eval_logps/chosen": -261.0694274902344,
6
- "eval_logps/rejected": -233.27114868164062,
7
- "eval_loss": 0.0021080097649246454,
8
- "eval_rewards/accuracies": 0.6919999718666077,
9
- "eval_rewards/chosen": -0.008252721279859543,
10
- "eval_rewards/margins": 0.01009758934378624,
11
- "eval_rewards/rejected": -0.018350308761000633,
12
- "eval_runtime": 325.1898,
13
  "eval_samples": 2000,
14
- "eval_samples_per_second": 6.15,
15
- "eval_steps_per_second": 0.384,
16
- "train_loss": 0.0021909422920118682,
17
- "train_runtime": 18127.9992,
18
  "train_samples": 30567,
19
- "train_samples_per_second": 3.372,
20
- "train_steps_per_second": 0.053
21
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "eval_logits/chosen": 0.7796330451965332,
4
+ "eval_logits/rejected": 0.8823836445808411,
5
+ "eval_logps/chosen": -261.0818176269531,
6
+ "eval_logps/rejected": -233.2046661376953,
7
+ "eval_loss": 0.0004278263368178159,
8
+ "eval_rewards/accuracies": 0.6700000166893005,
9
+ "eval_rewards/chosen": -0.008376287296414375,
10
+ "eval_rewards/margins": 0.009309147484600544,
11
+ "eval_rewards/rejected": -0.017685433849692345,
12
+ "eval_runtime": 324.63,
13
  "eval_samples": 2000,
14
+ "eval_samples_per_second": 6.161,
15
+ "eval_steps_per_second": 0.385,
16
+ "train_loss": 2.2877212975025803e-05,
17
+ "train_runtime": 862.5384,
18
  "train_samples": 30567,
19
+ "train_samples_per_second": 70.877,
20
+ "train_steps_per_second": 1.106
21
  }
eval_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "epoch": 2.0,
3
- "eval_logits/chosen": 0.7808946371078491,
4
- "eval_logits/rejected": 0.8833128213882446,
5
- "eval_logps/chosen": -261.0694274902344,
6
- "eval_logps/rejected": -233.27114868164062,
7
- "eval_loss": 0.0021080097649246454,
8
- "eval_rewards/accuracies": 0.6919999718666077,
9
- "eval_rewards/chosen": -0.008252721279859543,
10
- "eval_rewards/margins": 0.01009758934378624,
11
- "eval_rewards/rejected": -0.018350308761000633,
12
- "eval_runtime": 325.1898,
13
  "eval_samples": 2000,
14
- "eval_samples_per_second": 6.15,
15
- "eval_steps_per_second": 0.384
16
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "eval_logits/chosen": 0.7796330451965332,
4
+ "eval_logits/rejected": 0.8823836445808411,
5
+ "eval_logps/chosen": -261.0818176269531,
6
+ "eval_logps/rejected": -233.2046661376953,
7
+ "eval_loss": 0.0004278263368178159,
8
+ "eval_rewards/accuracies": 0.6700000166893005,
9
+ "eval_rewards/chosen": -0.008376287296414375,
10
+ "eval_rewards/margins": 0.009309147484600544,
11
+ "eval_rewards/rejected": -0.017685433849692345,
12
+ "eval_runtime": 324.63,
13
  "eval_samples": 2000,
14
+ "eval_samples_per_second": 6.161,
15
+ "eval_steps_per_second": 0.385
16
  }
runs/Mar05_10-41-15_gpu4-119-4/events.out.tfevents.1709595839.gpu4-119-4.3155837.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed89e1e527290c4877b057258807d925af0fd9aa366908feb6502245477f3abc
3
+ size 8366
runs/Mar05_10-41-15_gpu4-119-4/events.out.tfevents.1709597026.gpu4-119-4.3155837.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eeeb7d6c762751ee53da0154e2ad4f754acf3f56d63d6447a6c964c8e649244
3
+ size 828
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.0,
3
- "train_loss": 0.0021909422920118682,
4
- "train_runtime": 18127.9992,
5
  "train_samples": 30567,
6
- "train_samples_per_second": 3.372,
7
- "train_steps_per_second": 0.053
8
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "train_loss": 2.2877212975025803e-05,
4
+ "train_runtime": 862.5384,
5
  "train_samples": 30567,
6
+ "train_samples_per_second": 70.877,
7
+ "train_steps_per_second": 1.106
8
  }
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.9968602825745683,
5
  "eval_steps": 100,
6
  "global_step": 954,
7
  "is_hyper_param_search": false,
@@ -1427,83 +1427,83 @@
1427
  "step": 900
1428
  },
1429
  {
1430
- "epoch": 1.9,
1431
  "learning_rate": 3.237434340521789e-08,
1432
- "logits/chosen": 0.7978643774986267,
1433
- "logits/rejected": 0.8687127828598022,
1434
- "logps/chosen": -263.38275146484375,
1435
- "logps/rejected": -247.8026123046875,
1436
- "loss": 0.0021,
1437
- "rewards/accuracies": 0.668749988079071,
1438
- "rewards/chosen": -0.007231117691844702,
1439
- "rewards/margins": 0.009652243927121162,
1440
- "rewards/rejected": -0.016883360221982002,
1441
  "step": 910
1442
  },
1443
  {
1444
  "epoch": 1.93,
1445
  "learning_rate": 1.93478202307823e-08,
1446
- "logits/chosen": 0.7963850498199463,
1447
- "logits/rejected": 0.8160678148269653,
1448
- "logps/chosen": -242.1365966796875,
1449
- "logps/rejected": -246.0305938720703,
1450
- "loss": 0.0021,
1451
- "rewards/accuracies": 0.643750011920929,
1452
- "rewards/chosen": -0.0077395932748913765,
1453
- "rewards/margins": 0.007968437857925892,
1454
- "rewards/rejected": -0.01570803113281727,
1455
  "step": 920
1456
  },
1457
  {
1458
  "epoch": 1.95,
1459
  "learning_rate": 9.646686570697062e-09,
1460
- "logits/chosen": 0.862303614616394,
1461
- "logits/rejected": 0.8678015470504761,
1462
- "logps/chosen": -257.33099365234375,
1463
- "logps/rejected": -249.9061737060547,
1464
- "loss": 0.0021,
1465
- "rewards/accuracies": 0.7124999761581421,
1466
- "rewards/chosen": -0.007601047400385141,
1467
- "rewards/margins": 0.009036187082529068,
1468
- "rewards/rejected": -0.01663723587989807,
1469
  "step": 930
1470
  },
1471
  {
1472
  "epoch": 1.97,
1473
  "learning_rate": 3.283947088983663e-09,
1474
- "logits/chosen": 0.8371657133102417,
1475
- "logits/rejected": 0.8322643041610718,
1476
- "logps/chosen": -238.14657592773438,
1477
- "logps/rejected": -243.3525390625,
1478
- "loss": 0.0021,
1479
- "rewards/accuracies": 0.65625,
1480
- "rewards/chosen": -0.009062298573553562,
1481
- "rewards/margins": 0.008633644320070744,
1482
- "rewards/rejected": -0.017695942893624306,
1483
  "step": 940
1484
  },
1485
  {
1486
  "epoch": 1.99,
1487
  "learning_rate": 2.681312309735229e-10,
1488
- "logits/chosen": 0.8020931482315063,
1489
- "logits/rejected": 0.9026565551757812,
1490
- "logps/chosen": -231.6744842529297,
1491
- "logps/rejected": -229.53726196289062,
1492
- "loss": 0.0021,
1493
- "rewards/accuracies": 0.612500011920929,
1494
- "rewards/chosen": -0.007398143410682678,
1495
- "rewards/margins": 0.009477959014475346,
1496
- "rewards/rejected": -0.0168761033564806,
1497
  "step": 950
1498
  },
1499
  {
1500
  "epoch": 2.0,
1501
  "step": 954,
1502
  "total_flos": 0.0,
1503
- "train_loss": 0.0021909422920118682,
1504
- "train_runtime": 18127.9992,
1505
- "train_samples_per_second": 3.372,
1506
- "train_steps_per_second": 0.053
1507
  }
1508
  ],
1509
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.998430141287284,
5
  "eval_steps": 100,
6
  "global_step": 954,
7
  "is_hyper_param_search": false,
 
1427
  "step": 900
1428
  },
1429
  {
1430
+ "epoch": 1.91,
1431
  "learning_rate": 3.237434340521789e-08,
1432
+ "logits/chosen": 0.8207446932792664,
1433
+ "logits/rejected": 0.8551779985427856,
1434
+ "logps/chosen": -253.6584014892578,
1435
+ "logps/rejected": -244.2725372314453,
1436
+ "loss": 0.0004,
1437
+ "rewards/accuracies": 0.606249988079071,
1438
+ "rewards/chosen": -0.006646591238677502,
1439
+ "rewards/margins": 0.008468803949654102,
1440
+ "rewards/rejected": -0.015115395188331604,
1441
  "step": 910
1442
  },
1443
  {
1444
  "epoch": 1.93,
1445
  "learning_rate": 1.93478202307823e-08,
1446
+ "logits/chosen": 0.7895456552505493,
1447
+ "logits/rejected": 0.820245623588562,
1448
+ "logps/chosen": -247.3115692138672,
1449
+ "logps/rejected": -253.6699676513672,
1450
+ "loss": 0.0004,
1451
+ "rewards/accuracies": 0.65625,
1452
+ "rewards/chosen": -0.008030624128878117,
1453
+ "rewards/margins": 0.007940003648400307,
1454
+ "rewards/rejected": -0.015970628708600998,
1455
  "step": 920
1456
  },
1457
  {
1458
  "epoch": 1.95,
1459
  "learning_rate": 9.646686570697062e-09,
1460
+ "logits/chosen": 0.868087112903595,
1461
+ "logits/rejected": 0.8821622729301453,
1462
+ "logps/chosen": -258.50799560546875,
1463
+ "logps/rejected": -248.83169555664062,
1464
+ "loss": 0.0004,
1465
+ "rewards/accuracies": 0.699999988079071,
1466
+ "rewards/chosen": -0.007558141835033894,
1467
+ "rewards/margins": 0.008687029592692852,
1468
+ "rewards/rejected": -0.016245171427726746,
1469
  "step": 930
1470
  },
1471
  {
1472
  "epoch": 1.97,
1473
  "learning_rate": 3.283947088983663e-09,
1474
+ "logits/chosen": 0.8342536687850952,
1475
+ "logits/rejected": 0.8288405537605286,
1476
+ "logps/chosen": -237.41232299804688,
1477
+ "logps/rejected": -237.2918243408203,
1478
+ "loss": 0.0004,
1479
+ "rewards/accuracies": 0.6625000238418579,
1480
+ "rewards/chosen": -0.00896701030433178,
1481
+ "rewards/margins": 0.009420427493751049,
1482
+ "rewards/rejected": -0.018387438729405403,
1483
  "step": 940
1484
  },
1485
  {
1486
  "epoch": 1.99,
1487
  "learning_rate": 2.681312309735229e-10,
1488
+ "logits/chosen": 0.7803130149841309,
1489
+ "logits/rejected": 0.8894654512405396,
1490
+ "logps/chosen": -234.93325805664062,
1491
+ "logps/rejected": -232.20156860351562,
1492
+ "loss": 0.0004,
1493
+ "rewards/accuracies": 0.637499988079071,
1494
+ "rewards/chosen": -0.008270134218037128,
1495
+ "rewards/margins": 0.00816525612026453,
1496
+ "rewards/rejected": -0.01643539033830166,
1497
  "step": 950
1498
  },
1499
  {
1500
  "epoch": 2.0,
1501
  "step": 954,
1502
  "total_flos": 0.0,
1503
+ "train_loss": 2.2877212975025803e-05,
1504
+ "train_runtime": 862.5384,
1505
+ "train_samples_per_second": 70.877,
1506
+ "train_steps_per_second": 1.106
1507
  }
1508
  ],
1509
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6973090f4edc33a748ed65364e59f947a3ecfeb0039599bebec83bce04d70b8
3
  size 5816
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0dd2b02ab585a34a9bf39314ee5dc3e74f98b4692efe90bf2da48b933a6a62a
3
  size 5816