RedaAlami commited on
Commit
3a9c5a4
1 Parent(s): f9b279e

End of training

Browse files
README.md CHANGED
@@ -1,8 +1,11 @@
1
  ---
2
  base_model: HuggingFaceH4/zephyr-7b-gemma-sft-v0.1
 
 
3
  library_name: peft
4
  license: other
5
  tags:
 
6
  - trl
7
  - dpo
8
  - generated_from_trainer
@@ -16,17 +19,17 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # zephyr-7b-gemma-dpo
18
 
19
- This model is a fine-tuned version of [HuggingFaceH4/zephyr-7b-gemma-sft-v0.1](https://huggingface.co/HuggingFaceH4/zephyr-7b-gemma-sft-v0.1) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 97.2543
22
  - Rewards/chosen: 0.0424
23
  - Rewards/rejected: 0.0341
24
- - Rewards/accuracies: 0.5976
25
  - Rewards/margins: 0.0083
26
- - Logps/rejected: -2.3888
27
- - Logps/chosen: -2.3300
28
- - Logits/rejected: 384.5274
29
- - Logits/chosen: 412.5387
30
 
31
  ## Model description
32
 
 
1
  ---
2
  base_model: HuggingFaceH4/zephyr-7b-gemma-sft-v0.1
3
+ datasets:
4
+ - RedaAlami/PKU-SafeRLHF-Processed
5
  library_name: peft
6
  license: other
7
  tags:
8
+ - alignment-handbook
9
  - trl
10
  - dpo
11
  - generated_from_trainer
 
19
 
20
  # zephyr-7b-gemma-dpo
21
 
22
+ This model is a fine-tuned version of [HuggingFaceH4/zephyr-7b-gemma-sft-v0.1](https://huggingface.co/HuggingFaceH4/zephyr-7b-gemma-sft-v0.1) on the RedaAlami/PKU-SafeRLHF-Processed dataset.
23
  It achieves the following results on the evaluation set:
24
+ - Loss: 97.2382
25
  - Rewards/chosen: 0.0424
26
  - Rewards/rejected: 0.0341
27
+ - Rewards/accuracies: 0.6062
28
  - Rewards/margins: 0.0083
29
+ - Logps/rejected: -2.3880
30
+ - Logps/chosen: -2.3290
31
+ - Logits/rejected: 384.5392
32
+ - Logits/chosen: 412.5483
33
 
34
  ## Model description
35
 
all_results.json CHANGED
@@ -1,5 +1,18 @@
1
  {
2
  "epoch": 1.9969834087481146,
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "total_flos": 0.0,
4
  "train_loss": 98.05829228519313,
5
  "train_runtime": 4353.733,
 
1
  {
2
  "epoch": 1.9969834087481146,
3
+ "eval_logits/chosen": 412.54827880859375,
4
+ "eval_logits/rejected": 384.53924560546875,
5
+ "eval_logps/chosen": -2.328974485397339,
6
+ "eval_logps/rejected": -2.3880226612091064,
7
+ "eval_loss": 97.23818969726562,
8
+ "eval_rewards/accuracies": 0.6061643958091736,
9
+ "eval_rewards/chosen": 0.042412400245666504,
10
+ "eval_rewards/margins": 0.00826968066394329,
11
+ "eval_rewards/rejected": 0.034142717719078064,
12
+ "eval_runtime": 96.0326,
13
+ "eval_samples": 4656,
14
+ "eval_samples_per_second": 48.484,
15
+ "eval_steps_per_second": 1.52,
16
  "total_flos": 0.0,
17
  "train_loss": 98.05829228519313,
18
  "train_runtime": 4353.733,
eval_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 1.9986987638256344,
3
- "eval_logits/chosen": 452.641357421875,
4
- "eval_logits/rejected": 436.01385498046875,
5
- "eval_logps/chosen": -295.9647216796875,
6
- "eval_logps/rejected": -334.555419921875,
7
- "eval_loss": 0.6477869153022766,
8
- "eval_rewards/accuracies": 0.6168639063835144,
9
- "eval_rewards/chosen": -0.34524381160736084,
10
- "eval_rewards/margins": 0.23360556364059448,
11
- "eval_rewards/rejected": -0.5788493752479553,
12
- "eval_runtime": 243.7753,
13
- "eval_samples": 5406,
14
- "eval_samples_per_second": 22.176,
15
- "eval_steps_per_second": 0.693
16
  }
 
1
  {
2
+ "epoch": 1.9969834087481146,
3
+ "eval_logits/chosen": 412.54827880859375,
4
+ "eval_logits/rejected": 384.53924560546875,
5
+ "eval_logps/chosen": -2.328974485397339,
6
+ "eval_logps/rejected": -2.3880226612091064,
7
+ "eval_loss": 97.23818969726562,
8
+ "eval_rewards/accuracies": 0.6061643958091736,
9
+ "eval_rewards/chosen": 0.042412400245666504,
10
+ "eval_rewards/margins": 0.00826968066394329,
11
+ "eval_rewards/rejected": 0.034142717719078064,
12
+ "eval_runtime": 96.0326,
13
+ "eval_samples": 4656,
14
+ "eval_samples_per_second": 48.484,
15
+ "eval_steps_per_second": 1.52
16
  }
runs/Jul31_17-29-05_ip-172-16-2-184.us-west-2.compute.internal/events.out.tfevents.1722451516.ip-172-16-2-184.us-west-2.compute.internal.23039.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ffef7c05c4adb4652f9c4e25c9c8b1c3b06477fdf7be8c07558d7756bfc6b81
3
+ size 828