vain05
/

stablelm-2-1_6b-orpo-full-v1

@@ -2,9 +2,15 @@
 license: other
 base_model: stabilityai/stablelm-2-1_6b
 tags:
 - trl
 - orpo
 - generated_from_trainer
 model-index:
 - name: stablelm-2-1_6b-orpo-full-v1
   results: []
@@ -15,20 +21,20 @@ should probably proofread and complete it, then remove this comment. -->
 # stablelm-2-1_6b-orpo-full-v1
-This model is a fine-tuned version of [stabilityai/stablelm-2-1_6b](https://huggingface.co/stabilityai/stablelm-2-1_6b) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.3829
 - Rewards/chosen: -0.1279
-- Rewards/rejected: -0.1396
 - Rewards/accuracies: 0.5625
-- Rewards/margins: 0.0115
-- Logps/rejected: -1.3984
-- Logps/chosen: -1.2812
 - Logits/rejected: -2.4688
 - Logits/chosen: -2.4844
 - Nll Loss: 1.3359
-- Log Odds Ratio: -0.6793
-- Log Odds Chosen: 0.1746
 ## Model description

 license: other
 base_model: stabilityai/stablelm-2-1_6b
 tags:
+- alignment-handbook
 - trl
 - orpo
 - generated_from_trainer
+- trl
+- orpo
+- generated_from_trainer
+datasets:
+- alvarobartt/dpo-mix-7k-simplified
 model-index:
 - name: stablelm-2-1_6b-orpo-full-v1
   results: []
 # stablelm-2-1_6b-orpo-full-v1
+This model is a fine-tuned version of [stabilityai/stablelm-2-1_6b](https://huggingface.co/stabilityai/stablelm-2-1_6b) on the alvarobartt/dpo-mix-7k-simplified dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.3790
 - Rewards/chosen: -0.1279
+- Rewards/rejected: -0.1387
 - Rewards/accuracies: 0.5625
+- Rewards/margins: 0.0114
+- Logps/rejected: -1.3906
+- Logps/chosen: -1.2734
 - Logits/rejected: -2.4688
 - Logits/chosen: -2.4844
 - Nll Loss: 1.3359
+- Log Odds Ratio: -0.6799
+- Log Odds Chosen: 0.1751
 ## Model description

all_results.json CHANGED Viewed

@@ -1,5 +1,21 @@
 {
     "epoch": 2.0,
     "train_loss": 1.48347466719066,
     "train_runtime": 2049.7189,
     "train_samples": 6750,

 {
     "epoch": 2.0,
+    "eval_log_odds_chosen": 0.1751251220703125,
+    "eval_log_odds_ratio": -0.679931640625,
+    "eval_logits/chosen": -2.484375,
+    "eval_logits/rejected": -2.46875,
+    "eval_logps/chosen": -1.2734375,
+    "eval_logps/rejected": -1.390625,
+    "eval_loss": 1.3789583444595337,
+    "eval_nll_loss": 1.3359375,
+    "eval_rewards/accuracies": 0.5625,
+    "eval_rewards/chosen": -0.1279296875,
+    "eval_rewards/margins": 0.01141357421875,
+    "eval_rewards/rejected": -0.138671875,
+    "eval_runtime": 28.0834,
+    "eval_samples": 750,
+    "eval_samples_per_second": 26.706,
+    "eval_steps_per_second": 1.139,
     "train_loss": 1.48347466719066,
     "train_runtime": 2049.7189,
     "train_samples": 6750,

eval_results.json ADDED Viewed

+{
+    "epoch": 2.0,
+    "eval_log_odds_chosen": 0.1751251220703125,
+    "eval_log_odds_ratio": -0.679931640625,
+    "eval_logits/chosen": -2.484375,
+    "eval_logits/rejected": -2.46875,
+    "eval_logps/chosen": -1.2734375,
+    "eval_logps/rejected": -1.390625,
+    "eval_loss": 1.3789583444595337,
+    "eval_nll_loss": 1.3359375,
+    "eval_rewards/accuracies": 0.5625,
+    "eval_rewards/chosen": -0.1279296875,
+    "eval_rewards/margins": 0.01141357421875,
+    "eval_rewards/rejected": -0.138671875,
+    "eval_runtime": 28.0834,
+    "eval_samples": 750,
+    "eval_samples_per_second": 26.706,
+    "eval_steps_per_second": 1.139
+}