Model save

Browse files

Files changed (9) hide show

README.md +2 -6
adapter_config.json +3 -3
all_results.json +7 -7
eval_results.json +3 -3
runs/Mar11_06-57-47_b89f062cf3e1/events.out.tfevents.1710140413.b89f062cf3e1.14079.0 +3 -0
runs/Mar11_06-57-47_b89f062cf3e1/events.out.tfevents.1710140992.b89f062cf3e1.14079.1 +3 -0
train_results.json +4 -4
trainer_state.json +4 -4
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,13 +1,9 @@
 ---
 library_name: peft
 tags:
-- alignment-handbook
-- generated_from_trainer
 - trl
 - dpo
 - generated_from_trainer
-datasets:
-- David-Xu/astronomy-stack-dpo-20-percent
 base_model: meta-llama/Llama-2-7b-chat-hf
 model-index:
 - name: cira-7b-dpo-lora
@@ -19,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
 # cira-7b-dpo-lora
-This model is a fine-tuned version of [David-Xu/llama-2-7b-cira-sft-v0.1-merge](https://huggingface.co/David-Xu/llama-2-7b-cira-sft-v0.1-merge) on the David-Xu/astronomy-stack-dpo-20-percent dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.6183
 - Rewards/chosen: 0.5535
@@ -71,7 +67,7 @@ The following hyperparameters were used during training:
 | 0.5596        | 0.56  | 500  | -1.0852       | -1.2330         | -790.7928    | -646.7930      | 0.6230          | 0.6683             | 0.5967         | 0.2037          | 0.3930           |
 | 0.5382        | 0.67  | 600  | -1.0547       | -1.2034         | -793.2486    | -650.0926      | 0.6199          | 0.6709             | 0.5721         | 0.2121          | 0.3600           |
 | 0.5952        | 0.78  | 700  | -1.0324       | -1.1827         | -794.9604    | -652.0420      | 0.6186          | 0.6784             | 0.5550         | 0.2145          | 0.3405           |
-| 0.5792        | 0.89  | 800  | 0.6182        | 0.5534          | 0.3382       | 0.6784         | 0.2151          | -652.2705          | -795.125       | -1.1812         | -1.0308          |
 ### Framework versions

 ---
 library_name: peft
 tags:
 - trl
 - dpo
 - generated_from_trainer
 base_model: meta-llama/Llama-2-7b-chat-hf
 model-index:
 - name: cira-7b-dpo-lora
 # cira-7b-dpo-lora
+This model is a fine-tuned version of [meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) on the None dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.6183
 - Rewards/chosen: 0.5535
 | 0.5596        | 0.56  | 500  | -1.0852       | -1.2330         | -790.7928    | -646.7930      | 0.6230          | 0.6683             | 0.5967         | 0.2037          | 0.3930           |
 | 0.5382        | 0.67  | 600  | -1.0547       | -1.2034         | -793.2486    | -650.0926      | 0.6199          | 0.6709             | 0.5721         | 0.2121          | 0.3600           |
 | 0.5952        | 0.78  | 700  | -1.0324       | -1.1827         | -794.9604    | -652.0420      | 0.6186          | 0.6784             | 0.5550         | 0.2145          | 0.3405           |
+| 0.5792        | 0.89  | 800  | -1.0308       | -1.1812         | -795.125     | -652.2705      | 0.6182          | 0.6784             | 0.5534         | 0.2151          | 0.3382           |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -20,12 +20,12 @@
   "revision": null,
   "target_modules": [
     "o_proj",
-    "k_proj",
-    "q_proj",
     "up_proj",
     "v_proj",
     "gate_proj",
-    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "revision": null,
   "target_modules": [
     "o_proj",
     "up_proj",
+    "down_proj",
+    "k_proj",
     "v_proj",
     "gate_proj",
+    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

all_results.json CHANGED Viewed

@@ -9,13 +9,13 @@
     "eval_rewards/chosen": 0.5534913539886475,
     "eval_rewards/margins": 0.214975506067276,
     "eval_rewards/rejected": 0.33851587772369385,
-    "eval_runtime": 182.1641,
     "eval_samples": 398,
-    "eval_samples_per_second": 2.185,
-    "eval_steps_per_second": 2.185,
-    "train_loss": 0.12323601163483516,
-    "train_runtime": 1012.2753,
     "train_samples": 3588,
-    "train_samples_per_second": 3.544,
-    "train_steps_per_second": 0.886
 }

     "eval_rewards/chosen": 0.5534913539886475,
     "eval_rewards/margins": 0.214975506067276,
     "eval_rewards/rejected": 0.33851587772369385,
+    "eval_runtime": 181.731,
     "eval_samples": 398,
+    "eval_samples_per_second": 2.19,
+    "eval_steps_per_second": 2.19,
+    "train_loss": 0.06080360662445443,
+    "train_runtime": 395.7009,
     "train_samples": 3588,
+    "train_samples_per_second": 9.067,
+    "train_steps_per_second": 2.267
 }

eval_results.json CHANGED Viewed

@@ -9,8 +9,8 @@
     "eval_rewards/chosen": 0.5534913539886475,
     "eval_rewards/margins": 0.214975506067276,
     "eval_rewards/rejected": 0.33851587772369385,
-    "eval_runtime": 182.1641,
     "eval_samples": 398,
-    "eval_samples_per_second": 2.185,
-    "eval_steps_per_second": 2.185
 }

     "eval_rewards/chosen": 0.5534913539886475,
     "eval_rewards/margins": 0.214975506067276,
     "eval_rewards/rejected": 0.33851587772369385,
+    "eval_runtime": 181.731,
     "eval_samples": 398,
+    "eval_samples_per_second": 2.19,
+    "eval_steps_per_second": 2.19
 }

runs/Mar11_06-57-47_b89f062cf3e1/events.out.tfevents.1710140413.b89f062cf3e1.14079.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b821df81bee3de1d5b6e0311f9c2935e0068743349f806ad8211cbae72ae67ba
+size 11021

runs/Mar11_06-57-47_b89f062cf3e1/events.out.tfevents.1710140992.b89f062cf3e1.14079.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dcdb71d67ab8da461762bd18b6ff028cbaa01e1dccfc654c57fc047e4e0837b3
+size 828

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 1.0,
-    "train_loss": 0.12323601163483516,
-    "train_runtime": 1012.2753,
     "train_samples": 3588,
-    "train_samples_per_second": 3.544,
-    "train_steps_per_second": 0.886
 }

 {
     "epoch": 1.0,
+    "train_loss": 0.06080360662445443,
+    "train_runtime": 395.7009,
     "train_samples": 3588,
+    "train_samples_per_second": 9.067,
+    "train_steps_per_second": 2.267
 }

trainer_state.json CHANGED Viewed

@@ -1400,10 +1400,10 @@
       "epoch": 1.0,
       "step": 897,
       "total_flos": 0.0,
-      "train_loss": 0.12323601163483516,
-      "train_runtime": 1012.2753,
-      "train_samples_per_second": 3.544,
-      "train_steps_per_second": 0.886
     }
   ],
   "logging_steps": 10,

       "epoch": 1.0,
       "step": 897,
       "total_flos": 0.0,
+      "train_loss": 0.06080360662445443,
+      "train_runtime": 395.7009,
+      "train_samples_per_second": 9.067,
+      "train_steps_per_second": 2.267
     }
   ],
   "logging_steps": 10,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cce9a3a99c6711ebb8b18c4a7e23fb8e32e4da26e087963f99148e32f7380a99
 size 4856

 version https://git-lfs.github.com/spec/v1
+oid sha256:cb9bb8efdb9c6841452004c47a87a4862f6eeb584a1259066be691727e36b983
 size 4856