David-Xu commited on
Commit
4c7509c
1 Parent(s): 1a2cb74

Model save

Browse files
README.md CHANGED
@@ -1,13 +1,9 @@
1
  ---
2
  library_name: peft
3
  tags:
4
- - alignment-handbook
5
- - generated_from_trainer
6
  - trl
7
  - dpo
8
  - generated_from_trainer
9
- datasets:
10
- - David-Xu/astronomy-stack-dpo-20-percent
11
  base_model: meta-llama/Llama-2-7b-chat-hf
12
  model-index:
13
  - name: cira-7b-dpo-lora
@@ -19,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
19
 
20
  # cira-7b-dpo-lora
21
 
22
- This model is a fine-tuned version of [David-Xu/llama-2-7b-cira-sft-v0.1-merge](https://huggingface.co/David-Xu/llama-2-7b-cira-sft-v0.1-merge) on the David-Xu/astronomy-stack-dpo-20-percent dataset.
23
  It achieves the following results on the evaluation set:
24
  - Loss: 0.6183
25
  - Rewards/chosen: 0.5535
@@ -71,7 +67,7 @@ The following hyperparameters were used during training:
71
  | 0.5596 | 0.56 | 500 | -1.0852 | -1.2330 | -790.7928 | -646.7930 | 0.6230 | 0.6683 | 0.5967 | 0.2037 | 0.3930 |
72
  | 0.5382 | 0.67 | 600 | -1.0547 | -1.2034 | -793.2486 | -650.0926 | 0.6199 | 0.6709 | 0.5721 | 0.2121 | 0.3600 |
73
  | 0.5952 | 0.78 | 700 | -1.0324 | -1.1827 | -794.9604 | -652.0420 | 0.6186 | 0.6784 | 0.5550 | 0.2145 | 0.3405 |
74
- | 0.5792 | 0.89 | 800 | 0.6182 | 0.5534 | 0.3382 | 0.6784 | 0.2151 | -652.2705 | -795.125 | -1.1812 | -1.0308 |
75
 
76
 
77
  ### Framework versions
 
1
  ---
2
  library_name: peft
3
  tags:
 
 
4
  - trl
5
  - dpo
6
  - generated_from_trainer
 
 
7
  base_model: meta-llama/Llama-2-7b-chat-hf
8
  model-index:
9
  - name: cira-7b-dpo-lora
 
15
 
16
  # cira-7b-dpo-lora
17
 
18
+ This model is a fine-tuned version of [meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 0.6183
21
  - Rewards/chosen: 0.5535
 
67
  | 0.5596 | 0.56 | 500 | -1.0852 | -1.2330 | -790.7928 | -646.7930 | 0.6230 | 0.6683 | 0.5967 | 0.2037 | 0.3930 |
68
  | 0.5382 | 0.67 | 600 | -1.0547 | -1.2034 | -793.2486 | -650.0926 | 0.6199 | 0.6709 | 0.5721 | 0.2121 | 0.3600 |
69
  | 0.5952 | 0.78 | 700 | -1.0324 | -1.1827 | -794.9604 | -652.0420 | 0.6186 | 0.6784 | 0.5550 | 0.2145 | 0.3405 |
70
+ | 0.5792 | 0.89 | 800 | -1.0308 | -1.1812 | -795.125 | -652.2705 | 0.6182 | 0.6784 | 0.5534 | 0.2151 | 0.3382 |
71
 
72
 
73
  ### Framework versions
adapter_config.json CHANGED
@@ -20,12 +20,12 @@
20
  "revision": null,
21
  "target_modules": [
22
  "o_proj",
23
- "k_proj",
24
- "q_proj",
25
  "up_proj",
 
 
26
  "v_proj",
27
  "gate_proj",
28
- "down_proj"
29
  ],
30
  "task_type": "CAUSAL_LM",
31
  "use_dora": false,
 
20
  "revision": null,
21
  "target_modules": [
22
  "o_proj",
 
 
23
  "up_proj",
24
+ "down_proj",
25
+ "k_proj",
26
  "v_proj",
27
  "gate_proj",
28
+ "q_proj"
29
  ],
30
  "task_type": "CAUSAL_LM",
31
  "use_dora": false,
all_results.json CHANGED
@@ -9,13 +9,13 @@
9
  "eval_rewards/chosen": 0.5534913539886475,
10
  "eval_rewards/margins": 0.214975506067276,
11
  "eval_rewards/rejected": 0.33851587772369385,
12
- "eval_runtime": 182.1641,
13
  "eval_samples": 398,
14
- "eval_samples_per_second": 2.185,
15
- "eval_steps_per_second": 2.185,
16
- "train_loss": 0.12323601163483516,
17
- "train_runtime": 1012.2753,
18
  "train_samples": 3588,
19
- "train_samples_per_second": 3.544,
20
- "train_steps_per_second": 0.886
21
  }
 
9
  "eval_rewards/chosen": 0.5534913539886475,
10
  "eval_rewards/margins": 0.214975506067276,
11
  "eval_rewards/rejected": 0.33851587772369385,
12
+ "eval_runtime": 181.731,
13
  "eval_samples": 398,
14
+ "eval_samples_per_second": 2.19,
15
+ "eval_steps_per_second": 2.19,
16
+ "train_loss": 0.06080360662445443,
17
+ "train_runtime": 395.7009,
18
  "train_samples": 3588,
19
+ "train_samples_per_second": 9.067,
20
+ "train_steps_per_second": 2.267
21
  }
eval_results.json CHANGED
@@ -9,8 +9,8 @@
9
  "eval_rewards/chosen": 0.5534913539886475,
10
  "eval_rewards/margins": 0.214975506067276,
11
  "eval_rewards/rejected": 0.33851587772369385,
12
- "eval_runtime": 182.1641,
13
  "eval_samples": 398,
14
- "eval_samples_per_second": 2.185,
15
- "eval_steps_per_second": 2.185
16
  }
 
9
  "eval_rewards/chosen": 0.5534913539886475,
10
  "eval_rewards/margins": 0.214975506067276,
11
  "eval_rewards/rejected": 0.33851587772369385,
12
+ "eval_runtime": 181.731,
13
  "eval_samples": 398,
14
+ "eval_samples_per_second": 2.19,
15
+ "eval_steps_per_second": 2.19
16
  }
runs/Mar11_06-57-47_b89f062cf3e1/events.out.tfevents.1710140413.b89f062cf3e1.14079.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b821df81bee3de1d5b6e0311f9c2935e0068743349f806ad8211cbae72ae67ba
3
+ size 11021
runs/Mar11_06-57-47_b89f062cf3e1/events.out.tfevents.1710140992.b89f062cf3e1.14079.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcdb71d67ab8da461762bd18b6ff028cbaa01e1dccfc654c57fc047e4e0837b3
3
+ size 828
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.12323601163483516,
4
- "train_runtime": 1012.2753,
5
  "train_samples": 3588,
6
- "train_samples_per_second": 3.544,
7
- "train_steps_per_second": 0.886
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.06080360662445443,
4
+ "train_runtime": 395.7009,
5
  "train_samples": 3588,
6
+ "train_samples_per_second": 9.067,
7
+ "train_steps_per_second": 2.267
8
  }
trainer_state.json CHANGED
@@ -1400,10 +1400,10 @@
1400
  "epoch": 1.0,
1401
  "step": 897,
1402
  "total_flos": 0.0,
1403
- "train_loss": 0.12323601163483516,
1404
- "train_runtime": 1012.2753,
1405
- "train_samples_per_second": 3.544,
1406
- "train_steps_per_second": 0.886
1407
  }
1408
  ],
1409
  "logging_steps": 10,
 
1400
  "epoch": 1.0,
1401
  "step": 897,
1402
  "total_flos": 0.0,
1403
+ "train_loss": 0.06080360662445443,
1404
+ "train_runtime": 395.7009,
1405
+ "train_samples_per_second": 9.067,
1406
+ "train_steps_per_second": 2.267
1407
  }
1408
  ],
1409
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cce9a3a99c6711ebb8b18c4a7e23fb8e32e4da26e087963f99148e32f7380a99
3
  size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb9bb8efdb9c6841452004c47a87a4862f6eeb584a1259066be691727e36b983
3
  size 4856