Jerry46 commited on
Commit
2a34ae7
·
1 Parent(s): 1ada802

Training in progress, epoch 1

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  license: apache-2.0
3
- base_model: mistralai/Mistral-7B-v0.1
4
  tags:
5
  - generated_from_trainer
6
  model-index:
@@ -13,17 +13,17 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # zephyr-7b-dpo-lora
15
 
16
- This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.6642
19
- - Rewards/chosen: 0.1042
20
- - Rewards/rejected: 0.0401
21
- - Rewards/accuracies: 0.6480
22
- - Rewards/margins: 0.0641
23
- - Logps/rejected: -230.4560
24
- - Logps/chosen: -278.6917
25
- - Logits/rejected: -2.3987
26
- - Logits/chosen: -2.4597
27
 
28
  ## Model description
29
 
@@ -54,13 +54,15 @@ The following hyperparameters were used during training:
54
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
  - lr_scheduler_type: linear
56
  - lr_scheduler_warmup_ratio: 0.1
57
- - num_epochs: 1
58
 
59
  ### Training results
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
- | 0.661 | 1.0 | 968 | 0.6642 | 0.1042 | 0.0401 | 0.6480 | 0.0641 | -230.4560 | -278.6917 | -2.3987 | -2.4597 |
 
 
64
 
65
 
66
  ### Framework versions
 
1
  ---
2
  license: apache-2.0
3
+ base_model: alignment-handbook/zephyr-7b-sft-full
4
  tags:
5
  - generated_from_trainer
6
  model-index:
 
13
 
14
  # zephyr-7b-dpo-lora
15
 
16
+ This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: -0.2038
19
+ - Rewards/chosen: -1.1628
20
+ - Rewards/rejected: -2.4457
21
+ - Rewards/accuracies: 0.6840
22
+ - Rewards/margins: 1.2829
23
+ - Logps/rejected: -252.9479
24
+ - Logps/chosen: -282.7848
25
+ - Logits/rejected: -2.9400
26
+ - Logits/chosen: -2.9655
27
 
28
  ## Model description
29
 
 
54
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
  - lr_scheduler_type: linear
56
  - lr_scheduler_warmup_ratio: 0.1
57
+ - num_epochs: 3
58
 
59
  ### Training results
60
 
61
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
+ | 0.6137 | 1.0 | 968 | 0.6277 | -0.0287 | -0.4191 | 0.7040 | 0.3905 | -232.6823 | -271.4433 | -2.9989 | -3.0154 |
64
+ | 0.0705 | 2.0 | 1937 | 0.0570 | -0.6708 | -1.6676 | 0.6960 | 0.9968 | -245.1669 | -277.8647 | -2.9609 | -2.9830 |
65
+ | -0.2602 | 3.0 | 2904 | -0.2038 | -1.1628 | -2.4457 | 0.6840 | 1.2829 | -252.9479 | -282.7848 | -2.9400 | -2.9655 |
66
 
67
 
68
  ### Framework versions
adapter_config.json CHANGED
@@ -16,10 +16,10 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "v_proj",
20
- "o_proj",
21
  "k_proj",
22
- "q_proj"
 
 
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
 
 
19
  "k_proj",
20
+ "q_proj",
21
+ "o_proj",
22
+ "v_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ad681b009188f61d825e95cda2759788ba737dd184ea82cf57b3a221d003e22
3
- size 109086672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:686cd1e69fd98cd1135ed111d3eb4ace750c72a48810bd7354522a4e8d748d3b
3
+ size 218138576
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_logits/chosen": -2.4597132205963135,
4
- "eval_logits/rejected": -2.398695468902588,
5
- "eval_logps/chosen": -278.69171142578125,
6
- "eval_logps/rejected": -230.4560089111328,
7
- "eval_loss": 0.6642152070999146,
8
- "eval_rewards/accuracies": 0.6480000019073486,
9
- "eval_rewards/chosen": 0.10415761172771454,
10
- "eval_rewards/margins": 0.06405296921730042,
11
- "eval_rewards/rejected": 0.04010463133454323,
12
- "eval_runtime": 444.8959,
13
  "eval_samples": 2000,
14
- "eval_samples_per_second": 4.495,
15
  "eval_steps_per_second": 0.281,
16
- "train_loss": 0.6728762634529555,
17
- "train_runtime": 27528.1814,
18
  "train_samples": 61966,
19
- "train_samples_per_second": 2.251,
20
- "train_steps_per_second": 0.035
21
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "eval_logits/chosen": -2.965512990951538,
4
+ "eval_logits/rejected": -2.9399757385253906,
5
+ "eval_logps/chosen": -282.7847900390625,
6
+ "eval_logps/rejected": -252.9479217529297,
7
+ "eval_loss": -0.203842431306839,
8
+ "eval_rewards/accuracies": 0.6840000152587891,
9
+ "eval_rewards/chosen": -1.1628247499465942,
10
+ "eval_rewards/margins": 1.2828813791275024,
11
+ "eval_rewards/rejected": -2.4457061290740967,
12
+ "eval_runtime": 444.1107,
13
  "eval_samples": 2000,
14
+ "eval_samples_per_second": 4.503,
15
  "eval_steps_per_second": 0.281,
16
+ "train_loss": 0.36701411283100355,
17
+ "train_runtime": 84636.1866,
18
  "train_samples": 61966,
19
+ "train_samples_per_second": 2.196,
20
+ "train_steps_per_second": 0.034
21
  }
config.json CHANGED
@@ -1,25 +1,26 @@
1
  {
2
- "_name_or_path": "mistralai/Mistral-7B-v0.1",
3
- "architectures": [
4
- "MistralForCausalLM"
5
- ],
6
- "bos_token_id": 1,
7
- "eos_token_id": 2,
8
- "hidden_act": "silu",
9
- "hidden_size": 4096,
10
- "initializer_range": 0.02,
11
- "intermediate_size": 14336,
12
- "max_position_embeddings": 32768,
13
- "model_type": "mistral",
14
- "num_attention_heads": 32,
15
- "num_hidden_layers": 32,
16
- "num_key_value_heads": 8,
17
- "rms_norm_eps": 1e-05,
18
- "rope_theta": 10000.0,
19
- "sliding_window": 4096,
20
- "tie_word_embeddings": false,
21
- "torch_dtype": "bfloat16",
22
- "transformers_version": "4.35.0",
23
- "use_cache": true,
24
- "vocab_size": 32000
25
- }
 
 
1
  {
2
+ "_name_or_path": "alignment-handbook/zephyr-7b-sft-full",
3
+ "architectures": [
4
+ "MistralForCausalLM"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "eos_token_id": 2,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 4096,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 14336,
12
+ "max_position_embeddings": 32768,
13
+ "model_type": "mistral",
14
+ "num_attention_heads": 32,
15
+ "num_hidden_layers": 32,
16
+ "num_key_value_heads": 8,
17
+ "rms_norm_eps": 1e-05,
18
+ "rope_theta": 10000.0,
19
+ "sliding_window": 4096,
20
+ "tie_word_embeddings": false,
21
+ "torch_dtype": "bfloat16",
22
+ "transformers_version": "4.35.0",
23
+ "use_cache": true,
24
+ "vocab_size": 32000
25
+ }
26
+
eval_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_logits/chosen": -2.4597132205963135,
4
- "eval_logits/rejected": -2.398695468902588,
5
- "eval_logps/chosen": -278.69171142578125,
6
- "eval_logps/rejected": -230.4560089111328,
7
- "eval_loss": 0.6642152070999146,
8
- "eval_rewards/accuracies": 0.6480000019073486,
9
- "eval_rewards/chosen": 0.10415761172771454,
10
- "eval_rewards/margins": 0.06405296921730042,
11
- "eval_rewards/rejected": 0.04010463133454323,
12
- "eval_runtime": 444.8959,
13
  "eval_samples": 2000,
14
- "eval_samples_per_second": 4.495,
15
  "eval_steps_per_second": 0.281
16
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "eval_logits/chosen": -2.965512990951538,
4
+ "eval_logits/rejected": -2.9399757385253906,
5
+ "eval_logps/chosen": -282.7847900390625,
6
+ "eval_logps/rejected": -252.9479217529297,
7
+ "eval_loss": -0.203842431306839,
8
+ "eval_rewards/accuracies": 0.6840000152587891,
9
+ "eval_rewards/chosen": -1.1628247499465942,
10
+ "eval_rewards/margins": 1.2828813791275024,
11
+ "eval_rewards/rejected": -2.4457061290740967,
12
+ "eval_runtime": 444.1107,
13
  "eval_samples": 2000,
14
+ "eval_samples_per_second": 4.503,
15
  "eval_steps_per_second": 0.281
16
  }
runs/Dec14_23-45-22_uclaml03.cs.ucla.edu/events.out.tfevents.1702626380.uclaml03.cs.ucla.edu.3135904.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82cb9fbc53b5517939daf91cfd50b212856fbbf7477d19911c51a77f03abf408
3
+ size 5011
runs/Dec18_14-20-56_uclaml03.cs.ucla.edu/events.out.tfevents.1702938137.uclaml03.cs.ucla.edu.3492719.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6af6cf64672c1afc612a69a766a9b0e1dc56476be227dec3492f74cc1f73291
3
+ size 4376
runs/Dec18_14-23-54_uclaml03.cs.ucla.edu/events.out.tfevents.1702938314.uclaml03.cs.ucla.edu.3493625.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b170fd343eef91646d511b9274e33304308e59a0a1ec19bfbd9baefad46f9159
3
+ size 66471
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.0,
3
- "train_loss": 0.6728762634529555,
4
- "train_runtime": 27528.1814,
5
  "train_samples": 61966,
6
- "train_samples_per_second": 2.251,
7
- "train_steps_per_second": 0.035
8
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.36701411283100355,
4
+ "train_runtime": 84636.1866,
5
  "train_samples": 61966,
6
+ "train_samples_per_second": 2.196,
7
+ "train_steps_per_second": 0.034
8
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cfc6fa65238373edb8c038b73d0de99649ac0d248e697a0222bd24510217b308
3
- size 4792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b08ab1b3f35b469ceb8c0e728bff6c742e3c47f593c2b3236e583f878d28b9de
3
+ size 4728