imelnyk commited on
Commit
0375483
1 Parent(s): 8bda31c

Model save

Browse files
README.md CHANGED
@@ -2,15 +2,9 @@
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
  - trl
7
  - dpo
8
  - generated_from_trainer
9
- - trl
10
- - dpo
11
- - generated_from_trainer
12
- datasets:
13
- - HuggingFaceH4/ultrafeedback_binarized
14
  base_model: mistralai/Mistral-7B-v0.1
15
  model-index:
16
  - name: zephyr-7b-dpo-qlora-fsdp
@@ -22,17 +16,17 @@ should probably proofread and complete it, then remove this comment. -->
22
 
23
  # zephyr-7b-dpo-qlora-fsdp
24
 
25
- This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-qlora](https://huggingface.co/alignment-handbook/zephyr-7b-sft-qlora) on the HuggingFaceH4/ultrafeedback_binarized dataset.
26
  It achieves the following results on the evaluation set:
27
- - Loss: 0.6865
28
- - Rewards/chosen: 0.0331
29
- - Rewards/rejected: 0.0188
30
- - Rewards/accuracies: 0.5935
31
- - Rewards/margins: 0.0143
32
- - Logps/rejected: -257.1393
33
- - Logps/chosen: -276.4896
34
- - Logits/rejected: -2.3640
35
- - Logits/chosen: -2.4104
36
 
37
  ## Model description
38
 
@@ -52,16 +46,18 @@ More information needed
52
 
53
  The following hyperparameters were used during training:
54
  - learning_rate: 5e-06
55
- - train_batch_size: 5
56
  - eval_batch_size: 8
57
  - seed: 42
58
  - distributed_type: multi-GPU
 
59
  - gradient_accumulation_steps: 4
60
- - total_train_batch_size: 20
 
61
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
62
  - lr_scheduler_type: cosine
63
  - lr_scheduler_warmup_ratio: 0.1
64
- - num_epochs: 0.01
65
 
66
  ### Training results
67
 
 
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
 
5
  - trl
6
  - dpo
7
  - generated_from_trainer
 
 
 
 
 
8
  base_model: mistralai/Mistral-7B-v0.1
9
  model-index:
10
  - name: zephyr-7b-dpo-qlora-fsdp
 
16
 
17
  # zephyr-7b-dpo-qlora-fsdp
18
 
19
+ This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.6843
22
+ - Rewards/chosen: 0.0234
23
+ - Rewards/rejected: 0.0034
24
+ - Rewards/accuracies: 0.6211
25
+ - Rewards/margins: 0.0199
26
+ - Logps/rejected: -260.8430
27
+ - Logps/chosen: -258.9067
28
+ - Logits/rejected: -2.4164
29
+ - Logits/chosen: -2.4494
30
 
31
  ## Model description
32
 
 
46
 
47
  The following hyperparameters were used during training:
48
  - learning_rate: 5e-06
49
+ - train_batch_size: 15
50
  - eval_batch_size: 8
51
  - seed: 42
52
  - distributed_type: multi-GPU
53
+ - num_devices: 8
54
  - gradient_accumulation_steps: 4
55
+ - total_train_batch_size: 480
56
+ - total_eval_batch_size: 64
57
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
58
  - lr_scheduler_type: cosine
59
  - lr_scheduler_warmup_ratio: 0.1
60
+ - num_epochs: 0.1
61
 
62
  ### Training results
63
 
adapter_config.json CHANGED
@@ -19,13 +19,13 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "gate_proj",
23
- "down_proj",
24
- "v_proj",
25
  "o_proj",
 
 
26
  "up_proj",
27
- "q_proj",
28
- "k_proj"
29
  ],
30
  "task_type": "CAUSAL_LM",
31
  "use_dora": false,
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
+ "q_proj",
 
 
23
  "o_proj",
24
+ "down_proj",
25
+ "k_proj",
26
  "up_proj",
27
+ "gate_proj",
28
+ "v_proj"
29
  ],
30
  "task_type": "CAUSAL_LM",
31
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f7925055af868d3b38756325d43d85883ad8c14753b88f93c708d2886161f80
3
  size 671150064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a2d5fa611aeb51254da3b6144f743c1192dce3017f741b26f4cdf367d0100bb
3
  size 671150064
all_results.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
- "epoch": 0.01,
3
- "eval_logits/chosen": -2.410409688949585,
4
- "eval_logits/rejected": -2.363970994949341,
5
- "eval_logps/chosen": -276.4896240234375,
6
- "eval_logps/rejected": -257.1393127441406,
7
- "eval_loss": 0.6865259408950806,
8
- "eval_rewards/accuracies": 0.593500018119812,
9
- "eval_rewards/chosen": 0.03312591835856438,
10
- "eval_rewards/margins": 0.014345898292958736,
11
- "eval_rewards/rejected": 0.018780020996928215,
12
- "eval_runtime": 615.2299,
13
  "eval_samples": 2000,
14
- "eval_samples_per_second": 3.251,
15
- "eval_steps_per_second": 0.406,
16
- "train_loss": 0.6888245363389293,
17
- "train_runtime": 439.9957,
18
  "train_samples": 61135,
19
- "train_samples_per_second": 1.389,
20
- "train_steps_per_second": 0.07
21
  }
 
1
  {
2
+ "epoch": 0.1,
3
+ "eval_logits/chosen": -2.449439287185669,
4
+ "eval_logits/rejected": -2.416422128677368,
5
+ "eval_logps/chosen": -258.90673828125,
6
+ "eval_logps/rejected": -260.843017578125,
7
+ "eval_loss": 0.6843283772468567,
8
+ "eval_rewards/accuracies": 0.62109375,
9
+ "eval_rewards/chosen": 0.023392992094159126,
10
+ "eval_rewards/margins": 0.019943276420235634,
11
+ "eval_rewards/rejected": 0.003449714742600918,
12
+ "eval_runtime": 144.9425,
13
  "eval_samples": 2000,
14
+ "eval_samples_per_second": 13.799,
15
+ "eval_steps_per_second": 0.221,
16
+ "train_loss": 0.6879739944751446,
17
+ "train_runtime": 738.7034,
18
  "train_samples": 61135,
19
+ "train_samples_per_second": 8.276,
20
+ "train_steps_per_second": 0.018
21
  }
eval_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 0.01,
3
- "eval_logits/chosen": -2.410409688949585,
4
- "eval_logits/rejected": -2.363970994949341,
5
- "eval_logps/chosen": -276.4896240234375,
6
- "eval_logps/rejected": -257.1393127441406,
7
- "eval_loss": 0.6865259408950806,
8
- "eval_rewards/accuracies": 0.593500018119812,
9
- "eval_rewards/chosen": 0.03312591835856438,
10
- "eval_rewards/margins": 0.014345898292958736,
11
- "eval_rewards/rejected": 0.018780020996928215,
12
- "eval_runtime": 615.2299,
13
  "eval_samples": 2000,
14
- "eval_samples_per_second": 3.251,
15
- "eval_steps_per_second": 0.406
16
  }
 
1
  {
2
+ "epoch": 0.1,
3
+ "eval_logits/chosen": -2.449439287185669,
4
+ "eval_logits/rejected": -2.416422128677368,
5
+ "eval_logps/chosen": -258.90673828125,
6
+ "eval_logps/rejected": -260.843017578125,
7
+ "eval_loss": 0.6843283772468567,
8
+ "eval_rewards/accuracies": 0.62109375,
9
+ "eval_rewards/chosen": 0.023392992094159126,
10
+ "eval_rewards/margins": 0.019943276420235634,
11
+ "eval_rewards/rejected": 0.003449714742600918,
12
+ "eval_runtime": 144.9425,
13
  "eval_samples": 2000,
14
+ "eval_samples_per_second": 13.799,
15
+ "eval_steps_per_second": 0.221
16
  }
runs/Mar07_22-50-40_cccxc544/events.out.tfevents.1709869943.cccxc544.580669.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7082c4d428bfe48e310fa7b1025dac7ff000eca580c8abea515e181c224d9714
3
+ size 12962
runs/Mar08_14-00-52_cccxc544/events.out.tfevents.1709924482.cccxc544.734409.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80bd2cf062bd929e2029872bce1ea0f91112afeda4c59bc369fb0cc6c827ffbd
3
+ size 5478
runs/Mar08_14-12-12_cccxc542/events.out.tfevents.1709925175.cccxc542.465628.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08a6dc5383bb59b370a9b301fd1e8ea0188524008ea8a3bbfc19f8954fa27c75
3
+ size 4801
runs/Mar08_14-17-17_cccxc544/events.out.tfevents.1709925468.cccxc544.737210.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51914d18d6a3ad3bebd45ef404fb2c51207b1cf788205fad758c13208e5900a1
3
+ size 6501
runs/Mar08_14-17-17_cccxc544/events.out.tfevents.1709926351.cccxc544.737210.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32b05b4eeb189df79c4cf1cd4709802bd6f13350efde1948d3175e44e2118ef1
3
+ size 815
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 0.01,
3
- "train_loss": 0.6888245363389293,
4
- "train_runtime": 439.9957,
5
  "train_samples": 61135,
6
- "train_samples_per_second": 1.389,
7
- "train_steps_per_second": 0.07
8
  }
 
1
  {
2
+ "epoch": 0.1,
3
+ "train_loss": 0.6879739944751446,
4
+ "train_runtime": 738.7034,
5
  "train_samples": 61135,
6
+ "train_samples_per_second": 8.276,
7
+ "train_steps_per_second": 0.018
8
  }
trainer_state.json CHANGED
@@ -1,21 +1,21 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.010141490144761593,
5
  "eval_steps": 100,
6
- "global_step": 31,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.0,
13
- "grad_norm": 2.421875,
14
- "learning_rate": 1.25e-06,
15
- "logits/chosen": -2.3689165115356445,
16
- "logits/rejected": -2.3419089317321777,
17
- "logps/chosen": -304.96429443359375,
18
- "logps/rejected": -224.31954956054688,
19
  "loss": 0.6931,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
@@ -24,67 +24,37 @@
24
  "step": 1
25
  },
26
  {
27
- "epoch": 0.0,
28
- "grad_norm": 1.9296875,
29
- "learning_rate": 4.415111107797445e-06,
30
- "logits/chosen": -2.3774471282958984,
31
- "logits/rejected": -2.358837127685547,
32
- "logps/chosen": -267.6408386230469,
33
- "logps/rejected": -221.9726104736328,
34
- "loss": 0.6921,
35
- "rewards/accuracies": 0.5166666507720947,
36
- "rewards/chosen": 0.008927525021135807,
37
- "rewards/margins": 0.002250629710033536,
38
- "rewards/rejected": 0.006676895078271627,
39
  "step": 10
40
  },
41
  {
42
- "epoch": 0.01,
43
- "grad_norm": 2.203125,
44
- "learning_rate": 1.7829919182222752e-06,
45
- "logits/chosen": -2.4560706615448,
46
- "logits/rejected": -2.402303695678711,
47
- "logps/chosen": -265.12762451171875,
48
- "logps/rejected": -272.61566162109375,
49
- "loss": 0.6889,
50
- "rewards/accuracies": 0.5900000333786011,
51
- "rewards/chosen": 0.028245043009519577,
52
- "rewards/margins": 0.009032377041876316,
53
- "rewards/rejected": 0.019212666898965836,
54
- "step": 20
55
- },
56
- {
57
- "epoch": 0.01,
58
- "grad_norm": 2.375,
59
- "learning_rate": 1.6904105645142443e-08,
60
- "logits/chosen": -2.3814165592193604,
61
- "logits/rejected": -2.3470723628997803,
62
- "logps/chosen": -304.08697509765625,
63
- "logps/rejected": -281.0203552246094,
64
- "loss": 0.6844,
65
- "rewards/accuracies": 0.6299999952316284,
66
- "rewards/chosen": 0.03472686558961868,
67
- "rewards/margins": 0.019056813791394234,
68
- "rewards/rejected": 0.015670055523514748,
69
- "step": 30
70
- },
71
- {
72
- "epoch": 0.01,
73
- "step": 31,
74
  "total_flos": 0.0,
75
- "train_loss": 0.6888245363389293,
76
- "train_runtime": 439.9957,
77
- "train_samples_per_second": 1.389,
78
- "train_steps_per_second": 0.07
79
  }
80
  ],
81
  "logging_steps": 10,
82
- "max_steps": 31,
83
  "num_input_tokens_seen": 0,
84
  "num_train_epochs": 1,
85
  "save_steps": 100,
86
  "total_flos": 0.0,
87
- "train_batch_size": 5,
88
  "trial_name": null,
89
  "trial_params": null
90
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.10196078431372549,
5
  "eval_steps": 100,
6
+ "global_step": 13,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.01,
13
+ "grad_norm": 0.5078125,
14
+ "learning_rate": 2.5e-06,
15
+ "logits/chosen": -2.3996620178222656,
16
+ "logits/rejected": -2.364995002746582,
17
+ "logps/chosen": -288.2684326171875,
18
+ "logps/rejected": -260.847412109375,
19
  "loss": 0.6931,
20
  "rewards/accuracies": 0.0,
21
  "rewards/chosen": 0.0,
 
24
  "step": 1
25
  },
26
  {
27
+ "epoch": 0.08,
28
+ "grad_norm": 0.48046875,
29
+ "learning_rate": 8.628481651367876e-07,
30
+ "logits/chosen": -2.3491926193237305,
31
+ "logits/rejected": -2.307647705078125,
32
+ "logps/chosen": -264.3670349121094,
33
+ "logps/rejected": -252.78759765625,
34
+ "loss": 0.6887,
35
+ "rewards/accuracies": 0.5314815640449524,
36
+ "rewards/chosen": 0.012712381780147552,
37
+ "rewards/margins": 0.01025653351098299,
38
+ "rewards/rejected": 0.0024558501318097115,
39
  "step": 10
40
  },
41
  {
42
+ "epoch": 0.1,
43
+ "step": 13,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  "total_flos": 0.0,
45
+ "train_loss": 0.6879739944751446,
46
+ "train_runtime": 738.7034,
47
+ "train_samples_per_second": 8.276,
48
+ "train_steps_per_second": 0.018
49
  }
50
  ],
51
  "logging_steps": 10,
52
+ "max_steps": 13,
53
  "num_input_tokens_seen": 0,
54
  "num_train_epochs": 1,
55
  "save_steps": 100,
56
  "total_flos": 0.0,
57
+ "train_batch_size": 15,
58
  "trial_name": null,
59
  "trial_params": null
60
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f21d618e5d6b3867a78c9fe4f30b09ded8634419a2c3a54b84735333c2487436
3
  size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b632066f85ea8d2404c2331fdbd91457f0cc5397598475923b889072fa42749
3
  size 5112