BraylonDash commited on
Commit
23d0840
1 Parent(s): 1e556a1

Model save

Browse files
README.md CHANGED
@@ -2,14 +2,10 @@
2
  license: mit
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
- - generated_from_trainer
7
  - trl
8
  - dpo
9
  - generated_from_trainer
10
  base_model: microsoft/phi-2
11
- datasets:
12
- - HuggingFaceH4/ultrafeedback_binarized
13
  model-index:
14
  - name: phi-2-gpo-test-iter-0
15
  results: []
@@ -20,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
20
 
21
  # phi-2-gpo-test-iter-0
22
 
23
- This model is a fine-tuned version of [lole25/phi-2-sft-ultrachat-lora](https://huggingface.co/lole25/phi-2-sft-ultrachat-lora) on the HuggingFaceH4/ultrafeedback_binarized dataset.
24
 
25
  ## Model description
26
 
 
2
  license: mit
3
  library_name: peft
4
  tags:
 
 
5
  - trl
6
  - dpo
7
  - generated_from_trainer
8
  base_model: microsoft/phi-2
 
 
9
  model-index:
10
  - name: phi-2-gpo-test-iter-0
11
  results: []
 
16
 
17
  # phi-2-gpo-test-iter-0
18
 
19
+ This model is a fine-tuned version of [microsoft/phi-2](https://huggingface.co/microsoft/phi-2) on the None dataset.
20
 
21
  ## Model description
22
 
adapter_config.json CHANGED
@@ -19,9 +19,9 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "v_proj",
23
- "q_proj",
24
  "k_proj",
 
 
25
  "dense"
26
  ],
27
  "task_type": "CAUSAL_LM"
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
 
 
22
  "k_proj",
23
+ "q_proj",
24
+ "v_proj",
25
  "dense"
26
  ],
27
  "task_type": "CAUSAL_LM"
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5aefc8771d1c715d0dd7ba3921091cb643f9b864456e67d5d9a7eccc5b4a3338
3
  size 41977616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c99237b61ddbcc12b02ca16d9318ff7a9125e77add338ae7faccac130d33f3ad
3
  size 41977616
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.0,
3
- "train_loss": 0.0005297146562952548,
4
- "train_runtime": 27.8881,
5
  "train_samples": 30567,
6
- "train_samples_per_second": 2.223,
7
- "train_steps_per_second": 0.143
8
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "train_loss": 0.00012263256940059364,
4
+ "train_runtime": 3.6964,
5
  "train_samples": 30567,
6
+ "train_samples_per_second": 2.164,
7
+ "train_steps_per_second": 0.541
8
  }
runs/Mar20_14-05-40_Braylon/events.out.tfevents.1710904006.Braylon.7128.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea01b700538735b2e7533c0dcd9eafc8c9865f23aa1ae3f0e99c8569a40b125f
3
+ size 5787
runs/Mar20_14-08-12_Braylon/events.out.tfevents.1710904158.Braylon.7774.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1022f0befdf79d39c4626475e7c066359d04bb4a1b2baa190cb79eb343c63668
3
+ size 5787
runs/Mar20_14-10-43_Braylon/events.out.tfevents.1710904309.Braylon.8372.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a07d931ca10021e49380ebdd93e7da2cfcf1f3746a62fcec27ab6695f8e11711
3
+ size 5787
runs/Mar20_14-14-21_Braylon/events.out.tfevents.1710904523.Braylon.9369.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:680cb8093a177f757eb1ba57eb5bafed5413022d6cf65ed5f94cee1552b98f27
3
+ size 5781
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.0,
3
- "train_loss": 0.0005297146562952548,
4
- "train_runtime": 27.8881,
5
  "train_samples": 30567,
6
- "train_samples_per_second": 2.223,
7
- "train_steps_per_second": 0.143
8
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "train_loss": 0.00012263256940059364,
4
+ "train_runtime": 3.6964,
5
  "train_samples": 30567,
6
+ "train_samples_per_second": 2.164,
7
+ "train_steps_per_second": 0.541
8
  }
trainer_state.json CHANGED
@@ -3,19 +3,19 @@
3
  "best_model_checkpoint": null,
4
  "epoch": 2.0,
5
  "eval_steps": 100,
6
- "global_step": 4,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.5,
13
  "learning_rate": 5e-06,
14
- "logits/chosen": 0.8444626331329346,
15
- "logits/rejected": 0.9047268629074097,
16
- "logps/chosen": -269.189208984375,
17
- "logps/rejected": -232.40396118164062,
18
- "loss": 0.0005,
19
  "rewards/accuracies": 0.0,
20
  "rewards/chosen": 0.0,
21
  "rewards/margins": 0.0,
@@ -24,16 +24,16 @@
24
  },
25
  {
26
  "epoch": 2.0,
27
- "step": 4,
28
  "total_flos": 0.0,
29
- "train_loss": 0.0005297146562952548,
30
- "train_runtime": 27.8881,
31
- "train_samples_per_second": 2.223,
32
- "train_steps_per_second": 0.143
33
  }
34
  ],
35
  "logging_steps": 10,
36
- "max_steps": 4,
37
  "num_input_tokens_seen": 0,
38
  "num_train_epochs": 2,
39
  "save_steps": 100,
 
3
  "best_model_checkpoint": null,
4
  "epoch": 2.0,
5
  "eval_steps": 100,
6
+ "global_step": 2,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 1.0,
13
  "learning_rate": 5e-06,
14
+ "logits/chosen": 0.7793408036231995,
15
+ "logits/rejected": 0.853427529335022,
16
+ "logps/chosen": -255.33981323242188,
17
+ "logps/rejected": -306.3643798828125,
18
+ "loss": 0.0001,
19
  "rewards/accuracies": 0.0,
20
  "rewards/chosen": 0.0,
21
  "rewards/margins": 0.0,
 
24
  },
25
  {
26
  "epoch": 2.0,
27
+ "step": 2,
28
  "total_flos": 0.0,
29
+ "train_loss": 0.00012263256940059364,
30
+ "train_runtime": 3.6964,
31
+ "train_samples_per_second": 2.164,
32
+ "train_steps_per_second": 0.541
33
  }
34
  ],
35
  "logging_steps": 10,
36
+ "max_steps": 2,
37
  "num_input_tokens_seen": 0,
38
  "num_train_epochs": 2,
39
  "save_steps": 100,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8d486432f7edca2ff053d45db771fb36af9ff4664f14de6c9bdd48da8667112
3
  size 5816
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d08e7fc0828d4977b610287315f5212d8fe2e8b722629dba12249180a3d141d
3
  size 5816