Minbyul commited on
Commit
ad73942
1 Parent(s): 9ff5e19

Model save

Browse files
README.md CHANGED
@@ -2,15 +2,10 @@
2
  license: llama2
3
  base_model: Minbyul/meditron-7b-wo-medication_qa-sft
4
  tags:
5
- - alignment-handbook
6
- - trl
7
- - dpo
8
- - generated_from_trainer
9
  - trl
10
  - dpo
 
11
  - generated_from_trainer
12
- datasets:
13
- - HuggingFaceH4/ultrafeedback_binarized
14
  model-index:
15
  - name: meditron-7b-dpo-full-sft-wo-medication_qa
16
  results: []
@@ -21,17 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
21
 
22
  # meditron-7b-dpo-full-sft-wo-medication_qa
23
 
24
- This model is a fine-tuned version of [Minbyul/meditron-7b-wo-medication_qa-sft](https://huggingface.co/Minbyul/meditron-7b-wo-medication_qa-sft) on the HuggingFaceH4/ultrafeedback_binarized dataset.
25
- It achieves the following results on the evaluation set:
26
- - Loss: 0.6342
27
- - Rewards/chosen: -0.0725
28
- - Rewards/rejected: -0.3037
29
- - Rewards/accuracies: 0.7812
30
- - Rewards/margins: 0.2312
31
- - Logps/rejected: -613.1050
32
- - Logps/chosen: -394.9411
33
- - Logits/rejected: -1.0634
34
- - Logits/chosen: -1.1747
35
 
36
  ## Model description
37
 
 
2
  license: llama2
3
  base_model: Minbyul/meditron-7b-wo-medication_qa-sft
4
  tags:
 
 
 
 
5
  - trl
6
  - dpo
7
+ - alignment-handbook
8
  - generated_from_trainer
 
 
9
  model-index:
10
  - name: meditron-7b-dpo-full-sft-wo-medication_qa
11
  results: []
 
16
 
17
  # meditron-7b-dpo-full-sft-wo-medication_qa
18
 
19
+ This model is a fine-tuned version of [Minbyul/meditron-7b-wo-medication_qa-sft](https://huggingface.co/Minbyul/meditron-7b-wo-medication_qa-sft) on an unknown dataset.
 
 
 
 
 
 
 
 
 
 
20
 
21
  ## Model description
22
 
all_results.json CHANGED
@@ -13,9 +13,9 @@
13
  "eval_samples": 240,
14
  "eval_samples_per_second": 7.857,
15
  "eval_steps_per_second": 0.262,
16
- "train_loss": 0.5082056158680027,
17
- "train_runtime": 904.9767,
18
  "train_samples": 3790,
19
- "train_samples_per_second": 4.188,
20
- "train_steps_per_second": 0.065
21
  }
 
13
  "eval_samples": 240,
14
  "eval_samples_per_second": 7.857,
15
  "eval_steps_per_second": 0.262,
16
+ "train_loss": 0.5081947011462713,
17
+ "train_runtime": 896.7003,
18
  "train_samples": 3790,
19
+ "train_samples_per_second": 4.227,
20
+ "train_steps_per_second": 0.066
21
  }
config.json CHANGED
@@ -23,6 +23,6 @@
23
  "tie_word_embeddings": false,
24
  "torch_dtype": "bfloat16",
25
  "transformers_version": "4.39.0.dev0",
26
- "use_cache": true,
27
  "vocab_size": 32017
28
  }
 
23
  "tie_word_embeddings": false,
24
  "torch_dtype": "bfloat16",
25
  "transformers_version": "4.39.0.dev0",
26
+ "use_cache": false,
27
  "vocab_size": 32017
28
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a219f5b9727cc10c912ebb17f51e961332273fb6d58f0bec7971ffc3a20372a7
3
  size 4939124616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62ddb60864ab3df63427dd2ef97cefbe278cddbd8ef4b58ab3373cbe51997f4b
3
  size 4939124616
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:846a04c4147ee488539d490649c404aa373290076c967d8a371dca5c77e71c5e
3
  size 4947390880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:451f3f655b623958dd20a8428b6ec7f29f95ff71e5ab20b43810ce0c55a5e275
3
  size 4947390880
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5aa02fc6a945bd66fa64f158ec7da4a6ba11ba3803cb1e6f10710a05d0d93206
3
  size 3590628080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b1bbd45a4105bf147ff5fb16fd4d7a579b997486765402e072d695babddf2ce
3
  size 3590628080
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.99,
3
- "train_loss": 0.5082056158680027,
4
- "train_runtime": 904.9767,
5
  "train_samples": 3790,
6
- "train_samples_per_second": 4.188,
7
- "train_steps_per_second": 0.065
8
  }
 
1
  {
2
  "epoch": 0.99,
3
+ "train_loss": 0.5081947011462713,
4
+ "train_runtime": 896.7003,
5
  "train_samples": 3790,
6
+ "train_samples_per_second": 4.227,
7
+ "train_steps_per_second": 0.066
8
  }
trainer_state.json CHANGED
@@ -10,7 +10,7 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.02,
13
- "grad_norm": 6.891863950600918,
14
  "learning_rate": 8.333333333333333e-08,
15
  "logits/chosen": -1.1214768886566162,
16
  "logits/rejected": -1.0666239261627197,
@@ -25,87 +25,87 @@
25
  },
26
  {
27
  "epoch": 0.17,
28
- "grad_norm": 6.6410695205888075,
29
  "learning_rate": 4.930057285201027e-07,
30
- "logits/chosen": -1.2806458473205566,
31
- "logits/rejected": -0.9247087836265564,
32
- "logps/chosen": -503.0013122558594,
33
- "logps/rejected": -899.8956298828125,
34
- "loss": 0.6907,
35
- "rewards/accuracies": 0.5069444179534912,
36
- "rewards/chosen": -0.0002252649428555742,
37
- "rewards/margins": 0.004411212634295225,
38
- "rewards/rejected": -0.004636477679014206,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.34,
43
- "grad_norm": 7.3762614729181015,
44
  "learning_rate": 4.187457503795526e-07,
45
- "logits/chosen": -1.2530821561813354,
46
- "logits/rejected": -0.9845311045646667,
47
- "logps/chosen": -543.0537109375,
48
- "logps/rejected": -933.8472900390625,
49
  "loss": 0.6527,
50
- "rewards/accuracies": 0.8687499761581421,
51
- "rewards/chosen": -0.019722815603017807,
52
- "rewards/margins": 0.08204329013824463,
53
- "rewards/rejected": -0.10176609456539154,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.5,
58
- "grad_norm": 8.929333715687283,
59
  "learning_rate": 2.8691164100062034e-07,
60
- "logits/chosen": -1.1901506185531616,
61
- "logits/rejected": -0.9701334238052368,
62
- "logps/chosen": -547.3389892578125,
63
- "logps/rejected": -972.1591796875,
64
- "loss": 0.5279,
65
- "rewards/accuracies": 0.8812500238418579,
66
- "rewards/chosen": -0.08818452060222626,
67
- "rewards/margins": 0.44495606422424316,
68
- "rewards/rejected": -0.5331405997276306,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.67,
73
- "grad_norm": 7.370675587057317,
74
  "learning_rate": 1.4248369943086995e-07,
75
- "logits/chosen": -1.121147632598877,
76
- "logits/rejected": -0.9864951372146606,
77
- "logps/chosen": -544.5201416015625,
78
- "logps/rejected": -1050.002197265625,
79
  "loss": 0.4145,
80
- "rewards/accuracies": 0.90625,
81
- "rewards/chosen": -0.19094732403755188,
82
- "rewards/margins": 0.9460043907165527,
83
- "rewards/rejected": -1.1369515657424927,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.84,
88
- "grad_norm": 6.641314363386866,
89
  "learning_rate": 3.473909705816111e-08,
90
- "logits/chosen": -1.0754799842834473,
91
- "logits/rejected": -1.0215175151824951,
92
- "logps/chosen": -568.5784912109375,
93
- "logps/rejected": -1094.7982177734375,
94
  "loss": 0.3982,
95
- "rewards/accuracies": 0.875,
96
- "rewards/chosen": -0.3303142786026001,
97
- "rewards/margins": 1.3637864589691162,
98
- "rewards/rejected": -1.6941007375717163,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 0.99,
103
  "step": 59,
104
  "total_flos": 0.0,
105
- "train_loss": 0.5082056158680027,
106
- "train_runtime": 904.9767,
107
- "train_samples_per_second": 4.188,
108
- "train_steps_per_second": 0.065
109
  }
110
  ],
111
  "logging_steps": 10,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.02,
13
+ "grad_norm": 6.892402047143212,
14
  "learning_rate": 8.333333333333333e-08,
15
  "logits/chosen": -1.1214768886566162,
16
  "logits/rejected": -1.0666239261627197,
 
25
  },
26
  {
27
  "epoch": 0.17,
28
+ "grad_norm": 6.623544286492592,
29
  "learning_rate": 4.930057285201027e-07,
30
+ "logits/chosen": -1.280522346496582,
31
+ "logits/rejected": -0.9248583912849426,
32
+ "logps/chosen": -503.0466613769531,
33
+ "logps/rejected": -899.9366455078125,
34
+ "loss": 0.6904,
35
+ "rewards/accuracies": 0.5347222089767456,
36
+ "rewards/chosen": -0.0006784469587728381,
37
+ "rewards/margins": 0.004367371555417776,
38
+ "rewards/rejected": -0.005045818164944649,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.34,
43
+ "grad_norm": 7.366007707770025,
44
  "learning_rate": 4.187457503795526e-07,
45
+ "logits/chosen": -1.253061294555664,
46
+ "logits/rejected": -0.9847872853279114,
47
+ "logps/chosen": -543.08447265625,
48
+ "logps/rejected": -933.7767333984375,
49
  "loss": 0.6527,
50
+ "rewards/accuracies": 0.856249988079071,
51
+ "rewards/chosen": -0.020030761137604713,
52
+ "rewards/margins": 0.08102954924106598,
53
+ "rewards/rejected": -0.10106030851602554,
54
  "step": 20
55
  },
56
  {
57
  "epoch": 0.5,
58
+ "grad_norm": 8.97929873252708,
59
  "learning_rate": 2.8691164100062034e-07,
60
+ "logits/chosen": -1.188706398010254,
61
+ "logits/rejected": -0.9695678949356079,
62
+ "logps/chosen": -547.2942504882812,
63
+ "logps/rejected": -972.0201416015625,
64
+ "loss": 0.5278,
65
+ "rewards/accuracies": 0.875,
66
+ "rewards/chosen": -0.08773749321699142,
67
+ "rewards/margins": 0.4440121054649353,
68
+ "rewards/rejected": -0.5317496061325073,
69
  "step": 30
70
  },
71
  {
72
  "epoch": 0.67,
73
+ "grad_norm": 7.373164759260948,
74
  "learning_rate": 1.4248369943086995e-07,
75
+ "logits/chosen": -1.121267557144165,
76
+ "logits/rejected": -0.9862432479858398,
77
+ "logps/chosen": -544.4817504882812,
78
+ "logps/rejected": -1049.937744140625,
79
  "loss": 0.4145,
80
+ "rewards/accuracies": 0.8999999761581421,
81
+ "rewards/chosen": -0.19056306779384613,
82
+ "rewards/margins": 0.9457426071166992,
83
+ "rewards/rejected": -1.136305570602417,
84
  "step": 40
85
  },
86
  {
87
  "epoch": 0.84,
88
+ "grad_norm": 6.631179730087488,
89
  "learning_rate": 3.473909705816111e-08,
90
+ "logits/chosen": -1.0745445489883423,
91
+ "logits/rejected": -1.0211777687072754,
92
+ "logps/chosen": -568.6431884765625,
93
+ "logps/rejected": -1094.7789306640625,
94
  "loss": 0.3982,
95
+ "rewards/accuracies": 0.8687499761581421,
96
+ "rewards/chosen": -0.33096131682395935,
97
+ "rewards/margins": 1.3629460334777832,
98
+ "rewards/rejected": -1.6939074993133545,
99
  "step": 50
100
  },
101
  {
102
  "epoch": 0.99,
103
  "step": 59,
104
  "total_flos": 0.0,
105
+ "train_loss": 0.5081947011462713,
106
+ "train_runtime": 896.7003,
107
+ "train_samples_per_second": 4.227,
108
+ "train_steps_per_second": 0.066
109
  }
110
  ],
111
  "logging_steps": 10,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20c2e7d7bca5ee6f2a1963f9a3c804561f4d02a9ab170c8917e179691a8b2c30
3
  size 6264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e8ba9506cb4ca202b7c2f4ebbae99b55462a4f2328c8eec10bf0c8002349ade
3
  size 6264