End of training

Browse files

Files changed (15) hide show

README.md +34 -34
config.json +2 -2
final_checkpoint/config.json +2 -2
final_checkpoint/generation_config.json +1 -1
final_checkpoint/model-00001-of-00004.safetensors +1 -1
final_checkpoint/model-00002-of-00004.safetensors +1 -1
final_checkpoint/model-00003-of-00004.safetensors +1 -1
final_checkpoint/model-00004-of-00004.safetensors +1 -1
generation_config.json +1 -1
model-00001-of-00004.safetensors +1 -1
model-00002-of-00004.safetensors +1 -1
model-00003-of-00004.safetensors +1 -1
model-00004-of-00004.safetensors +1 -1
tokenizer_config.json +5 -1
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,31 +1,31 @@
 ---
 license: llama3
-base_model: meta-llama/Meta-Llama-3-8B-Instruct
 tags:
 - trl
 - dpo
 - generated_from_trainer
 model-index:
-- name: MedQA_L3_1000steps_1e5rate_03beta_CSFTDPO
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-# MedQA_L3_1000steps_1e5rate_03beta_CSFTDPO
-This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 3.1515
-- Rewards/chosen: -14.8095
-- Rewards/rejected: -13.6020
-- Rewards/accuracies: 0.4176
-- Rewards/margins: -1.2075
-- Logps/rejected: -48.5205
-- Logps/chosen: -47.8417
-- Logits/rejected: -0.3920
-- Logits/chosen: -0.3920
 ## Model description
@@ -59,31 +59,31 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch  | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
 |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
-| 1.1843        | 0.0489 | 50   | 1.4952          | -9.4284        | -9.7251          | 0.5604             | 0.2967          | -40.7667       | -37.0794     | -0.9884         | -0.9876       |
-| 2.9825        | 0.0977 | 100  | 2.9340          | -11.3951       | -10.1604         | 0.3956             | -1.2347         | -41.6373       | -41.0129     | -0.7061         | -0.7063       |
-| 5.07          | 0.1466 | 150  | 5.8445          | -33.3133       | -32.2334         | 0.4330             | -1.0798         | -85.7834       | -84.8492     | 0.6418          | 0.6418        |
-| 3.8509        | 0.1954 | 200  | 4.3552          | -19.5431       | -18.0372         | 0.3868             | -1.5059         | -57.3910       | -57.3089     | 0.4504          | 0.4502        |
-| 2.4016        | 0.2443 | 250  | 4.1219          | -19.6576       | -18.2218         | 0.4154             | -1.4358         | -57.7601       | -57.5379     | -0.6569         | -0.6572       |
-| 6.0228        | 0.2931 | 300  | 3.6535          | -22.0862       | -20.5713         | 0.4044             | -1.5149         | -62.4591       | -62.3951     | -0.5981         | -0.5981       |
-| 6.0198        | 0.3420 | 350  | 4.5591          | -27.2649       | -25.8931         | 0.3956             | -1.3718         | -73.1028       | -72.7525     | -0.4660         | -0.4661       |
-| 4.3784        | 0.3908 | 400  | 3.8339          | -16.6518       | -14.9972         | 0.4044             | -1.6546         | -51.3108       | -51.5262     | -0.5637         | -0.5639       |
-| 4.9465        | 0.4397 | 450  | 3.5058          | -18.5653       | -17.0632         | 0.3912             | -1.5021         | -55.4430       | -55.3533     | -0.8423         | -0.8424       |
-| 5.0702        | 0.4885 | 500  | 3.7225          | -18.6932       | -17.3825         | 0.4022             | -1.3108         | -56.0814       | -55.6091     | -1.0248         | -1.0249       |
-| 3.3013        | 0.5374 | 550  | 3.5310          | -17.9328       | -16.5937         | 0.4242             | -1.3391         | -54.5040       | -54.0883     | -0.4439         | -0.4440       |
-| 3.6542        | 0.5862 | 600  | 3.6084          | -16.4171       | -15.1231         | 0.4110             | -1.2939         | -51.5628       | -51.0568     | -0.3546         | -0.3546       |
-| 3.6485        | 0.6351 | 650  | 3.5138          | -18.0930       | -16.6063         | 0.3890             | -1.4867         | -54.5292       | -54.4087     | -0.6487         | -0.6487       |
-| 3.2954        | 0.6839 | 700  | 3.6009          | -15.0224       | -13.6768         | 0.4110             | -1.3456         | -48.6701       | -48.2675     | -0.3265         | -0.3266       |
-| 3.5098        | 0.7328 | 750  | 3.3569          | -16.2005       | -14.8261         | 0.4220             | -1.3744         | -50.9687       | -50.6236     | -0.4849         | -0.4850       |
-| 2.6183        | 0.7816 | 800  | 3.1964          | -14.9792       | -13.7545         | 0.4110             | -1.2247         | -48.8255       | -48.1811     | -0.4045         | -0.4045       |
-| 3.4615        | 0.8305 | 850  | 3.1868          | -14.5397       | -13.3293         | 0.4176             | -1.2105         | -47.9750       | -47.3021     | -0.3691         | -0.3691       |
-| 2.4743        | 0.8793 | 900  | 3.1532          | -14.7973       | -13.5910         | 0.4132             | -1.2063         | -48.4985       | -47.8173     | -0.3918         | -0.3918       |
-| 2.337         | 0.9282 | 950  | 3.1514          | -14.8091       | -13.6016         | 0.4176             | -1.2075         | -48.5197       | -47.8408     | -0.3921         | -0.3920       |
-| 2.2422        | 0.9770 | 1000 | 3.1515          | -14.8095       | -13.6020         | 0.4176             | -1.2075         | -48.5205       | -47.8417     | -0.3920         | -0.3920       |
 ### Framework versions
-- Transformers 4.41.0
 - Pytorch 2.0.0+cu117
 - Datasets 2.19.1
 - Tokenizers 0.19.1

 ---
 license: llama3
+base_model: tsavage68/MedQA_L3_1000steps_1e6rate_SFT
 tags:
 - trl
 - dpo
 - generated_from_trainer
 model-index:
+- name: MedQA_L3_1000steps_1e5rate_05beta_CSFTDPO
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+# MedQA_L3_1000steps_1e5rate_05beta_CSFTDPO
+This model is a fine-tuned version of [tsavage68/MedQA_L3_1000steps_1e6rate_SFT](https://huggingface.co/tsavage68/MedQA_L3_1000steps_1e6rate_SFT) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 2.7867
+- Rewards/chosen: -10.2874
+- Rewards/rejected: -9.4675
+- Rewards/accuracies: 0.4330
+- Rewards/margins: -0.8198
+- Logps/rejected: -52.7899
+- Logps/chosen: -51.9033
+- Logits/rejected: -0.3129
+- Logits/chosen: -0.3128
 ## Model description
 | Training Loss | Epoch  | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
 |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
+| 0.9373        | 0.0489 | 50   | 1.5325          | 0.6891         | -0.1945          | 0.5912             | 0.8836          | -34.2439       | -29.9504     | -1.1200         | -1.1197       |
+| 3.7169        | 0.0977 | 100  | 3.7845          | -9.7504        | -8.8431          | 0.4527             | -0.9074         | -51.5409       | -50.8294     | -0.6137         | -0.6138       |
+| 5.2014        | 0.1466 | 150  | 5.2600          | -22.3993       | -21.8605         | 0.4681             | -0.5389         | -77.5758       | -76.1272     | -1.3215         | -1.3217       |
+| 5.4743        | 0.1954 | 200  | 3.9034          | -7.1491        | -6.2277          | 0.4176             | -0.9214         | -46.3103       | -45.6268     | -0.6483         | -0.6486       |
+| 3.0731        | 0.2443 | 250  | 4.1865          | -11.6364       | -10.1791         | 0.4198             | -1.4572         | -54.2131       | -54.6012     | -0.7051         | -0.7056       |
+| 5.7952        | 0.2931 | 300  | 3.6683          | -9.2381        | -7.9895          | 0.4264             | -1.2486         | -49.8338       | -49.8046     | -0.4055         | -0.4058       |
+| 3.8474        | 0.3420 | 350  | 3.4898          | -12.7687       | -11.9414         | 0.4132             | -0.8274         | -57.7376       | -56.8660     | -0.8625         | -0.8625       |
+| 5.5721        | 0.3908 | 400  | 3.4194          | -13.5468       | -12.3658         | 0.4044             | -1.1810         | -58.5864       | -58.4221     | -0.8921         | -0.8922       |
+| 6.0929        | 0.4397 | 450  | 3.4518          | -12.5599       | -11.2787         | 0.4132             | -1.2812         | -56.4122       | -56.4483     | -0.6596         | -0.6596       |
+| 5.4036        | 0.4885 | 500  | 3.4349          | -13.3250       | -12.3700         | 0.4264             | -0.9550         | -58.5948       | -57.9785     | -0.4398         | -0.4397       |
+| 4.2614        | 0.5374 | 550  | 3.4447          | -13.2741       | -12.0523         | 0.4132             | -1.2218         | -57.9595       | -57.8767     | -0.2318         | -0.2318       |
+| 5.0683        | 0.5862 | 600  | 3.6325          | -10.9169       | -9.7136          | 0.4242             | -1.2033         | -53.2821       | -53.1624     | 0.0024          | 0.0023        |
+| 2.8041        | 0.6351 | 650  | 3.3753          | -13.7510       | -12.4756         | 0.4110             | -1.2754         | -58.8060       | -58.8306     | -0.4253         | -0.4254       |
+| 2.852         | 0.6839 | 700  | 3.2123          | -11.3782       | -10.1837         | 0.4132             | -1.1945         | -54.2221       | -54.0849     | -0.3353         | -0.3353       |
+| 3.1506        | 0.7328 | 750  | 2.9861          | -10.9246       | -9.9019          | 0.4198             | -1.0227         | -53.6587       | -53.1778     | -0.3577         | -0.3577       |
+| 2.9206        | 0.7816 | 800  | 2.8476          | -10.3118       | -9.4465          | 0.4264             | -0.8653         | -52.7479       | -51.9522     | -0.2881         | -0.2880       |
+| 3.6047        | 0.8305 | 850  | 2.8115          | -10.1979       | -9.3565          | 0.4308             | -0.8414         | -52.5679       | -51.7243     | -0.3016         | -0.3015       |
+| 2.4799        | 0.8793 | 900  | 2.7874          | -10.3005       | -9.4828          | 0.4308             | -0.8177         | -52.8204       | -51.9295     | -0.3147         | -0.3146       |
+| 2.8467        | 0.9282 | 950  | 2.7864          | -10.2878       | -9.4711          | 0.4330             | -0.8167         | -52.7969       | -51.9040     | -0.3132         | -0.3130       |
+| 2.2638        | 0.9770 | 1000 | 2.7867          | -10.2874       | -9.4675          | 0.4330             | -0.8198         | -52.7899       | -51.9033     | -0.3129         | -0.3128       |
 ### Framework versions
+- Transformers 4.41.1
 - Pytorch 2.0.0+cu117
 - Datasets 2.19.1
 - Tokenizers 0.19.1

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
   "architectures": [
     "LlamaForCausalLM"
   ],
@@ -23,7 +23,7 @@
   "rope_theta": 500000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
-  "transformers_version": "4.41.0",
   "use_cache": false,
   "vocab_size": 128256
 }

 {
+  "_name_or_path": "tsavage68/MedQA_L3_1000steps_1e6rate_SFT",
   "architectures": [
     "LlamaForCausalLM"
   ],
   "rope_theta": 500000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
+  "transformers_version": "4.41.1",
   "use_cache": false,
   "vocab_size": 128256
 }

final_checkpoint/config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
   "architectures": [
     "LlamaForCausalLM"
   ],
@@ -23,7 +23,7 @@
   "rope_theta": 500000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
-  "transformers_version": "4.41.0",
   "use_cache": false,
   "vocab_size": 128256
 }

 {
+  "_name_or_path": "tsavage68/MedQA_L3_1000steps_1e6rate_SFT",
   "architectures": [
     "LlamaForCausalLM"
   ],
   "rope_theta": 500000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
+  "transformers_version": "4.41.1",
   "use_cache": false,
   "vocab_size": 128256
 }

final_checkpoint/generation_config.json CHANGED Viewed

@@ -8,5 +8,5 @@
   "max_length": 4096,
   "temperature": 0.6,
   "top_p": 0.9,
-  "transformers_version": "4.41.0"
 }

   "max_length": 4096,
   "temperature": 0.6,
   "top_p": 0.9,
+  "transformers_version": "4.41.1"
 }

final_checkpoint/model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8f84458a09371132b21dc39246c63a6fb19268b72cb9695a188e08f306624772
 size 4976698592

 version https://git-lfs.github.com/spec/v1
+oid sha256:e3bfe0d189b8c25d74449c7cde2e7b6ff9d0b345bf0acb6eab5646a5b95eb3a8
 size 4976698592

final_checkpoint/model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9dcb5e0daa0cdc4cf8274791b8f56ac1ae9cbed17e26f5e9b44fbae5ecc20dcb
 size 4999802616

 version https://git-lfs.github.com/spec/v1
+oid sha256:68185401033d9802234331da2b510aff2dd191e72c29b677791f018bab421c1d
 size 4999802616

final_checkpoint/model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5c39383df2cf91192331c7b99ab2068e46494ad5c1ac209be9daf92e129ca72c
 size 4915916080

 version https://git-lfs.github.com/spec/v1
+oid sha256:c20b5851739334eaad060d01a9ee28f037918f27a8f6e9189d3465c893b74955
 size 4915916080

final_checkpoint/model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e1ea0d908c8e866b37b6849c93d82a80d564f9917fe2c91b1ba5ce097ed15598
 size 1168138808

 version https://git-lfs.github.com/spec/v1
+oid sha256:13741957043d65eff98b808945ae5939e51dad50151dcf60b7f608530d9db3d3
 size 1168138808

generation_config.json CHANGED Viewed

@@ -8,5 +8,5 @@
   "max_length": 4096,
   "temperature": 0.6,
   "top_p": 0.9,
-  "transformers_version": "4.41.0"
 }

   "max_length": 4096,
   "temperature": 0.6,
   "top_p": 0.9,
+  "transformers_version": "4.41.1"
 }

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8f84458a09371132b21dc39246c63a6fb19268b72cb9695a188e08f306624772
 size 4976698592

 version https://git-lfs.github.com/spec/v1
+oid sha256:e3bfe0d189b8c25d74449c7cde2e7b6ff9d0b345bf0acb6eab5646a5b95eb3a8
 size 4976698592

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9dcb5e0daa0cdc4cf8274791b8f56ac1ae9cbed17e26f5e9b44fbae5ecc20dcb
 size 4999802616

 version https://git-lfs.github.com/spec/v1
+oid sha256:68185401033d9802234331da2b510aff2dd191e72c29b677791f018bab421c1d
 size 4999802616

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5c39383df2cf91192331c7b99ab2068e46494ad5c1ac209be9daf92e129ca72c
 size 4915916080

 version https://git-lfs.github.com/spec/v1
+oid sha256:c20b5851739334eaad060d01a9ee28f037918f27a8f6e9189d3465c893b74955
 size 4915916080

model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e1ea0d908c8e866b37b6849c93d82a80d564f9917fe2c91b1ba5ce097ed15598
 size 1168138808

 version https://git-lfs.github.com/spec/v1
+oid sha256:13741957043d65eff98b808945ae5939e51dad50151dcf60b7f608530d9db3d3
 size 1168138808

tokenizer_config.json CHANGED Viewed

@@ -2053,11 +2053,15 @@
   "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|eot_id|>",
   "model_input_names": [
     "input_ids",
     "attention_mask"
   ],
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "<|eot_id|>",
-  "tokenizer_class": "PreTrainedTokenizerFast"
 }

   "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|eot_id|>",
+  "max_length": 1024,
   "model_input_names": [
     "input_ids",
     "attention_mask"
   ],
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "<|eot_id|>",
+  "stride": 0,
+  "tokenizer_class": "PreTrainedTokenizerFast",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:58d5bfb553da065051a173638d5415c4e5a09f001d12c1f1116fce4d4ccca8b4
 size 4667

 version https://git-lfs.github.com/spec/v1
+oid sha256:596a0ab20874a6edffb19b3bcbfa30ef508de7c59c47f46a5205c74f5d3a5703
 size 4667