End of training

Browse files

Files changed (15) hide show

README.md +32 -32
config.json +2 -2
final_checkpoint/config.json +2 -2
final_checkpoint/generation_config.json +1 -1
final_checkpoint/model-00001-of-00004.safetensors +1 -1
final_checkpoint/model-00002-of-00004.safetensors +1 -1
final_checkpoint/model-00003-of-00004.safetensors +1 -1
final_checkpoint/model-00004-of-00004.safetensors +1 -1
generation_config.json +1 -1
model-00001-of-00004.safetensors +1 -1
model-00002-of-00004.safetensors +1 -1
model-00003-of-00004.safetensors +1 -1
model-00004-of-00004.safetensors +1 -1
tokenizer_config.json +5 -1
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 license: llama3
-base_model: meta-llama/Meta-Llama-3-8B-Instruct
 tags:
 - trl
 - dpo
@@ -15,17 +15,17 @@ should probably proofread and complete it, then remove this comment. -->
 # MedQA_L3_1000steps_1e7rate_03beta_CSFTDPO
-This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.6220
-- Rewards/chosen: 0.1076
-- Rewards/rejected: -0.0975
-- Rewards/accuracies: 0.6725
-- Rewards/margins: 0.2050
-- Logps/rejected: -21.6414
-- Logps/chosen: -17.8641
-- Logits/rejected: -0.9452
-- Logits/chosen: -0.9446
 ## Model description
@@ -59,31 +59,31 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch  | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
 |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
-| 0.6938        | 0.0489 | 50   | 0.6934          | 0.0041         | 0.0042           | 0.5099             | -0.0000         | -21.3026       | -18.2088     | -0.9262         | -0.9257       |
-| 0.6807        | 0.0977 | 100  | 0.6781          | 0.1130         | 0.0788           | 0.6110             | 0.0343          | -21.0540       | -17.8459     | -0.9280         | -0.9275       |
-| 0.6658        | 0.1466 | 150  | 0.6612          | 0.1697         | 0.0895           | 0.6374             | 0.0802          | -21.0183       | -17.6569     | -0.9316         | -0.9311       |
-| 0.6596        | 0.1954 | 200  | 0.6558          | 0.1737         | 0.0784           | 0.6505             | 0.0954          | -21.0553       | -17.6435     | -0.9345         | -0.9340       |
-| 0.6778        | 0.2443 | 250  | 0.6473          | 0.1808         | 0.0591           | 0.6659             | 0.1218          | -21.1196       | -17.6199     | -0.9363         | -0.9358       |
-| 0.6312        | 0.2931 | 300  | 0.6411          | 0.1839         | 0.0439           | 0.6681             | 0.1400          | -21.1702       | -17.6097     | -0.9382         | -0.9376       |
-| 0.7093        | 0.3420 | 350  | 0.6361          | 0.1789         | 0.0228           | 0.6659             | 0.1561          | -21.2406       | -17.6264     | -0.9386         | -0.9381       |
-| 0.6118        | 0.3908 | 400  | 0.6341          | 0.1611         | -0.0028          | 0.6747             | 0.1639          | -21.3258       | -17.6857     | -0.9403         | -0.9398       |
-| 0.6395        | 0.4397 | 450  | 0.6303          | 0.1523         | -0.0236          | 0.6659             | 0.1759          | -21.3953       | -17.7150     | -0.9424         | -0.9418       |
-| 0.649         | 0.4885 | 500  | 0.6284          | 0.1419         | -0.0403          | 0.6703             | 0.1822          | -21.4507       | -17.7495     | -0.9434         | -0.9428       |
-| 0.5851        | 0.5374 | 550  | 0.6265          | 0.1154         | -0.0757          | 0.6747             | 0.1911          | -21.5689       | -17.8381     | -0.9443         | -0.9437       |
-| 0.6155        | 0.5862 | 600  | 0.6255          | 0.1139         | -0.0809          | 0.6725             | 0.1948          | -21.5862       | -17.8429     | -0.9446         | -0.9440       |
-| 0.6048        | 0.6351 | 650  | 0.6250          | 0.1091         | -0.0893          | 0.6703             | 0.1983          | -21.6141       | -17.8591     | -0.9449         | -0.9443       |
-| 0.671         | 0.6839 | 700  | 0.6242          | 0.1080         | -0.0932          | 0.6725             | 0.2012          | -21.6272       | -17.8627     | -0.9450         | -0.9445       |
-| 0.5693        | 0.7328 | 750  | 0.6236          | 0.1063         | -0.0951          | 0.6681             | 0.2014          | -21.6335       | -17.8682     | -0.9449         | -0.9443       |
-| 0.6342        | 0.7816 | 800  | 0.6251          | 0.1039         | -0.0954          | 0.6725             | 0.1993          | -21.6344       | -17.8763     | -0.9455         | -0.9449       |
-| 0.6575        | 0.8305 | 850  | 0.6235          | 0.1067         | -0.0957          | 0.6681             | 0.2025          | -21.6356       | -17.8668     | -0.9451         | -0.9445       |
-| 0.6549        | 0.8793 | 900  | 0.6232          | 0.1081         | -0.0943          | 0.6747             | 0.2024          | -21.6308       | -17.8622     | -0.9452         | -0.9446       |
-| 0.6019        | 0.9282 | 950  | 0.6229          | 0.1049         | -0.0980          | 0.6725             | 0.2029          | -21.6431       | -17.8729     | -0.9453         | -0.9447       |
-| 0.5785        | 0.9770 | 1000 | 0.6220          | 0.1076         | -0.0975          | 0.6725             | 0.2050          | -21.6414       | -17.8641     | -0.9452         | -0.9446       |
 ### Framework versions
-- Transformers 4.41.0
 - Pytorch 2.0.0+cu117
 - Datasets 2.19.1
 - Tokenizers 0.19.1

 ---
 license: llama3
+base_model: tsavage68/MedQA_L3_1000steps_1e6rate_SFT
 tags:
 - trl
 - dpo
 # MedQA_L3_1000steps_1e7rate_03beta_CSFTDPO
+This model is a fine-tuned version of [tsavage68/MedQA_L3_1000steps_1e6rate_SFT](https://huggingface.co/tsavage68/MedQA_L3_1000steps_1e6rate_SFT) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.6020
+- Rewards/chosen: 0.7087
+- Rewards/rejected: 0.4830
+- Rewards/accuracies: 0.7341
+- Rewards/margins: 0.2257
+- Logps/rejected: -32.2447
+- Logps/chosen: -28.9661
+- Logits/rejected: -0.7358
+- Logits/chosen: -0.7350
 ## Model description
 | Training Loss | Epoch  | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
 |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
+| 0.6925        | 0.0489 | 50   | 0.6930          | -0.0016        | -0.0023          | 0.5011             | 0.0007          | -33.8624       | -31.3338     | -0.7320         | -0.7314       |
+| 0.6841        | 0.0977 | 100  | 0.6807          | 0.2459         | 0.2195           | 0.6549             | 0.0264          | -33.1233       | -30.5088     | -0.7330         | -0.7323       |
+| 0.6562        | 0.1466 | 150  | 0.6641          | 0.3800         | 0.3137           | 0.6791             | 0.0663          | -32.8092       | -30.0619     | -0.7310         | -0.7303       |
+| 0.6334        | 0.1954 | 200  | 0.6509          | 0.1334         | 0.0355           | 0.7165             | 0.0979          | -33.7366       | -30.8837     | -0.7311         | -0.7304       |
+| 0.6544        | 0.2443 | 250  | 0.6415          | 0.2943         | 0.1754           | 0.7209             | 0.1189          | -33.2701       | -30.3474     | -0.7311         | -0.7303       |
+| 0.6145        | 0.2931 | 300  | 0.6304          | 0.3548         | 0.2099           | 0.7385             | 0.1448          | -33.1550       | -30.1459     | -0.7317         | -0.7310       |
+| 0.6171        | 0.3420 | 350  | 0.6223          | 0.4756         | 0.3093           | 0.7341             | 0.1663          | -32.8238       | -29.7432     | -0.7336         | -0.7328       |
+| 0.5911        | 0.3908 | 400  | 0.6181          | 0.6387         | 0.4602           | 0.7121             | 0.1785          | -32.3208       | -29.1996     | -0.7334         | -0.7327       |
+| 0.5942        | 0.4397 | 450  | 0.6129          | 0.6839         | 0.4904           | 0.7253             | 0.1935          | -32.2203       | -29.0489     | -0.7347         | -0.7339       |
+| 0.6096        | 0.4885 | 500  | 0.6090          | 0.7785         | 0.5741           | 0.7297             | 0.2044          | -31.9411       | -28.7335     | -0.7351         | -0.7343       |
+| 0.5671        | 0.5374 | 550  | 0.6068          | 0.7522         | 0.5395           | 0.7275             | 0.2127          | -32.0566       | -28.8212     | -0.7355         | -0.7347       |
+| 0.6066        | 0.5862 | 600  | 0.6061          | 0.7215         | 0.5067           | 0.7209             | 0.2147          | -32.1657       | -28.9236     | -0.7356         | -0.7348       |
+| 0.5816        | 0.6351 | 650  | 0.6046          | 0.6882         | 0.4692           | 0.7231             | 0.2191          | -32.2910       | -29.0344     | -0.7356         | -0.7348       |
+| 0.5968        | 0.6839 | 700  | 0.6030          | 0.6956         | 0.4723           | 0.7451             | 0.2233          | -32.2804       | -29.0097     | -0.7352         | -0.7344       |
+| 0.6132        | 0.7328 | 750  | 0.6042          | 0.7103         | 0.4891           | 0.7297             | 0.2212          | -32.2246       | -28.9608     | -0.7354         | -0.7346       |
+| 0.6133        | 0.7816 | 800  | 0.6021          | 0.6956         | 0.4697           | 0.7407             | 0.2258          | -32.2890       | -29.0099     | -0.7358         | -0.7350       |
+| 0.6397        | 0.8305 | 850  | 0.6029          | 0.7027         | 0.4791           | 0.7341             | 0.2236          | -32.2579       | -28.9862     | -0.7354         | -0.7346       |
+| 0.6273        | 0.8793 | 900  | 0.6030          | 0.7126         | 0.4896           | 0.7341             | 0.2230          | -32.2229       | -28.9533     | -0.7356         | -0.7348       |
+| 0.5996        | 0.9282 | 950  | 0.6019          | 0.7087         | 0.4830           | 0.7341             | 0.2257          | -32.2447       | -28.9661     | -0.7358         | -0.7350       |
+| 0.5319        | 0.9770 | 1000 | 0.6020          | 0.7087         | 0.4830           | 0.7341             | 0.2257          | -32.2447       | -28.9661     | -0.7358         | -0.7350       |
 ### Framework versions
+- Transformers 4.41.1
 - Pytorch 2.0.0+cu117
 - Datasets 2.19.1
 - Tokenizers 0.19.1

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
   "architectures": [
     "LlamaForCausalLM"
   ],
@@ -23,7 +23,7 @@
   "rope_theta": 500000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
-  "transformers_version": "4.41.0",
   "use_cache": false,
   "vocab_size": 128256
 }

 {
+  "_name_or_path": "tsavage68/MedQA_L3_1000steps_1e6rate_SFT",
   "architectures": [
     "LlamaForCausalLM"
   ],
   "rope_theta": 500000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
+  "transformers_version": "4.41.1",
   "use_cache": false,
   "vocab_size": 128256
 }

final_checkpoint/config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
   "architectures": [
     "LlamaForCausalLM"
   ],
@@ -23,7 +23,7 @@
   "rope_theta": 500000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
-  "transformers_version": "4.41.0",
   "use_cache": false,
   "vocab_size": 128256
 }

 {
+  "_name_or_path": "tsavage68/MedQA_L3_1000steps_1e6rate_SFT",
   "architectures": [
     "LlamaForCausalLM"
   ],
   "rope_theta": 500000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
+  "transformers_version": "4.41.1",
   "use_cache": false,
   "vocab_size": 128256
 }

final_checkpoint/generation_config.json CHANGED Viewed

@@ -8,5 +8,5 @@
   "max_length": 4096,
   "temperature": 0.6,
   "top_p": 0.9,
-  "transformers_version": "4.41.0"
 }

   "max_length": 4096,
   "temperature": 0.6,
   "top_p": 0.9,
+  "transformers_version": "4.41.1"
 }

final_checkpoint/model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b914441784cdf4d5a5ba86b5d6c2d9a4876d2dc0a8aed0b10cdbfe199c98e824
 size 4976698592

 version https://git-lfs.github.com/spec/v1
+oid sha256:c2557ef54b1cb541258b6aac809e2ec44f1a879c7ab1c84166f20074fd81e1c3
 size 4976698592

final_checkpoint/model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b2062ccf9cb4cf7095035161161a63286897f99839a1345ab3da62ca35c7ca12
 size 4999802616

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e119d7c88d94f0a6db7e39294810bd68b32ced63b3bd7464af0a063f1c32029
 size 4999802616

final_checkpoint/model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7c6b3cbb77cdf6f4d908d28bc078374b0997fba437bac4c02a7134cc90a626ef
 size 4915916080

 version https://git-lfs.github.com/spec/v1
+oid sha256:9525481aee3b9d44cee6ffb20de7add9a26abb8c1ff8bea8a5e260cac6fc12c4
 size 4915916080

final_checkpoint/model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9cfb14502956990d8a876fc6b8342b97cd0fcf64e8003be846cbd708c5aa3a2c
 size 1168138808

 version https://git-lfs.github.com/spec/v1
+oid sha256:02aa661fc4a8018b889fdbfbb145ea620dbfb2ef8508c18febc2e41b8cafde1e
 size 1168138808

generation_config.json CHANGED Viewed

@@ -8,5 +8,5 @@
   "max_length": 4096,
   "temperature": 0.6,
   "top_p": 0.9,
-  "transformers_version": "4.41.0"
 }

   "max_length": 4096,
   "temperature": 0.6,
   "top_p": 0.9,
+  "transformers_version": "4.41.1"
 }

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b914441784cdf4d5a5ba86b5d6c2d9a4876d2dc0a8aed0b10cdbfe199c98e824
 size 4976698592

 version https://git-lfs.github.com/spec/v1
+oid sha256:c2557ef54b1cb541258b6aac809e2ec44f1a879c7ab1c84166f20074fd81e1c3
 size 4976698592

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b2062ccf9cb4cf7095035161161a63286897f99839a1345ab3da62ca35c7ca12
 size 4999802616

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e119d7c88d94f0a6db7e39294810bd68b32ced63b3bd7464af0a063f1c32029
 size 4999802616

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7c6b3cbb77cdf6f4d908d28bc078374b0997fba437bac4c02a7134cc90a626ef
 size 4915916080

 version https://git-lfs.github.com/spec/v1
+oid sha256:9525481aee3b9d44cee6ffb20de7add9a26abb8c1ff8bea8a5e260cac6fc12c4
 size 4915916080

model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9cfb14502956990d8a876fc6b8342b97cd0fcf64e8003be846cbd708c5aa3a2c
 size 1168138808

 version https://git-lfs.github.com/spec/v1
+oid sha256:02aa661fc4a8018b889fdbfbb145ea620dbfb2ef8508c18febc2e41b8cafde1e
 size 1168138808

tokenizer_config.json CHANGED Viewed

@@ -2053,11 +2053,15 @@
   "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|eot_id|>",
   "model_input_names": [
     "input_ids",
     "attention_mask"
   ],
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "<|eot_id|>",
-  "tokenizer_class": "PreTrainedTokenizerFast"
 }

   "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|eot_id|>",
+  "max_length": 1024,
   "model_input_names": [
     "input_ids",
     "attention_mask"
   ],
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "<|eot_id|>",
+  "stride": 0,
+  "tokenizer_class": "PreTrainedTokenizerFast",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:856c174c4a353987238506e7a157b8d3b8d7aaef99dbfff284495206942917de
 size 4667

 version https://git-lfs.github.com/spec/v1
+oid sha256:3c7fa4546475a34fdf342ed9f3840232fd6ebbaace04e86920503ddffcd0bbf4
 size 4667