End of training

Browse files

Files changed (15) hide show

README.md +32 -32
config.json +2 -2
final_checkpoint/config.json +2 -2
final_checkpoint/generation_config.json +1 -1
final_checkpoint/model-00001-of-00004.safetensors +1 -1
final_checkpoint/model-00002-of-00004.safetensors +1 -1
final_checkpoint/model-00003-of-00004.safetensors +1 -1
final_checkpoint/model-00004-of-00004.safetensors +1 -1
generation_config.json +1 -1
model-00001-of-00004.safetensors +1 -1
model-00002-of-00004.safetensors +1 -1
model-00003-of-00004.safetensors +1 -1
model-00004-of-00004.safetensors +1 -1
tokenizer_config.json +5 -1
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 license: llama3
-base_model: meta-llama/Meta-Llama-3-8B-Instruct
 tags:
 - trl
 - dpo
@@ -15,17 +15,17 @@ should probably proofread and complete it, then remove this comment. -->
 # MedQA_L3_1000steps_1e7rate_05beta_CSFTDPO
-This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.6092
-- Rewards/chosen: 0.2694
-- Rewards/rejected: -0.0121
-- Rewards/accuracies: 0.6835
-- Rewards/margins: 0.2814
-- Logps/rejected: -21.3407
-- Logps/chosen: -17.6839
-- Logits/rejected: -0.9435
-- Logits/chosen: -0.9429
 ## Model description
@@ -59,31 +59,31 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch  | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
 |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
-| 0.7034        | 0.0489 | 50   | 0.6908          | 0.0092         | 0.0030           | 0.5187             | 0.0061          | -21.3104       | -18.2043     | -0.9262         | -0.9257       |
-| 0.6841        | 0.0977 | 100  | 0.6705          | 0.1777         | 0.1221           | 0.6088             | 0.0556          | -21.0723       | -17.8673     | -0.9278         | -0.9273       |
-| 0.6628        | 0.1466 | 150  | 0.6509          | 0.2657         | 0.1441           | 0.6220             | 0.1216          | -21.0283       | -17.6913     | -0.9313         | -0.9308       |
-| 0.6443        | 0.1954 | 200  | 0.6426          | 0.2910         | 0.1442           | 0.6352             | 0.1468          | -21.0282       | -17.6407     | -0.9332         | -0.9327       |
-| 0.6834        | 0.2443 | 250  | 0.6344          | 0.3123         | 0.1351           | 0.6527             | 0.1772          | -21.0464       | -17.5981     | -0.9353         | -0.9347       |
-| 0.6183        | 0.2931 | 300  | 0.6286          | 0.3313         | 0.1340           | 0.6725             | 0.1973          | -21.0485       | -17.5601     | -0.9370         | -0.9365       |
-| 0.7184        | 0.3420 | 350  | 0.6258          | 0.3266         | 0.1175           | 0.6637             | 0.2091          | -21.0816       | -17.5695     | -0.9377         | -0.9371       |
-| 0.5884        | 0.3908 | 400  | 0.6206          | 0.3117         | 0.0832           | 0.6857             | 0.2285          | -21.1501       | -17.5993     | -0.9393         | -0.9387       |
-| 0.6333        | 0.4397 | 450  | 0.6173          | 0.3095         | 0.0692           | 0.6681             | 0.2403          | -21.1780       | -17.6036     | -0.9402         | -0.9396       |
-| 0.6541        | 0.4885 | 500  | 0.6130          | 0.3027         | 0.0479           | 0.6813             | 0.2548          | -21.2207       | -17.6173     | -0.9417         | -0.9411       |
-| 0.5625        | 0.5374 | 550  | 0.6161          | 0.2745         | 0.0181           | 0.6747             | 0.2564          | -21.2803       | -17.6737     | -0.9423         | -0.9417       |
-| 0.5996        | 0.5862 | 600  | 0.6122          | 0.2770         | 0.0070           | 0.6681             | 0.2699          | -21.3024       | -17.6687     | -0.9428         | -0.9422       |
-| 0.595         | 0.6351 | 650  | 0.6098          | 0.2745         | -0.0048          | 0.6879             | 0.2793          | -21.3261       | -17.6737     | -0.9428         | -0.9423       |
-| 0.6529        | 0.6839 | 700  | 0.6088          | 0.2667         | -0.0156          | 0.6791             | 0.2822          | -21.3477       | -17.6893     | -0.9433         | -0.9427       |
-| 0.5262        | 0.7328 | 750  | 0.6099          | 0.2672         | -0.0130          | 0.6791             | 0.2802          | -21.3426       | -17.6883     | -0.9431         | -0.9425       |
-| 0.6351        | 0.7816 | 800  | 0.6090          | 0.2731         | -0.0087          | 0.6945             | 0.2819          | -21.3340       | -17.6764     | -0.9432         | -0.9426       |
-| 0.6448        | 0.8305 | 850  | 0.6073          | 0.2716         | -0.0155          | 0.6769             | 0.2871          | -21.3474       | -17.6795     | -0.9433         | -0.9428       |
-| 0.6519        | 0.8793 | 900  | 0.6081          | 0.2699         | -0.0141          | 0.6791             | 0.2840          | -21.3448       | -17.6829     | -0.9433         | -0.9427       |
-| 0.5745        | 0.9282 | 950  | 0.6091          | 0.2696         | -0.0119          | 0.6813             | 0.2815          | -21.3403       | -17.6834     | -0.9434         | -0.9429       |
-| 0.5343        | 0.9770 | 1000 | 0.6092          | 0.2694         | -0.0121          | 0.6835             | 0.2814          | -21.3407       | -17.6839     | -0.9435         | -0.9429       |
 ### Framework versions
-- Transformers 4.41.0
 - Pytorch 2.0.0+cu117
 - Datasets 2.19.1
 - Tokenizers 0.19.1

 ---
 license: llama3
+base_model: tsavage68/MedQA_L3_1000steps_1e6rate_SFT
 tags:
 - trl
 - dpo
 # MedQA_L3_1000steps_1e7rate_05beta_CSFTDPO
+This model is a fine-tuned version of [tsavage68/MedQA_L3_1000steps_1e6rate_SFT](https://huggingface.co/tsavage68/MedQA_L3_1000steps_1e6rate_SFT) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.5679
+- Rewards/chosen: 0.9256
+- Rewards/rejected: 0.5812
+- Rewards/accuracies: 0.7407
+- Rewards/margins: 0.3444
+- Logps/rejected: -32.6925
+- Logps/chosen: -29.4774
+- Logits/rejected: -0.7357
+- Logits/chosen: -0.7349
 ## Model description
 | Training Loss | Epoch  | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
 |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
+| 0.6857        | 0.0489 | 50   | 0.6947          | -0.0249        | -0.0232          | 0.4879             | -0.0018         | -33.9011       | -31.3784     | -0.7318         | -0.7312       |
+| 0.6799        | 0.0977 | 100  | 0.6734          | 0.3881         | 0.3450           | 0.6681             | 0.0432          | -33.1649       | -30.5522     | -0.7330         | -0.7323       |
+| 0.6275        | 0.1466 | 150  | 0.6484          | 0.5732         | 0.4639           | 0.6813             | 0.1093          | -32.9271       | -30.1822     | -0.7310         | -0.7303       |
+| 0.5934        | 0.1954 | 200  | 0.6321          | 0.1707         | 0.0172           | 0.6989             | 0.1535          | -33.8203       | -30.9871     | -0.7310         | -0.7303       |
+| 0.6358        | 0.2443 | 250  | 0.6181          | 0.4355         | 0.2501           | 0.7253             | 0.1854          | -33.3546       | -30.4574     | -0.7315         | -0.7308       |
+| 0.5727        | 0.2931 | 300  | 0.6007          | 0.5633         | 0.3322           | 0.7429             | 0.2311          | -33.1904       | -30.2020     | -0.7321         | -0.7314       |
+| 0.5786        | 0.3420 | 350  | 0.5923          | 0.7025         | 0.4439           | 0.7407             | 0.2586          | -32.9670       | -29.9235     | -0.7343         | -0.7335       |
+| 0.545         | 0.3908 | 400  | 0.5830          | 0.9347         | 0.6493           | 0.7385             | 0.2854          | -32.5562       | -29.4591     | -0.7336         | -0.7328       |
+| 0.5497        | 0.4397 | 450  | 0.5795          | 0.9735         | 0.6722           | 0.7385             | 0.3014          | -32.5105       | -29.3814     | -0.7346         | -0.7338       |
+| 0.5857        | 0.4885 | 500  | 0.5781          | 1.0925         | 0.7817           | 0.7407             | 0.3108          | -32.2914       | -29.1435     | -0.7356         | -0.7348       |
+| 0.5168        | 0.5374 | 550  | 0.5714          | 1.0244         | 0.6925           | 0.7385             | 0.3319          | -32.4698       | -29.2796     | -0.7358         | -0.7350       |
+| 0.567         | 0.5862 | 600  | 0.5699          | 0.9715         | 0.6353           | 0.7407             | 0.3362          | -32.5842       | -29.3855     | -0.7356         | -0.7349       |
+| 0.5375        | 0.6351 | 650  | 0.5689          | 0.9102         | 0.5695           | 0.7429             | 0.3407          | -32.7158       | -29.5081     | -0.7357         | -0.7349       |
+| 0.5541        | 0.6839 | 700  | 0.5698          | 0.9277         | 0.5885           | 0.7385             | 0.3391          | -32.6778       | -29.4732     | -0.7359         | -0.7351       |
+| 0.5824        | 0.7328 | 750  | 0.5693          | 0.9133         | 0.5709           | 0.7516             | 0.3424          | -32.7129       | -29.5019     | -0.7358         | -0.7350       |
+| 0.5769        | 0.7816 | 800  | 0.5684          | 0.9103         | 0.5658           | 0.7429             | 0.3444          | -32.7232       | -29.5080     | -0.7354         | -0.7346       |
+| 0.6223        | 0.8305 | 850  | 0.5678          | 0.9317         | 0.5868           | 0.7473             | 0.3449          | -32.6812       | -29.4651     | -0.7360         | -0.7352       |
+| 0.5968        | 0.8793 | 900  | 0.5687          | 0.9231         | 0.5807           | 0.7385             | 0.3424          | -32.6935       | -29.4824     | -0.7361         | -0.7353       |
+| 0.5673        | 0.9282 | 950  | 0.5678          | 0.9259         | 0.5813           | 0.7407             | 0.3446          | -32.6921       | -29.4767     | -0.7357         | -0.7349       |
+| 0.4742        | 0.9770 | 1000 | 0.5679          | 0.9256         | 0.5812           | 0.7407             | 0.3444          | -32.6925       | -29.4774     | -0.7357         | -0.7349       |
 ### Framework versions
+- Transformers 4.41.1
 - Pytorch 2.0.0+cu117
 - Datasets 2.19.1
 - Tokenizers 0.19.1

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
   "architectures": [
     "LlamaForCausalLM"
   ],
@@ -23,7 +23,7 @@
   "rope_theta": 500000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
-  "transformers_version": "4.41.0",
   "use_cache": false,
   "vocab_size": 128256
 }

 {
+  "_name_or_path": "tsavage68/MedQA_L3_1000steps_1e6rate_SFT",
   "architectures": [
     "LlamaForCausalLM"
   ],
   "rope_theta": 500000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
+  "transformers_version": "4.41.1",
   "use_cache": false,
   "vocab_size": 128256
 }

final_checkpoint/config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
   "architectures": [
     "LlamaForCausalLM"
   ],
@@ -23,7 +23,7 @@
   "rope_theta": 500000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
-  "transformers_version": "4.41.0",
   "use_cache": false,
   "vocab_size": 128256
 }

 {
+  "_name_or_path": "tsavage68/MedQA_L3_1000steps_1e6rate_SFT",
   "architectures": [
     "LlamaForCausalLM"
   ],
   "rope_theta": 500000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
+  "transformers_version": "4.41.1",
   "use_cache": false,
   "vocab_size": 128256
 }

final_checkpoint/generation_config.json CHANGED Viewed

@@ -8,5 +8,5 @@
   "max_length": 4096,
   "temperature": 0.6,
   "top_p": 0.9,
-  "transformers_version": "4.41.0"
 }

   "max_length": 4096,
   "temperature": 0.6,
   "top_p": 0.9,
+  "transformers_version": "4.41.1"
 }

final_checkpoint/model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3b2d173c2c8bd4b0fd8bddd07512157eefdcdca1fc632c7eadac3a998e48507d
 size 4976698592

 version https://git-lfs.github.com/spec/v1
+oid sha256:9f367e14b84c970210f88e7eb315634c363047d729c21b0c858bcb8753a2c160
 size 4976698592

final_checkpoint/model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1f4746ba320abd3d5a4635b7f6b3a0f5346fed139065ab05ca2a7a2570d4fa65
 size 4999802616

 version https://git-lfs.github.com/spec/v1
+oid sha256:107e5faaa4c6d50c82666322c4725b136e5462cfd425b965a5ee1dd730a419ae
 size 4999802616

final_checkpoint/model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a9d65f9d04799b450284633fdc430e9f46e00ee17a6dcc83aeede359aa596f8b
 size 4915916080

 version https://git-lfs.github.com/spec/v1
+oid sha256:21702f3064c14669d69adc12d5e282d3e5017b2bbf7a816e76e9a8fb91194d36
 size 4915916080

final_checkpoint/model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4abf71527dbda8c8d487eae9f8fa4670052f962007ddc7f277da9c61f3b6a1e6
 size 1168138808

 version https://git-lfs.github.com/spec/v1
+oid sha256:36313565484ac1939a2a32eeec04429297196ce205c5295f04e6d649e2de13fa
 size 1168138808

generation_config.json CHANGED Viewed

@@ -8,5 +8,5 @@
   "max_length": 4096,
   "temperature": 0.6,
   "top_p": 0.9,
-  "transformers_version": "4.41.0"
 }

   "max_length": 4096,
   "temperature": 0.6,
   "top_p": 0.9,
+  "transformers_version": "4.41.1"
 }

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3b2d173c2c8bd4b0fd8bddd07512157eefdcdca1fc632c7eadac3a998e48507d
 size 4976698592

 version https://git-lfs.github.com/spec/v1
+oid sha256:9f367e14b84c970210f88e7eb315634c363047d729c21b0c858bcb8753a2c160
 size 4976698592

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1f4746ba320abd3d5a4635b7f6b3a0f5346fed139065ab05ca2a7a2570d4fa65
 size 4999802616

 version https://git-lfs.github.com/spec/v1
+oid sha256:107e5faaa4c6d50c82666322c4725b136e5462cfd425b965a5ee1dd730a419ae
 size 4999802616

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a9d65f9d04799b450284633fdc430e9f46e00ee17a6dcc83aeede359aa596f8b
 size 4915916080

 version https://git-lfs.github.com/spec/v1
+oid sha256:21702f3064c14669d69adc12d5e282d3e5017b2bbf7a816e76e9a8fb91194d36
 size 4915916080

model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4abf71527dbda8c8d487eae9f8fa4670052f962007ddc7f277da9c61f3b6a1e6
 size 1168138808

 version https://git-lfs.github.com/spec/v1
+oid sha256:36313565484ac1939a2a32eeec04429297196ce205c5295f04e6d649e2de13fa
 size 1168138808

tokenizer_config.json CHANGED Viewed

@@ -2053,11 +2053,15 @@
   "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|eot_id|>",
   "model_input_names": [
     "input_ids",
     "attention_mask"
   ],
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "<|eot_id|>",
-  "tokenizer_class": "PreTrainedTokenizerFast"
 }

   "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|eot_id|>",
+  "max_length": 1024,
   "model_input_names": [
     "input_ids",
     "attention_mask"
   ],
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "<|eot_id|>",
+  "stride": 0,
+  "tokenizer_class": "PreTrainedTokenizerFast",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:df1d3a8ec252b34a0905d832feb9c8c73f3f463f2f14e4ac6e22a26f03fef1e9
 size 4667

 version https://git-lfs.github.com/spec/v1
+oid sha256:e554fd0d8ef36a2d2b9be6800b6846e248122386c3c6777ba627010d0aab2a62
 size 4667