tsavage68 commited on
Commit
706332e
1 Parent(s): 867698e

End of training

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  license: llama3
3
- base_model: meta-llama/Meta-Llama-3-8B-Instruct
4
  tags:
5
  - trl
6
  - dpo
@@ -15,17 +15,17 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # MedQA_L3_1000steps_1e7rate_05beta_CSFTDPO
17
 
18
- This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.6092
21
- - Rewards/chosen: 0.2694
22
- - Rewards/rejected: -0.0121
23
- - Rewards/accuracies: 0.6835
24
- - Rewards/margins: 0.2814
25
- - Logps/rejected: -21.3407
26
- - Logps/chosen: -17.6839
27
- - Logits/rejected: -0.9435
28
- - Logits/chosen: -0.9429
29
 
30
  ## Model description
31
 
@@ -59,31 +59,31 @@ The following hyperparameters were used during training:
59
 
60
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
61
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
62
- | 0.7034 | 0.0489 | 50 | 0.6908 | 0.0092 | 0.0030 | 0.5187 | 0.0061 | -21.3104 | -18.2043 | -0.9262 | -0.9257 |
63
- | 0.6841 | 0.0977 | 100 | 0.6705 | 0.1777 | 0.1221 | 0.6088 | 0.0556 | -21.0723 | -17.8673 | -0.9278 | -0.9273 |
64
- | 0.6628 | 0.1466 | 150 | 0.6509 | 0.2657 | 0.1441 | 0.6220 | 0.1216 | -21.0283 | -17.6913 | -0.9313 | -0.9308 |
65
- | 0.6443 | 0.1954 | 200 | 0.6426 | 0.2910 | 0.1442 | 0.6352 | 0.1468 | -21.0282 | -17.6407 | -0.9332 | -0.9327 |
66
- | 0.6834 | 0.2443 | 250 | 0.6344 | 0.3123 | 0.1351 | 0.6527 | 0.1772 | -21.0464 | -17.5981 | -0.9353 | -0.9347 |
67
- | 0.6183 | 0.2931 | 300 | 0.6286 | 0.3313 | 0.1340 | 0.6725 | 0.1973 | -21.0485 | -17.5601 | -0.9370 | -0.9365 |
68
- | 0.7184 | 0.3420 | 350 | 0.6258 | 0.3266 | 0.1175 | 0.6637 | 0.2091 | -21.0816 | -17.5695 | -0.9377 | -0.9371 |
69
- | 0.5884 | 0.3908 | 400 | 0.6206 | 0.3117 | 0.0832 | 0.6857 | 0.2285 | -21.1501 | -17.5993 | -0.9393 | -0.9387 |
70
- | 0.6333 | 0.4397 | 450 | 0.6173 | 0.3095 | 0.0692 | 0.6681 | 0.2403 | -21.1780 | -17.6036 | -0.9402 | -0.9396 |
71
- | 0.6541 | 0.4885 | 500 | 0.6130 | 0.3027 | 0.0479 | 0.6813 | 0.2548 | -21.2207 | -17.6173 | -0.9417 | -0.9411 |
72
- | 0.5625 | 0.5374 | 550 | 0.6161 | 0.2745 | 0.0181 | 0.6747 | 0.2564 | -21.2803 | -17.6737 | -0.9423 | -0.9417 |
73
- | 0.5996 | 0.5862 | 600 | 0.6122 | 0.2770 | 0.0070 | 0.6681 | 0.2699 | -21.3024 | -17.6687 | -0.9428 | -0.9422 |
74
- | 0.595 | 0.6351 | 650 | 0.6098 | 0.2745 | -0.0048 | 0.6879 | 0.2793 | -21.3261 | -17.6737 | -0.9428 | -0.9423 |
75
- | 0.6529 | 0.6839 | 700 | 0.6088 | 0.2667 | -0.0156 | 0.6791 | 0.2822 | -21.3477 | -17.6893 | -0.9433 | -0.9427 |
76
- | 0.5262 | 0.7328 | 750 | 0.6099 | 0.2672 | -0.0130 | 0.6791 | 0.2802 | -21.3426 | -17.6883 | -0.9431 | -0.9425 |
77
- | 0.6351 | 0.7816 | 800 | 0.6090 | 0.2731 | -0.0087 | 0.6945 | 0.2819 | -21.3340 | -17.6764 | -0.9432 | -0.9426 |
78
- | 0.6448 | 0.8305 | 850 | 0.6073 | 0.2716 | -0.0155 | 0.6769 | 0.2871 | -21.3474 | -17.6795 | -0.9433 | -0.9428 |
79
- | 0.6519 | 0.8793 | 900 | 0.6081 | 0.2699 | -0.0141 | 0.6791 | 0.2840 | -21.3448 | -17.6829 | -0.9433 | -0.9427 |
80
- | 0.5745 | 0.9282 | 950 | 0.6091 | 0.2696 | -0.0119 | 0.6813 | 0.2815 | -21.3403 | -17.6834 | -0.9434 | -0.9429 |
81
- | 0.5343 | 0.9770 | 1000 | 0.6092 | 0.2694 | -0.0121 | 0.6835 | 0.2814 | -21.3407 | -17.6839 | -0.9435 | -0.9429 |
82
 
83
 
84
  ### Framework versions
85
 
86
- - Transformers 4.41.0
87
  - Pytorch 2.0.0+cu117
88
  - Datasets 2.19.1
89
  - Tokenizers 0.19.1
 
1
  ---
2
  license: llama3
3
+ base_model: tsavage68/MedQA_L3_1000steps_1e6rate_SFT
4
  tags:
5
  - trl
6
  - dpo
 
15
 
16
  # MedQA_L3_1000steps_1e7rate_05beta_CSFTDPO
17
 
18
+ This model is a fine-tuned version of [tsavage68/MedQA_L3_1000steps_1e6rate_SFT](https://huggingface.co/tsavage68/MedQA_L3_1000steps_1e6rate_SFT) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.5679
21
+ - Rewards/chosen: 0.9256
22
+ - Rewards/rejected: 0.5812
23
+ - Rewards/accuracies: 0.7407
24
+ - Rewards/margins: 0.3444
25
+ - Logps/rejected: -32.6925
26
+ - Logps/chosen: -29.4774
27
+ - Logits/rejected: -0.7357
28
+ - Logits/chosen: -0.7349
29
 
30
  ## Model description
31
 
 
59
 
60
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
61
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
62
+ | 0.6857 | 0.0489 | 50 | 0.6947 | -0.0249 | -0.0232 | 0.4879 | -0.0018 | -33.9011 | -31.3784 | -0.7318 | -0.7312 |
63
+ | 0.6799 | 0.0977 | 100 | 0.6734 | 0.3881 | 0.3450 | 0.6681 | 0.0432 | -33.1649 | -30.5522 | -0.7330 | -0.7323 |
64
+ | 0.6275 | 0.1466 | 150 | 0.6484 | 0.5732 | 0.4639 | 0.6813 | 0.1093 | -32.9271 | -30.1822 | -0.7310 | -0.7303 |
65
+ | 0.5934 | 0.1954 | 200 | 0.6321 | 0.1707 | 0.0172 | 0.6989 | 0.1535 | -33.8203 | -30.9871 | -0.7310 | -0.7303 |
66
+ | 0.6358 | 0.2443 | 250 | 0.6181 | 0.4355 | 0.2501 | 0.7253 | 0.1854 | -33.3546 | -30.4574 | -0.7315 | -0.7308 |
67
+ | 0.5727 | 0.2931 | 300 | 0.6007 | 0.5633 | 0.3322 | 0.7429 | 0.2311 | -33.1904 | -30.2020 | -0.7321 | -0.7314 |
68
+ | 0.5786 | 0.3420 | 350 | 0.5923 | 0.7025 | 0.4439 | 0.7407 | 0.2586 | -32.9670 | -29.9235 | -0.7343 | -0.7335 |
69
+ | 0.545 | 0.3908 | 400 | 0.5830 | 0.9347 | 0.6493 | 0.7385 | 0.2854 | -32.5562 | -29.4591 | -0.7336 | -0.7328 |
70
+ | 0.5497 | 0.4397 | 450 | 0.5795 | 0.9735 | 0.6722 | 0.7385 | 0.3014 | -32.5105 | -29.3814 | -0.7346 | -0.7338 |
71
+ | 0.5857 | 0.4885 | 500 | 0.5781 | 1.0925 | 0.7817 | 0.7407 | 0.3108 | -32.2914 | -29.1435 | -0.7356 | -0.7348 |
72
+ | 0.5168 | 0.5374 | 550 | 0.5714 | 1.0244 | 0.6925 | 0.7385 | 0.3319 | -32.4698 | -29.2796 | -0.7358 | -0.7350 |
73
+ | 0.567 | 0.5862 | 600 | 0.5699 | 0.9715 | 0.6353 | 0.7407 | 0.3362 | -32.5842 | -29.3855 | -0.7356 | -0.7349 |
74
+ | 0.5375 | 0.6351 | 650 | 0.5689 | 0.9102 | 0.5695 | 0.7429 | 0.3407 | -32.7158 | -29.5081 | -0.7357 | -0.7349 |
75
+ | 0.5541 | 0.6839 | 700 | 0.5698 | 0.9277 | 0.5885 | 0.7385 | 0.3391 | -32.6778 | -29.4732 | -0.7359 | -0.7351 |
76
+ | 0.5824 | 0.7328 | 750 | 0.5693 | 0.9133 | 0.5709 | 0.7516 | 0.3424 | -32.7129 | -29.5019 | -0.7358 | -0.7350 |
77
+ | 0.5769 | 0.7816 | 800 | 0.5684 | 0.9103 | 0.5658 | 0.7429 | 0.3444 | -32.7232 | -29.5080 | -0.7354 | -0.7346 |
78
+ | 0.6223 | 0.8305 | 850 | 0.5678 | 0.9317 | 0.5868 | 0.7473 | 0.3449 | -32.6812 | -29.4651 | -0.7360 | -0.7352 |
79
+ | 0.5968 | 0.8793 | 900 | 0.5687 | 0.9231 | 0.5807 | 0.7385 | 0.3424 | -32.6935 | -29.4824 | -0.7361 | -0.7353 |
80
+ | 0.5673 | 0.9282 | 950 | 0.5678 | 0.9259 | 0.5813 | 0.7407 | 0.3446 | -32.6921 | -29.4767 | -0.7357 | -0.7349 |
81
+ | 0.4742 | 0.9770 | 1000 | 0.5679 | 0.9256 | 0.5812 | 0.7407 | 0.3444 | -32.6925 | -29.4774 | -0.7357 | -0.7349 |
82
 
83
 
84
  ### Framework versions
85
 
86
+ - Transformers 4.41.1
87
  - Pytorch 2.0.0+cu117
88
  - Datasets 2.19.1
89
  - Tokenizers 0.19.1
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
@@ -23,7 +23,7 @@
23
  "rope_theta": 500000.0,
24
  "tie_word_embeddings": false,
25
  "torch_dtype": "float16",
26
- "transformers_version": "4.41.0",
27
  "use_cache": false,
28
  "vocab_size": 128256
29
  }
 
1
  {
2
+ "_name_or_path": "tsavage68/MedQA_L3_1000steps_1e6rate_SFT",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
23
  "rope_theta": 500000.0,
24
  "tie_word_embeddings": false,
25
  "torch_dtype": "float16",
26
+ "transformers_version": "4.41.1",
27
  "use_cache": false,
28
  "vocab_size": 128256
29
  }
final_checkpoint/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
@@ -23,7 +23,7 @@
23
  "rope_theta": 500000.0,
24
  "tie_word_embeddings": false,
25
  "torch_dtype": "float16",
26
- "transformers_version": "4.41.0",
27
  "use_cache": false,
28
  "vocab_size": 128256
29
  }
 
1
  {
2
+ "_name_or_path": "tsavage68/MedQA_L3_1000steps_1e6rate_SFT",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
23
  "rope_theta": 500000.0,
24
  "tie_word_embeddings": false,
25
  "torch_dtype": "float16",
26
+ "transformers_version": "4.41.1",
27
  "use_cache": false,
28
  "vocab_size": 128256
29
  }
final_checkpoint/generation_config.json CHANGED
@@ -8,5 +8,5 @@
8
  "max_length": 4096,
9
  "temperature": 0.6,
10
  "top_p": 0.9,
11
- "transformers_version": "4.41.0"
12
  }
 
8
  "max_length": 4096,
9
  "temperature": 0.6,
10
  "top_p": 0.9,
11
+ "transformers_version": "4.41.1"
12
  }
final_checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b2d173c2c8bd4b0fd8bddd07512157eefdcdca1fc632c7eadac3a998e48507d
3
  size 4976698592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f367e14b84c970210f88e7eb315634c363047d729c21b0c858bcb8753a2c160
3
  size 4976698592
final_checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f4746ba320abd3d5a4635b7f6b3a0f5346fed139065ab05ca2a7a2570d4fa65
3
  size 4999802616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:107e5faaa4c6d50c82666322c4725b136e5462cfd425b965a5ee1dd730a419ae
3
  size 4999802616
final_checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9d65f9d04799b450284633fdc430e9f46e00ee17a6dcc83aeede359aa596f8b
3
  size 4915916080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21702f3064c14669d69adc12d5e282d3e5017b2bbf7a816e76e9a8fb91194d36
3
  size 4915916080
final_checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4abf71527dbda8c8d487eae9f8fa4670052f962007ddc7f277da9c61f3b6a1e6
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36313565484ac1939a2a32eeec04429297196ce205c5295f04e6d649e2de13fa
3
  size 1168138808
generation_config.json CHANGED
@@ -8,5 +8,5 @@
8
  "max_length": 4096,
9
  "temperature": 0.6,
10
  "top_p": 0.9,
11
- "transformers_version": "4.41.0"
12
  }
 
8
  "max_length": 4096,
9
  "temperature": 0.6,
10
  "top_p": 0.9,
11
+ "transformers_version": "4.41.1"
12
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b2d173c2c8bd4b0fd8bddd07512157eefdcdca1fc632c7eadac3a998e48507d
3
  size 4976698592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f367e14b84c970210f88e7eb315634c363047d729c21b0c858bcb8753a2c160
3
  size 4976698592
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f4746ba320abd3d5a4635b7f6b3a0f5346fed139065ab05ca2a7a2570d4fa65
3
  size 4999802616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:107e5faaa4c6d50c82666322c4725b136e5462cfd425b965a5ee1dd730a419ae
3
  size 4999802616
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9d65f9d04799b450284633fdc430e9f46e00ee17a6dcc83aeede359aa596f8b
3
  size 4915916080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21702f3064c14669d69adc12d5e282d3e5017b2bbf7a816e76e9a8fb91194d36
3
  size 4915916080
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4abf71527dbda8c8d487eae9f8fa4670052f962007ddc7f277da9c61f3b6a1e6
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36313565484ac1939a2a32eeec04429297196ce205c5295f04e6d649e2de13fa
3
  size 1168138808
tokenizer_config.json CHANGED
@@ -2053,11 +2053,15 @@
2053
  "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
2054
  "clean_up_tokenization_spaces": true,
2055
  "eos_token": "<|eot_id|>",
 
2056
  "model_input_names": [
2057
  "input_ids",
2058
  "attention_mask"
2059
  ],
2060
  "model_max_length": 1000000000000000019884624838656,
2061
  "pad_token": "<|eot_id|>",
2062
- "tokenizer_class": "PreTrainedTokenizerFast"
 
 
 
2063
  }
 
2053
  "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
2054
  "clean_up_tokenization_spaces": true,
2055
  "eos_token": "<|eot_id|>",
2056
+ "max_length": 1024,
2057
  "model_input_names": [
2058
  "input_ids",
2059
  "attention_mask"
2060
  ],
2061
  "model_max_length": 1000000000000000019884624838656,
2062
  "pad_token": "<|eot_id|>",
2063
+ "stride": 0,
2064
+ "tokenizer_class": "PreTrainedTokenizerFast",
2065
+ "truncation_side": "right",
2066
+ "truncation_strategy": "longest_first"
2067
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df1d3a8ec252b34a0905d832feb9c8c73f3f463f2f14e4ac6e22a26f03fef1e9
3
  size 4667
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e554fd0d8ef36a2d2b9be6800b6846e248122386c3c6777ba627010d0aab2a62
3
  size 4667