tsavage68 commited on
Commit
08670be
1 Parent(s): 9cff42a

End of training

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  license: llama3
3
- base_model: meta-llama/Meta-Llama-3-8B-Instruct
4
  tags:
5
  - trl
6
  - dpo
@@ -15,17 +15,17 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # MedQA_L3_1000steps_1e7rate_03beta_CSFTDPO
17
 
18
- This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.6220
21
- - Rewards/chosen: 0.1076
22
- - Rewards/rejected: -0.0975
23
- - Rewards/accuracies: 0.6725
24
- - Rewards/margins: 0.2050
25
- - Logps/rejected: -21.6414
26
- - Logps/chosen: -17.8641
27
- - Logits/rejected: -0.9452
28
- - Logits/chosen: -0.9446
29
 
30
  ## Model description
31
 
@@ -59,31 +59,31 @@ The following hyperparameters were used during training:
59
 
60
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
61
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
62
- | 0.6938 | 0.0489 | 50 | 0.6934 | 0.0041 | 0.0042 | 0.5099 | -0.0000 | -21.3026 | -18.2088 | -0.9262 | -0.9257 |
63
- | 0.6807 | 0.0977 | 100 | 0.6781 | 0.1130 | 0.0788 | 0.6110 | 0.0343 | -21.0540 | -17.8459 | -0.9280 | -0.9275 |
64
- | 0.6658 | 0.1466 | 150 | 0.6612 | 0.1697 | 0.0895 | 0.6374 | 0.0802 | -21.0183 | -17.6569 | -0.9316 | -0.9311 |
65
- | 0.6596 | 0.1954 | 200 | 0.6558 | 0.1737 | 0.0784 | 0.6505 | 0.0954 | -21.0553 | -17.6435 | -0.9345 | -0.9340 |
66
- | 0.6778 | 0.2443 | 250 | 0.6473 | 0.1808 | 0.0591 | 0.6659 | 0.1218 | -21.1196 | -17.6199 | -0.9363 | -0.9358 |
67
- | 0.6312 | 0.2931 | 300 | 0.6411 | 0.1839 | 0.0439 | 0.6681 | 0.1400 | -21.1702 | -17.6097 | -0.9382 | -0.9376 |
68
- | 0.7093 | 0.3420 | 350 | 0.6361 | 0.1789 | 0.0228 | 0.6659 | 0.1561 | -21.2406 | -17.6264 | -0.9386 | -0.9381 |
69
- | 0.6118 | 0.3908 | 400 | 0.6341 | 0.1611 | -0.0028 | 0.6747 | 0.1639 | -21.3258 | -17.6857 | -0.9403 | -0.9398 |
70
- | 0.6395 | 0.4397 | 450 | 0.6303 | 0.1523 | -0.0236 | 0.6659 | 0.1759 | -21.3953 | -17.7150 | -0.9424 | -0.9418 |
71
- | 0.649 | 0.4885 | 500 | 0.6284 | 0.1419 | -0.0403 | 0.6703 | 0.1822 | -21.4507 | -17.7495 | -0.9434 | -0.9428 |
72
- | 0.5851 | 0.5374 | 550 | 0.6265 | 0.1154 | -0.0757 | 0.6747 | 0.1911 | -21.5689 | -17.8381 | -0.9443 | -0.9437 |
73
- | 0.6155 | 0.5862 | 600 | 0.6255 | 0.1139 | -0.0809 | 0.6725 | 0.1948 | -21.5862 | -17.8429 | -0.9446 | -0.9440 |
74
- | 0.6048 | 0.6351 | 650 | 0.6250 | 0.1091 | -0.0893 | 0.6703 | 0.1983 | -21.6141 | -17.8591 | -0.9449 | -0.9443 |
75
- | 0.671 | 0.6839 | 700 | 0.6242 | 0.1080 | -0.0932 | 0.6725 | 0.2012 | -21.6272 | -17.8627 | -0.9450 | -0.9445 |
76
- | 0.5693 | 0.7328 | 750 | 0.6236 | 0.1063 | -0.0951 | 0.6681 | 0.2014 | -21.6335 | -17.8682 | -0.9449 | -0.9443 |
77
- | 0.6342 | 0.7816 | 800 | 0.6251 | 0.1039 | -0.0954 | 0.6725 | 0.1993 | -21.6344 | -17.8763 | -0.9455 | -0.9449 |
78
- | 0.6575 | 0.8305 | 850 | 0.6235 | 0.1067 | -0.0957 | 0.6681 | 0.2025 | -21.6356 | -17.8668 | -0.9451 | -0.9445 |
79
- | 0.6549 | 0.8793 | 900 | 0.6232 | 0.1081 | -0.0943 | 0.6747 | 0.2024 | -21.6308 | -17.8622 | -0.9452 | -0.9446 |
80
- | 0.6019 | 0.9282 | 950 | 0.6229 | 0.1049 | -0.0980 | 0.6725 | 0.2029 | -21.6431 | -17.8729 | -0.9453 | -0.9447 |
81
- | 0.5785 | 0.9770 | 1000 | 0.6220 | 0.1076 | -0.0975 | 0.6725 | 0.2050 | -21.6414 | -17.8641 | -0.9452 | -0.9446 |
82
 
83
 
84
  ### Framework versions
85
 
86
- - Transformers 4.41.0
87
  - Pytorch 2.0.0+cu117
88
  - Datasets 2.19.1
89
  - Tokenizers 0.19.1
 
1
  ---
2
  license: llama3
3
+ base_model: tsavage68/MedQA_L3_1000steps_1e6rate_SFT
4
  tags:
5
  - trl
6
  - dpo
 
15
 
16
  # MedQA_L3_1000steps_1e7rate_03beta_CSFTDPO
17
 
18
+ This model is a fine-tuned version of [tsavage68/MedQA_L3_1000steps_1e6rate_SFT](https://huggingface.co/tsavage68/MedQA_L3_1000steps_1e6rate_SFT) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 0.6020
21
+ - Rewards/chosen: 0.7087
22
+ - Rewards/rejected: 0.4830
23
+ - Rewards/accuracies: 0.7341
24
+ - Rewards/margins: 0.2257
25
+ - Logps/rejected: -32.2447
26
+ - Logps/chosen: -28.9661
27
+ - Logits/rejected: -0.7358
28
+ - Logits/chosen: -0.7350
29
 
30
  ## Model description
31
 
 
59
 
60
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
61
  |:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
62
+ | 0.6925 | 0.0489 | 50 | 0.6930 | -0.0016 | -0.0023 | 0.5011 | 0.0007 | -33.8624 | -31.3338 | -0.7320 | -0.7314 |
63
+ | 0.6841 | 0.0977 | 100 | 0.6807 | 0.2459 | 0.2195 | 0.6549 | 0.0264 | -33.1233 | -30.5088 | -0.7330 | -0.7323 |
64
+ | 0.6562 | 0.1466 | 150 | 0.6641 | 0.3800 | 0.3137 | 0.6791 | 0.0663 | -32.8092 | -30.0619 | -0.7310 | -0.7303 |
65
+ | 0.6334 | 0.1954 | 200 | 0.6509 | 0.1334 | 0.0355 | 0.7165 | 0.0979 | -33.7366 | -30.8837 | -0.7311 | -0.7304 |
66
+ | 0.6544 | 0.2443 | 250 | 0.6415 | 0.2943 | 0.1754 | 0.7209 | 0.1189 | -33.2701 | -30.3474 | -0.7311 | -0.7303 |
67
+ | 0.6145 | 0.2931 | 300 | 0.6304 | 0.3548 | 0.2099 | 0.7385 | 0.1448 | -33.1550 | -30.1459 | -0.7317 | -0.7310 |
68
+ | 0.6171 | 0.3420 | 350 | 0.6223 | 0.4756 | 0.3093 | 0.7341 | 0.1663 | -32.8238 | -29.7432 | -0.7336 | -0.7328 |
69
+ | 0.5911 | 0.3908 | 400 | 0.6181 | 0.6387 | 0.4602 | 0.7121 | 0.1785 | -32.3208 | -29.1996 | -0.7334 | -0.7327 |
70
+ | 0.5942 | 0.4397 | 450 | 0.6129 | 0.6839 | 0.4904 | 0.7253 | 0.1935 | -32.2203 | -29.0489 | -0.7347 | -0.7339 |
71
+ | 0.6096 | 0.4885 | 500 | 0.6090 | 0.7785 | 0.5741 | 0.7297 | 0.2044 | -31.9411 | -28.7335 | -0.7351 | -0.7343 |
72
+ | 0.5671 | 0.5374 | 550 | 0.6068 | 0.7522 | 0.5395 | 0.7275 | 0.2127 | -32.0566 | -28.8212 | -0.7355 | -0.7347 |
73
+ | 0.6066 | 0.5862 | 600 | 0.6061 | 0.7215 | 0.5067 | 0.7209 | 0.2147 | -32.1657 | -28.9236 | -0.7356 | -0.7348 |
74
+ | 0.5816 | 0.6351 | 650 | 0.6046 | 0.6882 | 0.4692 | 0.7231 | 0.2191 | -32.2910 | -29.0344 | -0.7356 | -0.7348 |
75
+ | 0.5968 | 0.6839 | 700 | 0.6030 | 0.6956 | 0.4723 | 0.7451 | 0.2233 | -32.2804 | -29.0097 | -0.7352 | -0.7344 |
76
+ | 0.6132 | 0.7328 | 750 | 0.6042 | 0.7103 | 0.4891 | 0.7297 | 0.2212 | -32.2246 | -28.9608 | -0.7354 | -0.7346 |
77
+ | 0.6133 | 0.7816 | 800 | 0.6021 | 0.6956 | 0.4697 | 0.7407 | 0.2258 | -32.2890 | -29.0099 | -0.7358 | -0.7350 |
78
+ | 0.6397 | 0.8305 | 850 | 0.6029 | 0.7027 | 0.4791 | 0.7341 | 0.2236 | -32.2579 | -28.9862 | -0.7354 | -0.7346 |
79
+ | 0.6273 | 0.8793 | 900 | 0.6030 | 0.7126 | 0.4896 | 0.7341 | 0.2230 | -32.2229 | -28.9533 | -0.7356 | -0.7348 |
80
+ | 0.5996 | 0.9282 | 950 | 0.6019 | 0.7087 | 0.4830 | 0.7341 | 0.2257 | -32.2447 | -28.9661 | -0.7358 | -0.7350 |
81
+ | 0.5319 | 0.9770 | 1000 | 0.6020 | 0.7087 | 0.4830 | 0.7341 | 0.2257 | -32.2447 | -28.9661 | -0.7358 | -0.7350 |
82
 
83
 
84
  ### Framework versions
85
 
86
+ - Transformers 4.41.1
87
  - Pytorch 2.0.0+cu117
88
  - Datasets 2.19.1
89
  - Tokenizers 0.19.1
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
@@ -23,7 +23,7 @@
23
  "rope_theta": 500000.0,
24
  "tie_word_embeddings": false,
25
  "torch_dtype": "float16",
26
- "transformers_version": "4.41.0",
27
  "use_cache": false,
28
  "vocab_size": 128256
29
  }
 
1
  {
2
+ "_name_or_path": "tsavage68/MedQA_L3_1000steps_1e6rate_SFT",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
23
  "rope_theta": 500000.0,
24
  "tie_word_embeddings": false,
25
  "torch_dtype": "float16",
26
+ "transformers_version": "4.41.1",
27
  "use_cache": false,
28
  "vocab_size": 128256
29
  }
final_checkpoint/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
@@ -23,7 +23,7 @@
23
  "rope_theta": 500000.0,
24
  "tie_word_embeddings": false,
25
  "torch_dtype": "float16",
26
- "transformers_version": "4.41.0",
27
  "use_cache": false,
28
  "vocab_size": 128256
29
  }
 
1
  {
2
+ "_name_or_path": "tsavage68/MedQA_L3_1000steps_1e6rate_SFT",
3
  "architectures": [
4
  "LlamaForCausalLM"
5
  ],
 
23
  "rope_theta": 500000.0,
24
  "tie_word_embeddings": false,
25
  "torch_dtype": "float16",
26
+ "transformers_version": "4.41.1",
27
  "use_cache": false,
28
  "vocab_size": 128256
29
  }
final_checkpoint/generation_config.json CHANGED
@@ -8,5 +8,5 @@
8
  "max_length": 4096,
9
  "temperature": 0.6,
10
  "top_p": 0.9,
11
- "transformers_version": "4.41.0"
12
  }
 
8
  "max_length": 4096,
9
  "temperature": 0.6,
10
  "top_p": 0.9,
11
+ "transformers_version": "4.41.1"
12
  }
final_checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b914441784cdf4d5a5ba86b5d6c2d9a4876d2dc0a8aed0b10cdbfe199c98e824
3
  size 4976698592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2557ef54b1cb541258b6aac809e2ec44f1a879c7ab1c84166f20074fd81e1c3
3
  size 4976698592
final_checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2062ccf9cb4cf7095035161161a63286897f99839a1345ab3da62ca35c7ca12
3
  size 4999802616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e119d7c88d94f0a6db7e39294810bd68b32ced63b3bd7464af0a063f1c32029
3
  size 4999802616
final_checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c6b3cbb77cdf6f4d908d28bc078374b0997fba437bac4c02a7134cc90a626ef
3
  size 4915916080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9525481aee3b9d44cee6ffb20de7add9a26abb8c1ff8bea8a5e260cac6fc12c4
3
  size 4915916080
final_checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cfb14502956990d8a876fc6b8342b97cd0fcf64e8003be846cbd708c5aa3a2c
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02aa661fc4a8018b889fdbfbb145ea620dbfb2ef8508c18febc2e41b8cafde1e
3
  size 1168138808
generation_config.json CHANGED
@@ -8,5 +8,5 @@
8
  "max_length": 4096,
9
  "temperature": 0.6,
10
  "top_p": 0.9,
11
- "transformers_version": "4.41.0"
12
  }
 
8
  "max_length": 4096,
9
  "temperature": 0.6,
10
  "top_p": 0.9,
11
+ "transformers_version": "4.41.1"
12
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b914441784cdf4d5a5ba86b5d6c2d9a4876d2dc0a8aed0b10cdbfe199c98e824
3
  size 4976698592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2557ef54b1cb541258b6aac809e2ec44f1a879c7ab1c84166f20074fd81e1c3
3
  size 4976698592
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2062ccf9cb4cf7095035161161a63286897f99839a1345ab3da62ca35c7ca12
3
  size 4999802616
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e119d7c88d94f0a6db7e39294810bd68b32ced63b3bd7464af0a063f1c32029
3
  size 4999802616
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c6b3cbb77cdf6f4d908d28bc078374b0997fba437bac4c02a7134cc90a626ef
3
  size 4915916080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9525481aee3b9d44cee6ffb20de7add9a26abb8c1ff8bea8a5e260cac6fc12c4
3
  size 4915916080
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cfb14502956990d8a876fc6b8342b97cd0fcf64e8003be846cbd708c5aa3a2c
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02aa661fc4a8018b889fdbfbb145ea620dbfb2ef8508c18febc2e41b8cafde1e
3
  size 1168138808
tokenizer_config.json CHANGED
@@ -2053,11 +2053,15 @@
2053
  "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
2054
  "clean_up_tokenization_spaces": true,
2055
  "eos_token": "<|eot_id|>",
 
2056
  "model_input_names": [
2057
  "input_ids",
2058
  "attention_mask"
2059
  ],
2060
  "model_max_length": 1000000000000000019884624838656,
2061
  "pad_token": "<|eot_id|>",
2062
- "tokenizer_class": "PreTrainedTokenizerFast"
 
 
 
2063
  }
 
2053
  "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
2054
  "clean_up_tokenization_spaces": true,
2055
  "eos_token": "<|eot_id|>",
2056
+ "max_length": 1024,
2057
  "model_input_names": [
2058
  "input_ids",
2059
  "attention_mask"
2060
  ],
2061
  "model_max_length": 1000000000000000019884624838656,
2062
  "pad_token": "<|eot_id|>",
2063
+ "stride": 0,
2064
+ "tokenizer_class": "PreTrainedTokenizerFast",
2065
+ "truncation_side": "right",
2066
+ "truncation_strategy": "longest_first"
2067
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:856c174c4a353987238506e7a157b8d3b8d7aaef99dbfff284495206942917de
3
  size 4667
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c7fa4546475a34fdf342ed9f3840232fd6ebbaace04e86920503ddffcd0bbf4
3
  size 4667