End of training
Browse files- README.md +32 -32
- config.json +2 -2
- final_checkpoint/config.json +2 -2
- final_checkpoint/generation_config.json +1 -1
- final_checkpoint/model-00001-of-00004.safetensors +1 -1
- final_checkpoint/model-00002-of-00004.safetensors +1 -1
- final_checkpoint/model-00003-of-00004.safetensors +1 -1
- final_checkpoint/model-00004-of-00004.safetensors +1 -1
- generation_config.json +1 -1
- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- tokenizer_config.json +5 -1
- training_args.bin +1 -1
README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
license: llama3
|
3 |
-
base_model:
|
4 |
tags:
|
5 |
- trl
|
6 |
- dpo
|
@@ -15,17 +15,17 @@ should probably proofread and complete it, then remove this comment. -->
|
|
15 |
|
16 |
# MedQA_L3_1000steps_1e7rate_05beta_CSFTDPO
|
17 |
|
18 |
-
This model is a fine-tuned version of [
|
19 |
It achieves the following results on the evaluation set:
|
20 |
-
- Loss: 0.
|
21 |
-
- Rewards/chosen: 0.
|
22 |
-
- Rewards/rejected:
|
23 |
-
- Rewards/accuracies: 0.
|
24 |
-
- Rewards/margins: 0.
|
25 |
-
- Logps/rejected: -
|
26 |
-
- Logps/chosen: -
|
27 |
-
- Logits/rejected: -0.
|
28 |
-
- Logits/chosen: -0.
|
29 |
|
30 |
## Model description
|
31 |
|
@@ -59,31 +59,31 @@ The following hyperparameters were used during training:
|
|
59 |
|
60 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
61 |
|:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
62 |
-
| 0.
|
63 |
-
| 0.
|
64 |
-
| 0.
|
65 |
-
| 0.
|
66 |
-
| 0.
|
67 |
-
| 0.
|
68 |
-
| 0.
|
69 |
-
| 0.
|
70 |
-
| 0.
|
71 |
-
| 0.
|
72 |
-
| 0.
|
73 |
-
| 0.
|
74 |
-
| 0.
|
75 |
-
| 0.
|
76 |
-
| 0.
|
77 |
-
| 0.
|
78 |
-
| 0.
|
79 |
-
| 0.
|
80 |
-
| 0.
|
81 |
-
| 0.
|
82 |
|
83 |
|
84 |
### Framework versions
|
85 |
|
86 |
-
- Transformers 4.41.
|
87 |
- Pytorch 2.0.0+cu117
|
88 |
- Datasets 2.19.1
|
89 |
- Tokenizers 0.19.1
|
|
|
1 |
---
|
2 |
license: llama3
|
3 |
+
base_model: tsavage68/MedQA_L3_1000steps_1e6rate_SFT
|
4 |
tags:
|
5 |
- trl
|
6 |
- dpo
|
|
|
15 |
|
16 |
# MedQA_L3_1000steps_1e7rate_05beta_CSFTDPO
|
17 |
|
18 |
+
This model is a fine-tuned version of [tsavage68/MedQA_L3_1000steps_1e6rate_SFT](https://huggingface.co/tsavage68/MedQA_L3_1000steps_1e6rate_SFT) on an unknown dataset.
|
19 |
It achieves the following results on the evaluation set:
|
20 |
+
- Loss: 0.5679
|
21 |
+
- Rewards/chosen: 0.9256
|
22 |
+
- Rewards/rejected: 0.5812
|
23 |
+
- Rewards/accuracies: 0.7407
|
24 |
+
- Rewards/margins: 0.3444
|
25 |
+
- Logps/rejected: -32.6925
|
26 |
+
- Logps/chosen: -29.4774
|
27 |
+
- Logits/rejected: -0.7357
|
28 |
+
- Logits/chosen: -0.7349
|
29 |
|
30 |
## Model description
|
31 |
|
|
|
59 |
|
60 |
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
61 |
|:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
62 |
+
| 0.6857 | 0.0489 | 50 | 0.6947 | -0.0249 | -0.0232 | 0.4879 | -0.0018 | -33.9011 | -31.3784 | -0.7318 | -0.7312 |
|
63 |
+
| 0.6799 | 0.0977 | 100 | 0.6734 | 0.3881 | 0.3450 | 0.6681 | 0.0432 | -33.1649 | -30.5522 | -0.7330 | -0.7323 |
|
64 |
+
| 0.6275 | 0.1466 | 150 | 0.6484 | 0.5732 | 0.4639 | 0.6813 | 0.1093 | -32.9271 | -30.1822 | -0.7310 | -0.7303 |
|
65 |
+
| 0.5934 | 0.1954 | 200 | 0.6321 | 0.1707 | 0.0172 | 0.6989 | 0.1535 | -33.8203 | -30.9871 | -0.7310 | -0.7303 |
|
66 |
+
| 0.6358 | 0.2443 | 250 | 0.6181 | 0.4355 | 0.2501 | 0.7253 | 0.1854 | -33.3546 | -30.4574 | -0.7315 | -0.7308 |
|
67 |
+
| 0.5727 | 0.2931 | 300 | 0.6007 | 0.5633 | 0.3322 | 0.7429 | 0.2311 | -33.1904 | -30.2020 | -0.7321 | -0.7314 |
|
68 |
+
| 0.5786 | 0.3420 | 350 | 0.5923 | 0.7025 | 0.4439 | 0.7407 | 0.2586 | -32.9670 | -29.9235 | -0.7343 | -0.7335 |
|
69 |
+
| 0.545 | 0.3908 | 400 | 0.5830 | 0.9347 | 0.6493 | 0.7385 | 0.2854 | -32.5562 | -29.4591 | -0.7336 | -0.7328 |
|
70 |
+
| 0.5497 | 0.4397 | 450 | 0.5795 | 0.9735 | 0.6722 | 0.7385 | 0.3014 | -32.5105 | -29.3814 | -0.7346 | -0.7338 |
|
71 |
+
| 0.5857 | 0.4885 | 500 | 0.5781 | 1.0925 | 0.7817 | 0.7407 | 0.3108 | -32.2914 | -29.1435 | -0.7356 | -0.7348 |
|
72 |
+
| 0.5168 | 0.5374 | 550 | 0.5714 | 1.0244 | 0.6925 | 0.7385 | 0.3319 | -32.4698 | -29.2796 | -0.7358 | -0.7350 |
|
73 |
+
| 0.567 | 0.5862 | 600 | 0.5699 | 0.9715 | 0.6353 | 0.7407 | 0.3362 | -32.5842 | -29.3855 | -0.7356 | -0.7349 |
|
74 |
+
| 0.5375 | 0.6351 | 650 | 0.5689 | 0.9102 | 0.5695 | 0.7429 | 0.3407 | -32.7158 | -29.5081 | -0.7357 | -0.7349 |
|
75 |
+
| 0.5541 | 0.6839 | 700 | 0.5698 | 0.9277 | 0.5885 | 0.7385 | 0.3391 | -32.6778 | -29.4732 | -0.7359 | -0.7351 |
|
76 |
+
| 0.5824 | 0.7328 | 750 | 0.5693 | 0.9133 | 0.5709 | 0.7516 | 0.3424 | -32.7129 | -29.5019 | -0.7358 | -0.7350 |
|
77 |
+
| 0.5769 | 0.7816 | 800 | 0.5684 | 0.9103 | 0.5658 | 0.7429 | 0.3444 | -32.7232 | -29.5080 | -0.7354 | -0.7346 |
|
78 |
+
| 0.6223 | 0.8305 | 850 | 0.5678 | 0.9317 | 0.5868 | 0.7473 | 0.3449 | -32.6812 | -29.4651 | -0.7360 | -0.7352 |
|
79 |
+
| 0.5968 | 0.8793 | 900 | 0.5687 | 0.9231 | 0.5807 | 0.7385 | 0.3424 | -32.6935 | -29.4824 | -0.7361 | -0.7353 |
|
80 |
+
| 0.5673 | 0.9282 | 950 | 0.5678 | 0.9259 | 0.5813 | 0.7407 | 0.3446 | -32.6921 | -29.4767 | -0.7357 | -0.7349 |
|
81 |
+
| 0.4742 | 0.9770 | 1000 | 0.5679 | 0.9256 | 0.5812 | 0.7407 | 0.3444 | -32.6925 | -29.4774 | -0.7357 | -0.7349 |
|
82 |
|
83 |
|
84 |
### Framework versions
|
85 |
|
86 |
+
- Transformers 4.41.1
|
87 |
- Pytorch 2.0.0+cu117
|
88 |
- Datasets 2.19.1
|
89 |
- Tokenizers 0.19.1
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"LlamaForCausalLM"
|
5 |
],
|
@@ -23,7 +23,7 @@
|
|
23 |
"rope_theta": 500000.0,
|
24 |
"tie_word_embeddings": false,
|
25 |
"torch_dtype": "float16",
|
26 |
-
"transformers_version": "4.41.
|
27 |
"use_cache": false,
|
28 |
"vocab_size": 128256
|
29 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "tsavage68/MedQA_L3_1000steps_1e6rate_SFT",
|
3 |
"architectures": [
|
4 |
"LlamaForCausalLM"
|
5 |
],
|
|
|
23 |
"rope_theta": 500000.0,
|
24 |
"tie_word_embeddings": false,
|
25 |
"torch_dtype": "float16",
|
26 |
+
"transformers_version": "4.41.1",
|
27 |
"use_cache": false,
|
28 |
"vocab_size": 128256
|
29 |
}
|
final_checkpoint/config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"LlamaForCausalLM"
|
5 |
],
|
@@ -23,7 +23,7 @@
|
|
23 |
"rope_theta": 500000.0,
|
24 |
"tie_word_embeddings": false,
|
25 |
"torch_dtype": "float16",
|
26 |
-
"transformers_version": "4.41.
|
27 |
"use_cache": false,
|
28 |
"vocab_size": 128256
|
29 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "tsavage68/MedQA_L3_1000steps_1e6rate_SFT",
|
3 |
"architectures": [
|
4 |
"LlamaForCausalLM"
|
5 |
],
|
|
|
23 |
"rope_theta": 500000.0,
|
24 |
"tie_word_embeddings": false,
|
25 |
"torch_dtype": "float16",
|
26 |
+
"transformers_version": "4.41.1",
|
27 |
"use_cache": false,
|
28 |
"vocab_size": 128256
|
29 |
}
|
final_checkpoint/generation_config.json
CHANGED
@@ -8,5 +8,5 @@
|
|
8 |
"max_length": 4096,
|
9 |
"temperature": 0.6,
|
10 |
"top_p": 0.9,
|
11 |
-
"transformers_version": "4.41.
|
12 |
}
|
|
|
8 |
"max_length": 4096,
|
9 |
"temperature": 0.6,
|
10 |
"top_p": 0.9,
|
11 |
+
"transformers_version": "4.41.1"
|
12 |
}
|
final_checkpoint/model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4976698592
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f367e14b84c970210f88e7eb315634c363047d729c21b0c858bcb8753a2c160
|
3 |
size 4976698592
|
final_checkpoint/model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999802616
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:107e5faaa4c6d50c82666322c4725b136e5462cfd425b965a5ee1dd730a419ae
|
3 |
size 4999802616
|
final_checkpoint/model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4915916080
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21702f3064c14669d69adc12d5e282d3e5017b2bbf7a816e76e9a8fb91194d36
|
3 |
size 4915916080
|
final_checkpoint/model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1168138808
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:36313565484ac1939a2a32eeec04429297196ce205c5295f04e6d649e2de13fa
|
3 |
size 1168138808
|
generation_config.json
CHANGED
@@ -8,5 +8,5 @@
|
|
8 |
"max_length": 4096,
|
9 |
"temperature": 0.6,
|
10 |
"top_p": 0.9,
|
11 |
-
"transformers_version": "4.41.
|
12 |
}
|
|
|
8 |
"max_length": 4096,
|
9 |
"temperature": 0.6,
|
10 |
"top_p": 0.9,
|
11 |
+
"transformers_version": "4.41.1"
|
12 |
}
|
model-00001-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4976698592
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f367e14b84c970210f88e7eb315634c363047d729c21b0c858bcb8753a2c160
|
3 |
size 4976698592
|
model-00002-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4999802616
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:107e5faaa4c6d50c82666322c4725b136e5462cfd425b965a5ee1dd730a419ae
|
3 |
size 4999802616
|
model-00003-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4915916080
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21702f3064c14669d69adc12d5e282d3e5017b2bbf7a816e76e9a8fb91194d36
|
3 |
size 4915916080
|
model-00004-of-00004.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1168138808
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:36313565484ac1939a2a32eeec04429297196ce205c5295f04e6d649e2de13fa
|
3 |
size 1168138808
|
tokenizer_config.json
CHANGED
@@ -2053,11 +2053,15 @@
|
|
2053 |
"chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
|
2054 |
"clean_up_tokenization_spaces": true,
|
2055 |
"eos_token": "<|eot_id|>",
|
|
|
2056 |
"model_input_names": [
|
2057 |
"input_ids",
|
2058 |
"attention_mask"
|
2059 |
],
|
2060 |
"model_max_length": 1000000000000000019884624838656,
|
2061 |
"pad_token": "<|eot_id|>",
|
2062 |
-
"
|
|
|
|
|
|
|
2063 |
}
|
|
|
2053 |
"chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
|
2054 |
"clean_up_tokenization_spaces": true,
|
2055 |
"eos_token": "<|eot_id|>",
|
2056 |
+
"max_length": 1024,
|
2057 |
"model_input_names": [
|
2058 |
"input_ids",
|
2059 |
"attention_mask"
|
2060 |
],
|
2061 |
"model_max_length": 1000000000000000019884624838656,
|
2062 |
"pad_token": "<|eot_id|>",
|
2063 |
+
"stride": 0,
|
2064 |
+
"tokenizer_class": "PreTrainedTokenizerFast",
|
2065 |
+
"truncation_side": "right",
|
2066 |
+
"truncation_strategy": "longest_first"
|
2067 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4667
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e554fd0d8ef36a2d2b9be6800b6846e248122386c3c6777ba627010d0aab2a62
|
3 |
size 4667
|