alvarobartt commited on
Commit
7267e16
1 Parent(s): 4a54efc

End of training

Browse files
README.md ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: mistralai/Mistral-7B-v0.1
4
+ tags:
5
+ - trl
6
+ - orpo
7
+ - generated_from_trainer
8
+ model-index:
9
+ - name: mistral-orpo-mix-b0.05-l1024-pl512-lr5e-7-cosine
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # mistral-orpo-mix-b0.05-l1024-pl512-lr5e-7-cosine
17
+
18
+ This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.8648
21
+ - Rewards/chosen: -0.0405
22
+ - Rewards/rejected: -0.0502
23
+ - Rewards/accuracies: 0.6458
24
+ - Rewards/margins: 0.0097
25
+ - Logps/rejected: -1.0036
26
+ - Logps/chosen: -0.8096
27
+ - Logits/rejected: -2.9146
28
+ - Logits/chosen: -2.9040
29
+ - Nll Loss: 0.8392
30
+ - Log Odds Ratio: -0.6215
31
+ - Log Odds Chosen: 0.3802
32
+
33
+ ## Model description
34
+
35
+ More information needed
36
+
37
+ ## Intended uses & limitations
38
+
39
+ More information needed
40
+
41
+ ## Training and evaluation data
42
+
43
+ More information needed
44
+
45
+ ## Training procedure
46
+
47
+ ### Training hyperparameters
48
+
49
+ The following hyperparameters were used during training:
50
+ - learning_rate: 5e-07
51
+ - train_batch_size: 8
52
+ - eval_batch_size: 8
53
+ - seed: 42
54
+ - distributed_type: multi-GPU
55
+ - num_devices: 4
56
+ - gradient_accumulation_steps: 2
57
+ - total_train_batch_size: 64
58
+ - total_eval_batch_size: 32
59
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
60
+ - lr_scheduler_type: cosine
61
+ - lr_scheduler_warmup_ratio: 0.1
62
+ - lr_scheduler_warmup_steps: 100
63
+ - num_epochs: 3
64
+
65
+ ### Training results
66
+
67
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Nll Loss | Log Odds Ratio | Log Odds Chosen |
68
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:--------:|:--------------:|:---------------:|
69
+ | 0.9159 | 1.0 | 105 | 0.8794 | -0.0421 | -0.0499 | 0.6302 | 0.0078 | -0.9975 | -0.8413 | -2.8931 | -2.8875 | 0.8561 | -0.6429 | 0.3024 |
70
+ | 0.8397 | 2.0 | 211 | 0.8612 | -0.0404 | -0.0495 | 0.6458 | 0.0092 | -0.9902 | -0.8071 | -2.8882 | -2.8794 | 0.8366 | -0.6257 | 0.3555 |
71
+ | 0.7808 | 2.99 | 315 | 0.8648 | -0.0405 | -0.0502 | 0.6458 | 0.0097 | -1.0036 | -0.8096 | -2.9146 | -2.9040 | 0.8392 | -0.6215 | 0.3802 |
72
+
73
+
74
+ ### Framework versions
75
+
76
+ - Transformers 4.39.0
77
+ - Pytorch 2.1.1+cu121
78
+ - Datasets 2.16.1
79
+ - Tokenizers 0.15.2
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<|im_end|>": 32001,
3
+ "<|im_start|>": 32000
4
+ }
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "mistralai/Mistral-7B-v0.1",
3
+ "architectures": [
4
+ "MistralForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 4096,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 14336,
13
+ "max_position_embeddings": 32768,
14
+ "model_type": "mistral",
15
+ "num_attention_heads": 32,
16
+ "num_hidden_layers": 32,
17
+ "num_key_value_heads": 8,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_theta": 10000.0,
20
+ "sliding_window": 4096,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.39.0",
24
+ "use_cache": true,
25
+ "vocab_size": 32002
26
+ }
final_checkpoint/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<|im_end|>": 32001,
3
+ "<|im_start|>": 32000
4
+ }
final_checkpoint/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "mistralai/Mistral-7B-v0.1",
3
+ "architectures": [
4
+ "MistralForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 4096,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 14336,
13
+ "max_position_embeddings": 32768,
14
+ "model_type": "mistral",
15
+ "num_attention_heads": 32,
16
+ "num_hidden_layers": 32,
17
+ "num_key_value_heads": 8,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_theta": 10000.0,
20
+ "sliding_window": 4096,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.39.0",
24
+ "use_cache": true,
25
+ "vocab_size": 32002
26
+ }
final_checkpoint/special_tokens_map.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<|im_start|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<|im_end|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
+ "bos_token": "<|im_start|>",
19
+ "eos_token": "<|im_end|>",
20
+ "pad_token": "<|im_end|>",
21
+ "unk_token": {
22
+ "content": "<unk>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ }
28
+ }
final_checkpoint/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
final_checkpoint/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
final_checkpoint/tokenizer_config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "32000": {
30
+ "content": "<|im_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "32001": {
38
+ "content": "<|im_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ }
45
+ },
46
+ "additional_special_tokens": [
47
+ "<|im_start|>",
48
+ "<|im_end|>"
49
+ ],
50
+ "bos_token": "<|im_start|>",
51
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
52
+ "clean_up_tokenization_spaces": false,
53
+ "eos_token": "<|im_end|>",
54
+ "legacy": true,
55
+ "model_max_length": 2048,
56
+ "pad_token": "<|im_end|>",
57
+ "sp_model_kwargs": {},
58
+ "spaces_between_special_tokens": false,
59
+ "tokenizer_class": "LlamaTokenizer",
60
+ "unk_token": "<unk>",
61
+ "use_default_system_prompt": false
62
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 32000,
4
+ "eos_token_id": 32001,
5
+ "pad_token_id": 32001,
6
+ "transformers_version": "4.39.0"
7
+ }
model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18d457cf6a5ca1f95cf59dce109e7594691ba8db02734c1d5e2c4e49c408a688
3
+ size 4943178720
model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6895049aa92d025dabc3002926fded24394c3b63868cf6d2a282274d7120acfd
3
+ size 4999819336
model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc45bdcd8a319de2b7b1abb2f44f8f051930de6bc9f13322a17faa3f9a3a8a9d
3
+ size 4540532728
model.safetensors.index.json ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 14483496960
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00003-of-00003.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
15
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
16
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
17
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
18
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
19
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
20
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
21
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
22
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
23
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
24
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
25
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
26
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00003.safetensors",
27
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
28
+ "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
30
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
31
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
32
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
33
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
34
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
35
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
36
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
37
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
38
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
39
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
40
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
41
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
42
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
43
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
44
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
45
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
46
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
47
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
48
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
49
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
50
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
51
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
52
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
53
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
54
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
55
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
56
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
57
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
58
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
59
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
60
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
61
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
62
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
63
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
64
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
65
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
66
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
67
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
68
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
69
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
70
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
71
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
72
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
73
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
74
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
75
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
76
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
77
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
78
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
79
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
80
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
81
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
82
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
83
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
84
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
85
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
86
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
87
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
88
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
89
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
90
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
91
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
92
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
93
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
94
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
95
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
96
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
97
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
98
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
99
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
100
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
101
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
102
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
103
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
104
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
105
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
106
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
107
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
108
+ "model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
109
+ "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
110
+ "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
111
+ "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
112
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
113
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
114
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
115
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
116
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
117
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
118
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
119
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
120
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
121
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
122
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
123
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
124
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
125
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
126
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
127
+ "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
128
+ "model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
129
+ "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
130
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
131
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
132
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
133
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
134
+ "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
135
+ "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
136
+ "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
137
+ "model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
138
+ "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
139
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
140
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
141
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
142
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
143
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00003.safetensors",
144
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
145
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
146
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
147
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
148
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
149
+ "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
150
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
151
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
152
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
153
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
154
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
155
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
156
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
157
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
158
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
159
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
160
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
161
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00003.safetensors",
162
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
163
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
164
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
165
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
166
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
167
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
168
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
169
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
170
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00003.safetensors",
171
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
172
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
173
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
174
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
175
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
176
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
177
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
178
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
179
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors",
180
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
181
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
182
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
183
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
184
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
185
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
186
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
187
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
188
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors",
189
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
190
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
191
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
192
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
193
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
194
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
195
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
196
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
197
+ "model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors",
198
+ "model.layers.28.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
199
+ "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
200
+ "model.layers.28.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
201
+ "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
202
+ "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
203
+ "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
204
+ "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
205
+ "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
206
+ "model.layers.29.input_layernorm.weight": "model-00003-of-00003.safetensors",
207
+ "model.layers.29.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
208
+ "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
209
+ "model.layers.29.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
210
+ "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
211
+ "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
212
+ "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
213
+ "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
214
+ "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
215
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
216
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
217
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
218
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
219
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
220
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
221
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
222
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
223
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
224
+ "model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors",
225
+ "model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
226
+ "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
227
+ "model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
228
+ "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
229
+ "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
230
+ "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
231
+ "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
232
+ "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
233
+ "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
234
+ "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
235
+ "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
236
+ "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
237
+ "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
238
+ "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
239
+ "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
240
+ "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
241
+ "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
242
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
243
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
244
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
245
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
246
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
247
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
248
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
249
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
250
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
251
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
252
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
253
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
254
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
255
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
256
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
257
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
258
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
259
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
260
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
261
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
262
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
263
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
264
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
265
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
266
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
267
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
268
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
269
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
270
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
271
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
272
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
273
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
274
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
275
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
276
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
277
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
278
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
279
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
280
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
281
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
282
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
283
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
284
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
285
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
286
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
287
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
288
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
289
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
290
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
291
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
292
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
293
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
294
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
295
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
296
+ "model.norm.weight": "model-00003-of-00003.safetensors"
297
+ }
298
+ }
runs/Mar26_04-45-16_alvarobartt-slimy-moth-1-0/events.out.tfevents.1711442779.alvarobartt-slimy-moth-1-0.445.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfa3e265f93cbeec1cc808b906966ce46404c01a77b2872489cfb05bb19359d8
3
+ size 34575
special_tokens_map.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<|im_start|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<|im_end|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
+ "bos_token": "<|im_start|>",
19
+ "eos_token": "<|im_end|>",
20
+ "pad_token": "<|im_end|>",
21
+ "unk_token": {
22
+ "content": "<unk>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ }
28
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
tokenizer_config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "32000": {
30
+ "content": "<|im_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "32001": {
38
+ "content": "<|im_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ }
45
+ },
46
+ "additional_special_tokens": [
47
+ "<|im_start|>",
48
+ "<|im_end|>"
49
+ ],
50
+ "bos_token": "<|im_start|>",
51
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
52
+ "clean_up_tokenization_spaces": false,
53
+ "eos_token": "<|im_end|>",
54
+ "legacy": true,
55
+ "model_max_length": 2048,
56
+ "pad_token": "<|im_end|>",
57
+ "sp_model_kwargs": {},
58
+ "spaces_between_special_tokens": false,
59
+ "tokenizer_class": "LlamaTokenizer",
60
+ "unk_token": "<unk>",
61
+ "use_default_system_prompt": false
62
+ }
trainer_state.json ADDED
@@ -0,0 +1,645 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.985781990521327,
5
+ "eval_steps": 500,
6
+ "global_step": 315,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.09,
13
+ "grad_norm": 51.45086593171862,
14
+ "learning_rate": 5e-08,
15
+ "log_odds_chosen": 0.23327143490314484,
16
+ "log_odds_ratio": -0.7173982262611389,
17
+ "logits/chosen": -2.9019644260406494,
18
+ "logits/rejected": -2.911267042160034,
19
+ "logps/chosen": -1.2478530406951904,
20
+ "logps/rejected": -1.4603310823440552,
21
+ "loss": 1.1528,
22
+ "nll_loss": 1.110867977142334,
23
+ "rewards/accuracies": 0.5249999761581421,
24
+ "rewards/chosen": -0.06239265203475952,
25
+ "rewards/margins": 0.010623900219798088,
26
+ "rewards/rejected": -0.07301654666662216,
27
+ "step": 10
28
+ },
29
+ {
30
+ "epoch": 0.19,
31
+ "grad_norm": 43.38597658388667,
32
+ "learning_rate": 1e-07,
33
+ "log_odds_chosen": 0.025622714310884476,
34
+ "log_odds_ratio": -0.803138256072998,
35
+ "logits/chosen": -2.957723617553711,
36
+ "logits/rejected": -2.9606709480285645,
37
+ "logps/chosen": -1.2851994037628174,
38
+ "logps/rejected": -1.2747653722763062,
39
+ "loss": 1.109,
40
+ "nll_loss": 1.0504928827285767,
41
+ "rewards/accuracies": 0.5,
42
+ "rewards/chosen": -0.06425996124744415,
43
+ "rewards/margins": -0.0005216972786001861,
44
+ "rewards/rejected": -0.06373827159404755,
45
+ "step": 20
46
+ },
47
+ {
48
+ "epoch": 0.28,
49
+ "grad_norm": 18.07276834258672,
50
+ "learning_rate": 1.5e-07,
51
+ "log_odds_chosen": 0.33495259284973145,
52
+ "log_odds_ratio": -0.6577204465866089,
53
+ "logits/chosen": -2.936415195465088,
54
+ "logits/rejected": -2.939385175704956,
55
+ "logps/chosen": -1.0753445625305176,
56
+ "logps/rejected": -1.3320289850234985,
57
+ "loss": 1.0256,
58
+ "nll_loss": 0.9573599696159363,
59
+ "rewards/accuracies": 0.5625,
60
+ "rewards/chosen": -0.053767234086990356,
61
+ "rewards/margins": 0.012834218330681324,
62
+ "rewards/rejected": -0.06660144031047821,
63
+ "step": 30
64
+ },
65
+ {
66
+ "epoch": 0.38,
67
+ "grad_norm": 14.138346458866787,
68
+ "learning_rate": 2e-07,
69
+ "log_odds_chosen": 0.06818895787000656,
70
+ "log_odds_ratio": -0.7556289434432983,
71
+ "logits/chosen": -2.979851484298706,
72
+ "logits/rejected": -2.9792511463165283,
73
+ "logps/chosen": -1.0226948261260986,
74
+ "logps/rejected": -1.0867162942886353,
75
+ "loss": 0.9841,
76
+ "nll_loss": 0.9785293340682983,
77
+ "rewards/accuracies": 0.518750011920929,
78
+ "rewards/chosen": -0.05113474652171135,
79
+ "rewards/margins": 0.003201070474460721,
80
+ "rewards/rejected": -0.0543358139693737,
81
+ "step": 40
82
+ },
83
+ {
84
+ "epoch": 0.47,
85
+ "grad_norm": 19.142475208524697,
86
+ "learning_rate": 2.5e-07,
87
+ "log_odds_chosen": 0.2577429711818695,
88
+ "log_odds_ratio": -0.6650774478912354,
89
+ "logits/chosen": -3.0103330612182617,
90
+ "logits/rejected": -3.0095608234405518,
91
+ "logps/chosen": -0.9924309849739075,
92
+ "logps/rejected": -1.1665266752243042,
93
+ "loss": 0.9833,
94
+ "nll_loss": 0.965359091758728,
95
+ "rewards/accuracies": 0.59375,
96
+ "rewards/chosen": -0.04962155967950821,
97
+ "rewards/margins": 0.008704781532287598,
98
+ "rewards/rejected": -0.05832633376121521,
99
+ "step": 50
100
+ },
101
+ {
102
+ "epoch": 0.57,
103
+ "grad_norm": 9.00103625601085,
104
+ "learning_rate": 3e-07,
105
+ "log_odds_chosen": 0.2601451277732849,
106
+ "log_odds_ratio": -0.6529109477996826,
107
+ "logits/chosen": -3.0405919551849365,
108
+ "logits/rejected": -3.0266852378845215,
109
+ "logps/chosen": -0.8860586881637573,
110
+ "logps/rejected": -1.0516645908355713,
111
+ "loss": 0.967,
112
+ "nll_loss": 0.8791006207466125,
113
+ "rewards/accuracies": 0.6187499761581421,
114
+ "rewards/chosen": -0.044302936643362045,
115
+ "rewards/margins": 0.008280294016003609,
116
+ "rewards/rejected": -0.0525832362473011,
117
+ "step": 60
118
+ },
119
+ {
120
+ "epoch": 0.66,
121
+ "grad_norm": 8.208565662624949,
122
+ "learning_rate": 3.5e-07,
123
+ "log_odds_chosen": 0.24647900462150574,
124
+ "log_odds_ratio": -0.6602240800857544,
125
+ "logits/chosen": -2.9646213054656982,
126
+ "logits/rejected": -2.9692559242248535,
127
+ "logps/chosen": -0.8851203918457031,
128
+ "logps/rejected": -1.04824697971344,
129
+ "loss": 0.9521,
130
+ "nll_loss": 0.8758776783943176,
131
+ "rewards/accuracies": 0.6312500238418579,
132
+ "rewards/chosen": -0.044256024062633514,
133
+ "rewards/margins": 0.008156323805451393,
134
+ "rewards/rejected": -0.05241234228014946,
135
+ "step": 70
136
+ },
137
+ {
138
+ "epoch": 0.76,
139
+ "grad_norm": 9.949021333520767,
140
+ "learning_rate": 4e-07,
141
+ "log_odds_chosen": 0.3642485439777374,
142
+ "log_odds_ratio": -0.6296299695968628,
143
+ "logits/chosen": -2.9997572898864746,
144
+ "logits/rejected": -2.9932103157043457,
145
+ "logps/chosen": -0.8595079183578491,
146
+ "logps/rejected": -1.0957872867584229,
147
+ "loss": 0.8992,
148
+ "nll_loss": 0.8687052726745605,
149
+ "rewards/accuracies": 0.6000000238418579,
150
+ "rewards/chosen": -0.042975399643182755,
151
+ "rewards/margins": 0.011813966557383537,
152
+ "rewards/rejected": -0.05478937178850174,
153
+ "step": 80
154
+ },
155
+ {
156
+ "epoch": 0.85,
157
+ "grad_norm": 11.826771567950217,
158
+ "learning_rate": 4.5e-07,
159
+ "log_odds_chosen": 0.13775679469108582,
160
+ "log_odds_ratio": -0.7174943685531616,
161
+ "logits/chosen": -2.9466898441314697,
162
+ "logits/rejected": -2.9638590812683105,
163
+ "logps/chosen": -0.8956987261772156,
164
+ "logps/rejected": -0.9907738566398621,
165
+ "loss": 0.903,
166
+ "nll_loss": 0.8976006507873535,
167
+ "rewards/accuracies": 0.512499988079071,
168
+ "rewards/chosen": -0.04478494077920914,
169
+ "rewards/margins": 0.0047537581995129585,
170
+ "rewards/rejected": -0.04953869432210922,
171
+ "step": 90
172
+ },
173
+ {
174
+ "epoch": 0.95,
175
+ "grad_norm": 7.898257944088662,
176
+ "learning_rate": 5e-07,
177
+ "log_odds_chosen": 0.19350430369377136,
178
+ "log_odds_ratio": -0.6868109703063965,
179
+ "logits/chosen": -2.932610034942627,
180
+ "logits/rejected": -2.9326844215393066,
181
+ "logps/chosen": -0.8639435768127441,
182
+ "logps/rejected": -0.9883400797843933,
183
+ "loss": 0.9159,
184
+ "nll_loss": 0.8739891052246094,
185
+ "rewards/accuracies": 0.606249988079071,
186
+ "rewards/chosen": -0.04319717735052109,
187
+ "rewards/margins": 0.0062198275700211525,
188
+ "rewards/rejected": -0.049417007714509964,
189
+ "step": 100
190
+ },
191
+ {
192
+ "epoch": 1.0,
193
+ "eval_log_odds_chosen": 0.3023655116558075,
194
+ "eval_log_odds_ratio": -0.6428868770599365,
195
+ "eval_logits/chosen": -2.887465238571167,
196
+ "eval_logits/rejected": -2.8930952548980713,
197
+ "eval_logps/chosen": -0.8412830829620361,
198
+ "eval_logps/rejected": -0.9974649548530579,
199
+ "eval_loss": 0.8794215321540833,
200
+ "eval_nll_loss": 0.8561177253723145,
201
+ "eval_rewards/accuracies": 0.6302083134651184,
202
+ "eval_rewards/chosen": -0.042064156383275986,
203
+ "eval_rewards/margins": 0.007809097412973642,
204
+ "eval_rewards/rejected": -0.04987325146794319,
205
+ "eval_runtime": 32.7887,
206
+ "eval_samples_per_second": 22.874,
207
+ "eval_steps_per_second": 0.732,
208
+ "step": 105
209
+ },
210
+ {
211
+ "epoch": 1.04,
212
+ "grad_norm": 7.858898751829365,
213
+ "learning_rate": 4.973358420187775e-07,
214
+ "log_odds_chosen": 0.2784392237663269,
215
+ "log_odds_ratio": -0.6759603023529053,
216
+ "logits/chosen": -2.898139715194702,
217
+ "logits/rejected": -2.888554096221924,
218
+ "logps/chosen": -0.8769194483757019,
219
+ "logps/rejected": -1.0477098226547241,
220
+ "loss": 0.9267,
221
+ "nll_loss": 0.9084149599075317,
222
+ "rewards/accuracies": 0.5687500238418579,
223
+ "rewards/chosen": -0.043845973908901215,
224
+ "rewards/margins": 0.00853952020406723,
225
+ "rewards/rejected": -0.05238549783825874,
226
+ "step": 110
227
+ },
228
+ {
229
+ "epoch": 1.14,
230
+ "grad_norm": 8.696606134772816,
231
+ "learning_rate": 4.894001499771015e-07,
232
+ "log_odds_chosen": 0.408263623714447,
233
+ "log_odds_ratio": -0.5970031023025513,
234
+ "logits/chosen": -2.8863773345947266,
235
+ "logits/rejected": -2.8754923343658447,
236
+ "logps/chosen": -0.82969731092453,
237
+ "logps/rejected": -1.0684994459152222,
238
+ "loss": 0.8737,
239
+ "nll_loss": 0.8546596765518188,
240
+ "rewards/accuracies": 0.6875,
241
+ "rewards/chosen": -0.04148487001657486,
242
+ "rewards/margins": 0.011940106749534607,
243
+ "rewards/rejected": -0.05342497676610947,
244
+ "step": 120
245
+ },
246
+ {
247
+ "epoch": 1.23,
248
+ "grad_norm": 8.142229649165017,
249
+ "learning_rate": 4.7636205937328664e-07,
250
+ "log_odds_chosen": 0.24737975001335144,
251
+ "log_odds_ratio": -0.6806861162185669,
252
+ "logits/chosen": -2.8786978721618652,
253
+ "logits/rejected": -2.869795083999634,
254
+ "logps/chosen": -0.8045932054519653,
255
+ "logps/rejected": -0.9472286105155945,
256
+ "loss": 0.9147,
257
+ "nll_loss": 0.8494957089424133,
258
+ "rewards/accuracies": 0.59375,
259
+ "rewards/chosen": -0.04022965580224991,
260
+ "rewards/margins": 0.007131779100745916,
261
+ "rewards/rejected": -0.04736143350601196,
262
+ "step": 130
263
+ },
264
+ {
265
+ "epoch": 1.33,
266
+ "grad_norm": 8.40750837533949,
267
+ "learning_rate": 4.584994544724695e-07,
268
+ "log_odds_chosen": 0.2743343412876129,
269
+ "log_odds_ratio": -0.6686180830001831,
270
+ "logits/chosen": -2.866347312927246,
271
+ "logits/rejected": -2.8765628337860107,
272
+ "logps/chosen": -0.7923319935798645,
273
+ "logps/rejected": -0.9410168528556824,
274
+ "loss": 0.8631,
275
+ "nll_loss": 0.8559592366218567,
276
+ "rewards/accuracies": 0.606249988079071,
277
+ "rewards/chosen": -0.039616603404283524,
278
+ "rewards/margins": 0.007434243801981211,
279
+ "rewards/rejected": -0.047050848603248596,
280
+ "step": 140
281
+ },
282
+ {
283
+ "epoch": 1.42,
284
+ "grad_norm": 8.158461437554013,
285
+ "learning_rate": 4.3619304568594546e-07,
286
+ "log_odds_chosen": 0.3588339388370514,
287
+ "log_odds_ratio": -0.6238493919372559,
288
+ "logits/chosen": -2.869856119155884,
289
+ "logits/rejected": -2.8966941833496094,
290
+ "logps/chosen": -0.7852147817611694,
291
+ "logps/rejected": -0.9708358645439148,
292
+ "loss": 0.8549,
293
+ "nll_loss": 0.8177824020385742,
294
+ "rewards/accuracies": 0.6312500238418579,
295
+ "rewards/chosen": -0.03926074132323265,
296
+ "rewards/margins": 0.009281056001782417,
297
+ "rewards/rejected": -0.04854179546236992,
298
+ "step": 150
299
+ },
300
+ {
301
+ "epoch": 1.52,
302
+ "grad_norm": 8.09400448571549,
303
+ "learning_rate": 4.099182553897228e-07,
304
+ "log_odds_chosen": 0.47473040223121643,
305
+ "log_odds_ratio": -0.5752421021461487,
306
+ "logits/chosen": -2.8670287132263184,
307
+ "logits/rejected": -2.849987506866455,
308
+ "logps/chosen": -0.7626146078109741,
309
+ "logps/rejected": -1.0768699645996094,
310
+ "loss": 0.8506,
311
+ "nll_loss": 0.7988383173942566,
312
+ "rewards/accuracies": 0.699999988079071,
313
+ "rewards/chosen": -0.038130730390548706,
314
+ "rewards/margins": 0.01571277156472206,
315
+ "rewards/rejected": -0.05384349822998047,
316
+ "step": 160
317
+ },
318
+ {
319
+ "epoch": 1.61,
320
+ "grad_norm": 6.8406261430616535,
321
+ "learning_rate": 3.8023508512198257e-07,
322
+ "log_odds_chosen": 0.35588333010673523,
323
+ "log_odds_ratio": -0.6235536336898804,
324
+ "logits/chosen": -2.8909614086151123,
325
+ "logits/rejected": -2.8951175212860107,
326
+ "logps/chosen": -0.7602669596672058,
327
+ "logps/rejected": -0.9576327204704285,
328
+ "loss": 0.8503,
329
+ "nll_loss": 0.7980303764343262,
330
+ "rewards/accuracies": 0.6312500238418579,
331
+ "rewards/chosen": -0.03801335021853447,
332
+ "rewards/margins": 0.009868290275335312,
333
+ "rewards/rejected": -0.04788164049386978,
334
+ "step": 170
335
+ },
336
+ {
337
+ "epoch": 1.71,
338
+ "grad_norm": 7.855569563727718,
339
+ "learning_rate": 3.4777618012253895e-07,
340
+ "log_odds_chosen": 0.243193581700325,
341
+ "log_odds_ratio": -0.6670368909835815,
342
+ "logits/chosen": -2.84794545173645,
343
+ "logits/rejected": -2.8654625415802,
344
+ "logps/chosen": -0.8232158422470093,
345
+ "logps/rejected": -0.9566510915756226,
346
+ "loss": 0.8633,
347
+ "nll_loss": 0.8674771189689636,
348
+ "rewards/accuracies": 0.6187499761581421,
349
+ "rewards/chosen": -0.041160788387060165,
350
+ "rewards/margins": 0.006671770475804806,
351
+ "rewards/rejected": -0.04783256724476814,
352
+ "step": 180
353
+ },
354
+ {
355
+ "epoch": 1.8,
356
+ "grad_norm": 7.6860644181764375,
357
+ "learning_rate": 3.1323334559792015e-07,
358
+ "log_odds_chosen": 0.2766939699649811,
359
+ "log_odds_ratio": -0.6430349349975586,
360
+ "logits/chosen": -2.8580737113952637,
361
+ "logits/rejected": -2.8492133617401123,
362
+ "logps/chosen": -0.7749003171920776,
363
+ "logps/rejected": -0.9475242495536804,
364
+ "loss": 0.8497,
365
+ "nll_loss": 0.7961811423301697,
366
+ "rewards/accuracies": 0.6312500238418579,
367
+ "rewards/chosen": -0.03874502331018448,
368
+ "rewards/margins": 0.008631192147731781,
369
+ "rewards/rejected": -0.047376204282045364,
370
+ "step": 190
371
+ },
372
+ {
373
+ "epoch": 1.9,
374
+ "grad_norm": 7.778871538887919,
375
+ "learning_rate": 2.7734280209446865e-07,
376
+ "log_odds_chosen": 0.2929554879665375,
377
+ "log_odds_ratio": -0.6454571485519409,
378
+ "logits/chosen": -2.874631881713867,
379
+ "logits/rejected": -2.872854232788086,
380
+ "logps/chosen": -0.8560446500778198,
381
+ "logps/rejected": -1.0225257873535156,
382
+ "loss": 0.8742,
383
+ "nll_loss": 0.8836696743965149,
384
+ "rewards/accuracies": 0.6312500238418579,
385
+ "rewards/chosen": -0.04280223324894905,
386
+ "rewards/margins": 0.008324062451720238,
387
+ "rewards/rejected": -0.05112629011273384,
388
+ "step": 200
389
+ },
390
+ {
391
+ "epoch": 1.99,
392
+ "grad_norm": 8.608571431216085,
393
+ "learning_rate": 2.4086949423558525e-07,
394
+ "log_odds_chosen": 0.4544145464897156,
395
+ "log_odds_ratio": -0.5953847169876099,
396
+ "logits/chosen": -2.8644211292266846,
397
+ "logits/rejected": -2.8725366592407227,
398
+ "logps/chosen": -0.7150249481201172,
399
+ "logps/rejected": -0.9680275917053223,
400
+ "loss": 0.8397,
401
+ "nll_loss": 0.7700489163398743,
402
+ "rewards/accuracies": 0.6312500238418579,
403
+ "rewards/chosen": -0.03575124591588974,
404
+ "rewards/margins": 0.012650132179260254,
405
+ "rewards/rejected": -0.048401378095149994,
406
+ "step": 210
407
+ },
408
+ {
409
+ "epoch": 2.0,
410
+ "eval_log_odds_chosen": 0.3554897606372833,
411
+ "eval_log_odds_ratio": -0.6257309317588806,
412
+ "eval_logits/chosen": -2.8794047832489014,
413
+ "eval_logits/rejected": -2.888169527053833,
414
+ "eval_logps/chosen": -0.8071174621582031,
415
+ "eval_logps/rejected": -0.9902352690696716,
416
+ "eval_loss": 0.8611603379249573,
417
+ "eval_nll_loss": 0.8365828990936279,
418
+ "eval_rewards/accuracies": 0.6458333134651184,
419
+ "eval_rewards/chosen": -0.040355876088142395,
420
+ "eval_rewards/margins": 0.009155889973044395,
421
+ "eval_rewards/rejected": -0.04951176047325134,
422
+ "eval_runtime": 32.7226,
423
+ "eval_samples_per_second": 22.92,
424
+ "eval_steps_per_second": 0.733,
425
+ "step": 211
426
+ },
427
+ {
428
+ "epoch": 2.09,
429
+ "grad_norm": 6.957321522636136,
430
+ "learning_rate": 2.0459078725514089e-07,
431
+ "log_odds_chosen": 0.5733749866485596,
432
+ "log_odds_ratio": -0.5376938581466675,
433
+ "logits/chosen": -2.876321315765381,
434
+ "logits/rejected": -2.8998565673828125,
435
+ "logps/chosen": -0.6645466089248657,
436
+ "logps/rejected": -0.9755932092666626,
437
+ "loss": 0.7779,
438
+ "nll_loss": 0.7205012440681458,
439
+ "rewards/accuracies": 0.7124999761581421,
440
+ "rewards/chosen": -0.033227331936359406,
441
+ "rewards/margins": 0.015552327036857605,
442
+ "rewards/rejected": -0.04877965897321701,
443
+ "step": 220
444
+ },
445
+ {
446
+ "epoch": 2.18,
447
+ "grad_norm": 7.383557049949752,
448
+ "learning_rate": 1.692798988071385e-07,
449
+ "log_odds_chosen": 0.6264925003051758,
450
+ "log_odds_ratio": -0.5338762998580933,
451
+ "logits/chosen": -2.9063897132873535,
452
+ "logits/rejected": -2.9123330116271973,
453
+ "logps/chosen": -0.6798352003097534,
454
+ "logps/rejected": -1.0108721256256104,
455
+ "loss": 0.7879,
456
+ "nll_loss": 0.763096809387207,
457
+ "rewards/accuracies": 0.7562500238418579,
458
+ "rewards/chosen": -0.03399176150560379,
459
+ "rewards/margins": 0.016551844775676727,
460
+ "rewards/rejected": -0.05054359883069992,
461
+ "step": 230
462
+ },
463
+ {
464
+ "epoch": 2.27,
465
+ "grad_norm": 7.205471545860071,
466
+ "learning_rate": 1.3568941917384036e-07,
467
+ "log_odds_chosen": 0.6044402122497559,
468
+ "log_odds_ratio": -0.5256311297416687,
469
+ "logits/chosen": -2.929206609725952,
470
+ "logits/rejected": -2.9085187911987305,
471
+ "logps/chosen": -0.6959676742553711,
472
+ "logps/rejected": -1.0572305917739868,
473
+ "loss": 0.7677,
474
+ "nll_loss": 0.7402119636535645,
475
+ "rewards/accuracies": 0.737500011920929,
476
+ "rewards/chosen": -0.034798379987478256,
477
+ "rewards/margins": 0.018063146620988846,
478
+ "rewards/rejected": -0.0528615303337574,
479
+ "step": 240
480
+ },
481
+ {
482
+ "epoch": 2.37,
483
+ "grad_norm": 7.342052939736115,
484
+ "learning_rate": 1.0453527111051183e-07,
485
+ "log_odds_chosen": 0.6251620054244995,
486
+ "log_odds_ratio": -0.5341116786003113,
487
+ "logits/chosen": -2.8869872093200684,
488
+ "logits/rejected": -2.912830352783203,
489
+ "logps/chosen": -0.6446677446365356,
490
+ "logps/rejected": -0.9978305101394653,
491
+ "loss": 0.7682,
492
+ "nll_loss": 0.6836553812026978,
493
+ "rewards/accuracies": 0.737500011920929,
494
+ "rewards/chosen": -0.03223338723182678,
495
+ "rewards/margins": 0.017658134922385216,
496
+ "rewards/rejected": -0.04989152401685715,
497
+ "step": 250
498
+ },
499
+ {
500
+ "epoch": 2.46,
501
+ "grad_norm": 7.710315387822491,
502
+ "learning_rate": 7.648145119484151e-08,
503
+ "log_odds_chosen": 0.582895815372467,
504
+ "log_odds_ratio": -0.5480377078056335,
505
+ "logits/chosen": -2.89570689201355,
506
+ "logits/rejected": -2.8959426879882812,
507
+ "logps/chosen": -0.7015441656112671,
508
+ "logps/rejected": -0.9980028867721558,
509
+ "loss": 0.7845,
510
+ "nll_loss": 0.7611762881278992,
511
+ "rewards/accuracies": 0.6625000238418579,
512
+ "rewards/chosen": -0.035077206790447235,
513
+ "rewards/margins": 0.014822937548160553,
514
+ "rewards/rejected": -0.04990014433860779,
515
+ "step": 260
516
+ },
517
+ {
518
+ "epoch": 2.56,
519
+ "grad_norm": 7.850416556443741,
520
+ "learning_rate": 5.212587789268649e-08,
521
+ "log_odds_chosen": 0.5626873970031738,
522
+ "log_odds_ratio": -0.5825694799423218,
523
+ "logits/chosen": -2.898651599884033,
524
+ "logits/rejected": -2.9134531021118164,
525
+ "logps/chosen": -0.713202714920044,
526
+ "logps/rejected": -0.9749401211738586,
527
+ "loss": 0.7837,
528
+ "nll_loss": 0.7641984224319458,
529
+ "rewards/accuracies": 0.6937500238418579,
530
+ "rewards/chosen": -0.03566013649106026,
531
+ "rewards/margins": 0.013086864724755287,
532
+ "rewards/rejected": -0.04874700307846069,
533
+ "step": 270
534
+ },
535
+ {
536
+ "epoch": 2.65,
537
+ "grad_norm": 7.144333751483519,
538
+ "learning_rate": 3.198764796404807e-08,
539
+ "log_odds_chosen": 0.6033560037612915,
540
+ "log_odds_ratio": -0.5110877156257629,
541
+ "logits/chosen": -2.9342381954193115,
542
+ "logits/rejected": -2.9433302879333496,
543
+ "logps/chosen": -0.6664480566978455,
544
+ "logps/rejected": -0.9789284467697144,
545
+ "loss": 0.7707,
546
+ "nll_loss": 0.7404603362083435,
547
+ "rewards/accuracies": 0.762499988079071,
548
+ "rewards/chosen": -0.03332240507006645,
549
+ "rewards/margins": 0.015624021179974079,
550
+ "rewards/rejected": -0.04894643276929855,
551
+ "step": 280
552
+ },
553
+ {
554
+ "epoch": 2.75,
555
+ "grad_norm": 7.681448875266474,
556
+ "learning_rate": 1.649597281686302e-08,
557
+ "log_odds_chosen": 0.5207457542419434,
558
+ "log_odds_ratio": -0.5457995533943176,
559
+ "logits/chosen": -2.933040142059326,
560
+ "logits/rejected": -2.942648410797119,
561
+ "logps/chosen": -0.6956908106803894,
562
+ "logps/rejected": -0.970605194568634,
563
+ "loss": 0.7797,
564
+ "nll_loss": 0.7457938194274902,
565
+ "rewards/accuracies": 0.762499988079071,
566
+ "rewards/chosen": -0.03478454053401947,
567
+ "rewards/margins": 0.013745710253715515,
568
+ "rewards/rejected": -0.048530250787734985,
569
+ "step": 290
570
+ },
571
+ {
572
+ "epoch": 2.84,
573
+ "grad_norm": 7.3441185490833805,
574
+ "learning_rate": 5.981030611018234e-09,
575
+ "log_odds_chosen": 0.643921434879303,
576
+ "log_odds_ratio": -0.5143691897392273,
577
+ "logits/chosen": -2.923649549484253,
578
+ "logits/rejected": -2.937542676925659,
579
+ "logps/chosen": -0.7117995619773865,
580
+ "logps/rejected": -1.0296577215194702,
581
+ "loss": 0.7861,
582
+ "nll_loss": 0.7953752279281616,
583
+ "rewards/accuracies": 0.7562500238418579,
584
+ "rewards/chosen": -0.035589978098869324,
585
+ "rewards/margins": 0.015892909839749336,
586
+ "rewards/rejected": -0.05148288607597351,
587
+ "step": 300
588
+ },
589
+ {
590
+ "epoch": 2.94,
591
+ "grad_norm": 7.411020953558967,
592
+ "learning_rate": 6.66929084112089e-10,
593
+ "log_odds_chosen": 0.531533420085907,
594
+ "log_odds_ratio": -0.5645086765289307,
595
+ "logits/chosen": -2.9212653636932373,
596
+ "logits/rejected": -2.941697835922241,
597
+ "logps/chosen": -0.6984909772872925,
598
+ "logps/rejected": -0.9673376083374023,
599
+ "loss": 0.7808,
600
+ "nll_loss": 0.7526401877403259,
601
+ "rewards/accuracies": 0.7124999761581421,
602
+ "rewards/chosen": -0.034924548119306564,
603
+ "rewards/margins": 0.013442324474453926,
604
+ "rewards/rejected": -0.04836687445640564,
605
+ "step": 310
606
+ },
607
+ {
608
+ "epoch": 2.99,
609
+ "eval_log_odds_chosen": 0.3802085816860199,
610
+ "eval_log_odds_ratio": -0.6214679479598999,
611
+ "eval_logits/chosen": -2.9039885997772217,
612
+ "eval_logits/rejected": -2.914613723754883,
613
+ "eval_logps/chosen": -0.8096261024475098,
614
+ "eval_logps/rejected": -1.0036059617996216,
615
+ "eval_loss": 0.8648061752319336,
616
+ "eval_nll_loss": 0.8392227292060852,
617
+ "eval_rewards/accuracies": 0.6458333134651184,
618
+ "eval_rewards/chosen": -0.04048130661249161,
619
+ "eval_rewards/margins": 0.009698997251689434,
620
+ "eval_rewards/rejected": -0.05018030107021332,
621
+ "eval_runtime": 32.6828,
622
+ "eval_samples_per_second": 22.948,
623
+ "eval_steps_per_second": 0.734,
624
+ "step": 315
625
+ },
626
+ {
627
+ "epoch": 2.99,
628
+ "step": 315,
629
+ "total_flos": 0.0,
630
+ "train_loss": 0.877839538029262,
631
+ "train_runtime": 3399.396,
632
+ "train_samples_per_second": 5.957,
633
+ "train_steps_per_second": 0.093
634
+ }
635
+ ],
636
+ "logging_steps": 10,
637
+ "max_steps": 315,
638
+ "num_input_tokens_seen": 0,
639
+ "num_train_epochs": 3,
640
+ "save_steps": 500,
641
+ "total_flos": 0.0,
642
+ "train_batch_size": 8,
643
+ "trial_name": null,
644
+ "trial_params": null
645
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc8f78b338187df42575a50e4b266ef4e260ed66e8c7d74c9f9b3c79df212e82
3
+ size 6456