Lichang-Chen commited on
Commit
528119a
1 Parent(s): 2e35432
iter0/README.md ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: alignment-handbook/zephyr-7b-sft-full
4
+ tags:
5
+ - alignment-handbook
6
+ - generated_from_trainer
7
+ datasets:
8
+ - ./data_may/train/reward_max_optune/n_sample2_filter1.0/iter0
9
+ model-index:
10
+ - name: iter0_0.1
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # iter0_0.1
18
+
19
+ This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on the ./data_may/train/reward_max_optune/n_sample2_filter1.0/iter0 dataset.
20
+
21
+ ## Model description
22
+
23
+ More information needed
24
+
25
+ ## Intended uses & limitations
26
+
27
+ More information needed
28
+
29
+ ## Training and evaluation data
30
+
31
+ More information needed
32
+
33
+ ## Training procedure
34
+
35
+ ### Training hyperparameters
36
+
37
+ The following hyperparameters were used during training:
38
+ - learning_rate: 5e-07
39
+ - train_batch_size: 16
40
+ - eval_batch_size: 16
41
+ - seed: 42
42
+ - distributed_type: multi-GPU
43
+ - num_devices: 8
44
+ - total_train_batch_size: 128
45
+ - total_eval_batch_size: 128
46
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
+ - lr_scheduler_type: linear
48
+ - lr_scheduler_warmup_ratio: 0.1
49
+ - num_epochs: 1
50
+
51
+ ### Training results
52
+
53
+
54
+
55
+ ### Framework versions
56
+
57
+ - Transformers 4.37.0
58
+ - Pytorch 2.1.2+cu121
59
+ - Datasets 2.14.6
60
+ - Tokenizers 0.15.2
iter0/all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.2572693068186442,
4
+ "train_runtime": 6192.5005,
5
+ "train_samples": 48000,
6
+ "train_samples_per_second": 7.751,
7
+ "train_steps_per_second": 0.061
8
+ }
iter0/checkpoint-375/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "alignment-handbook/zephyr-7b-sft-full",
3
+ "architectures": [
4
+ "MistralForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 4096,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 14336,
13
+ "max_position_embeddings": 32768,
14
+ "model_type": "mistral",
15
+ "num_attention_heads": 32,
16
+ "num_hidden_layers": 32,
17
+ "num_key_value_heads": 8,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_theta": 10000.0,
20
+ "sliding_window": 4096,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.37.0",
24
+ "use_cache": false,
25
+ "vocab_size": 32000
26
+ }
iter0/checkpoint-375/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.37.0"
6
+ }
iter0/checkpoint-375/global_step375/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37fec716d65a951da558ffad957a64fd9c8f7ecc05bc53f089233758c52a0cef
3
+ size 7241735056
iter0/checkpoint-375/global_step375/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cb9cff4032ffcdea1e133dba6d74afdf34444b02d20e04ef4981f34dff6e273
3
+ size 7241735056
iter0/checkpoint-375/global_step375/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95a3d3cc13054e29ef78e1ccec049c8d711ab39efc847aef466754305af27a3c
3
+ size 7241735056
iter0/checkpoint-375/global_step375/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58bbb16b1f72b7c67fa0060e814d701763252c939df0e20fb6215cb19695a07b
3
+ size 7241735056
iter0/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "alignment-handbook/zephyr-7b-sft-full",
3
+ "architectures": [
4
+ "MistralForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 4096,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 14336,
13
+ "max_position_embeddings": 32768,
14
+ "model_type": "mistral",
15
+ "num_attention_heads": 32,
16
+ "num_hidden_layers": 32,
17
+ "num_key_value_heads": 8,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_theta": 10000.0,
20
+ "sliding_window": 4096,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.37.0",
24
+ "use_cache": true,
25
+ "vocab_size": 32000
26
+ }
iter0/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.37.0"
6
+ }
iter0/model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58624d559edea9d97e28af2bfbb6abed968f44cd0dd1e21f68048839c279da71
3
+ size 4943162336
iter0/model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a97a64d20f2985250b0eee1ee5c1115b69cdb8d095bb02a97812ab429d6c847c
3
+ size 4999819336
iter0/model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d3c18bae5a8f67f5569344f4e0409dd2225ec4336baf966bf316b11b7536e3e
3
+ size 4540516344
iter0/model.safetensors.index.json ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 14483464192
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00003-of-00003.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
15
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
16
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
17
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
18
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
19
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
20
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
21
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
22
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
23
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
24
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
25
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
26
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00003.safetensors",
27
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
28
+ "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
30
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
31
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
32
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
33
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
34
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
35
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
36
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
37
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
38
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
39
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
40
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
41
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
42
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
43
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
44
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
45
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
46
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
47
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
48
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
49
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
50
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
51
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
52
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
53
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
54
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
55
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
56
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
57
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
58
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
59
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
60
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
61
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
62
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
63
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
64
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
65
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
66
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
67
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
68
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
69
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
70
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
71
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
72
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
73
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
74
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
75
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
76
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
77
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
78
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
79
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
80
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
81
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
82
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
83
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
84
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
85
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
86
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
87
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
88
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
89
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
90
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
91
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
92
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
93
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
94
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
95
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
96
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
97
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
98
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
99
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
100
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
101
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
102
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
103
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
104
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
105
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
106
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
107
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
108
+ "model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
109
+ "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
110
+ "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
111
+ "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
112
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
113
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
114
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
115
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
116
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
117
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
118
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
119
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
120
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
121
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
122
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
123
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
124
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
125
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
126
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
127
+ "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
128
+ "model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
129
+ "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
130
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
131
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
132
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
133
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
134
+ "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
135
+ "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
136
+ "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
137
+ "model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
138
+ "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
139
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
140
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
141
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
142
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
143
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00003.safetensors",
144
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
145
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
146
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
147
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
148
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
149
+ "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
150
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
151
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
152
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
153
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
154
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
155
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
156
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
157
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
158
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
159
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
160
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
161
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00003.safetensors",
162
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
163
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
164
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
165
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
166
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
167
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
168
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
169
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
170
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00003.safetensors",
171
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
172
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
173
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
174
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
175
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
176
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
177
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
178
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
179
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors",
180
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
181
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
182
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
183
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
184
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
185
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
186
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
187
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
188
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors",
189
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
190
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
191
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
192
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
193
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
194
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
195
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
196
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
197
+ "model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors",
198
+ "model.layers.28.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
199
+ "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
200
+ "model.layers.28.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
201
+ "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
202
+ "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
203
+ "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
204
+ "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
205
+ "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
206
+ "model.layers.29.input_layernorm.weight": "model-00003-of-00003.safetensors",
207
+ "model.layers.29.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
208
+ "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
209
+ "model.layers.29.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
210
+ "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
211
+ "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
212
+ "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
213
+ "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
214
+ "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
215
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
216
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
217
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
218
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
219
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
220
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
221
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
222
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
223
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
224
+ "model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors",
225
+ "model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
226
+ "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
227
+ "model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
228
+ "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
229
+ "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
230
+ "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
231
+ "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
232
+ "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
233
+ "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
234
+ "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
235
+ "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
236
+ "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
237
+ "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
238
+ "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
239
+ "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
240
+ "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
241
+ "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
242
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
243
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
244
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
245
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
246
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
247
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
248
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
249
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
250
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
251
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
252
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
253
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
254
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
255
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
256
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
257
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
258
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
259
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
260
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
261
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
262
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
263
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
264
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
265
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
266
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
267
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
268
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
269
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
270
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
271
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
272
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
273
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
274
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
275
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
276
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
277
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
278
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
279
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
280
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
281
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
282
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
283
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
284
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
285
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
286
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
287
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
288
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
289
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
290
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
291
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
292
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
293
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
294
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
295
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
296
+ "model.norm.weight": "model-00003-of-00003.safetensors"
297
+ }
298
+ }
iter0/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
iter0/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
iter0/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "additional_special_tokens": [],
31
+ "bos_token": "<s>",
32
+ "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
33
+ "clean_up_tokenization_spaces": false,
34
+ "eos_token": "</s>",
35
+ "legacy": true,
36
+ "model_max_length": 2048,
37
+ "pad_token": "</s>",
38
+ "sp_model_kwargs": {},
39
+ "spaces_between_special_tokens": false,
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": true
43
+ }
iter0/train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "train_loss": 0.2572693068186442,
4
+ "train_runtime": 6192.5005,
5
+ "train_samples": 48000,
6
+ "train_samples_per_second": 7.751,
7
+ "train_steps_per_second": 0.061
8
+ }
iter0/trainer_state.json ADDED
@@ -0,0 +1,562 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 100,
6
+ "global_step": 375,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0,
13
+ "learning_rate": 1.3157894736842104e-08,
14
+ "logits/generated": -3.0232396125793457,
15
+ "logits/real": -2.996844530105591,
16
+ "logps/generated": -291.56793212890625,
17
+ "logps/real": -340.7873840332031,
18
+ "loss": 0.3645,
19
+ "rewards/accuracies": 0.0,
20
+ "rewards/generated": 0.0,
21
+ "rewards/margins": 0.0,
22
+ "rewards/real": 0.0,
23
+ "step": 1
24
+ },
25
+ {
26
+ "epoch": 0.03,
27
+ "learning_rate": 1.3157894736842104e-07,
28
+ "logits/generated": -2.977639675140381,
29
+ "logits/real": -2.9781062602996826,
30
+ "logps/generated": -338.7113037109375,
31
+ "logps/real": -360.56146240234375,
32
+ "loss": 0.3584,
33
+ "rewards/accuracies": 0.5555555820465088,
34
+ "rewards/generated": 0.10237760096788406,
35
+ "rewards/margins": 0.04294492304325104,
36
+ "rewards/real": 0.1453225314617157,
37
+ "step": 10
38
+ },
39
+ {
40
+ "epoch": 0.05,
41
+ "learning_rate": 2.631578947368421e-07,
42
+ "logits/generated": -2.9867026805877686,
43
+ "logits/real": -2.990659236907959,
44
+ "logps/generated": -371.62164306640625,
45
+ "logps/real": -372.09954833984375,
46
+ "loss": 0.3391,
47
+ "rewards/accuracies": 0.59375,
48
+ "rewards/generated": 0.5660532712936401,
49
+ "rewards/margins": 0.15894225239753723,
50
+ "rewards/real": 0.7249955534934998,
51
+ "step": 20
52
+ },
53
+ {
54
+ "epoch": 0.08,
55
+ "learning_rate": 3.9473684210526315e-07,
56
+ "logits/generated": -2.940207004547119,
57
+ "logits/real": -2.945539951324463,
58
+ "logps/generated": -323.21282958984375,
59
+ "logps/real": -323.20733642578125,
60
+ "loss": 0.3061,
61
+ "rewards/accuracies": 0.668749988079071,
62
+ "rewards/generated": 0.2973577380180359,
63
+ "rewards/margins": 0.29796674847602844,
64
+ "rewards/real": 0.5953244566917419,
65
+ "step": 30
66
+ },
67
+ {
68
+ "epoch": 0.11,
69
+ "learning_rate": 4.970326409495548e-07,
70
+ "logits/generated": -2.849879026412964,
71
+ "logits/real": -2.868879556655884,
72
+ "logps/generated": -339.9267578125,
73
+ "logps/real": -348.660400390625,
74
+ "loss": 0.3043,
75
+ "rewards/accuracies": 0.668749988079071,
76
+ "rewards/generated": -0.18336713314056396,
77
+ "rewards/margins": 0.4493914246559143,
78
+ "rewards/real": 0.26602429151535034,
79
+ "step": 40
80
+ },
81
+ {
82
+ "epoch": 0.13,
83
+ "learning_rate": 4.821958456973294e-07,
84
+ "logits/generated": -2.8244385719299316,
85
+ "logits/real": -2.819532871246338,
86
+ "logps/generated": -345.12353515625,
87
+ "logps/real": -345.24334716796875,
88
+ "loss": 0.2707,
89
+ "rewards/accuracies": 0.762499988079071,
90
+ "rewards/generated": -1.0958898067474365,
91
+ "rewards/margins": 0.7357537150382996,
92
+ "rewards/real": -0.36013612151145935,
93
+ "step": 50
94
+ },
95
+ {
96
+ "epoch": 0.16,
97
+ "learning_rate": 4.673590504451038e-07,
98
+ "logits/generated": -2.7510242462158203,
99
+ "logits/real": -2.744049549102783,
100
+ "logps/generated": -343.3367614746094,
101
+ "logps/real": -353.568115234375,
102
+ "loss": 0.2658,
103
+ "rewards/accuracies": 0.706250011920929,
104
+ "rewards/generated": -1.349844217300415,
105
+ "rewards/margins": 0.7489473819732666,
106
+ "rewards/real": -0.6008970141410828,
107
+ "step": 60
108
+ },
109
+ {
110
+ "epoch": 0.19,
111
+ "learning_rate": 4.5252225519287835e-07,
112
+ "logits/generated": -2.787135362625122,
113
+ "logits/real": -2.7906911373138428,
114
+ "logps/generated": -380.27276611328125,
115
+ "logps/real": -390.9748840332031,
116
+ "loss": 0.2682,
117
+ "rewards/accuracies": 0.731249988079071,
118
+ "rewards/generated": -1.3749873638153076,
119
+ "rewards/margins": 0.8838955760002136,
120
+ "rewards/real": -0.49109163880348206,
121
+ "step": 70
122
+ },
123
+ {
124
+ "epoch": 0.21,
125
+ "learning_rate": 4.376854599406528e-07,
126
+ "logits/generated": -2.7812376022338867,
127
+ "logits/real": -2.79952073097229,
128
+ "logps/generated": -352.7367858886719,
129
+ "logps/real": -343.9632873535156,
130
+ "loss": 0.2784,
131
+ "rewards/accuracies": 0.78125,
132
+ "rewards/generated": -1.779193639755249,
133
+ "rewards/margins": 1.1407415866851807,
134
+ "rewards/real": -0.6384519934654236,
135
+ "step": 80
136
+ },
137
+ {
138
+ "epoch": 0.24,
139
+ "learning_rate": 4.228486646884273e-07,
140
+ "logits/generated": -2.80656099319458,
141
+ "logits/real": -2.7876017093658447,
142
+ "logps/generated": -369.83990478515625,
143
+ "logps/real": -381.7880859375,
144
+ "loss": 0.2742,
145
+ "rewards/accuracies": 0.7250000238418579,
146
+ "rewards/generated": -1.4693442583084106,
147
+ "rewards/margins": 0.8362933993339539,
148
+ "rewards/real": -0.6330507397651672,
149
+ "step": 90
150
+ },
151
+ {
152
+ "epoch": 0.27,
153
+ "learning_rate": 4.0801186943620176e-07,
154
+ "logits/generated": -2.7452383041381836,
155
+ "logits/real": -2.7657182216644287,
156
+ "logps/generated": -354.4010314941406,
157
+ "logps/real": -359.81219482421875,
158
+ "loss": 0.2657,
159
+ "rewards/accuracies": 0.6937500238418579,
160
+ "rewards/generated": -1.3197325468063354,
161
+ "rewards/margins": 0.8461551666259766,
162
+ "rewards/real": -0.4735774099826813,
163
+ "step": 100
164
+ },
165
+ {
166
+ "epoch": 0.29,
167
+ "learning_rate": 3.931750741839762e-07,
168
+ "logits/generated": -2.8132920265197754,
169
+ "logits/real": -2.8043882846832275,
170
+ "logps/generated": -357.61383056640625,
171
+ "logps/real": -354.3050537109375,
172
+ "loss": 0.2716,
173
+ "rewards/accuracies": 0.7250000238418579,
174
+ "rewards/generated": -1.6764816045761108,
175
+ "rewards/margins": 0.9828389883041382,
176
+ "rewards/real": -0.6936424374580383,
177
+ "step": 110
178
+ },
179
+ {
180
+ "epoch": 0.32,
181
+ "learning_rate": 3.7833827893175073e-07,
182
+ "logits/generated": -2.825605869293213,
183
+ "logits/real": -2.8103888034820557,
184
+ "logps/generated": -365.675537109375,
185
+ "logps/real": -368.09197998046875,
186
+ "loss": 0.2543,
187
+ "rewards/accuracies": 0.762499988079071,
188
+ "rewards/generated": -1.922782301902771,
189
+ "rewards/margins": 1.292311191558838,
190
+ "rewards/real": -0.6304711103439331,
191
+ "step": 120
192
+ },
193
+ {
194
+ "epoch": 0.35,
195
+ "learning_rate": 3.635014836795252e-07,
196
+ "logits/generated": -2.7962846755981445,
197
+ "logits/real": -2.795644521713257,
198
+ "logps/generated": -340.1669006347656,
199
+ "logps/real": -348.66583251953125,
200
+ "loss": 0.2602,
201
+ "rewards/accuracies": 0.6312500238418579,
202
+ "rewards/generated": -1.8341821432113647,
203
+ "rewards/margins": 0.9289523959159851,
204
+ "rewards/real": -0.9052297472953796,
205
+ "step": 130
206
+ },
207
+ {
208
+ "epoch": 0.37,
209
+ "learning_rate": 3.486646884272997e-07,
210
+ "logits/generated": -2.7868337631225586,
211
+ "logits/real": -2.7795639038085938,
212
+ "logps/generated": -358.3647766113281,
213
+ "logps/real": -362.6192321777344,
214
+ "loss": 0.2584,
215
+ "rewards/accuracies": 0.7437499761581421,
216
+ "rewards/generated": -1.9304630756378174,
217
+ "rewards/margins": 1.0024542808532715,
218
+ "rewards/real": -0.9280086755752563,
219
+ "step": 140
220
+ },
221
+ {
222
+ "epoch": 0.4,
223
+ "learning_rate": 3.3382789317507414e-07,
224
+ "logits/generated": -2.7806317806243896,
225
+ "logits/real": -2.773284435272217,
226
+ "logps/generated": -392.99273681640625,
227
+ "logps/real": -388.6888732910156,
228
+ "loss": 0.2429,
229
+ "rewards/accuracies": 0.668749988079071,
230
+ "rewards/generated": -2.132871389389038,
231
+ "rewards/margins": 0.8601642847061157,
232
+ "rewards/real": -1.272707223892212,
233
+ "step": 150
234
+ },
235
+ {
236
+ "epoch": 0.43,
237
+ "learning_rate": 3.189910979228487e-07,
238
+ "logits/generated": -2.7479450702667236,
239
+ "logits/real": -2.7415106296539307,
240
+ "logps/generated": -384.2051086425781,
241
+ "logps/real": -382.9107360839844,
242
+ "loss": 0.2518,
243
+ "rewards/accuracies": 0.7562500238418579,
244
+ "rewards/generated": -2.3066015243530273,
245
+ "rewards/margins": 1.2394059896469116,
246
+ "rewards/real": -1.0671956539154053,
247
+ "step": 160
248
+ },
249
+ {
250
+ "epoch": 0.45,
251
+ "learning_rate": 3.0415430267062316e-07,
252
+ "logits/generated": -2.7207372188568115,
253
+ "logits/real": -2.6968023777008057,
254
+ "logps/generated": -351.6153259277344,
255
+ "logps/real": -358.0864562988281,
256
+ "loss": 0.247,
257
+ "rewards/accuracies": 0.8062499761581421,
258
+ "rewards/generated": -2.2227485179901123,
259
+ "rewards/margins": 1.4021742343902588,
260
+ "rewards/real": -0.820574164390564,
261
+ "step": 170
262
+ },
263
+ {
264
+ "epoch": 0.48,
265
+ "learning_rate": 2.893175074183976e-07,
266
+ "logits/generated": -2.687243700027466,
267
+ "logits/real": -2.6896092891693115,
268
+ "logps/generated": -340.67498779296875,
269
+ "logps/real": -325.22259521484375,
270
+ "loss": 0.2683,
271
+ "rewards/accuracies": 0.762499988079071,
272
+ "rewards/generated": -2.342029094696045,
273
+ "rewards/margins": 1.0233131647109985,
274
+ "rewards/real": -1.3187161684036255,
275
+ "step": 180
276
+ },
277
+ {
278
+ "epoch": 0.51,
279
+ "learning_rate": 2.744807121661721e-07,
280
+ "logits/generated": -2.709791421890259,
281
+ "logits/real": -2.73317289352417,
282
+ "logps/generated": -396.40606689453125,
283
+ "logps/real": -388.1844482421875,
284
+ "loss": 0.2442,
285
+ "rewards/accuracies": 0.78125,
286
+ "rewards/generated": -2.465156078338623,
287
+ "rewards/margins": 1.4016426801681519,
288
+ "rewards/real": -1.063513159751892,
289
+ "step": 190
290
+ },
291
+ {
292
+ "epoch": 0.53,
293
+ "learning_rate": 2.596439169139466e-07,
294
+ "logits/generated": -2.7428107261657715,
295
+ "logits/real": -2.7355589866638184,
296
+ "logps/generated": -368.4299011230469,
297
+ "logps/real": -373.0939025878906,
298
+ "loss": 0.2451,
299
+ "rewards/accuracies": 0.75,
300
+ "rewards/generated": -2.1397910118103027,
301
+ "rewards/margins": 1.308272123336792,
302
+ "rewards/real": -0.8315190076828003,
303
+ "step": 200
304
+ },
305
+ {
306
+ "epoch": 0.56,
307
+ "learning_rate": 2.4480712166172106e-07,
308
+ "logits/generated": -2.703258752822876,
309
+ "logits/real": -2.693305015563965,
310
+ "logps/generated": -339.4871826171875,
311
+ "logps/real": -326.2037658691406,
312
+ "loss": 0.2395,
313
+ "rewards/accuracies": 0.78125,
314
+ "rewards/generated": -2.3122100830078125,
315
+ "rewards/margins": 1.2954694032669067,
316
+ "rewards/real": -1.0167406797409058,
317
+ "step": 210
318
+ },
319
+ {
320
+ "epoch": 0.59,
321
+ "learning_rate": 2.2997032640949554e-07,
322
+ "logits/generated": -2.7212119102478027,
323
+ "logits/real": -2.716545581817627,
324
+ "logps/generated": -339.74267578125,
325
+ "logps/real": -346.297607421875,
326
+ "loss": 0.2458,
327
+ "rewards/accuracies": 0.7437499761581421,
328
+ "rewards/generated": -1.9660396575927734,
329
+ "rewards/margins": 1.078840970993042,
330
+ "rewards/real": -0.8871987462043762,
331
+ "step": 220
332
+ },
333
+ {
334
+ "epoch": 0.61,
335
+ "learning_rate": 2.1513353115727e-07,
336
+ "logits/generated": -2.77765154838562,
337
+ "logits/real": -2.7591769695281982,
338
+ "logps/generated": -386.1648864746094,
339
+ "logps/real": -381.2674560546875,
340
+ "loss": 0.2324,
341
+ "rewards/accuracies": 0.831250011920929,
342
+ "rewards/generated": -2.414008617401123,
343
+ "rewards/margins": 1.5327675342559814,
344
+ "rewards/real": -0.8812410235404968,
345
+ "step": 230
346
+ },
347
+ {
348
+ "epoch": 0.64,
349
+ "learning_rate": 2.0029673590504451e-07,
350
+ "logits/generated": -2.7021536827087402,
351
+ "logits/real": -2.70768666267395,
352
+ "logps/generated": -354.3561706542969,
353
+ "logps/real": -353.68212890625,
354
+ "loss": 0.2492,
355
+ "rewards/accuracies": 0.706250011920929,
356
+ "rewards/generated": -2.4238786697387695,
357
+ "rewards/margins": 1.27177894115448,
358
+ "rewards/real": -1.152099847793579,
359
+ "step": 240
360
+ },
361
+ {
362
+ "epoch": 0.67,
363
+ "learning_rate": 1.8545994065281897e-07,
364
+ "logits/generated": -2.7076125144958496,
365
+ "logits/real": -2.7352890968322754,
366
+ "logps/generated": -365.26214599609375,
367
+ "logps/real": -355.78564453125,
368
+ "loss": 0.2426,
369
+ "rewards/accuracies": 0.762499988079071,
370
+ "rewards/generated": -2.567624092102051,
371
+ "rewards/margins": 1.3714964389801025,
372
+ "rewards/real": -1.1961278915405273,
373
+ "step": 250
374
+ },
375
+ {
376
+ "epoch": 0.69,
377
+ "learning_rate": 1.7062314540059346e-07,
378
+ "logits/generated": -2.7474026679992676,
379
+ "logits/real": -2.733513593673706,
380
+ "logps/generated": -370.26568603515625,
381
+ "logps/real": -366.9493713378906,
382
+ "loss": 0.2496,
383
+ "rewards/accuracies": 0.793749988079071,
384
+ "rewards/generated": -2.5266714096069336,
385
+ "rewards/margins": 1.390491247177124,
386
+ "rewards/real": -1.1361799240112305,
387
+ "step": 260
388
+ },
389
+ {
390
+ "epoch": 0.72,
391
+ "learning_rate": 1.5578635014836795e-07,
392
+ "logits/generated": -2.7382729053497314,
393
+ "logits/real": -2.7590155601501465,
394
+ "logps/generated": -339.4982604980469,
395
+ "logps/real": -354.5415954589844,
396
+ "loss": 0.2407,
397
+ "rewards/accuracies": 0.731249988079071,
398
+ "rewards/generated": -2.284700870513916,
399
+ "rewards/margins": 1.1465342044830322,
400
+ "rewards/real": -1.1381666660308838,
401
+ "step": 270
402
+ },
403
+ {
404
+ "epoch": 0.75,
405
+ "learning_rate": 1.4094955489614243e-07,
406
+ "logits/generated": -2.6945815086364746,
407
+ "logits/real": -2.695988416671753,
408
+ "logps/generated": -373.51385498046875,
409
+ "logps/real": -350.8352966308594,
410
+ "loss": 0.2303,
411
+ "rewards/accuracies": 0.762499988079071,
412
+ "rewards/generated": -2.4922609329223633,
413
+ "rewards/margins": 1.3119118213653564,
414
+ "rewards/real": -1.1803491115570068,
415
+ "step": 280
416
+ },
417
+ {
418
+ "epoch": 0.77,
419
+ "learning_rate": 1.261127596439169e-07,
420
+ "logits/generated": -2.7670834064483643,
421
+ "logits/real": -2.7600436210632324,
422
+ "logps/generated": -344.09136962890625,
423
+ "logps/real": -337.3023376464844,
424
+ "loss": 0.2435,
425
+ "rewards/accuracies": 0.8062499761581421,
426
+ "rewards/generated": -2.637000799179077,
427
+ "rewards/margins": 1.540818452835083,
428
+ "rewards/real": -1.0961825847625732,
429
+ "step": 290
430
+ },
431
+ {
432
+ "epoch": 0.8,
433
+ "learning_rate": 1.1127596439169139e-07,
434
+ "logits/generated": -2.6610119342803955,
435
+ "logits/real": -2.6668756008148193,
436
+ "logps/generated": -342.7873229980469,
437
+ "logps/real": -330.2555847167969,
438
+ "loss": 0.2424,
439
+ "rewards/accuracies": 0.800000011920929,
440
+ "rewards/generated": -2.5096726417541504,
441
+ "rewards/margins": 1.4919517040252686,
442
+ "rewards/real": -1.0177206993103027,
443
+ "step": 300
444
+ },
445
+ {
446
+ "epoch": 0.83,
447
+ "learning_rate": 9.643916913946587e-08,
448
+ "logits/generated": -2.7119061946868896,
449
+ "logits/real": -2.736443519592285,
450
+ "logps/generated": -364.1079406738281,
451
+ "logps/real": -365.68963623046875,
452
+ "loss": 0.2316,
453
+ "rewards/accuracies": 0.793749988079071,
454
+ "rewards/generated": -3.0718140602111816,
455
+ "rewards/margins": 1.5135959386825562,
456
+ "rewards/real": -1.5582183599472046,
457
+ "step": 310
458
+ },
459
+ {
460
+ "epoch": 0.85,
461
+ "learning_rate": 8.160237388724035e-08,
462
+ "logits/generated": -2.7183382511138916,
463
+ "logits/real": -2.735018253326416,
464
+ "logps/generated": -394.9755859375,
465
+ "logps/real": -377.31427001953125,
466
+ "loss": 0.2359,
467
+ "rewards/accuracies": 0.768750011920929,
468
+ "rewards/generated": -2.73679256439209,
469
+ "rewards/margins": 1.1780710220336914,
470
+ "rewards/real": -1.558721661567688,
471
+ "step": 320
472
+ },
473
+ {
474
+ "epoch": 0.88,
475
+ "learning_rate": 6.676557863501484e-08,
476
+ "logits/generated": -2.7515358924865723,
477
+ "logits/real": -2.742940664291382,
478
+ "logps/generated": -388.3130187988281,
479
+ "logps/real": -372.29437255859375,
480
+ "loss": 0.234,
481
+ "rewards/accuracies": 0.706250011920929,
482
+ "rewards/generated": -2.38558030128479,
483
+ "rewards/margins": 1.1235764026641846,
484
+ "rewards/real": -1.2620038986206055,
485
+ "step": 330
486
+ },
487
+ {
488
+ "epoch": 0.91,
489
+ "learning_rate": 5.192878338278932e-08,
490
+ "logits/generated": -2.695279121398926,
491
+ "logits/real": -2.6978631019592285,
492
+ "logps/generated": -365.2856140136719,
493
+ "logps/real": -363.0904235839844,
494
+ "loss": 0.2303,
495
+ "rewards/accuracies": 0.762499988079071,
496
+ "rewards/generated": -2.723789930343628,
497
+ "rewards/margins": 1.42342209815979,
498
+ "rewards/real": -1.300368070602417,
499
+ "step": 340
500
+ },
501
+ {
502
+ "epoch": 0.93,
503
+ "learning_rate": 3.709198813056379e-08,
504
+ "logits/generated": -2.662724018096924,
505
+ "logits/real": -2.675875186920166,
506
+ "logps/generated": -338.28704833984375,
507
+ "logps/real": -342.17462158203125,
508
+ "loss": 0.2333,
509
+ "rewards/accuracies": 0.768750011920929,
510
+ "rewards/generated": -2.581637144088745,
511
+ "rewards/margins": 1.3430696725845337,
512
+ "rewards/real": -1.2385674715042114,
513
+ "step": 350
514
+ },
515
+ {
516
+ "epoch": 0.96,
517
+ "learning_rate": 2.225519287833828e-08,
518
+ "logits/generated": -2.6803088188171387,
519
+ "logits/real": -2.704144239425659,
520
+ "logps/generated": -356.77703857421875,
521
+ "logps/real": -359.313720703125,
522
+ "loss": 0.2368,
523
+ "rewards/accuracies": 0.75,
524
+ "rewards/generated": -2.8369853496551514,
525
+ "rewards/margins": 1.2829147577285767,
526
+ "rewards/real": -1.554070234298706,
527
+ "step": 360
528
+ },
529
+ {
530
+ "epoch": 0.99,
531
+ "learning_rate": 7.418397626112759e-09,
532
+ "logits/generated": -2.7113311290740967,
533
+ "logits/real": -2.7457308769226074,
534
+ "logps/generated": -404.06756591796875,
535
+ "logps/real": -393.70843505859375,
536
+ "loss": 0.2369,
537
+ "rewards/accuracies": 0.78125,
538
+ "rewards/generated": -2.8109331130981445,
539
+ "rewards/margins": 1.4008702039718628,
540
+ "rewards/real": -1.4100630283355713,
541
+ "step": 370
542
+ },
543
+ {
544
+ "epoch": 1.0,
545
+ "step": 375,
546
+ "total_flos": 0.0,
547
+ "train_loss": 0.2572693068186442,
548
+ "train_runtime": 6192.5005,
549
+ "train_samples_per_second": 7.751,
550
+ "train_steps_per_second": 0.061
551
+ }
552
+ ],
553
+ "logging_steps": 10,
554
+ "max_steps": 375,
555
+ "num_input_tokens_seen": 0,
556
+ "num_train_epochs": 1,
557
+ "save_steps": 500,
558
+ "total_flos": 0.0,
559
+ "train_batch_size": 16,
560
+ "trial_name": null,
561
+ "trial_params": null
562
+ }
iter0/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ad1a8e08a20d9849ba4854028576185ed1bfd04bb523ad1f1c37cd201163361
3
+ size 5944