avemio-digital commited on
Commit
5f9d906
verified
1 Parent(s): ef8395e

Upload 15 files

Browse files
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
added_tokens.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|/code|>": 32014,
3
+ "<|/data|>": 32033,
4
+ "<|/inst|>": 32037,
5
+ "<|/query|>": 32031,
6
+ "<|/sys|>": 32035,
7
+ "<|assistant_mask|>": 32017,
8
+ "<|assistant|>": 32001,
9
+ "<|calc|>": 32012,
10
+ "<|code|>": 32013,
11
+ "<|continue|>": 32009,
12
+ "<|data|>": 32032,
13
+ "<|diff_marker|>": 32025,
14
+ "<|disc_sep|>": 32029,
15
+ "<|disc_start|>": 32028,
16
+ "<|disc_thread|><|query|>": 32030,
17
+ "<|endoftext|>": 32000,
18
+ "<|end|>": 32007,
19
+ "<|fim_middle|>": 32021,
20
+ "<|fim_prefix|>": 32020,
21
+ "<|fim_suffix|>": 32022,
22
+ "<|function_call|>": 32005,
23
+ "<|function_list|>": 32011,
24
+ "<|function_output|>": 32003,
25
+ "<|ghissue|>": 32026,
26
+ "<|ghreview|>": 32027,
27
+ "<|inst|>": 32036,
28
+ "<|ipynb_marker|>": 32024,
29
+ "<|message|>": 32019,
30
+ "<|meta_start|>": 32023,
31
+ "<|raw|>": 32008,
32
+ "<|resource|>": 32016,
33
+ "<|start|>": 32018,
34
+ "<|step|>": 32002,
35
+ "<|summary|>": 32015,
36
+ "<|system|>": 32006,
37
+ "<|sys|>": 32034,
38
+ "<|tag|>": 32004,
39
+ "<|user|>": 32010
40
+ }
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "avemio-digital/GRAG-PHI-3.5-MINI-4B-SFT-HESSIAN-AI",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 32000,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 3072,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 8192,
14
+ "max_position_embeddings": 131072,
15
+ "mlp_bias": false,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 32,
19
+ "num_key_value_heads": 32,
20
+ "pretraining_tp": 1,
21
+ "rms_norm_eps": 1e-05,
22
+ "rope_scaling": null,
23
+ "rope_theta": 10000.0,
24
+ "sliding_window": 2047,
25
+ "tie_word_embeddings": false,
26
+ "torch_dtype": "bfloat16",
27
+ "transformers_version": "4.39.3",
28
+ "use_cache": true,
29
+ "vocab_size": 32064
30
+ }
generation_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": [
5
+ 32000,
6
+ 32007
7
+ ],
8
+ "pad_token_id": 32000,
9
+ "transformers_version": "4.39.3"
10
+ }
latest ADDED
@@ -0,0 +1 @@
 
 
1
+ global_step3360
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:358eb26d0132f58623b37a887551579e5acf1987ec0073b5a7124446e47eb5b0
3
+ size 4991370968
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b2bf802d83278bbf2cda06b82e1cc7b72768690015534003a8337a8e3454624
3
+ size 2650821816
model.safetensors.index.json ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 7642159104
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00002-of-00002.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
15
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
16
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
17
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
18
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
19
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
20
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
21
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
22
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
23
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
24
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
25
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
26
+ "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
27
+ "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
28
+ "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
30
+ "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
31
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
32
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
33
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
34
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
35
+ "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
36
+ "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
37
+ "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
38
+ "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
39
+ "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
40
+ "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
41
+ "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
42
+ "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
43
+ "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
44
+ "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
45
+ "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
46
+ "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
47
+ "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
48
+ "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
49
+ "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
50
+ "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
51
+ "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
52
+ "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
53
+ "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
54
+ "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
55
+ "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
56
+ "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
57
+ "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
58
+ "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
59
+ "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
60
+ "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
61
+ "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
62
+ "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
63
+ "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
64
+ "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
65
+ "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
66
+ "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
67
+ "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
68
+ "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
69
+ "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
70
+ "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
71
+ "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
72
+ "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
73
+ "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
74
+ "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
75
+ "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
76
+ "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
77
+ "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
78
+ "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
79
+ "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
80
+ "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
81
+ "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
82
+ "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
83
+ "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
84
+ "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
85
+ "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
86
+ "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
87
+ "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
88
+ "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
89
+ "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
90
+ "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
91
+ "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
92
+ "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
93
+ "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
94
+ "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
95
+ "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
96
+ "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
97
+ "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
98
+ "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
99
+ "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
100
+ "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
101
+ "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
102
+ "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
103
+ "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
104
+ "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
105
+ "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
106
+ "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
107
+ "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
108
+ "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
109
+ "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
110
+ "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
111
+ "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
112
+ "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
113
+ "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
114
+ "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
115
+ "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
116
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
117
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
118
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
119
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
120
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
121
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
122
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
123
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
124
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
125
+ "model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors",
126
+ "model.layers.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
127
+ "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
128
+ "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
129
+ "model.layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
130
+ "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
131
+ "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
132
+ "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
133
+ "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
134
+ "model.layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
135
+ "model.layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
136
+ "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
137
+ "model.layers.21.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
138
+ "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
139
+ "model.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
140
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
141
+ "model.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
142
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
143
+ "model.layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
144
+ "model.layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
145
+ "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
146
+ "model.layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
147
+ "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
148
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
149
+ "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
150
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
151
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
152
+ "model.layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
153
+ "model.layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
154
+ "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
155
+ "model.layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
156
+ "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
157
+ "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
158
+ "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
159
+ "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
160
+ "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
161
+ "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
162
+ "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
163
+ "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
164
+ "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
165
+ "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
166
+ "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
167
+ "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
168
+ "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
169
+ "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
170
+ "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
171
+ "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
172
+ "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
173
+ "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
174
+ "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
175
+ "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
176
+ "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
177
+ "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
178
+ "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
179
+ "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
180
+ "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
181
+ "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
182
+ "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
183
+ "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
184
+ "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
185
+ "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
186
+ "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
187
+ "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
188
+ "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
189
+ "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
190
+ "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
191
+ "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
192
+ "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
193
+ "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
194
+ "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
195
+ "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
196
+ "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
197
+ "model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors",
198
+ "model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
199
+ "model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
200
+ "model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
201
+ "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
202
+ "model.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
203
+ "model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
204
+ "model.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
205
+ "model.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
206
+ "model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors",
207
+ "model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
208
+ "model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
209
+ "model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
210
+ "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
211
+ "model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
212
+ "model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
213
+ "model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
214
+ "model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
215
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
216
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
217
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
218
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
219
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
220
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
221
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
222
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
223
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
224
+ "model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
225
+ "model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
226
+ "model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
227
+ "model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
228
+ "model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
229
+ "model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
230
+ "model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
231
+ "model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
232
+ "model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
233
+ "model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
234
+ "model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
235
+ "model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
236
+ "model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
237
+ "model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
238
+ "model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
239
+ "model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
240
+ "model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
241
+ "model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
242
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
243
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
244
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
245
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
246
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
247
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
248
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
249
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
250
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
251
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
252
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
253
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
254
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
255
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
256
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
257
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
258
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
259
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
260
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
261
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
262
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
263
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
264
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
265
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
266
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
267
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
268
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
269
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
270
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
271
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
272
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
273
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
274
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
275
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
276
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
277
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
278
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
279
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
280
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
281
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
282
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
283
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
284
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
285
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
286
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
287
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
288
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
289
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
290
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
291
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
292
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
293
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
294
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
295
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
296
+ "model.norm.weight": "model-00002-of-00002.safetensors"
297
+ }
298
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<|user|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<|system|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
+ {
18
+ "content": "<|assistant|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ {
25
+ "content": "<|end|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ ],
32
+ "bos_token": {
33
+ "content": "<s>",
34
+ "lstrip": false,
35
+ "normalized": false,
36
+ "rstrip": false,
37
+ "single_word": false
38
+ },
39
+ "eos_token": {
40
+ "content": "<|end|>",
41
+ "lstrip": false,
42
+ "normalized": false,
43
+ "rstrip": false,
44
+ "single_word": false
45
+ },
46
+ "pad_token": {
47
+ "content": "<|endoftext|>",
48
+ "lstrip": false,
49
+ "normalized": false,
50
+ "rstrip": false,
51
+ "single_word": false
52
+ },
53
+ "unk_token": {
54
+ "content": "<unk>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false
59
+ }
60
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,352 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": true,
26
+ "single_word": false,
27
+ "special": false
28
+ },
29
+ "32000": {
30
+ "content": "<|endoftext|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "32001": {
38
+ "content": "<|assistant|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "32002": {
46
+ "content": "<|step|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": true,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "32003": {
54
+ "content": "<|function_output|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": true,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "32004": {
62
+ "content": "<|tag|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": true,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "32005": {
70
+ "content": "<|function_call|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": true,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "32006": {
78
+ "content": "<|system|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "32007": {
86
+ "content": "<|end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "32008": {
94
+ "content": "<|raw|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": true,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "32009": {
102
+ "content": "<|continue|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": true,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "32010": {
110
+ "content": "<|user|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "32011": {
118
+ "content": "<|function_list|>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": true,
122
+ "single_word": false,
123
+ "special": true
124
+ },
125
+ "32012": {
126
+ "content": "<|calc|>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": true,
130
+ "single_word": false,
131
+ "special": true
132
+ },
133
+ "32013": {
134
+ "content": "<|code|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": true,
138
+ "single_word": false,
139
+ "special": true
140
+ },
141
+ "32014": {
142
+ "content": "<|/code|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": true,
146
+ "single_word": false,
147
+ "special": true
148
+ },
149
+ "32015": {
150
+ "content": "<|summary|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": true,
154
+ "single_word": false,
155
+ "special": true
156
+ },
157
+ "32016": {
158
+ "content": "<|resource|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": true,
162
+ "single_word": false,
163
+ "special": true
164
+ },
165
+ "32017": {
166
+ "content": "<|assistant_mask|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": true,
170
+ "single_word": false,
171
+ "special": true
172
+ },
173
+ "32018": {
174
+ "content": "<|start|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": true,
178
+ "single_word": false,
179
+ "special": true
180
+ },
181
+ "32019": {
182
+ "content": "<|message|>",
183
+ "lstrip": false,
184
+ "normalized": false,
185
+ "rstrip": true,
186
+ "single_word": false,
187
+ "special": true
188
+ },
189
+ "32020": {
190
+ "content": "<|fim_prefix|>",
191
+ "lstrip": false,
192
+ "normalized": false,
193
+ "rstrip": true,
194
+ "single_word": false,
195
+ "special": true
196
+ },
197
+ "32021": {
198
+ "content": "<|fim_middle|>",
199
+ "lstrip": false,
200
+ "normalized": false,
201
+ "rstrip": true,
202
+ "single_word": false,
203
+ "special": true
204
+ },
205
+ "32022": {
206
+ "content": "<|fim_suffix|>",
207
+ "lstrip": false,
208
+ "normalized": false,
209
+ "rstrip": true,
210
+ "single_word": false,
211
+ "special": true
212
+ },
213
+ "32023": {
214
+ "content": "<|meta_start|>",
215
+ "lstrip": false,
216
+ "normalized": false,
217
+ "rstrip": true,
218
+ "single_word": false,
219
+ "special": true
220
+ },
221
+ "32024": {
222
+ "content": "<|ipynb_marker|>",
223
+ "lstrip": false,
224
+ "normalized": false,
225
+ "rstrip": true,
226
+ "single_word": false,
227
+ "special": true
228
+ },
229
+ "32025": {
230
+ "content": "<|diff_marker|>",
231
+ "lstrip": false,
232
+ "normalized": false,
233
+ "rstrip": true,
234
+ "single_word": false,
235
+ "special": true
236
+ },
237
+ "32026": {
238
+ "content": "<|ghissue|>",
239
+ "lstrip": false,
240
+ "normalized": false,
241
+ "rstrip": true,
242
+ "single_word": false,
243
+ "special": true
244
+ },
245
+ "32027": {
246
+ "content": "<|ghreview|>",
247
+ "lstrip": false,
248
+ "normalized": false,
249
+ "rstrip": true,
250
+ "single_word": false,
251
+ "special": true
252
+ },
253
+ "32028": {
254
+ "content": "<|disc_start|>",
255
+ "lstrip": false,
256
+ "normalized": false,
257
+ "rstrip": true,
258
+ "single_word": false,
259
+ "special": true
260
+ },
261
+ "32029": {
262
+ "content": "<|disc_sep|>",
263
+ "lstrip": false,
264
+ "normalized": false,
265
+ "rstrip": true,
266
+ "single_word": false,
267
+ "special": true
268
+ },
269
+ "32030": {
270
+ "content": "<|disc_thread|><|query|>",
271
+ "lstrip": false,
272
+ "normalized": false,
273
+ "rstrip": true,
274
+ "single_word": false,
275
+ "special": true
276
+ },
277
+ "32031": {
278
+ "content": "<|/query|>",
279
+ "lstrip": false,
280
+ "normalized": false,
281
+ "rstrip": true,
282
+ "single_word": false,
283
+ "special": true
284
+ },
285
+ "32032": {
286
+ "content": "<|data|>",
287
+ "lstrip": false,
288
+ "normalized": false,
289
+ "rstrip": true,
290
+ "single_word": false,
291
+ "special": true
292
+ },
293
+ "32033": {
294
+ "content": "<|/data|>",
295
+ "lstrip": false,
296
+ "normalized": false,
297
+ "rstrip": true,
298
+ "single_word": false,
299
+ "special": true
300
+ },
301
+ "32034": {
302
+ "content": "<|sys|>",
303
+ "lstrip": false,
304
+ "normalized": false,
305
+ "rstrip": true,
306
+ "single_word": false,
307
+ "special": true
308
+ },
309
+ "32035": {
310
+ "content": "<|/sys|>",
311
+ "lstrip": false,
312
+ "normalized": false,
313
+ "rstrip": true,
314
+ "single_word": false,
315
+ "special": true
316
+ },
317
+ "32036": {
318
+ "content": "<|inst|>",
319
+ "lstrip": false,
320
+ "normalized": false,
321
+ "rstrip": true,
322
+ "single_word": false,
323
+ "special": true
324
+ },
325
+ "32037": {
326
+ "content": "<|/inst|>",
327
+ "lstrip": false,
328
+ "normalized": false,
329
+ "rstrip": true,
330
+ "single_word": false,
331
+ "special": true
332
+ }
333
+ },
334
+ "additional_special_tokens": [
335
+ "<|user|>",
336
+ "<|system|>",
337
+ "<|assistant|>",
338
+ "<|end|>"
339
+ ],
340
+ "bos_token": "<s>",
341
+ "chat_template": "{{ bos_token }}{% for message in messages %}{{'<|' + message['role'] + '|>' + '\n' + message['content'] + '<|end|>\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>\n' }}{% else %}{{ eos_token }}{% endif %}",
342
+ "clean_up_tokenization_spaces": false,
343
+ "eos_token": "<|end|>",
344
+ "legacy": false,
345
+ "model_max_length": 131072,
346
+ "pad_token": "<|endoftext|>",
347
+ "padding_side": "left",
348
+ "sp_model_kwargs": {},
349
+ "tokenizer_class": "LlamaTokenizer",
350
+ "unk_token": "<unk>",
351
+ "use_default_system_prompt": false
352
+ }
trainer_state.json ADDED
@@ -0,0 +1,2082 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 15.89591957421644,
5
+ "eval_steps": 400,
6
+ "global_step": 3360,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.15138971023063277,
13
+ "grad_norm": 27.229875564575195,
14
+ "learning_rate": 4.7619047619047613e-08,
15
+ "log_odds_chosen": -0.0525120347738266,
16
+ "log_odds_ratio": -0.7864450216293335,
17
+ "logits/chosen": 1.5500602722167969,
18
+ "logits/rejected": 1.3292943239212036,
19
+ "logps/chosen": -1.191162109375,
20
+ "logps/rejected": -1.1635648012161255,
21
+ "loss": 1.6156,
22
+ "nll_loss": 1.46018648147583,
23
+ "rewards/accuracies": 0.43359375,
24
+ "rewards/chosen": -0.1786743402481079,
25
+ "rewards/margins": -0.004139607772231102,
26
+ "rewards/rejected": -0.17453473806381226,
27
+ "step": 32
28
+ },
29
+ {
30
+ "epoch": 0.30277942046126555,
31
+ "grad_norm": 27.179594039916992,
32
+ "learning_rate": 9.523809523809523e-08,
33
+ "log_odds_chosen": 0.019624141976237297,
34
+ "log_odds_ratio": -0.748144805431366,
35
+ "logits/chosen": 1.5089401006698608,
36
+ "logits/rejected": 1.3729290962219238,
37
+ "logps/chosen": -1.2469431161880493,
38
+ "logps/rejected": -1.26250422000885,
39
+ "loss": 1.5984,
40
+ "nll_loss": 1.5403207540512085,
41
+ "rewards/accuracies": 0.5078125,
42
+ "rewards/chosen": -0.1870414763689041,
43
+ "rewards/margins": 0.002334160730242729,
44
+ "rewards/rejected": -0.1893756240606308,
45
+ "step": 64
46
+ },
47
+ {
48
+ "epoch": 0.4541691306918983,
49
+ "grad_norm": 25.431949615478516,
50
+ "learning_rate": 1.4285714285714285e-07,
51
+ "log_odds_chosen": -0.053292229771614075,
52
+ "log_odds_ratio": -0.78084397315979,
53
+ "logits/chosen": 1.5771101713180542,
54
+ "logits/rejected": 1.4359058141708374,
55
+ "logps/chosen": -1.20406174659729,
56
+ "logps/rejected": -1.1857094764709473,
57
+ "loss": 1.6007,
58
+ "nll_loss": 1.4434431791305542,
59
+ "rewards/accuracies": 0.421875,
60
+ "rewards/chosen": -0.18060927093029022,
61
+ "rewards/margins": -0.0027528139762580395,
62
+ "rewards/rejected": -0.1778564453125,
63
+ "step": 96
64
+ },
65
+ {
66
+ "epoch": 0.6055588409225311,
67
+ "grad_norm": 21.6727294921875,
68
+ "learning_rate": 1.9047619047619045e-07,
69
+ "log_odds_chosen": 0.05749227851629257,
70
+ "log_odds_ratio": -0.7303333878517151,
71
+ "logits/chosen": 1.468267560005188,
72
+ "logits/rejected": 1.4105079174041748,
73
+ "logps/chosen": -1.216729998588562,
74
+ "logps/rejected": -1.2735037803649902,
75
+ "loss": 1.5421,
76
+ "nll_loss": 1.4667391777038574,
77
+ "rewards/accuracies": 0.48046875,
78
+ "rewards/chosen": -0.18250951170921326,
79
+ "rewards/margins": 0.008516057394444942,
80
+ "rewards/rejected": -0.19102558493614197,
81
+ "step": 128
82
+ },
83
+ {
84
+ "epoch": 0.7569485511531638,
85
+ "grad_norm": 18.762540817260742,
86
+ "learning_rate": 2.3809523809523806e-07,
87
+ "log_odds_chosen": -0.01149587519466877,
88
+ "log_odds_ratio": -0.7697539329528809,
89
+ "logits/chosen": 1.5571284294128418,
90
+ "logits/rejected": 1.4197614192962646,
91
+ "logps/chosen": -1.1996402740478516,
92
+ "logps/rejected": -1.206023931503296,
93
+ "loss": 1.4771,
94
+ "nll_loss": 1.3902133703231812,
95
+ "rewards/accuracies": 0.4609375,
96
+ "rewards/chosen": -0.17994605004787445,
97
+ "rewards/margins": 0.000957544194534421,
98
+ "rewards/rejected": -0.1809035986661911,
99
+ "step": 160
100
+ },
101
+ {
102
+ "epoch": 0.9083382613837966,
103
+ "grad_norm": 14.901942253112793,
104
+ "learning_rate": 2.857142857142857e-07,
105
+ "log_odds_chosen": -0.0564657598733902,
106
+ "log_odds_ratio": -0.791755735874176,
107
+ "logits/chosen": 1.7110127210617065,
108
+ "logits/rejected": 1.559685468673706,
109
+ "logps/chosen": -1.201224446296692,
110
+ "logps/rejected": -1.1592237949371338,
111
+ "loss": 1.4155,
112
+ "nll_loss": 1.3239426612854004,
113
+ "rewards/accuracies": 0.50390625,
114
+ "rewards/chosen": -0.18018370866775513,
115
+ "rewards/margins": -0.00630012946203351,
116
+ "rewards/rejected": -0.1738835722208023,
117
+ "step": 192
118
+ },
119
+ {
120
+ "epoch": 1.0597279716144294,
121
+ "grad_norm": 14.1319580078125,
122
+ "learning_rate": 3.333333333333333e-07,
123
+ "log_odds_chosen": -0.07105285674333572,
124
+ "log_odds_ratio": -0.7984029650688171,
125
+ "logits/chosen": 1.623414397239685,
126
+ "logits/rejected": 1.496307134628296,
127
+ "logps/chosen": -1.1715890169143677,
128
+ "logps/rejected": -1.1218650341033936,
129
+ "loss": 1.3513,
130
+ "nll_loss": 1.2719416618347168,
131
+ "rewards/accuracies": 0.46484375,
132
+ "rewards/chosen": -0.1757383644580841,
133
+ "rewards/margins": -0.007458594627678394,
134
+ "rewards/rejected": -0.16827978193759918,
135
+ "step": 224
136
+ },
137
+ {
138
+ "epoch": 1.2111176818450622,
139
+ "grad_norm": 13.425606727600098,
140
+ "learning_rate": 3.809523809523809e-07,
141
+ "log_odds_chosen": 0.10578853636980057,
142
+ "log_odds_ratio": -0.70930016040802,
143
+ "logits/chosen": 1.6068717241287231,
144
+ "logits/rejected": 1.3819518089294434,
145
+ "logps/chosen": -1.1136094331741333,
146
+ "logps/rejected": -1.1912662982940674,
147
+ "loss": 1.3252,
148
+ "nll_loss": 1.2232904434204102,
149
+ "rewards/accuracies": 0.546875,
150
+ "rewards/chosen": -0.16704143583774567,
151
+ "rewards/margins": 0.0116485096514225,
152
+ "rewards/rejected": -0.17868994176387787,
153
+ "step": 256
154
+ },
155
+ {
156
+ "epoch": 1.362507392075695,
157
+ "grad_norm": 11.342605590820312,
158
+ "learning_rate": 4.285714285714285e-07,
159
+ "log_odds_chosen": 0.13081349432468414,
160
+ "log_odds_ratio": -0.6838027238845825,
161
+ "logits/chosen": 1.4501845836639404,
162
+ "logits/rejected": 1.3310956954956055,
163
+ "logps/chosen": -1.0919809341430664,
164
+ "logps/rejected": -1.1673154830932617,
165
+ "loss": 1.2755,
166
+ "nll_loss": 1.1733828783035278,
167
+ "rewards/accuracies": 0.578125,
168
+ "rewards/chosen": -0.16379712522029877,
169
+ "rewards/margins": 0.011300182901322842,
170
+ "rewards/rejected": -0.1750973016023636,
171
+ "step": 288
172
+ },
173
+ {
174
+ "epoch": 1.5138971023063275,
175
+ "grad_norm": 12.543194770812988,
176
+ "learning_rate": 4.761904761904761e-07,
177
+ "log_odds_chosen": 0.19491538405418396,
178
+ "log_odds_ratio": -0.6595159769058228,
179
+ "logits/chosen": 1.4903298616409302,
180
+ "logits/rejected": 1.3049672842025757,
181
+ "logps/chosen": -1.0332714319229126,
182
+ "logps/rejected": -1.1428489685058594,
183
+ "loss": 1.2223,
184
+ "nll_loss": 1.0796581506729126,
185
+ "rewards/accuracies": 0.6171875,
186
+ "rewards/chosen": -0.15499071776866913,
187
+ "rewards/margins": 0.0164366252720356,
188
+ "rewards/rejected": -0.17142733931541443,
189
+ "step": 320
190
+ },
191
+ {
192
+ "epoch": 1.6652868125369604,
193
+ "grad_norm": 8.179709434509277,
194
+ "learning_rate": 4.999654636727764e-07,
195
+ "log_odds_chosen": 0.14331884682178497,
196
+ "log_odds_ratio": -0.6748344302177429,
197
+ "logits/chosen": 1.4205052852630615,
198
+ "logits/rejected": 1.3244390487670898,
199
+ "logps/chosen": -1.0807911157608032,
200
+ "logps/rejected": -1.159712314605713,
201
+ "loss": 1.1776,
202
+ "nll_loss": 1.0815861225128174,
203
+ "rewards/accuracies": 0.609375,
204
+ "rewards/chosen": -0.16211867332458496,
205
+ "rewards/margins": 0.011838208884000778,
206
+ "rewards/rejected": -0.17395688593387604,
207
+ "step": 352
208
+ },
209
+ {
210
+ "epoch": 1.8166765227675932,
211
+ "grad_norm": 9.002681732177734,
212
+ "learning_rate": 4.996892303047305e-07,
213
+ "log_odds_chosen": 0.15229541063308716,
214
+ "log_odds_ratio": -0.6689931154251099,
215
+ "logits/chosen": 1.3082184791564941,
216
+ "logits/rejected": 1.208222508430481,
217
+ "logps/chosen": -1.0531638860702515,
218
+ "logps/rejected": -1.1305123567581177,
219
+ "loss": 1.1209,
220
+ "nll_loss": 1.026604175567627,
221
+ "rewards/accuracies": 0.62890625,
222
+ "rewards/chosen": -0.15797458589076996,
223
+ "rewards/margins": 0.01160226296633482,
224
+ "rewards/rejected": -0.16957685351371765,
225
+ "step": 384
226
+ },
227
+ {
228
+ "epoch": 1.8923713778829097,
229
+ "eval_log_odds_chosen": 0.9119634628295898,
230
+ "eval_log_odds_ratio": -0.3477023243904114,
231
+ "eval_logits/chosen": 0.8482466340065002,
232
+ "eval_logits/rejected": 0.7518002986907959,
233
+ "eval_logps/chosen": -0.7484418153762817,
234
+ "eval_logps/rejected": -1.3053876161575317,
235
+ "eval_loss": 0.84984290599823,
236
+ "eval_nll_loss": 0.7749183773994446,
237
+ "eval_rewards/accuracies": 1.0,
238
+ "eval_rewards/chosen": -0.11226626485586166,
239
+ "eval_rewards/margins": 0.0835418850183487,
240
+ "eval_rewards/rejected": -0.19580814242362976,
241
+ "eval_runtime": 1.7821,
242
+ "eval_samples_per_second": 76.877,
243
+ "eval_steps_per_second": 10.101,
244
+ "step": 400
245
+ },
246
+ {
247
+ "epoch": 1.968066232998226,
248
+ "grad_norm": 10.631780624389648,
249
+ "learning_rate": 4.991370688303038e-07,
250
+ "log_odds_chosen": 0.20428910851478577,
251
+ "log_odds_ratio": -0.6562178134918213,
252
+ "logits/chosen": 1.2808618545532227,
253
+ "logits/rejected": 1.1230928897857666,
254
+ "logps/chosen": -1.0897853374481201,
255
+ "logps/rejected": -1.1929757595062256,
256
+ "loss": 1.0818,
257
+ "nll_loss": 1.0095133781433105,
258
+ "rewards/accuracies": 0.625,
259
+ "rewards/chosen": -0.16346779465675354,
260
+ "rewards/margins": 0.015478584915399551,
261
+ "rewards/rejected": -0.1789463758468628,
262
+ "step": 416
263
+ },
264
+ {
265
+ "epoch": 2.119455943228859,
266
+ "grad_norm": 9.022457122802734,
267
+ "learning_rate": 4.983095894354857e-07,
268
+ "log_odds_chosen": 0.22953583300113678,
269
+ "log_odds_ratio": -0.6313825845718384,
270
+ "logits/chosen": 1.349506139755249,
271
+ "logits/rejected": 1.1360180377960205,
272
+ "logps/chosen": -1.0178955793380737,
273
+ "logps/rejected": -1.142075538635254,
274
+ "loss": 1.0595,
275
+ "nll_loss": 0.9702749848365784,
276
+ "rewards/accuracies": 0.63671875,
277
+ "rewards/chosen": -0.15268434584140778,
278
+ "rewards/margins": 0.018626993522047997,
279
+ "rewards/rejected": -0.17131134867668152,
280
+ "step": 448
281
+ },
282
+ {
283
+ "epoch": 2.2708456534594914,
284
+ "grad_norm": 8.519028663635254,
285
+ "learning_rate": 4.972077065562821e-07,
286
+ "log_odds_chosen": 0.20490483939647675,
287
+ "log_odds_ratio": -0.6597353219985962,
288
+ "logits/chosen": 1.2364730834960938,
289
+ "logits/rejected": 1.1246590614318848,
290
+ "logps/chosen": -1.0860800743103027,
291
+ "logps/rejected": -1.1871649026870728,
292
+ "loss": 1.0455,
293
+ "nll_loss": 0.9942155480384827,
294
+ "rewards/accuracies": 0.65625,
295
+ "rewards/chosen": -0.16291199624538422,
296
+ "rewards/margins": 0.015162724070250988,
297
+ "rewards/rejected": -0.17807474732398987,
298
+ "step": 480
299
+ },
300
+ {
301
+ "epoch": 2.4222353636901244,
302
+ "grad_norm": 7.702695369720459,
303
+ "learning_rate": 4.958326378681848e-07,
304
+ "log_odds_chosen": 0.3035791516304016,
305
+ "log_odds_ratio": -0.6072664260864258,
306
+ "logits/chosen": 1.2193742990493774,
307
+ "logits/rejected": 1.0568186044692993,
308
+ "logps/chosen": -1.029651165008545,
309
+ "logps/rejected": -1.1974754333496094,
310
+ "loss": 1.031,
311
+ "nll_loss": 0.9461196660995483,
312
+ "rewards/accuracies": 0.7109375,
313
+ "rewards/chosen": -0.15444767475128174,
314
+ "rewards/margins": 0.02517363429069519,
315
+ "rewards/rejected": -0.17962132394313812,
316
+ "step": 512
317
+ },
318
+ {
319
+ "epoch": 2.573625073920757,
320
+ "grad_norm": 8.201448440551758,
321
+ "learning_rate": 4.941859029405353e-07,
322
+ "log_odds_chosen": 0.35751351714134216,
323
+ "log_odds_ratio": -0.5834794044494629,
324
+ "logits/chosen": 1.2276177406311035,
325
+ "logits/rejected": 1.0265512466430664,
326
+ "logps/chosen": -1.0028650760650635,
327
+ "logps/rejected": -1.1897025108337402,
328
+ "loss": 1.0218,
329
+ "nll_loss": 0.9072933793067932,
330
+ "rewards/accuracies": 0.73828125,
331
+ "rewards/chosen": -0.15042978525161743,
332
+ "rewards/margins": 0.028025589883327484,
333
+ "rewards/rejected": -0.17845536768436432,
334
+ "step": 544
335
+ },
336
+ {
337
+ "epoch": 2.72501478415139,
338
+ "grad_norm": 7.634998798370361,
339
+ "learning_rate": 4.922693215572695e-07,
340
+ "log_odds_chosen": 0.45870620012283325,
341
+ "log_odds_ratio": -0.54433274269104,
342
+ "logits/chosen": 1.1980278491973877,
343
+ "logits/rejected": 1.0682458877563477,
344
+ "logps/chosen": -0.978523313999176,
345
+ "logps/rejected": -1.243023157119751,
346
+ "loss": 0.9966,
347
+ "nll_loss": 0.921144962310791,
348
+ "rewards/accuracies": 0.7578125,
349
+ "rewards/chosen": -0.14677852392196655,
350
+ "rewards/margins": 0.03967496007680893,
351
+ "rewards/rejected": -0.1864534616470337,
352
+ "step": 576
353
+ },
354
+ {
355
+ "epoch": 2.8764044943820224,
356
+ "grad_norm": 7.217565059661865,
357
+ "learning_rate": 4.900850117058999e-07,
358
+ "log_odds_chosen": 0.47186481952667236,
359
+ "log_odds_ratio": -0.5484339594841003,
360
+ "logits/chosen": 1.152608871459961,
361
+ "logits/rejected": 1.015822172164917,
362
+ "logps/chosen": -1.01084566116333,
363
+ "logps/rejected": -1.277451992034912,
364
+ "loss": 0.9987,
365
+ "nll_loss": 0.9013168215751648,
366
+ "rewards/accuracies": 0.7421875,
367
+ "rewards/chosen": -0.1516268402338028,
368
+ "rewards/margins": 0.03999098762869835,
369
+ "rewards/rejected": -0.19161783158779144,
370
+ "step": 608
371
+ },
372
+ {
373
+ "epoch": 3.0277942046126554,
374
+ "grad_norm": 6.927852630615234,
375
+ "learning_rate": 4.876353872369572e-07,
376
+ "log_odds_chosen": 0.48829925060272217,
377
+ "log_odds_ratio": -0.5393761396408081,
378
+ "logits/chosen": 1.0784587860107422,
379
+ "logits/rejected": 0.9411880970001221,
380
+ "logps/chosen": -1.0088391304016113,
381
+ "logps/rejected": -1.271460771560669,
382
+ "loss": 0.9837,
383
+ "nll_loss": 0.9265193343162537,
384
+ "rewards/accuracies": 0.765625,
385
+ "rewards/chosen": -0.15132588148117065,
386
+ "rewards/margins": 0.039393242448568344,
387
+ "rewards/rejected": -0.1907191127538681,
388
+ "step": 640
389
+ },
390
+ {
391
+ "epoch": 3.179183914843288,
392
+ "grad_norm": 6.685938358306885,
393
+ "learning_rate": 4.849231551964771e-07,
394
+ "log_odds_chosen": 0.562548041343689,
395
+ "log_odds_ratio": -0.5139177441596985,
396
+ "logits/chosen": 1.1162034273147583,
397
+ "logits/rejected": 0.927276611328125,
398
+ "logps/chosen": -0.9777481555938721,
399
+ "logps/rejected": -1.3050942420959473,
400
+ "loss": 0.9845,
401
+ "nll_loss": 0.8839849233627319,
402
+ "rewards/accuracies": 0.78125,
403
+ "rewards/chosen": -0.14666223526000977,
404
+ "rewards/margins": 0.04910193011164665,
405
+ "rewards/rejected": -0.19576415419578552,
406
+ "step": 672
407
+ },
408
+ {
409
+ "epoch": 3.330573625073921,
410
+ "grad_norm": 5.24590539932251,
411
+ "learning_rate": 4.819513128344813e-07,
412
+ "log_odds_chosen": 0.4602447748184204,
413
+ "log_odds_ratio": -0.5505639314651489,
414
+ "logits/chosen": 1.1351033449172974,
415
+ "logits/rejected": 0.9407525062561035,
416
+ "logps/chosen": -0.9991594552993774,
417
+ "logps/rejected": -1.2416499853134155,
418
+ "loss": 0.9658,
419
+ "nll_loss": 0.8639576435089111,
420
+ "rewards/accuracies": 0.75,
421
+ "rewards/chosen": -0.14987392723560333,
422
+ "rewards/margins": 0.036373574286699295,
423
+ "rewards/rejected": -0.18624748289585114,
424
+ "step": 704
425
+ },
426
+ {
427
+ "epoch": 3.4819633353045534,
428
+ "grad_norm": 6.936483860015869,
429
+ "learning_rate": 4.787231442927586e-07,
430
+ "log_odds_chosen": 0.5815439820289612,
431
+ "log_odds_ratio": -0.5048896074295044,
432
+ "logits/chosen": 1.0991628170013428,
433
+ "logits/rejected": 0.8964717984199524,
434
+ "logps/chosen": -0.9201152920722961,
435
+ "logps/rejected": -1.242402195930481,
436
+ "loss": 0.9681,
437
+ "nll_loss": 0.8501954674720764,
438
+ "rewards/accuracies": 0.7890625,
439
+ "rewards/chosen": -0.13801729679107666,
440
+ "rewards/margins": 0.04834304004907608,
441
+ "rewards/rejected": -0.18636034429073334,
442
+ "step": 736
443
+ },
444
+ {
445
+ "epoch": 3.6333530455351863,
446
+ "grad_norm": 6.671252250671387,
447
+ "learning_rate": 4.752422169756047e-07,
448
+ "log_odds_chosen": 0.494718998670578,
449
+ "log_odds_ratio": -0.5431851148605347,
450
+ "logits/chosen": 1.1249089241027832,
451
+ "logits/rejected": 1.0157999992370605,
452
+ "logps/chosen": -1.0117340087890625,
453
+ "logps/rejected": -1.293691635131836,
454
+ "loss": 0.978,
455
+ "nll_loss": 0.9269427061080933,
456
+ "rewards/accuracies": 0.7578125,
457
+ "rewards/chosen": -0.15176010131835938,
458
+ "rewards/margins": 0.042293645441532135,
459
+ "rewards/rejected": -0.1940537393093109,
460
+ "step": 768
461
+ },
462
+ {
463
+ "epoch": 3.7847427557658193,
464
+ "grad_norm": 7.028476715087891,
465
+ "learning_rate": 4.715123776075336e-07,
466
+ "log_odds_chosen": 0.5061647891998291,
467
+ "log_odds_ratio": -0.5440715551376343,
468
+ "logits/chosen": 1.0956813097000122,
469
+ "logits/rejected": 0.9653363823890686,
470
+ "logps/chosen": -1.0257270336151123,
471
+ "logps/rejected": -1.3192400932312012,
472
+ "loss": 0.9528,
473
+ "nll_loss": 0.8593652844429016,
474
+ "rewards/accuracies": 0.75390625,
475
+ "rewards/chosen": -0.15385906398296356,
476
+ "rewards/margins": 0.044026970863342285,
477
+ "rewards/rejected": -0.19788604974746704,
478
+ "step": 800
479
+ },
480
+ {
481
+ "epoch": 3.7847427557658193,
482
+ "eval_log_odds_chosen": 1.0081945657730103,
483
+ "eval_log_odds_ratio": -0.3223256468772888,
484
+ "eval_logits/chosen": 0.7514240145683289,
485
+ "eval_logits/rejected": 0.6671679615974426,
486
+ "eval_logps/chosen": -0.7009862065315247,
487
+ "eval_logps/rejected": -1.2981789112091064,
488
+ "eval_loss": 0.7884585857391357,
489
+ "eval_nll_loss": 0.7162714600563049,
490
+ "eval_rewards/accuracies": 1.0,
491
+ "eval_rewards/chosen": -0.10514792799949646,
492
+ "eval_rewards/margins": 0.08957889676094055,
493
+ "eval_rewards/rejected": -0.1947268396615982,
494
+ "eval_runtime": 1.7846,
495
+ "eval_samples_per_second": 76.767,
496
+ "eval_steps_per_second": 10.086,
497
+ "step": 800
498
+ },
499
+ {
500
+ "epoch": 3.936132465996452,
501
+ "grad_norm": 6.8125834465026855,
502
+ "learning_rate": 4.675377479823153e-07,
503
+ "log_odds_chosen": 0.6920242309570312,
504
+ "log_odds_ratio": -0.4726037383079529,
505
+ "logits/chosen": 1.0377426147460938,
506
+ "logits/rejected": 0.9017472863197327,
507
+ "logps/chosen": -0.922009289264679,
508
+ "logps/rejected": -1.3119571208953857,
509
+ "loss": 0.9638,
510
+ "nll_loss": 0.8638713359832764,
511
+ "rewards/accuracies": 0.82421875,
512
+ "rewards/chosen": -0.13830138742923737,
513
+ "rewards/margins": 0.05849217250943184,
514
+ "rewards/rejected": -0.1967935562133789,
515
+ "step": 832
516
+ },
517
+ {
518
+ "epoch": 4.087522176227084,
519
+ "grad_norm": 6.372574806213379,
520
+ "learning_rate": 4.6332272040803887e-07,
521
+ "log_odds_chosen": 0.6877175569534302,
522
+ "log_odds_ratio": -0.46182751655578613,
523
+ "logits/chosen": 1.1110641956329346,
524
+ "logits/rejected": 0.9074443578720093,
525
+ "logps/chosen": -0.9194135069847107,
526
+ "logps/rejected": -1.3282839059829712,
527
+ "loss": 0.9604,
528
+ "nll_loss": 0.8556405901908875,
529
+ "rewards/accuracies": 0.83203125,
530
+ "rewards/chosen": -0.13791203498840332,
531
+ "rewards/margins": 0.06133056432008743,
532
+ "rewards/rejected": -0.19924262166023254,
533
+ "step": 864
534
+ },
535
+ {
536
+ "epoch": 4.238911886457718,
537
+ "grad_norm": 6.756438255310059,
538
+ "learning_rate": 4.588719528532341e-07,
539
+ "log_odds_chosen": 0.6642757058143616,
540
+ "log_odds_ratio": -0.4779506325721741,
541
+ "logits/chosen": 1.0594482421875,
542
+ "logits/rejected": 0.9946908354759216,
543
+ "logps/chosen": -0.965737521648407,
544
+ "logps/rejected": -1.359665870666504,
545
+ "loss": 0.954,
546
+ "nll_loss": 0.8865021467208862,
547
+ "rewards/accuracies": 0.77734375,
548
+ "rewards/chosen": -0.1448606252670288,
549
+ "rewards/margins": 0.059089258313179016,
550
+ "rewards/rejected": -0.20394988358020782,
551
+ "step": 896
552
+ },
553
+ {
554
+ "epoch": 4.39030159668835,
555
+ "grad_norm": 7.890772342681885,
556
+ "learning_rate": 4.5419036379941414e-07,
557
+ "log_odds_chosen": 0.7298649549484253,
558
+ "log_odds_ratio": -0.4601740837097168,
559
+ "logits/chosen": 1.1765400171279907,
560
+ "logits/rejected": 0.9228672981262207,
561
+ "logps/chosen": -0.9468764066696167,
562
+ "logps/rejected": -1.3811423778533936,
563
+ "loss": 0.9453,
564
+ "nll_loss": 0.8525615930557251,
565
+ "rewards/accuracies": 0.8203125,
566
+ "rewards/chosen": -0.1420314460992813,
567
+ "rewards/margins": 0.06513990461826324,
568
+ "rewards/rejected": -0.20717135071754456,
569
+ "step": 928
570
+ },
571
+ {
572
+ "epoch": 4.541691306918983,
573
+ "grad_norm": 6.184362888336182,
574
+ "learning_rate": 4.492831268057306e-07,
575
+ "log_odds_chosen": 0.7427738904953003,
576
+ "log_odds_ratio": -0.46771693229675293,
577
+ "logits/chosen": 1.0225163698196411,
578
+ "logits/rejected": 0.9077222943305969,
579
+ "logps/chosen": -0.9970439672470093,
580
+ "logps/rejected": -1.456943154335022,
581
+ "loss": 0.938,
582
+ "nll_loss": 0.8868236541748047,
583
+ "rewards/accuracies": 0.77734375,
584
+ "rewards/chosen": -0.14955660700798035,
585
+ "rewards/margins": 0.06898489594459534,
586
+ "rewards/rejected": -0.2185414880514145,
587
+ "step": 960
588
+ },
589
+ {
590
+ "epoch": 4.693081017149615,
591
+ "grad_norm": 6.933351993560791,
592
+ "learning_rate": 4.441556647917446e-07,
593
+ "log_odds_chosen": 0.8609212636947632,
594
+ "log_odds_ratio": -0.43812429904937744,
595
+ "logits/chosen": 1.0455502271652222,
596
+ "logits/rejected": 0.906272292137146,
597
+ "logps/chosen": -0.9208173155784607,
598
+ "logps/rejected": -1.457839012145996,
599
+ "loss": 0.9434,
600
+ "nll_loss": 0.8511086106300354,
601
+ "rewards/accuracies": 0.83203125,
602
+ "rewards/chosen": -0.13812260329723358,
603
+ "rewards/margins": 0.08055327087640762,
604
+ "rewards/rejected": -0.2186758816242218,
605
+ "step": 992
606
+ },
607
+ {
608
+ "epoch": 4.844470727380249,
609
+ "grad_norm": 6.150376796722412,
610
+ "learning_rate": 4.3881364404463375e-07,
611
+ "log_odds_chosen": 0.9446333050727844,
612
+ "log_odds_ratio": -0.4172128438949585,
613
+ "logits/chosen": 1.110432744026184,
614
+ "logits/rejected": 0.8510321974754333,
615
+ "logps/chosen": -0.9673236608505249,
616
+ "logps/rejected": -1.5632784366607666,
617
+ "loss": 0.9264,
618
+ "nll_loss": 0.8780388832092285,
619
+ "rewards/accuracies": 0.81640625,
620
+ "rewards/chosen": -0.14509856700897217,
621
+ "rewards/margins": 0.08939322084188461,
622
+ "rewards/rejected": -0.234491765499115,
623
+ "step": 1024
624
+ },
625
+ {
626
+ "epoch": 4.995860437610881,
627
+ "grad_norm": 5.648180961608887,
628
+ "learning_rate": 4.332629679574565e-07,
629
+ "log_odds_chosen": 0.9642012715339661,
630
+ "log_odds_ratio": -0.42083150148391724,
631
+ "logits/chosen": 1.0487498044967651,
632
+ "logits/rejected": 0.8362730741500854,
633
+ "logps/chosen": -0.9618784189224243,
634
+ "logps/rejected": -1.5873997211456299,
635
+ "loss": 0.925,
636
+ "nll_loss": 0.8492802381515503,
637
+ "rewards/accuracies": 0.8203125,
638
+ "rewards/chosen": -0.1442817747592926,
639
+ "rewards/margins": 0.09382818639278412,
640
+ "rewards/rejected": -0.23810997605323792,
641
+ "step": 1056
642
+ },
643
+ {
644
+ "epoch": 5.147250147841514,
645
+ "grad_norm": 6.209354400634766,
646
+ "learning_rate": 4.2750977050539503e-07,
647
+ "log_odds_chosen": 1.127962350845337,
648
+ "log_odds_ratio": -0.3810023367404938,
649
+ "logits/chosen": 0.9537469148635864,
650
+ "logits/rejected": 0.8183348178863525,
651
+ "logps/chosen": -0.9366539120674133,
652
+ "logps/rejected": -1.6753818988800049,
653
+ "loss": 0.9233,
654
+ "nll_loss": 0.8444766998291016,
655
+ "rewards/accuracies": 0.828125,
656
+ "rewards/chosen": -0.140498086810112,
657
+ "rewards/margins": 0.11080917716026306,
658
+ "rewards/rejected": -0.25130727887153625,
659
+ "step": 1088
660
+ },
661
+ {
662
+ "epoch": 5.298639858072146,
663
+ "grad_norm": 6.09550666809082,
664
+ "learning_rate": 4.2156040946718343e-07,
665
+ "log_odds_chosen": 1.1001328229904175,
666
+ "log_odds_ratio": -0.40834715962409973,
667
+ "logits/chosen": 0.9538164734840393,
668
+ "logits/rejected": 0.8565899133682251,
669
+ "logps/chosen": -0.9979989528656006,
670
+ "logps/rejected": -1.736957311630249,
671
+ "loss": 0.9264,
672
+ "nll_loss": 0.8773810863494873,
673
+ "rewards/accuracies": 0.8203125,
674
+ "rewards/chosen": -0.1496998369693756,
675
+ "rewards/margins": 0.11084374785423279,
676
+ "rewards/rejected": -0.2605435848236084,
677
+ "step": 1120
678
+ },
679
+ {
680
+ "epoch": 5.45002956830278,
681
+ "grad_norm": 5.165525913238525,
682
+ "learning_rate": 4.154214593992149e-07,
683
+ "log_odds_chosen": 1.4560502767562866,
684
+ "log_odds_ratio": -0.36019906401634216,
685
+ "logits/chosen": 1.0402196645736694,
686
+ "logits/rejected": 0.8284226655960083,
687
+ "logps/chosen": -0.9208565950393677,
688
+ "logps/rejected": -1.9513992071151733,
689
+ "loss": 0.9184,
690
+ "nll_loss": 0.8742519617080688,
691
+ "rewards/accuracies": 0.83984375,
692
+ "rewards/chosen": -0.13812850415706635,
693
+ "rewards/margins": 0.15458139777183533,
694
+ "rewards/rejected": -0.2927098870277405,
695
+ "step": 1152
696
+ },
697
+ {
698
+ "epoch": 5.601419278533412,
699
+ "grad_norm": 5.565188407897949,
700
+ "learning_rate": 4.090997043700909e-07,
701
+ "log_odds_chosen": 1.8058509826660156,
702
+ "log_odds_ratio": -0.34831157326698303,
703
+ "logits/chosen": 0.9435930252075195,
704
+ "logits/rejected": 0.7780628204345703,
705
+ "logps/chosen": -0.9824676513671875,
706
+ "logps/rejected": -2.3647959232330322,
707
+ "loss": 0.915,
708
+ "nll_loss": 0.9026926159858704,
709
+ "rewards/accuracies": 0.828125,
710
+ "rewards/chosen": -0.1473701447248459,
711
+ "rewards/margins": 0.2073492407798767,
712
+ "rewards/rejected": -0.3547194004058838,
713
+ "step": 1184
714
+ },
715
+ {
716
+ "epoch": 5.677114133648729,
717
+ "eval_log_odds_chosen": 1.515297532081604,
718
+ "eval_log_odds_ratio": -0.21902640163898468,
719
+ "eval_logits/chosen": 0.5878681540489197,
720
+ "eval_logits/rejected": 0.514284610748291,
721
+ "eval_logps/chosen": -0.6912536025047302,
722
+ "eval_logps/rejected": -1.6357617378234863,
723
+ "eval_loss": 0.7435688972473145,
724
+ "eval_nll_loss": 0.6848150491714478,
725
+ "eval_rewards/accuracies": 1.0,
726
+ "eval_rewards/chosen": -0.10368803888559341,
727
+ "eval_rewards/margins": 0.14167624711990356,
728
+ "eval_rewards/rejected": -0.2453642636537552,
729
+ "eval_runtime": 1.7754,
730
+ "eval_samples_per_second": 77.164,
731
+ "eval_steps_per_second": 10.138,
732
+ "step": 1200
733
+ },
734
+ {
735
+ "epoch": 5.752808988764045,
736
+ "grad_norm": 5.417468547821045,
737
+ "learning_rate": 4.0260213046364076e-07,
738
+ "log_odds_chosen": 2.085019111633301,
739
+ "log_odds_ratio": -0.31234151124954224,
740
+ "logits/chosen": 0.9959389567375183,
741
+ "logits/rejected": 0.7742877006530762,
742
+ "logps/chosen": -0.9441136121749878,
743
+ "logps/rejected": -2.5619935989379883,
744
+ "loss": 0.9004,
745
+ "nll_loss": 0.8535679578781128,
746
+ "rewards/accuracies": 0.8359375,
747
+ "rewards/chosen": -0.1416170299053192,
748
+ "rewards/margins": 0.24268200993537903,
749
+ "rewards/rejected": -0.38429906964302063,
750
+ "step": 1216
751
+ },
752
+ {
753
+ "epoch": 5.904198698994678,
754
+ "grad_norm": 7.9577178955078125,
755
+ "learning_rate": 3.959359180586975e-07,
756
+ "log_odds_chosen": 2.5801219940185547,
757
+ "log_odds_ratio": -0.31198883056640625,
758
+ "logits/chosen": 0.9304694533348083,
759
+ "logits/rejected": 0.7099679112434387,
760
+ "logps/chosen": -1.004030466079712,
761
+ "logps/rejected": -3.1341824531555176,
762
+ "loss": 0.9199,
763
+ "nll_loss": 0.888052225112915,
764
+ "rewards/accuracies": 0.8515625,
765
+ "rewards/chosen": -0.15060456097126007,
766
+ "rewards/margins": 0.3195228576660156,
767
+ "rewards/rejected": -0.4701274335384369,
768
+ "step": 1248
769
+ },
770
+ {
771
+ "epoch": 6.055588409225311,
772
+ "grad_norm": 7.205864906311035,
773
+ "learning_rate": 3.891084338941603e-07,
774
+ "log_odds_chosen": 2.632976770401001,
775
+ "log_odds_ratio": -0.33525609970092773,
776
+ "logits/chosen": 0.8941175937652588,
777
+ "logits/rejected": 0.712418794631958,
778
+ "logps/chosen": -0.9687196016311646,
779
+ "logps/rejected": -3.1566426753997803,
780
+ "loss": 0.9156,
781
+ "nll_loss": 0.8564908504486084,
782
+ "rewards/accuracies": 0.84765625,
783
+ "rewards/chosen": -0.14530794322490692,
784
+ "rewards/margins": 0.3281884789466858,
785
+ "rewards/rejected": -0.4734964370727539,
786
+ "step": 1280
787
+ },
788
+ {
789
+ "epoch": 6.206978119455943,
790
+ "grad_norm": 5.557631015777588,
791
+ "learning_rate": 3.8212722292811383e-07,
792
+ "log_odds_chosen": 2.9796371459960938,
793
+ "log_odds_ratio": -0.3066112995147705,
794
+ "logits/chosen": 0.9908494353294373,
795
+ "logits/rejected": 0.7001262903213501,
796
+ "logps/chosen": -0.9531494975090027,
797
+ "logps/rejected": -3.451514720916748,
798
+ "loss": 0.8945,
799
+ "nll_loss": 0.8682339191436768,
800
+ "rewards/accuracies": 0.83203125,
801
+ "rewards/chosen": -0.1429724246263504,
802
+ "rewards/margins": 0.3747548460960388,
803
+ "rewards/rejected": -0.517727255821228,
804
+ "step": 1312
805
+ },
806
+ {
807
+ "epoch": 6.358367829686576,
808
+ "grad_norm": 7.154934883117676,
809
+ "learning_rate": 3.75e-07,
810
+ "log_odds_chosen": 3.150144338607788,
811
+ "log_odds_ratio": -0.30607521533966064,
812
+ "logits/chosen": 0.8532112240791321,
813
+ "logits/rejected": 0.688872754573822,
814
+ "logps/chosen": -0.9391156435012817,
815
+ "logps/rejected": -3.6156792640686035,
816
+ "loss": 0.9013,
817
+ "nll_loss": 0.8500258922576904,
818
+ "rewards/accuracies": 0.84765625,
819
+ "rewards/chosen": -0.14086736738681793,
820
+ "rewards/margins": 0.40148457884788513,
821
+ "rewards/rejected": -0.5423519611358643,
822
+ "step": 1344
823
+ },
824
+ {
825
+ "epoch": 6.509757539917208,
826
+ "grad_norm": 5.582004070281982,
827
+ "learning_rate": 3.67734641305055e-07,
828
+ "log_odds_chosen": 3.518749475479126,
829
+ "log_odds_ratio": -0.26204630732536316,
830
+ "logits/chosen": 1.0231519937515259,
831
+ "logits/rejected": 0.6429997682571411,
832
+ "logps/chosen": -0.8629344701766968,
833
+ "logps/rejected": -3.8456180095672607,
834
+ "loss": 0.9023,
835
+ "nll_loss": 0.801094114780426,
836
+ "rewards/accuracies": 0.8671875,
837
+ "rewards/chosen": -0.12944017350673676,
838
+ "rewards/margins": 0.4474025368690491,
839
+ "rewards/rejected": -0.576842725276947,
840
+ "step": 1376
841
+ },
842
+ {
843
+ "epoch": 6.661147250147842,
844
+ "grad_norm": 6.333008766174316,
845
+ "learning_rate": 3.6033917569043597e-07,
846
+ "log_odds_chosen": 3.5107364654541016,
847
+ "log_odds_ratio": -0.25962206721305847,
848
+ "logits/chosen": 0.9015189409255981,
849
+ "logits/rejected": 0.604630172252655,
850
+ "logps/chosen": -0.9094609618186951,
851
+ "logps/rejected": -3.901463270187378,
852
+ "loss": 0.8982,
853
+ "nll_loss": 0.8305466175079346,
854
+ "rewards/accuracies": 0.87109375,
855
+ "rewards/chosen": -0.1364191472530365,
856
+ "rewards/margins": 0.44880032539367676,
857
+ "rewards/rejected": -0.5852195024490356,
858
+ "step": 1408
859
+ },
860
+ {
861
+ "epoch": 6.812536960378474,
862
+ "grad_norm": 9.112639427185059,
863
+ "learning_rate": 3.528217757826529e-07,
864
+ "log_odds_chosen": 3.822404384613037,
865
+ "log_odds_ratio": -0.28991812467575073,
866
+ "logits/chosen": 0.9384167790412903,
867
+ "logits/rejected": 0.6002436280250549,
868
+ "logps/chosen": -0.966259241104126,
869
+ "logps/rejected": -4.308917999267578,
870
+ "loss": 0.899,
871
+ "nll_loss": 0.8516695499420166,
872
+ "rewards/accuracies": 0.83203125,
873
+ "rewards/chosen": -0.1449388712644577,
874
+ "rewards/margins": 0.5013989210128784,
875
+ "rewards/rejected": -0.6463377475738525,
876
+ "step": 1440
877
+ },
878
+ {
879
+ "epoch": 6.963926670609107,
880
+ "grad_norm": 7.574125289916992,
881
+ "learning_rate": 3.4519074895611236e-07,
882
+ "log_odds_chosen": 3.943324327468872,
883
+ "log_odds_ratio": -0.26691746711730957,
884
+ "logits/chosen": 0.9103025197982788,
885
+ "logits/rejected": 0.6238164901733398,
886
+ "logps/chosen": -0.8985946774482727,
887
+ "logps/rejected": -4.343371391296387,
888
+ "loss": 0.8962,
889
+ "nll_loss": 0.8179515600204468,
890
+ "rewards/accuracies": 0.8984375,
891
+ "rewards/chosen": -0.13478921353816986,
892
+ "rewards/margins": 0.516716480255127,
893
+ "rewards/rejected": -0.651505708694458,
894
+ "step": 1472
895
+ },
896
+ {
897
+ "epoch": 7.11531638083974,
898
+ "grad_norm": 6.7364115715026855,
899
+ "learning_rate": 3.374545281527537e-07,
900
+ "log_odds_chosen": 4.374906539916992,
901
+ "log_odds_ratio": -0.2600148320198059,
902
+ "logits/chosen": 0.9600415229797363,
903
+ "logits/rejected": 0.6132468581199646,
904
+ "logps/chosen": -0.9232965707778931,
905
+ "logps/rejected": -4.797858715057373,
906
+ "loss": 0.8895,
907
+ "nll_loss": 0.8346379995346069,
908
+ "rewards/accuracies": 0.87109375,
909
+ "rewards/chosen": -0.13849450647830963,
910
+ "rewards/margins": 0.5811843872070312,
911
+ "rewards/rejected": -0.7196788787841797,
912
+ "step": 1504
913
+ },
914
+ {
915
+ "epoch": 7.266706091070373,
916
+ "grad_norm": 8.94677448272705,
917
+ "learning_rate": 3.296216625629211e-07,
918
+ "log_odds_chosen": 3.412320375442505,
919
+ "log_odds_ratio": -0.2966606616973877,
920
+ "logits/chosen": 0.9029962420463562,
921
+ "logits/rejected": 0.6578757762908936,
922
+ "logps/chosen": -0.9623314738273621,
923
+ "logps/rejected": -3.89890193939209,
924
+ "loss": 0.8925,
925
+ "nll_loss": 0.8433880805969238,
926
+ "rewards/accuracies": 0.8671875,
927
+ "rewards/chosen": -0.14434972405433655,
928
+ "rewards/margins": 0.4404855966567993,
929
+ "rewards/rejected": -0.5848353505134583,
930
+ "step": 1536
931
+ },
932
+ {
933
+ "epoch": 7.418095801301005,
934
+ "grad_norm": 5.7957353591918945,
935
+ "learning_rate": 3.2170080817777257e-07,
936
+ "log_odds_chosen": 4.052781581878662,
937
+ "log_odds_ratio": -0.2798649072647095,
938
+ "logits/chosen": 0.9068763256072998,
939
+ "logits/rejected": 0.6250233054161072,
940
+ "logps/chosen": -0.9688931107521057,
941
+ "logps/rejected": -4.529140472412109,
942
+ "loss": 0.9004,
943
+ "nll_loss": 0.8530284762382507,
944
+ "rewards/accuracies": 0.859375,
945
+ "rewards/chosen": -0.14533399045467377,
946
+ "rewards/margins": 0.534037172794342,
947
+ "rewards/rejected": -0.6793711185455322,
948
+ "step": 1568
949
+ },
950
+ {
951
+ "epoch": 7.569485511531638,
952
+ "grad_norm": 6.567281723022461,
953
+ "learning_rate": 3.137007182236637e-07,
954
+ "log_odds_chosen": 3.9496092796325684,
955
+ "log_odds_ratio": -0.25981855392456055,
956
+ "logits/chosen": 0.9131721258163452,
957
+ "logits/rejected": 0.6535216569900513,
958
+ "logps/chosen": -0.9185097813606262,
959
+ "logps/rejected": -4.3686933517456055,
960
+ "loss": 0.892,
961
+ "nll_loss": 0.8527241945266724,
962
+ "rewards/accuracies": 0.87890625,
963
+ "rewards/chosen": -0.1377764791250229,
964
+ "rewards/margins": 0.5175275206565857,
965
+ "rewards/rejected": -0.655303955078125,
966
+ "step": 1600
967
+ },
968
+ {
969
+ "epoch": 7.569485511531638,
970
+ "eval_log_odds_chosen": 1.6673216819763184,
971
+ "eval_log_odds_ratio": -0.19718672335147858,
972
+ "eval_logits/chosen": 0.5625311136245728,
973
+ "eval_logits/rejected": 0.4984322190284729,
974
+ "eval_logps/chosen": -0.6667929887771606,
975
+ "eval_logps/rejected": -1.6969513893127441,
976
+ "eval_loss": 0.7337117195129395,
977
+ "eval_nll_loss": 0.6776795387268066,
978
+ "eval_rewards/accuracies": 1.0,
979
+ "eval_rewards/chosen": -0.1000189557671547,
980
+ "eval_rewards/margins": 0.15452374517917633,
981
+ "eval_rewards/rejected": -0.2545427083969116,
982
+ "eval_runtime": 1.7831,
983
+ "eval_samples_per_second": 76.831,
984
+ "eval_steps_per_second": 10.095,
985
+ "step": 1600
986
+ },
987
+ {
988
+ "epoch": 7.720875221762271,
989
+ "grad_norm": 8.987198829650879,
990
+ "learning_rate": 3.056302334890786e-07,
991
+ "log_odds_chosen": 3.926710844039917,
992
+ "log_odds_ratio": -0.2714899480342865,
993
+ "logits/chosen": 0.9191571474075317,
994
+ "logits/rejected": 0.5953992605209351,
995
+ "logps/chosen": -0.9368714094161987,
996
+ "logps/rejected": -4.384879112243652,
997
+ "loss": 0.888,
998
+ "nll_loss": 0.8406177759170532,
999
+ "rewards/accuracies": 0.8671875,
1000
+ "rewards/chosen": -0.14053073525428772,
1001
+ "rewards/margins": 0.5172011852264404,
1002
+ "rewards/rejected": -0.6577318906784058,
1003
+ "step": 1632
1004
+ },
1005
+ {
1006
+ "epoch": 7.872264931992904,
1007
+ "grad_norm": 5.6181230545043945,
1008
+ "learning_rate": 2.974982725547975e-07,
1009
+ "log_odds_chosen": 3.617192506790161,
1010
+ "log_odds_ratio": -0.32495206594467163,
1011
+ "logits/chosen": 0.8457501530647278,
1012
+ "logits/rejected": 0.6253533363342285,
1013
+ "logps/chosen": -1.0049875974655151,
1014
+ "logps/rejected": -4.156848907470703,
1015
+ "loss": 0.8977,
1016
+ "nll_loss": 0.8724310994148254,
1017
+ "rewards/accuracies": 0.85546875,
1018
+ "rewards/chosen": -0.1507481336593628,
1019
+ "rewards/margins": 0.47277915477752686,
1020
+ "rewards/rejected": -0.6235272884368896,
1021
+ "step": 1664
1022
+ },
1023
+ {
1024
+ "epoch": 8.023654642223537,
1025
+ "grad_norm": 5.31005334854126,
1026
+ "learning_rate": 2.893138219380963e-07,
1027
+ "log_odds_chosen": 4.234038829803467,
1028
+ "log_odds_ratio": -0.30224624276161194,
1029
+ "logits/chosen": 0.922171950340271,
1030
+ "logits/rejected": 0.5847682952880859,
1031
+ "logps/chosen": -0.9686514139175415,
1032
+ "logps/rejected": -4.734119892120361,
1033
+ "loss": 0.8864,
1034
+ "nll_loss": 0.8605988025665283,
1035
+ "rewards/accuracies": 0.859375,
1036
+ "rewards/chosen": -0.14529772102832794,
1037
+ "rewards/margins": 0.5648203492164612,
1038
+ "rewards/rejected": -0.7101180553436279,
1039
+ "step": 1696
1040
+ },
1041
+ {
1042
+ "epoch": 8.175044352454169,
1043
+ "grad_norm": 4.773166179656982,
1044
+ "learning_rate": 2.810859261618713e-07,
1045
+ "log_odds_chosen": 4.176573753356934,
1046
+ "log_odds_ratio": -0.2747136056423187,
1047
+ "logits/chosen": 0.9669155478477478,
1048
+ "logits/rejected": 0.6131560206413269,
1049
+ "logps/chosen": -0.941318690776825,
1050
+ "logps/rejected": -4.627261638641357,
1051
+ "loss": 0.8908,
1052
+ "nll_loss": 0.8429233431816101,
1053
+ "rewards/accuracies": 0.875,
1054
+ "rewards/chosen": -0.1411978155374527,
1055
+ "rewards/margins": 0.5528914928436279,
1056
+ "rewards/rejected": -0.6940892934799194,
1057
+ "step": 1728
1058
+ },
1059
+ {
1060
+ "epoch": 8.326434062684802,
1061
+ "grad_norm": 7.928328990936279,
1062
+ "learning_rate": 2.728236777596621e-07,
1063
+ "log_odds_chosen": 4.232769012451172,
1064
+ "log_odds_ratio": -0.2622223496437073,
1065
+ "logits/chosen": 0.8704826831817627,
1066
+ "logits/rejected": 0.6309795379638672,
1067
+ "logps/chosen": -0.9345431327819824,
1068
+ "logps/rejected": -4.6482343673706055,
1069
+ "loss": 0.8856,
1070
+ "nll_loss": 0.849586009979248,
1071
+ "rewards/accuracies": 0.87109375,
1072
+ "rewards/chosen": -0.14018146693706512,
1073
+ "rewards/margins": 0.5570536851882935,
1074
+ "rewards/rejected": -0.6972352266311646,
1075
+ "step": 1760
1076
+ },
1077
+ {
1078
+ "epoch": 8.477823772915436,
1079
+ "grad_norm": 7.50920295715332,
1080
+ "learning_rate": 2.6453620722761895e-07,
1081
+ "log_odds_chosen": 3.835066795349121,
1082
+ "log_odds_ratio": -0.2713623344898224,
1083
+ "logits/chosen": 0.8469685316085815,
1084
+ "logits/rejected": 0.5746083855628967,
1085
+ "logps/chosen": -0.9510048031806946,
1086
+ "logps/rejected": -4.287370204925537,
1087
+ "loss": 0.9005,
1088
+ "nll_loss": 0.8307653069496155,
1089
+ "rewards/accuracies": 0.8828125,
1090
+ "rewards/chosen": -0.14265072345733643,
1091
+ "rewards/margins": 0.5004547238349915,
1092
+ "rewards/rejected": -0.6431055068969727,
1093
+ "step": 1792
1094
+ },
1095
+ {
1096
+ "epoch": 8.629213483146067,
1097
+ "grad_norm": 4.420612812042236,
1098
+ "learning_rate": 2.5623267293451823e-07,
1099
+ "log_odds_chosen": 4.375966548919678,
1100
+ "log_odds_ratio": -0.26864683628082275,
1101
+ "logits/chosen": 0.8368352055549622,
1102
+ "logits/rejected": 0.5574530959129333,
1103
+ "logps/chosen": -0.9161982536315918,
1104
+ "logps/rejected": -4.765947341918945,
1105
+ "loss": 0.8711,
1106
+ "nll_loss": 0.8084649443626404,
1107
+ "rewards/accuracies": 0.890625,
1108
+ "rewards/chosen": -0.13742974400520325,
1109
+ "rewards/margins": 0.577462375164032,
1110
+ "rewards/rejected": -0.7148921489715576,
1111
+ "step": 1824
1112
+ },
1113
+ {
1114
+ "epoch": 8.7806031933767,
1115
+ "grad_norm": 5.372297286987305,
1116
+ "learning_rate": 2.4792225100097575e-07,
1117
+ "log_odds_chosen": 4.036057472229004,
1118
+ "log_odds_ratio": -0.28991392254829407,
1119
+ "logits/chosen": 0.8479549884796143,
1120
+ "logits/rejected": 0.6130175590515137,
1121
+ "logps/chosen": -0.996108889579773,
1122
+ "logps/rejected": -4.58188009262085,
1123
+ "loss": 0.8868,
1124
+ "nll_loss": 0.8779551386833191,
1125
+ "rewards/accuracies": 0.8671875,
1126
+ "rewards/chosen": -0.14941634237766266,
1127
+ "rewards/margins": 0.5378656387329102,
1128
+ "rewards/rejected": -0.6872820258140564,
1129
+ "step": 1856
1130
+ },
1131
+ {
1132
+ "epoch": 8.931992903607332,
1133
+ "grad_norm": 7.286854267120361,
1134
+ "learning_rate": 2.3961412515904335e-07,
1135
+ "log_odds_chosen": 4.6014862060546875,
1136
+ "log_odds_ratio": -0.23827242851257324,
1137
+ "logits/chosen": 0.8562659621238708,
1138
+ "logits/rejected": 0.5500348806381226,
1139
+ "logps/chosen": -0.8830623030662537,
1140
+ "logps/rejected": -4.95844841003418,
1141
+ "loss": 0.8922,
1142
+ "nll_loss": 0.8241187930107117,
1143
+ "rewards/accuracies": 0.91796875,
1144
+ "rewards/chosen": -0.132459357380867,
1145
+ "rewards/margins": 0.6113079190254211,
1146
+ "rewards/rejected": -0.743767261505127,
1147
+ "step": 1888
1148
+ },
1149
+ {
1150
+ "epoch": 9.083382613837966,
1151
+ "grad_norm": 5.4477949142456055,
1152
+ "learning_rate": 2.3131747660339394e-07,
1153
+ "log_odds_chosen": 4.262630939483643,
1154
+ "log_odds_ratio": -0.25891953706741333,
1155
+ "logits/chosen": 0.7675349712371826,
1156
+ "logits/rejected": 0.51315838098526,
1157
+ "logps/chosen": -0.9374942779541016,
1158
+ "logps/rejected": -4.686108589172363,
1159
+ "loss": 0.8815,
1160
+ "nll_loss": 0.8197700381278992,
1161
+ "rewards/accuracies": 0.91015625,
1162
+ "rewards/chosen": -0.14062415063381195,
1163
+ "rewards/margins": 0.562292218208313,
1164
+ "rewards/rejected": -0.7029163837432861,
1165
+ "step": 1920
1166
+ },
1167
+ {
1168
+ "epoch": 9.234772324068599,
1169
+ "grad_norm": 5.214437484741211,
1170
+ "learning_rate": 2.2304147384531036e-07,
1171
+ "log_odds_chosen": 4.728519439697266,
1172
+ "log_odds_ratio": -0.26717641949653625,
1173
+ "logits/chosen": 0.8268774747848511,
1174
+ "logits/rejected": 0.5454421639442444,
1175
+ "logps/chosen": -0.9330585598945618,
1176
+ "logps/rejected": -5.14246129989624,
1177
+ "loss": 0.8819,
1178
+ "nll_loss": 0.8327500820159912,
1179
+ "rewards/accuracies": 0.86328125,
1180
+ "rewards/chosen": -0.13995879888534546,
1181
+ "rewards/margins": 0.6314104795455933,
1182
+ "rewards/rejected": -0.771369218826294,
1183
+ "step": 1952
1184
+ },
1185
+ {
1186
+ "epoch": 9.38616203429923,
1187
+ "grad_norm": 6.283268928527832,
1188
+ "learning_rate": 2.1479526258069083e-07,
1189
+ "log_odds_chosen": 4.715708255767822,
1190
+ "log_odds_ratio": -0.24205940961837769,
1191
+ "logits/chosen": 0.920197069644928,
1192
+ "logits/rejected": 0.5509434342384338,
1193
+ "logps/chosen": -0.9300947189331055,
1194
+ "logps/rejected": -5.131900787353516,
1195
+ "loss": 0.8768,
1196
+ "nll_loss": 0.8383646011352539,
1197
+ "rewards/accuracies": 0.87890625,
1198
+ "rewards/chosen": -0.13951420783996582,
1199
+ "rewards/margins": 0.6302710175514221,
1200
+ "rewards/rejected": -0.7697851657867432,
1201
+ "step": 1984
1202
+ },
1203
+ {
1204
+ "epoch": 9.461856889414548,
1205
+ "eval_log_odds_chosen": 1.7867234945297241,
1206
+ "eval_log_odds_ratio": -0.18122754991054535,
1207
+ "eval_logits/chosen": 0.5212496519088745,
1208
+ "eval_logits/rejected": 0.4629932940006256,
1209
+ "eval_logps/chosen": -0.6597533226013184,
1210
+ "eval_logps/rejected": -1.7732388973236084,
1211
+ "eval_loss": 0.7272647619247437,
1212
+ "eval_nll_loss": 0.6736801266670227,
1213
+ "eval_rewards/accuracies": 1.0,
1214
+ "eval_rewards/chosen": -0.09896300733089447,
1215
+ "eval_rewards/margins": 0.16702282428741455,
1216
+ "eval_rewards/rejected": -0.2659858167171478,
1217
+ "eval_runtime": 1.766,
1218
+ "eval_samples_per_second": 77.575,
1219
+ "eval_steps_per_second": 10.192,
1220
+ "step": 2000
1221
+ },
1222
+ {
1223
+ "epoch": 9.537551744529864,
1224
+ "grad_norm": 4.955827713012695,
1225
+ "learning_rate": 2.065879555832674e-07,
1226
+ "log_odds_chosen": 4.195652008056641,
1227
+ "log_odds_ratio": -0.25658515095710754,
1228
+ "logits/chosen": 0.8670744895935059,
1229
+ "logits/rejected": 0.6037735939025879,
1230
+ "logps/chosen": -0.9333707094192505,
1231
+ "logps/rejected": -4.614899158477783,
1232
+ "loss": 0.8862,
1233
+ "nll_loss": 0.8495485782623291,
1234
+ "rewards/accuracies": 0.87109375,
1235
+ "rewards/chosen": -0.14000560343265533,
1236
+ "rewards/margins": 0.5522292852401733,
1237
+ "rewards/rejected": -0.6922348737716675,
1238
+ "step": 2016
1239
+ },
1240
+ {
1241
+ "epoch": 9.688941454760498,
1242
+ "grad_norm": 5.796345233917236,
1243
+ "learning_rate": 1.984286226342056e-07,
1244
+ "log_odds_chosen": 4.511747360229492,
1245
+ "log_odds_ratio": -0.27976271510124207,
1246
+ "logits/chosen": 0.8292367458343506,
1247
+ "logits/rejected": 0.5408206582069397,
1248
+ "logps/chosen": -0.9318006038665771,
1249
+ "logps/rejected": -4.940333843231201,
1250
+ "loss": 0.8937,
1251
+ "nll_loss": 0.824824869632721,
1252
+ "rewards/accuracies": 0.86328125,
1253
+ "rewards/chosen": -0.13977007567882538,
1254
+ "rewards/margins": 0.6012800931930542,
1255
+ "rewards/rejected": -0.7410501837730408,
1256
+ "step": 2048
1257
+ },
1258
+ {
1259
+ "epoch": 9.84033116499113,
1260
+ "grad_norm": 5.052858352661133,
1261
+ "learning_rate": 1.9032628049921556e-07,
1262
+ "log_odds_chosen": 4.3274006843566895,
1263
+ "log_odds_ratio": -0.2658219337463379,
1264
+ "logits/chosen": 0.7942694425582886,
1265
+ "logits/rejected": 0.5170871615409851,
1266
+ "logps/chosen": -0.9714781045913696,
1267
+ "logps/rejected": -4.801671028137207,
1268
+ "loss": 0.896,
1269
+ "nll_loss": 0.8439369201660156,
1270
+ "rewards/accuracies": 0.8828125,
1271
+ "rewards/chosen": -0.14572171866893768,
1272
+ "rewards/margins": 0.5745289325714111,
1273
+ "rewards/rejected": -0.72025066614151,
1274
+ "step": 2080
1275
+ },
1276
+ {
1277
+ "epoch": 9.991720875221763,
1278
+ "grad_norm": 5.781661510467529,
1279
+ "learning_rate": 1.8228988296424875e-07,
1280
+ "log_odds_chosen": 4.903880596160889,
1281
+ "log_odds_ratio": -0.24629831314086914,
1282
+ "logits/chosen": 0.888929009437561,
1283
+ "logits/rejected": 0.5151562690734863,
1284
+ "logps/chosen": -0.9433965682983398,
1285
+ "logps/rejected": -5.327086925506592,
1286
+ "loss": 0.8761,
1287
+ "nll_loss": 0.8339080214500427,
1288
+ "rewards/accuracies": 0.8828125,
1289
+ "rewards/chosen": -0.1415095031261444,
1290
+ "rewards/margins": 0.6575536131858826,
1291
+ "rewards/rejected": -0.7990630865097046,
1292
+ "step": 2112
1293
+ },
1294
+ {
1295
+ "epoch": 10.143110585452394,
1296
+ "grad_norm": 8.988626480102539,
1297
+ "learning_rate": 1.7432831094079352e-07,
1298
+ "log_odds_chosen": 4.3950042724609375,
1299
+ "log_odds_ratio": -0.28862205147743225,
1300
+ "logits/chosen": 0.8134148120880127,
1301
+ "logits/rejected": 0.5847084522247314,
1302
+ "logps/chosen": -1.0293586254119873,
1303
+ "logps/rejected": -4.959186553955078,
1304
+ "loss": 0.8813,
1305
+ "nll_loss": 0.8774588108062744,
1306
+ "rewards/accuracies": 0.85546875,
1307
+ "rewards/chosen": -0.15440379083156586,
1308
+ "rewards/margins": 0.5894742608070374,
1309
+ "rewards/rejected": -0.7438780069351196,
1310
+ "step": 2144
1311
+ },
1312
+ {
1313
+ "epoch": 10.294500295683028,
1314
+ "grad_norm": 5.275697231292725,
1315
+ "learning_rate": 1.6645036265170313e-07,
1316
+ "log_odds_chosen": 5.46366548538208,
1317
+ "log_odds_ratio": -0.27606436610221863,
1318
+ "logits/chosen": 0.8438766598701477,
1319
+ "logits/rejected": 0.5199805498123169,
1320
+ "logps/chosen": -0.9803435802459717,
1321
+ "logps/rejected": -5.95693826675415,
1322
+ "loss": 0.8932,
1323
+ "nll_loss": 0.8328185677528381,
1324
+ "rewards/accuracies": 0.86328125,
1325
+ "rewards/chosen": -0.14705155789852142,
1326
+ "rewards/margins": 0.7464891076087952,
1327
+ "rewards/rejected": -0.8935407400131226,
1328
+ "step": 2176
1329
+ },
1330
+ {
1331
+ "epoch": 10.445890005913661,
1332
+ "grad_norm": 6.923160552978516,
1333
+ "learning_rate": 1.5866474390840124e-07,
1334
+ "log_odds_chosen": 4.528408050537109,
1335
+ "log_odds_ratio": -0.25843387842178345,
1336
+ "logits/chosen": 0.8353314399719238,
1337
+ "logits/rejected": 0.5368306636810303,
1338
+ "logps/chosen": -0.9630373120307922,
1339
+ "logps/rejected": -4.988365173339844,
1340
+ "loss": 0.887,
1341
+ "nll_loss": 0.855586051940918,
1342
+ "rewards/accuracies": 0.8984375,
1343
+ "rewards/chosen": -0.14445561170578003,
1344
+ "rewards/margins": 0.6037992238998413,
1345
+ "rewards/rejected": -0.7482547760009766,
1346
+ "step": 2208
1347
+ },
1348
+ {
1349
+ "epoch": 10.597279716144293,
1350
+ "grad_norm": 4.630692005157471,
1351
+ "learning_rate": 1.5098005849021078e-07,
1352
+ "log_odds_chosen": 4.724957466125488,
1353
+ "log_odds_ratio": -0.2811046242713928,
1354
+ "logits/chosen": 0.858523428440094,
1355
+ "logits/rejected": 0.5616721510887146,
1356
+ "logps/chosen": -0.9630488753318787,
1357
+ "logps/rejected": -5.188055038452148,
1358
+ "loss": 0.8694,
1359
+ "nll_loss": 0.858130693435669,
1360
+ "rewards/accuracies": 0.86328125,
1361
+ "rewards/chosen": -0.14445732533931732,
1362
+ "rewards/margins": 0.6337509155273438,
1363
+ "rewards/rejected": -0.778208315372467,
1364
+ "step": 2240
1365
+ },
1366
+ {
1367
+ "epoch": 10.748669426374926,
1368
+ "grad_norm": 6.591275215148926,
1369
+ "learning_rate": 1.4340479863643656e-07,
1370
+ "log_odds_chosen": 4.770135402679443,
1371
+ "log_odds_ratio": -0.2736424207687378,
1372
+ "logits/chosen": 0.7936345934867859,
1373
+ "logits/rejected": 0.5366979837417603,
1374
+ "logps/chosen": -0.9466649889945984,
1375
+ "logps/rejected": -5.203468322753906,
1376
+ "loss": 0.8882,
1377
+ "nll_loss": 0.8353475332260132,
1378
+ "rewards/accuracies": 0.85546875,
1379
+ "rewards/chosen": -0.141999751329422,
1380
+ "rewards/margins": 0.6385205984115601,
1381
+ "rewards/rejected": -0.7805203795433044,
1382
+ "step": 2272
1383
+ },
1384
+ {
1385
+ "epoch": 10.90005913660556,
1386
+ "grad_norm": 5.084187984466553,
1387
+ "learning_rate": 1.3594733566170925e-07,
1388
+ "log_odds_chosen": 4.994205474853516,
1389
+ "log_odds_ratio": -0.30068373680114746,
1390
+ "logits/chosen": 0.8074924945831299,
1391
+ "logits/rejected": 0.522384524345398,
1392
+ "logps/chosen": -0.9593102335929871,
1393
+ "logps/rejected": -5.447037220001221,
1394
+ "loss": 0.8834,
1395
+ "nll_loss": 0.8404646515846252,
1396
+ "rewards/accuracies": 0.8203125,
1397
+ "rewards/chosen": -0.14389653503894806,
1398
+ "rewards/margins": 0.673159122467041,
1399
+ "rewards/rejected": -0.8170557022094727,
1400
+ "step": 2304
1401
+ },
1402
+ {
1403
+ "epoch": 11.051448846836191,
1404
+ "grad_norm": 5.465320110321045,
1405
+ "learning_rate": 1.2861591070496192e-07,
1406
+ "log_odds_chosen": 4.723004341125488,
1407
+ "log_odds_ratio": -0.25821179151535034,
1408
+ "logits/chosen": 0.8570014238357544,
1409
+ "logits/rejected": 0.5345165133476257,
1410
+ "logps/chosen": -0.9341294765472412,
1411
+ "logps/rejected": -5.139418601989746,
1412
+ "loss": 0.8586,
1413
+ "nll_loss": 0.8409022092819214,
1414
+ "rewards/accuracies": 0.89453125,
1415
+ "rewards/chosen": -0.14011943340301514,
1416
+ "rewards/margins": 0.6307933330535889,
1417
+ "rewards/rejected": -0.770912766456604,
1418
+ "step": 2336
1419
+ },
1420
+ {
1421
+ "epoch": 11.202838557066825,
1422
+ "grad_norm": 5.109860420227051,
1423
+ "learning_rate": 1.2141862562226164e-07,
1424
+ "log_odds_chosen": 4.454768180847168,
1425
+ "log_odds_ratio": -0.24100762605667114,
1426
+ "logits/chosen": 0.7960795760154724,
1427
+ "logits/rejected": 0.511499285697937,
1428
+ "logps/chosen": -0.9228134751319885,
1429
+ "logps/rejected": -4.836323261260986,
1430
+ "loss": 0.8823,
1431
+ "nll_loss": 0.8171857595443726,
1432
+ "rewards/accuracies": 0.8984375,
1433
+ "rewards/chosen": -0.13842202723026276,
1434
+ "rewards/margins": 0.5870264172554016,
1435
+ "rewards/rejected": -0.725448489189148,
1436
+ "step": 2368
1437
+ },
1438
+ {
1439
+ "epoch": 11.354228267297458,
1440
+ "grad_norm": 4.377430438995361,
1441
+ "learning_rate": 1.1436343403356016e-07,
1442
+ "log_odds_chosen": 4.902271270751953,
1443
+ "log_odds_ratio": -0.24684929847717285,
1444
+ "logits/chosen": 0.8083094358444214,
1445
+ "logits/rejected": 0.5171899199485779,
1446
+ "logps/chosen": -0.9131155610084534,
1447
+ "logps/rejected": -5.286437034606934,
1448
+ "loss": 0.8823,
1449
+ "nll_loss": 0.8163360953330994,
1450
+ "rewards/accuracies": 0.89453125,
1451
+ "rewards/chosen": -0.13696734607219696,
1452
+ "rewards/margins": 0.6559982299804688,
1453
+ "rewards/rejected": -0.7929655313491821,
1454
+ "step": 2400
1455
+ },
1456
+ {
1457
+ "epoch": 11.354228267297458,
1458
+ "eval_log_odds_chosen": 1.8271435499191284,
1459
+ "eval_log_odds_ratio": -0.17579954862594604,
1460
+ "eval_logits/chosen": 0.49480167031288147,
1461
+ "eval_logits/rejected": 0.437588095664978,
1462
+ "eval_logps/chosen": -0.657990038394928,
1463
+ "eval_logps/rejected": -1.80092453956604,
1464
+ "eval_loss": 0.7247140407562256,
1465
+ "eval_nll_loss": 0.6719491481781006,
1466
+ "eval_rewards/accuracies": 1.0,
1467
+ "eval_rewards/chosen": -0.09869851171970367,
1468
+ "eval_rewards/margins": 0.1714402139186859,
1469
+ "eval_rewards/rejected": -0.2701387107372284,
1470
+ "eval_runtime": 1.7829,
1471
+ "eval_samples_per_second": 76.839,
1472
+ "eval_steps_per_second": 10.096,
1473
+ "step": 2400
1474
+ },
1475
+ {
1476
+ "epoch": 11.50561797752809,
1477
+ "grad_norm": 6.997631072998047,
1478
+ "learning_rate": 1.0745813253325956e-07,
1479
+ "log_odds_chosen": 4.850367069244385,
1480
+ "log_odds_ratio": -0.23768070340156555,
1481
+ "logits/chosen": 0.8919247984886169,
1482
+ "logits/rejected": 0.5253655910491943,
1483
+ "logps/chosen": -0.9427354335784912,
1484
+ "logps/rejected": -5.268039226531982,
1485
+ "loss": 0.8907,
1486
+ "nll_loss": 0.8325998783111572,
1487
+ "rewards/accuracies": 0.90625,
1488
+ "rewards/chosen": -0.14141032099723816,
1489
+ "rewards/margins": 0.6487956643104553,
1490
+ "rewards/rejected": -0.7902059555053711,
1491
+ "step": 2432
1492
+ },
1493
+ {
1494
+ "epoch": 11.657007687758723,
1495
+ "grad_norm": 6.977694988250732,
1496
+ "learning_rate": 1.007103520743035e-07,
1497
+ "log_odds_chosen": 4.430591106414795,
1498
+ "log_odds_ratio": -0.2736847698688507,
1499
+ "logits/chosen": 0.772072434425354,
1500
+ "logits/rejected": 0.5454930067062378,
1501
+ "logps/chosen": -0.972098171710968,
1502
+ "logps/rejected": -4.8969268798828125,
1503
+ "loss": 0.8668,
1504
+ "nll_loss": 0.864302396774292,
1505
+ "rewards/accuracies": 0.8828125,
1506
+ "rewards/chosen": -0.14581473171710968,
1507
+ "rewards/margins": 0.5887242555618286,
1508
+ "rewards/rejected": -0.7345390319824219,
1509
+ "step": 2464
1510
+ },
1511
+ {
1512
+ "epoch": 11.808397397989355,
1513
+ "grad_norm": 9.901198387145996,
1514
+ "learning_rate": 9.412754953531663e-08,
1515
+ "log_odds_chosen": 5.721859455108643,
1516
+ "log_odds_ratio": -0.2376585453748703,
1517
+ "logits/chosen": 0.9159454107284546,
1518
+ "logits/rejected": 0.49302536249160767,
1519
+ "logps/chosen": -0.905667781829834,
1520
+ "logps/rejected": -6.09743070602417,
1521
+ "loss": 0.8778,
1522
+ "nll_loss": 0.8210791945457458,
1523
+ "rewards/accuracies": 0.8984375,
1524
+ "rewards/chosen": -0.135850191116333,
1525
+ "rewards/margins": 0.7787644267082214,
1526
+ "rewards/rejected": -0.9146146178245544,
1527
+ "step": 2496
1528
+ },
1529
+ {
1530
+ "epoch": 11.959787108219988,
1531
+ "grad_norm": 6.6628241539001465,
1532
+ "learning_rate": 8.771699948011203e-08,
1533
+ "log_odds_chosen": 4.282519817352295,
1534
+ "log_odds_ratio": -0.2792586088180542,
1535
+ "logits/chosen": 0.790172815322876,
1536
+ "logits/rejected": 0.563973069190979,
1537
+ "logps/chosen": -0.9786302447319031,
1538
+ "logps/rejected": -4.77611780166626,
1539
+ "loss": 0.8802,
1540
+ "nll_loss": 0.8442527651786804,
1541
+ "rewards/accuracies": 0.84765625,
1542
+ "rewards/chosen": -0.14679455757141113,
1543
+ "rewards/margins": 0.5696231722831726,
1544
+ "rewards/rejected": -0.716417670249939,
1545
+ "step": 2528
1546
+ },
1547
+ {
1548
+ "epoch": 12.111176818450621,
1549
+ "grad_norm": 5.591745853424072,
1550
+ "learning_rate": 8.148578611867113e-08,
1551
+ "log_odds_chosen": 4.849425315856934,
1552
+ "log_odds_ratio": -0.29553845524787903,
1553
+ "logits/chosen": 0.8502916097640991,
1554
+ "logits/rejected": 0.5881719589233398,
1555
+ "logps/chosen": -0.9942740201950073,
1556
+ "logps/rejected": -5.380496025085449,
1557
+ "loss": 0.8794,
1558
+ "nll_loss": 0.894903302192688,
1559
+ "rewards/accuracies": 0.83984375,
1560
+ "rewards/chosen": -0.1491411030292511,
1561
+ "rewards/margins": 0.6579334139823914,
1562
+ "rewards/rejected": -0.8070744276046753,
1563
+ "step": 2560
1564
+ },
1565
+ {
1566
+ "epoch": 12.262566528681253,
1567
+ "grad_norm": 4.799871921539307,
1568
+ "learning_rate": 7.544079547848181e-08,
1569
+ "log_odds_chosen": 4.629427909851074,
1570
+ "log_odds_ratio": -0.2579698860645294,
1571
+ "logits/chosen": 0.8144665360450745,
1572
+ "logits/rejected": 0.5345531702041626,
1573
+ "logps/chosen": -0.9962482452392578,
1574
+ "logps/rejected": -5.126289367675781,
1575
+ "loss": 0.8853,
1576
+ "nll_loss": 0.8719948530197144,
1577
+ "rewards/accuracies": 0.87890625,
1578
+ "rewards/chosen": -0.14943724870681763,
1579
+ "rewards/margins": 0.6195061802864075,
1580
+ "rewards/rejected": -0.7689434885978699,
1581
+ "step": 2592
1582
+ },
1583
+ {
1584
+ "epoch": 12.413956238911886,
1585
+ "grad_norm": 5.2031779289245605,
1586
+ "learning_rate": 6.958870779488446e-08,
1587
+ "log_odds_chosen": 5.763055801391602,
1588
+ "log_odds_ratio": -0.24303670227527618,
1589
+ "logits/chosen": 0.85135418176651,
1590
+ "logits/rejected": 0.5018079876899719,
1591
+ "logps/chosen": -0.9315154552459717,
1592
+ "logps/rejected": -6.163926124572754,
1593
+ "loss": 0.8732,
1594
+ "nll_loss": 0.8289435505867004,
1595
+ "rewards/accuracies": 0.875,
1596
+ "rewards/chosen": -0.13972733914852142,
1597
+ "rewards/margins": 0.78486168384552,
1598
+ "rewards/rejected": -0.9245890378952026,
1599
+ "step": 2624
1600
+ },
1601
+ {
1602
+ "epoch": 12.56534594914252,
1603
+ "grad_norm": 4.5774712562561035,
1604
+ "learning_rate": 6.393599012883707e-08,
1605
+ "log_odds_chosen": 4.685327529907227,
1606
+ "log_odds_ratio": -0.2833007574081421,
1607
+ "logits/chosen": 0.7489104270935059,
1608
+ "logits/rejected": 0.5779923796653748,
1609
+ "logps/chosen": -0.9675414562225342,
1610
+ "logps/rejected": -5.168377876281738,
1611
+ "loss": 0.8694,
1612
+ "nll_loss": 0.8434449434280396,
1613
+ "rewards/accuracies": 0.875,
1614
+ "rewards/chosen": -0.14513123035430908,
1615
+ "rewards/margins": 0.6301255226135254,
1616
+ "rewards/rejected": -0.7752567529678345,
1617
+ "step": 2656
1618
+ },
1619
+ {
1620
+ "epoch": 12.716735659373152,
1621
+ "grad_norm": 5.854611396789551,
1622
+ "learning_rate": 5.848888922025552e-08,
1623
+ "log_odds_chosen": 5.014122009277344,
1624
+ "log_odds_ratio": -0.23267918825149536,
1625
+ "logits/chosen": 0.8252905607223511,
1626
+ "logits/rejected": 0.4858684539794922,
1627
+ "logps/chosen": -0.8893996477127075,
1628
+ "logps/rejected": -5.326512336730957,
1629
+ "loss": 0.878,
1630
+ "nll_loss": 0.8163630366325378,
1631
+ "rewards/accuracies": 0.8828125,
1632
+ "rewards/chosen": -0.13340994715690613,
1633
+ "rewards/margins": 0.6655669212341309,
1634
+ "rewards/rejected": -0.7989768981933594,
1635
+ "step": 2688
1636
+ },
1637
+ {
1638
+ "epoch": 12.868125369603785,
1639
+ "grad_norm": 5.542015075683594,
1640
+ "learning_rate": 5.325342458482779e-08,
1641
+ "log_odds_chosen": 5.052638530731201,
1642
+ "log_odds_ratio": -0.2500526010990143,
1643
+ "logits/chosen": 0.8215246796607971,
1644
+ "logits/rejected": 0.573950469493866,
1645
+ "logps/chosen": -0.8597905039787292,
1646
+ "logps/rejected": -5.335259437561035,
1647
+ "loss": 0.8812,
1648
+ "nll_loss": 0.8173032999038696,
1649
+ "rewards/accuracies": 0.875,
1650
+ "rewards/chosen": -0.12896858155727386,
1651
+ "rewards/margins": 0.6713204383850098,
1652
+ "rewards/rejected": -0.8002889156341553,
1653
+ "step": 2720
1654
+ },
1655
+ {
1656
+ "epoch": 13.019515079834418,
1657
+ "grad_norm": 7.424806118011475,
1658
+ "learning_rate": 4.823538186193096e-08,
1659
+ "log_odds_chosen": 5.35725212097168,
1660
+ "log_odds_ratio": -0.23181939125061035,
1661
+ "logits/chosen": 0.8148990273475647,
1662
+ "logits/rejected": 0.4551333785057068,
1663
+ "logps/chosen": -0.9124429225921631,
1664
+ "logps/rejected": -5.717087268829346,
1665
+ "loss": 0.8778,
1666
+ "nll_loss": 0.8277573585510254,
1667
+ "rewards/accuracies": 0.91015625,
1668
+ "rewards/chosen": -0.13686645030975342,
1669
+ "rewards/margins": 0.7206966876983643,
1670
+ "rewards/rejected": -0.8575630784034729,
1671
+ "step": 2752
1672
+ },
1673
+ {
1674
+ "epoch": 13.17090479006505,
1675
+ "grad_norm": 6.039958953857422,
1676
+ "learning_rate": 4.3440306421001324e-08,
1677
+ "log_odds_chosen": 5.5131001472473145,
1678
+ "log_odds_ratio": -0.24206629395484924,
1679
+ "logits/chosen": 0.873075008392334,
1680
+ "logits/rejected": 0.5212752223014832,
1681
+ "logps/chosen": -0.8922577500343323,
1682
+ "logps/rejected": -5.86539888381958,
1683
+ "loss": 0.8901,
1684
+ "nll_loss": 0.8136817216873169,
1685
+ "rewards/accuracies": 0.88671875,
1686
+ "rewards/chosen": -0.13383866846561432,
1687
+ "rewards/margins": 0.745971143245697,
1688
+ "rewards/rejected": -0.8798097968101501,
1689
+ "step": 2784
1690
+ },
1691
+ {
1692
+ "epoch": 13.246599645180366,
1693
+ "eval_log_odds_chosen": 1.8457978963851929,
1694
+ "eval_log_odds_ratio": -0.17291945219039917,
1695
+ "eval_logits/chosen": 0.5009181499481201,
1696
+ "eval_logits/rejected": 0.446205198764801,
1697
+ "eval_logps/chosen": -0.6597917675971985,
1698
+ "eval_logps/rejected": -1.8198742866516113,
1699
+ "eval_loss": 0.7256795763969421,
1700
+ "eval_nll_loss": 0.6736116409301758,
1701
+ "eval_rewards/accuracies": 1.0,
1702
+ "eval_rewards/chosen": -0.09896877408027649,
1703
+ "eval_rewards/margins": 0.17401237785816193,
1704
+ "eval_rewards/rejected": -0.2729811668395996,
1705
+ "eval_runtime": 1.7675,
1706
+ "eval_samples_per_second": 77.512,
1707
+ "eval_steps_per_second": 10.184,
1708
+ "step": 2800
1709
+ },
1710
+ {
1711
+ "epoch": 13.322294500295683,
1712
+ "grad_norm": 4.649291515350342,
1713
+ "learning_rate": 3.887349723342303e-08,
1714
+ "log_odds_chosen": 5.655206203460693,
1715
+ "log_odds_ratio": -0.22572118043899536,
1716
+ "logits/chosen": 0.8335084915161133,
1717
+ "logits/rejected": 0.4884824752807617,
1718
+ "logps/chosen": -0.8668183088302612,
1719
+ "logps/rejected": -5.9456024169921875,
1720
+ "loss": 0.8773,
1721
+ "nll_loss": 0.7930561900138855,
1722
+ "rewards/accuracies": 0.90234375,
1723
+ "rewards/chosen": -0.13002273440361023,
1724
+ "rewards/margins": 0.7618176937103271,
1725
+ "rewards/rejected": -0.8918405175209045,
1726
+ "step": 2816
1727
+ },
1728
+ {
1729
+ "epoch": 13.473684210526315,
1730
+ "grad_norm": 5.706801414489746,
1731
+ "learning_rate": 3.454000101670901e-08,
1732
+ "log_odds_chosen": 4.356830596923828,
1733
+ "log_odds_ratio": -0.24235375225543976,
1734
+ "logits/chosen": 0.7453078031539917,
1735
+ "logits/rejected": 0.5251801609992981,
1736
+ "logps/chosen": -0.9370274543762207,
1737
+ "logps/rejected": -4.772754192352295,
1738
+ "loss": 0.8771,
1739
+ "nll_loss": 0.8157171010971069,
1740
+ "rewards/accuracies": 0.88671875,
1741
+ "rewards/chosen": -0.14055413007736206,
1742
+ "rewards/margins": 0.5753591060638428,
1743
+ "rewards/rejected": -0.7159131765365601,
1744
+ "step": 2848
1745
+ },
1746
+ {
1747
+ "epoch": 13.625073920756948,
1748
+ "grad_norm": 6.6824140548706055,
1749
+ "learning_rate": 3.044460665744283e-08,
1750
+ "log_odds_chosen": 4.974400043487549,
1751
+ "log_odds_ratio": -0.24002020061016083,
1752
+ "logits/chosen": 0.7889403700828552,
1753
+ "logits/rejected": 0.4931294322013855,
1754
+ "logps/chosen": -0.9762779474258423,
1755
+ "logps/rejected": -5.440495491027832,
1756
+ "loss": 0.8849,
1757
+ "nll_loss": 0.8199655413627625,
1758
+ "rewards/accuracies": 0.8984375,
1759
+ "rewards/chosen": -0.14644168317317963,
1760
+ "rewards/margins": 0.6696327328681946,
1761
+ "rewards/rejected": -0.8160744905471802,
1762
+ "step": 2880
1763
+ },
1764
+ {
1765
+ "epoch": 13.776463630987582,
1766
+ "grad_norm": 9.858070373535156,
1767
+ "learning_rate": 2.659183991914696e-08,
1768
+ "log_odds_chosen": 4.271711349487305,
1769
+ "log_odds_ratio": -0.25790902972221375,
1770
+ "logits/chosen": 0.7586400508880615,
1771
+ "logits/rejected": 0.5483137369155884,
1772
+ "logps/chosen": -0.9079785346984863,
1773
+ "logps/rejected": -4.65129280090332,
1774
+ "loss": 0.8755,
1775
+ "nll_loss": 0.8172128200531006,
1776
+ "rewards/accuracies": 0.87890625,
1777
+ "rewards/chosen": -0.1361967921257019,
1778
+ "rewards/margins": 0.561497151851654,
1779
+ "rewards/rejected": -0.697693943977356,
1780
+ "step": 2912
1781
+ },
1782
+ {
1783
+ "epoch": 13.927853341218214,
1784
+ "grad_norm": 4.99421501159668,
1785
+ "learning_rate": 2.298595844092377e-08,
1786
+ "log_odds_chosen": 5.054343223571777,
1787
+ "log_odds_ratio": -0.2358601987361908,
1788
+ "logits/chosen": 0.7982761859893799,
1789
+ "logits/rejected": 0.5060718655586243,
1790
+ "logps/chosen": -0.9570282697677612,
1791
+ "logps/rejected": -5.482752799987793,
1792
+ "loss": 0.8707,
1793
+ "nll_loss": 0.8115738034248352,
1794
+ "rewards/accuracies": 0.90234375,
1795
+ "rewards/chosen": -0.143554225564003,
1796
+ "rewards/margins": 0.6788586974143982,
1797
+ "rewards/rejected": -0.82241290807724,
1798
+ "step": 2944
1799
+ },
1800
+ {
1801
+ "epoch": 14.079243051448847,
1802
+ "grad_norm": 17.175851821899414,
1803
+ "learning_rate": 1.9630947032398066e-08,
1804
+ "log_odds_chosen": 5.8499908447265625,
1805
+ "log_odds_ratio": -0.22148607671260834,
1806
+ "logits/chosen": 0.817506730556488,
1807
+ "logits/rejected": 0.44914665818214417,
1808
+ "logps/chosen": -0.8968250751495361,
1809
+ "logps/rejected": -6.185724258422852,
1810
+ "loss": 0.8673,
1811
+ "nll_loss": 0.8207356333732605,
1812
+ "rewards/accuracies": 0.921875,
1813
+ "rewards/chosen": -0.13452376425266266,
1814
+ "rewards/margins": 0.7933349013328552,
1815
+ "rewards/rejected": -0.9278587698936462,
1816
+ "step": 2976
1817
+ },
1818
+ {
1819
+ "epoch": 14.23063276167948,
1820
+ "grad_norm": 7.170802593231201,
1821
+ "learning_rate": 1.653051327015911e-08,
1822
+ "log_odds_chosen": 4.76658296585083,
1823
+ "log_odds_ratio": -0.24812592566013336,
1824
+ "logits/chosen": 0.8145585060119629,
1825
+ "logits/rejected": 0.5187351703643799,
1826
+ "logps/chosen": -0.9258391261100769,
1827
+ "logps/rejected": -5.176287651062012,
1828
+ "loss": 0.8781,
1829
+ "nll_loss": 0.8292718529701233,
1830
+ "rewards/accuracies": 0.90625,
1831
+ "rewards/chosen": -0.13887587189674377,
1832
+ "rewards/margins": 0.6375671625137329,
1833
+ "rewards/rejected": -0.7764431834220886,
1834
+ "step": 3008
1835
+ },
1836
+ {
1837
+ "epoch": 14.382022471910112,
1838
+ "grad_norm": 5.404478073120117,
1839
+ "learning_rate": 1.368808340056879e-08,
1840
+ "log_odds_chosen": 5.262024879455566,
1841
+ "log_odds_ratio": -0.22128547728061676,
1842
+ "logits/chosen": 0.7849254608154297,
1843
+ "logits/rejected": 0.4733457863330841,
1844
+ "logps/chosen": -0.9194588661193848,
1845
+ "logps/rejected": -5.613149166107178,
1846
+ "loss": 0.8665,
1847
+ "nll_loss": 0.8110780715942383,
1848
+ "rewards/accuracies": 0.8828125,
1849
+ "rewards/chosen": -0.1379188597202301,
1850
+ "rewards/margins": 0.704053521156311,
1851
+ "rewards/rejected": -0.8419723510742188,
1852
+ "step": 3040
1853
+ },
1854
+ {
1855
+ "epoch": 14.533412182140745,
1856
+ "grad_norm": 4.717693328857422,
1857
+ "learning_rate": 1.1106798553464802e-08,
1858
+ "log_odds_chosen": 5.532874584197998,
1859
+ "log_odds_ratio": -0.23889514803886414,
1860
+ "logits/chosen": 0.887575626373291,
1861
+ "logits/rejected": 0.503061056137085,
1862
+ "logps/chosen": -0.9478439092636108,
1863
+ "logps/rejected": -5.9591827392578125,
1864
+ "loss": 0.8689,
1865
+ "nll_loss": 0.8499802947044373,
1866
+ "rewards/accuracies": 0.9140625,
1867
+ "rewards/chosen": -0.1421765685081482,
1868
+ "rewards/margins": 0.7517008185386658,
1869
+ "rewards/rejected": -0.893877387046814,
1870
+ "step": 3072
1871
+ },
1872
+ {
1873
+ "epoch": 14.684801892371379,
1874
+ "grad_norm": 7.475513458251953,
1875
+ "learning_rate": 8.789511270941269e-09,
1876
+ "log_odds_chosen": 4.4497551918029785,
1877
+ "log_odds_ratio": -0.27013376355171204,
1878
+ "logits/chosen": 0.7935608625411987,
1879
+ "logits/rejected": 0.5559485554695129,
1880
+ "logps/chosen": -0.9605445861816406,
1881
+ "logps/rejected": -4.917541980743408,
1882
+ "loss": 0.8786,
1883
+ "nll_loss": 0.8641871213912964,
1884
+ "rewards/accuracies": 0.8828125,
1885
+ "rewards/chosen": -0.1440816968679428,
1886
+ "rewards/margins": 0.5935496091842651,
1887
+ "rewards/rejected": -0.7376313209533691,
1888
+ "step": 3104
1889
+ },
1890
+ {
1891
+ "epoch": 14.83619160260201,
1892
+ "grad_norm": 6.8675408363342285,
1893
+ "learning_rate": 6.738782355044048e-09,
1894
+ "log_odds_chosen": 4.509281635284424,
1895
+ "log_odds_ratio": -0.27578622102737427,
1896
+ "logits/chosen": 0.7721443772315979,
1897
+ "logits/rejected": 0.5139036774635315,
1898
+ "logps/chosen": -0.9913955926895142,
1899
+ "logps/rejected": -5.035284042358398,
1900
+ "loss": 0.8838,
1901
+ "nll_loss": 0.8683611154556274,
1902
+ "rewards/accuracies": 0.88671875,
1903
+ "rewards/chosen": -0.14870934188365936,
1904
+ "rewards/margins": 0.6065833568572998,
1905
+ "rewards/rejected": -0.7552926540374756,
1906
+ "step": 3136
1907
+ },
1908
+ {
1909
+ "epoch": 14.987581312832644,
1910
+ "grad_norm": 7.102670669555664,
1911
+ "learning_rate": 4.956878037864043e-09,
1912
+ "log_odds_chosen": 4.306816101074219,
1913
+ "log_odds_ratio": -0.30200034379959106,
1914
+ "logits/chosen": 0.8607514500617981,
1915
+ "logits/rejected": 0.591871440410614,
1916
+ "logps/chosen": -0.9792557954788208,
1917
+ "logps/rejected": -4.773169040679932,
1918
+ "loss": 0.8869,
1919
+ "nll_loss": 0.8911793231964111,
1920
+ "rewards/accuracies": 0.859375,
1921
+ "rewards/chosen": -0.1468883752822876,
1922
+ "rewards/margins": 0.569087028503418,
1923
+ "rewards/rejected": -0.7159753441810608,
1924
+ "step": 3168
1925
+ },
1926
+ {
1927
+ "epoch": 15.138971023063275,
1928
+ "grad_norm": 4.992292881011963,
1929
+ "learning_rate": 3.4457674771554422e-09,
1930
+ "log_odds_chosen": 4.759942054748535,
1931
+ "log_odds_ratio": -0.2575688362121582,
1932
+ "logits/chosen": 0.7185624241828918,
1933
+ "logits/rejected": 0.42112159729003906,
1934
+ "logps/chosen": -0.9415456652641296,
1935
+ "logps/rejected": -5.170385360717773,
1936
+ "loss": 0.858,
1937
+ "nll_loss": 0.8277443647384644,
1938
+ "rewards/accuracies": 0.8828125,
1939
+ "rewards/chosen": -0.14123186469078064,
1940
+ "rewards/margins": 0.6343258619308472,
1941
+ "rewards/rejected": -0.775557816028595,
1942
+ "step": 3200
1943
+ },
1944
+ {
1945
+ "epoch": 15.138971023063275,
1946
+ "eval_log_odds_chosen": 1.8564934730529785,
1947
+ "eval_log_odds_ratio": -0.17145967483520508,
1948
+ "eval_logits/chosen": 0.48080742359161377,
1949
+ "eval_logits/rejected": 0.4276208281517029,
1950
+ "eval_logps/chosen": -0.6593887209892273,
1951
+ "eval_logps/rejected": -1.8252443075180054,
1952
+ "eval_loss": 0.7243954539299011,
1953
+ "eval_nll_loss": 0.6726279854774475,
1954
+ "eval_rewards/accuracies": 1.0,
1955
+ "eval_rewards/chosen": -0.09890830516815186,
1956
+ "eval_rewards/margins": 0.17487837374210358,
1957
+ "eval_rewards/rejected": -0.27378666400909424,
1958
+ "eval_runtime": 1.7744,
1959
+ "eval_samples_per_second": 77.208,
1960
+ "eval_steps_per_second": 10.144,
1961
+ "step": 3200
1962
+ },
1963
+ {
1964
+ "epoch": 15.290360733293909,
1965
+ "grad_norm": 6.751287937164307,
1966
+ "learning_rate": 2.2071205802468297e-09,
1967
+ "log_odds_chosen": 4.854089736938477,
1968
+ "log_odds_ratio": -0.2636704742908478,
1969
+ "logits/chosen": 0.7567326426506042,
1970
+ "logits/rejected": 0.527582585811615,
1971
+ "logps/chosen": -0.9423821568489075,
1972
+ "logps/rejected": -5.277737617492676,
1973
+ "loss": 0.8852,
1974
+ "nll_loss": 0.844541609287262,
1975
+ "rewards/accuracies": 0.875,
1976
+ "rewards/chosen": -0.14135733246803284,
1977
+ "rewards/margins": 0.6503032445907593,
1978
+ "rewards/rejected": -0.7916606068611145,
1979
+ "step": 3232
1980
+ },
1981
+ {
1982
+ "epoch": 15.441750443524542,
1983
+ "grad_norm": 5.534750938415527,
1984
+ "learning_rate": 1.2423061586496476e-09,
1985
+ "log_odds_chosen": 5.184489727020264,
1986
+ "log_odds_ratio": -0.24983780086040497,
1987
+ "logits/chosen": 0.8209244608879089,
1988
+ "logits/rejected": 0.5052769780158997,
1989
+ "logps/chosen": -0.9556353688240051,
1990
+ "logps/rejected": -5.621804237365723,
1991
+ "loss": 0.8706,
1992
+ "nll_loss": 0.8418364524841309,
1993
+ "rewards/accuracies": 0.88671875,
1994
+ "rewards/chosen": -0.14334531128406525,
1995
+ "rewards/margins": 0.699925422668457,
1996
+ "rewards/rejected": -0.8432707786560059,
1997
+ "step": 3264
1998
+ },
1999
+ {
2000
+ "epoch": 15.593140153755174,
2001
+ "grad_norm": 5.217104434967041,
2002
+ "learning_rate": 5.523904154037528e-10,
2003
+ "log_odds_chosen": 5.348480701446533,
2004
+ "log_odds_ratio": -0.2507275640964508,
2005
+ "logits/chosen": 0.8220376372337341,
2006
+ "logits/rejected": 0.5271560549736023,
2007
+ "logps/chosen": -0.9200209975242615,
2008
+ "logps/rejected": -5.755062103271484,
2009
+ "loss": 0.887,
2010
+ "nll_loss": 0.8451349139213562,
2011
+ "rewards/accuracies": 0.85546875,
2012
+ "rewards/chosen": -0.1380031555891037,
2013
+ "rewards/margins": 0.7252561450004578,
2014
+ "rewards/rejected": -0.8632593154907227,
2015
+ "step": 3296
2016
+ },
2017
+ {
2018
+ "epoch": 15.744529863985807,
2019
+ "grad_norm": 6.9226460456848145,
2020
+ "learning_rate": 1.3813576683111006e-10,
2021
+ "log_odds_chosen": 4.370879650115967,
2022
+ "log_odds_ratio": -0.24154168367385864,
2023
+ "logits/chosen": 0.7712342739105225,
2024
+ "logits/rejected": 0.5409867763519287,
2025
+ "logps/chosen": -0.9708598256111145,
2026
+ "logps/rejected": -4.831565856933594,
2027
+ "loss": 0.8729,
2028
+ "nll_loss": 0.8363229036331177,
2029
+ "rewards/accuracies": 0.921875,
2030
+ "rewards/chosen": -0.14562898874282837,
2031
+ "rewards/margins": 0.5791059136390686,
2032
+ "rewards/rejected": -0.724734902381897,
2033
+ "step": 3328
2034
+ },
2035
+ {
2036
+ "epoch": 15.89591957421644,
2037
+ "grad_norm": 7.291532516479492,
2038
+ "learning_rate": 0.0,
2039
+ "log_odds_chosen": 5.1468186378479,
2040
+ "log_odds_ratio": -0.2334214597940445,
2041
+ "logits/chosen": 0.8100905418395996,
2042
+ "logits/rejected": 0.48369458317756653,
2043
+ "logps/chosen": -0.8902687430381775,
2044
+ "logps/rejected": -5.482838153839111,
2045
+ "loss": 0.883,
2046
+ "nll_loss": 0.8243392705917358,
2047
+ "rewards/accuracies": 0.890625,
2048
+ "rewards/chosen": -0.1335403174161911,
2049
+ "rewards/margins": 0.688885509967804,
2050
+ "rewards/rejected": -0.8224257826805115,
2051
+ "step": 3360
2052
+ },
2053
+ {
2054
+ "epoch": 15.89591957421644,
2055
+ "eval_log_odds_chosen": 1.8541311025619507,
2056
+ "eval_log_odds_ratio": -0.17156726121902466,
2057
+ "eval_logits/chosen": 0.4940509796142578,
2058
+ "eval_logits/rejected": 0.4394443929195404,
2059
+ "eval_logps/chosen": -0.6573522090911865,
2060
+ "eval_logps/rejected": -1.8197245597839355,
2061
+ "eval_loss": 0.7246665954589844,
2062
+ "eval_nll_loss": 0.6722227334976196,
2063
+ "eval_rewards/accuracies": 1.0,
2064
+ "eval_rewards/chosen": -0.09860283136367798,
2065
+ "eval_rewards/margins": 0.17435584962368011,
2066
+ "eval_rewards/rejected": -0.2729586660861969,
2067
+ "eval_runtime": 1.7715,
2068
+ "eval_samples_per_second": 77.334,
2069
+ "eval_steps_per_second": 10.161,
2070
+ "step": 3360
2071
+ }
2072
+ ],
2073
+ "logging_steps": 32,
2074
+ "max_steps": 3360,
2075
+ "num_input_tokens_seen": 0,
2076
+ "num_train_epochs": 16,
2077
+ "save_steps": 400,
2078
+ "total_flos": 0.0,
2079
+ "train_batch_size": 1,
2080
+ "trial_name": null,
2081
+ "trial_params": null
2082
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7d10cade09e8662424df8edaf4dc955c1a689008be350250b395adaf9f52b6d
3
+ size 7032
zero_to_fp32.py ADDED
@@ -0,0 +1,604 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright (c) Microsoft Corporation.
4
+ # SPDX-License-Identifier: Apache-2.0
5
+
6
+ # DeepSpeed Team
7
+
8
+ # This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets
9
+ # copied into the top level checkpoint dir, so the user can easily do the conversion at any point in
10
+ # the future. Once extracted, the weights don't require DeepSpeed and can be used in any
11
+ # application.
12
+ #
13
+ # example: python zero_to_fp32.py . pytorch_model.bin
14
+
15
+ import argparse
16
+ import torch
17
+ import glob
18
+ import math
19
+ import os
20
+ import re
21
+ from collections import OrderedDict
22
+ from dataclasses import dataclass
23
+
24
+ # while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with
25
+ # DeepSpeed data structures it has to be available in the current python environment.
26
+ from deepspeed.utils import logger
27
+ from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS,
28
+ FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES,
29
+ FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS)
30
+
31
+
32
+ @dataclass
33
+ class zero_model_state:
34
+ buffers: dict()
35
+ param_shapes: dict()
36
+ shared_params: list
37
+ ds_version: int
38
+ frozen_param_shapes: dict()
39
+ frozen_param_fragments: dict()
40
+
41
+
42
+ debug = 0
43
+
44
+ # load to cpu
45
+ device = torch.device('cpu')
46
+
47
+
48
+ def atoi(text):
49
+ return int(text) if text.isdigit() else text
50
+
51
+
52
+ def natural_keys(text):
53
+ '''
54
+ alist.sort(key=natural_keys) sorts in human order
55
+ http://nedbatchelder.com/blog/200712/human_sorting.html
56
+ (See Toothy's implementation in the comments)
57
+ '''
58
+ return [atoi(c) for c in re.split(r'(\d+)', text)]
59
+
60
+
61
+ def get_model_state_file(checkpoint_dir, zero_stage):
62
+ if not os.path.isdir(checkpoint_dir):
63
+ raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist")
64
+
65
+ # there should be only one file
66
+ if zero_stage <= 2:
67
+ file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt")
68
+ elif zero_stage == 3:
69
+ file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt")
70
+
71
+ if not os.path.exists(file):
72
+ raise FileNotFoundError(f"can't find model states file at '{file}'")
73
+
74
+ return file
75
+
76
+
77
+ def get_checkpoint_files(checkpoint_dir, glob_pattern):
78
+ # XXX: need to test that this simple glob rule works for multi-node setup too
79
+ ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys)
80
+
81
+ if len(ckpt_files) == 0:
82
+ raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'")
83
+
84
+ return ckpt_files
85
+
86
+
87
+ def get_optim_files(checkpoint_dir):
88
+ return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt")
89
+
90
+
91
+ def get_model_state_files(checkpoint_dir):
92
+ return get_checkpoint_files(checkpoint_dir, "*_model_states.pt")
93
+
94
+
95
+ def parse_model_states(files):
96
+ zero_model_states = []
97
+ for file in files:
98
+ state_dict = torch.load(file, map_location=device)
99
+
100
+ if BUFFER_NAMES not in state_dict:
101
+ raise ValueError(f"{file} is not a model state checkpoint")
102
+ buffer_names = state_dict[BUFFER_NAMES]
103
+ if debug:
104
+ print("Found buffers:", buffer_names)
105
+
106
+ # recover just the buffers while restoring them to fp32 if they were saved in fp16
107
+ buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names}
108
+ param_shapes = state_dict[PARAM_SHAPES]
109
+
110
+ # collect parameters that are included in param_shapes
111
+ param_names = []
112
+ for s in param_shapes:
113
+ for name in s.keys():
114
+ param_names.append(name)
115
+
116
+ # update with frozen parameters
117
+ frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None)
118
+ if frozen_param_shapes is not None:
119
+ if debug:
120
+ print(f"Found frozen_param_shapes: {frozen_param_shapes}")
121
+ param_names += list(frozen_param_shapes.keys())
122
+
123
+ # handle shared params
124
+ shared_params = [[k, v] for k, v in state_dict["shared_params"].items()]
125
+
126
+ ds_version = state_dict.get(DS_VERSION, None)
127
+
128
+ frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None)
129
+
130
+ z_model_state = zero_model_state(buffers=buffers,
131
+ param_shapes=param_shapes,
132
+ shared_params=shared_params,
133
+ ds_version=ds_version,
134
+ frozen_param_shapes=frozen_param_shapes,
135
+ frozen_param_fragments=frozen_param_fragments)
136
+ zero_model_states.append(z_model_state)
137
+
138
+ return zero_model_states
139
+
140
+
141
+ def parse_optim_states(files, ds_checkpoint_dir):
142
+
143
+ total_files = len(files)
144
+ state_dicts = []
145
+ for f in files:
146
+ state_dict = torch.load(f, map_location=device)
147
+ # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights
148
+ # and also handle the case where it was already removed by another helper script
149
+ state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None)
150
+ state_dicts.append(state_dict)
151
+
152
+ if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]:
153
+ raise ValueError(f"{files[0]} is not a zero checkpoint")
154
+ zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE]
155
+ world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT]
156
+
157
+ # For ZeRO-2 each param group can have different partition_count as data parallelism for expert
158
+ # parameters can be different from data parallelism for non-expert parameters. So we can just
159
+ # use the max of the partition_count to get the dp world_size.
160
+
161
+ if type(world_size) is list:
162
+ world_size = max(world_size)
163
+
164
+ if world_size != total_files:
165
+ raise ValueError(
166
+ f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. "
167
+ "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes."
168
+ )
169
+
170
+ # the groups are named differently in each stage
171
+ if zero_stage <= 2:
172
+ fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS
173
+ elif zero_stage == 3:
174
+ fp32_groups_key = FP32_FLAT_GROUPS
175
+ else:
176
+ raise ValueError(f"unknown zero stage {zero_stage}")
177
+
178
+ if zero_stage <= 2:
179
+ fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))]
180
+ elif zero_stage == 3:
181
+ # if there is more than one param group, there will be multiple flattened tensors - one
182
+ # flattened tensor per group - for simplicity merge them into a single tensor
183
+ #
184
+ # XXX: could make the script more memory efficient for when there are multiple groups - it
185
+ # will require matching the sub-lists of param_shapes for each param group flattened tensor
186
+
187
+ fp32_flat_groups = [
188
+ torch.cat(state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key], 0) for i in range(len(state_dicts))
189
+ ]
190
+
191
+ return zero_stage, world_size, fp32_flat_groups
192
+
193
+
194
+ def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters):
195
+ """
196
+ Returns fp32 state_dict reconstructed from ds checkpoint
197
+
198
+ Args:
199
+ - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are)
200
+
201
+ """
202
+ print(f"Processing zero checkpoint '{ds_checkpoint_dir}'")
203
+
204
+ optim_files = get_optim_files(ds_checkpoint_dir)
205
+ zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir)
206
+ print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}")
207
+
208
+ model_files = get_model_state_files(ds_checkpoint_dir)
209
+
210
+ zero_model_states = parse_model_states(model_files)
211
+ print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}')
212
+
213
+ if zero_stage <= 2:
214
+ return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states,
215
+ exclude_frozen_parameters)
216
+ elif zero_stage == 3:
217
+ return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states,
218
+ exclude_frozen_parameters)
219
+
220
+
221
+ def _zero2_merge_frozen_params(state_dict, zero_model_states):
222
+ if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0:
223
+ return
224
+
225
+ frozen_param_shapes = zero_model_states[0].frozen_param_shapes
226
+ frozen_param_fragments = zero_model_states[0].frozen_param_fragments
227
+
228
+ if debug:
229
+ num_elem = sum(s.numel() for s in frozen_param_shapes.values())
230
+ print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}')
231
+
232
+ wanted_params = len(frozen_param_shapes)
233
+ wanted_numel = sum(s.numel() for s in frozen_param_shapes.values())
234
+ avail_numel = sum([p.numel() for p in frozen_param_fragments.values()])
235
+ print(f'Frozen params: Have {avail_numel} numels to process.')
236
+ print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params')
237
+
238
+ total_params = 0
239
+ total_numel = 0
240
+ for name, shape in frozen_param_shapes.items():
241
+ total_params += 1
242
+ unpartitioned_numel = shape.numel()
243
+ total_numel += unpartitioned_numel
244
+
245
+ state_dict[name] = frozen_param_fragments[name]
246
+
247
+ if debug:
248
+ print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ")
249
+
250
+ print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements")
251
+
252
+
253
+ def _has_callable(obj, fn):
254
+ attr = getattr(obj, fn, None)
255
+ return callable(attr)
256
+
257
+
258
+ def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states):
259
+ param_shapes = zero_model_states[0].param_shapes
260
+
261
+ # Reconstruction protocol:
262
+ #
263
+ # XXX: document this
264
+
265
+ if debug:
266
+ for i in range(world_size):
267
+ for j in range(len(fp32_flat_groups[0])):
268
+ print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}")
269
+
270
+ # XXX: memory usage doubles here (zero2)
271
+ num_param_groups = len(fp32_flat_groups[0])
272
+ merged_single_partition_of_fp32_groups = []
273
+ for i in range(num_param_groups):
274
+ merged_partitions = [sd[i] for sd in fp32_flat_groups]
275
+ full_single_fp32_vector = torch.cat(merged_partitions, 0)
276
+ merged_single_partition_of_fp32_groups.append(full_single_fp32_vector)
277
+ avail_numel = sum(
278
+ [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups])
279
+
280
+ if debug:
281
+ wanted_params = sum([len(shapes) for shapes in param_shapes])
282
+ wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes])
283
+ # not asserting if there is a mismatch due to possible padding
284
+ print(f"Have {avail_numel} numels to process.")
285
+ print(f"Need {wanted_numel} numels in {wanted_params} params.")
286
+
287
+ # params
288
+ # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support
289
+ # out-of-core computing solution
290
+ total_numel = 0
291
+ total_params = 0
292
+ for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups):
293
+ offset = 0
294
+ avail_numel = full_single_fp32_vector.numel()
295
+ for name, shape in shapes.items():
296
+
297
+ unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape)
298
+ total_numel += unpartitioned_numel
299
+ total_params += 1
300
+
301
+ if debug:
302
+ print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ")
303
+ state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape)
304
+ offset += unpartitioned_numel
305
+
306
+ # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and
307
+ # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex
308
+ # paddings performed in the code it's almost impossible to predict the exact numbers w/o the
309
+ # live optimizer object, so we are checking that the numbers are within the right range
310
+ align_to = 2 * world_size
311
+
312
+ def zero2_align(x):
313
+ return align_to * math.ceil(x / align_to)
314
+
315
+ if debug:
316
+ print(f"original offset={offset}, avail_numel={avail_numel}")
317
+
318
+ offset = zero2_align(offset)
319
+ avail_numel = zero2_align(avail_numel)
320
+
321
+ if debug:
322
+ print(f"aligned offset={offset}, avail_numel={avail_numel}")
323
+
324
+ # Sanity check
325
+ if offset != avail_numel:
326
+ raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong")
327
+
328
+ print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements")
329
+
330
+
331
+ def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states,
332
+ exclude_frozen_parameters):
333
+ state_dict = OrderedDict()
334
+
335
+ # buffers
336
+ buffers = zero_model_states[0].buffers
337
+ state_dict.update(buffers)
338
+ if debug:
339
+ print(f"added {len(buffers)} buffers")
340
+
341
+ if not exclude_frozen_parameters:
342
+ _zero2_merge_frozen_params(state_dict, zero_model_states)
343
+
344
+ _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states)
345
+
346
+ # recover shared parameters
347
+ for pair in zero_model_states[0].shared_params:
348
+ if pair[1] in state_dict:
349
+ state_dict[pair[0]] = state_dict[pair[1]]
350
+
351
+ return state_dict
352
+
353
+
354
+ def zero3_partitioned_param_info(unpartitioned_numel, world_size):
355
+ remainder = unpartitioned_numel % world_size
356
+ padding_numel = (world_size - remainder) if remainder else 0
357
+ partitioned_numel = math.ceil(unpartitioned_numel / world_size)
358
+ return partitioned_numel, padding_numel
359
+
360
+
361
+ def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states):
362
+ if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0:
363
+ return
364
+
365
+ if debug:
366
+ for i in range(world_size):
367
+ num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values())
368
+ print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}')
369
+
370
+ frozen_param_shapes = zero_model_states[0].frozen_param_shapes
371
+ wanted_params = len(frozen_param_shapes)
372
+ wanted_numel = sum(s.numel() for s in frozen_param_shapes.values())
373
+ avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size
374
+ print(f'Frozen params: Have {avail_numel} numels to process.')
375
+ print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params')
376
+
377
+ total_params = 0
378
+ total_numel = 0
379
+ for name, shape in zero_model_states[0].frozen_param_shapes.items():
380
+ total_params += 1
381
+ unpartitioned_numel = shape.numel()
382
+ total_numel += unpartitioned_numel
383
+
384
+ param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states)
385
+ state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape)
386
+
387
+ partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size)
388
+
389
+ if debug:
390
+ print(
391
+ f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}"
392
+ )
393
+
394
+ print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements")
395
+
396
+
397
+ def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states):
398
+ param_shapes = zero_model_states[0].param_shapes
399
+ avail_numel = fp32_flat_groups[0].numel() * world_size
400
+ # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each
401
+ # param, re-consolidating each param, while dealing with padding if any
402
+
403
+ # merge list of dicts, preserving order
404
+ param_shapes = {k: v for d in param_shapes for k, v in d.items()}
405
+
406
+ if debug:
407
+ for i in range(world_size):
408
+ print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}")
409
+
410
+ wanted_params = len(param_shapes)
411
+ wanted_numel = sum(shape.numel() for shape in param_shapes.values())
412
+ # not asserting if there is a mismatch due to possible padding
413
+ avail_numel = fp32_flat_groups[0].numel() * world_size
414
+ print(f"Trainable params: Have {avail_numel} numels to process.")
415
+ print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.")
416
+
417
+ # params
418
+ # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support
419
+ # out-of-core computing solution
420
+ offset = 0
421
+ total_numel = 0
422
+ total_params = 0
423
+ for name, shape in param_shapes.items():
424
+
425
+ unpartitioned_numel = shape.numel()
426
+ total_numel += unpartitioned_numel
427
+ total_params += 1
428
+
429
+ partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size)
430
+
431
+ if debug:
432
+ print(
433
+ f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}"
434
+ )
435
+
436
+ # XXX: memory usage doubles here
437
+ state_dict[name] = torch.cat(
438
+ tuple(fp32_flat_groups[i].narrow(0, offset, partitioned_numel) for i in range(world_size)),
439
+ 0).narrow(0, 0, unpartitioned_numel).view(shape)
440
+ offset += partitioned_numel
441
+
442
+ offset *= world_size
443
+
444
+ # Sanity check
445
+ if offset != avail_numel:
446
+ raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong")
447
+
448
+ print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements")
449
+
450
+
451
+ def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states,
452
+ exclude_frozen_parameters):
453
+ state_dict = OrderedDict()
454
+
455
+ # buffers
456
+ buffers = zero_model_states[0].buffers
457
+ state_dict.update(buffers)
458
+ if debug:
459
+ print(f"added {len(buffers)} buffers")
460
+
461
+ if not exclude_frozen_parameters:
462
+ _zero3_merge_frozen_params(state_dict, world_size, zero_model_states)
463
+
464
+ _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states)
465
+
466
+ # recover shared parameters
467
+ for pair in zero_model_states[0].shared_params:
468
+ if pair[1] in state_dict:
469
+ state_dict[pair[0]] = state_dict[pair[1]]
470
+
471
+ return state_dict
472
+
473
+
474
+ def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag=None, exclude_frozen_parameters=False):
475
+ """
476
+ Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with
477
+ ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example
478
+ via a model hub.
479
+
480
+ Args:
481
+ - ``checkpoint_dir``: path to the desired checkpoint folder
482
+ - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14``
483
+ - ``exclude_frozen_parameters``: exclude frozen parameters
484
+
485
+ Returns:
486
+ - pytorch ``state_dict``
487
+
488
+ Note: this approach may not work if your application doesn't have sufficient free CPU memory and
489
+ you may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with
490
+ the checkpoint.
491
+
492
+ A typical usage might be ::
493
+
494
+ from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint
495
+ # do the training and checkpoint saving
496
+ state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu
497
+ model = model.cpu() # move to cpu
498
+ model.load_state_dict(state_dict)
499
+ # submit to model hub or save the model to share with others
500
+
501
+ In this example the ``model`` will no longer be usable in the deepspeed context of the same
502
+ application. i.e. you will need to re-initialize the deepspeed engine, since
503
+ ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it.
504
+
505
+ If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead.
506
+
507
+ """
508
+ if tag is None:
509
+ latest_path = os.path.join(checkpoint_dir, 'latest')
510
+ if os.path.isfile(latest_path):
511
+ with open(latest_path, 'r') as fd:
512
+ tag = fd.read().strip()
513
+ else:
514
+ raise ValueError(f"Unable to find 'latest' file at {latest_path}")
515
+
516
+ ds_checkpoint_dir = os.path.join(checkpoint_dir, tag)
517
+
518
+ if not os.path.isdir(ds_checkpoint_dir):
519
+ raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist")
520
+
521
+ return _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters)
522
+
523
+
524
+ def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir, output_file, tag=None, exclude_frozen_parameters=False):
525
+ """
526
+ Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be
527
+ loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed.
528
+
529
+ Args:
530
+ - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``)
531
+ - ``output_file``: path to the pytorch fp32 state_dict output file (e.g. path/pytorch_model.bin)
532
+ - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14``
533
+ - ``exclude_frozen_parameters``: exclude frozen parameters
534
+ """
535
+
536
+ state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag, exclude_frozen_parameters)
537
+ print(f"Saving fp32 state dict to {output_file}")
538
+ torch.save(state_dict, output_file)
539
+
540
+
541
+ def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None):
542
+ """
543
+ 1. Put the provided model to cpu
544
+ 2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict``
545
+ 3. Load it into the provided model
546
+
547
+ Args:
548
+ - ``model``: the model object to update
549
+ - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``)
550
+ - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14``
551
+
552
+ Returns:
553
+ - ``model`: modified model
554
+
555
+ Make sure you have plenty of CPU memory available before you call this function. If you don't
556
+ have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it
557
+ conveniently placed for you in the checkpoint folder.
558
+
559
+ A typical usage might be ::
560
+
561
+ from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint
562
+ model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)
563
+ # submit to model hub or save the model to share with others
564
+
565
+ Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context
566
+ of the same application. i.e. you will need to re-initialize the deepspeed engine, since
567
+ ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it.
568
+
569
+ """
570
+ logger.info(f"Extracting fp32 weights")
571
+ state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)
572
+
573
+ logger.info(f"Overwriting model with fp32 weights")
574
+ model = model.cpu()
575
+ model.load_state_dict(state_dict, strict=False)
576
+
577
+ return model
578
+
579
+
580
+ if __name__ == "__main__":
581
+
582
+ parser = argparse.ArgumentParser()
583
+ parser.add_argument("checkpoint_dir",
584
+ type=str,
585
+ help="path to the desired checkpoint folder, e.g., path/checkpoint-12")
586
+ parser.add_argument(
587
+ "output_file",
588
+ type=str,
589
+ help="path to the pytorch fp32 state_dict output file (e.g. path/checkpoint-12/pytorch_model.bin)")
590
+ parser.add_argument("-t",
591
+ "--tag",
592
+ type=str,
593
+ default=None,
594
+ help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1")
595
+ parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters")
596
+ parser.add_argument("-d", "--debug", action='store_true', help="enable debug")
597
+ args = parser.parse_args()
598
+
599
+ debug = args.debug
600
+
601
+ convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir,
602
+ args.output_file,
603
+ tag=args.tag,
604
+ exclude_frozen_parameters=args.exclude_frozen_parameters)