zz814 commited on
Commit
cd0c4c8
·
verified ·
1 Parent(s): 9052e13

Upload 30 files

Browse files
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Tiny Llama Zh Mlc
3
+ emoji: 📊
4
+ colorFrom: blue
5
+ colorTo: pink
6
+ sdk: gradio
7
+ sdk_version: 4.31.5
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
mlc-chat-config.json ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "llama",
3
+ "quantization": "q4f16_1",
4
+ "model_config": {
5
+ "hidden_size": 2048,
6
+ "intermediate_size": 5632,
7
+ "num_attention_heads": 32,
8
+ "num_hidden_layers": 22,
9
+ "rms_norm_eps": 1e-05,
10
+ "vocab_size": 65024,
11
+ "position_embedding_base": 10000.0,
12
+ "context_window_size": 2048,
13
+ "prefill_chunk_size": 2048,
14
+ "num_key_value_heads": 4,
15
+ "head_dim": 64,
16
+ "tensor_parallel_shards": 1,
17
+ "max_batch_size": 80
18
+ },
19
+ "vocab_size": 65024,
20
+ "context_window_size": 2048,
21
+ "sliding_window_size": -1,
22
+ "prefill_chunk_size": 2048,
23
+ "attention_sink_size": -1,
24
+ "tensor_parallel_shards": 1,
25
+ "mean_gen_len": 128,
26
+ "max_gen_len": 512,
27
+ "shift_fill_factor": 0.3,
28
+ "temperature": 0.7,
29
+ "presence_penalty": 0.0,
30
+ "frequency_penalty": 0.0,
31
+ "repetition_penalty": 1.0,
32
+ "top_p": 0.95,
33
+ "conv_template": {
34
+ "name": "llama-2",
35
+ "system_template": "[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n",
36
+ "system_message": "You are a helpful, respectful and honest assistant.",
37
+ "system_prefix_token_ids": [
38
+ 1
39
+ ],
40
+ "add_role_after_system_message": false,
41
+ "roles": {
42
+ "user": "<s>[INST]",
43
+ "assistant": "[/INST]",
44
+ "tool": "[INST]"
45
+ },
46
+ "role_templates": {
47
+ "user": "{user_message}",
48
+ "assistant": "{assistant_message}",
49
+ "tool": "{tool_message}"
50
+ },
51
+ "messages": [],
52
+ "seps": [
53
+ " ",
54
+ " </s>"
55
+ ],
56
+ "role_content_sep": " ",
57
+ "role_empty_sep": " ",
58
+ "stop_str": [
59
+ "[INST]"
60
+ ],
61
+ "stop_token_ids": [
62
+ 2
63
+ ],
64
+ "function_string": "",
65
+ "use_function_calling": false
66
+ },
67
+ "pad_token_id": 0,
68
+ "bos_token_id": 1,
69
+ "eos_token_id": 2,
70
+ "tokenizer_files": [
71
+ "tokenizer.model",
72
+ "tokenizer_config.json"
73
+ ],
74
+ "token_table_postproc_method": "byte_fallback",
75
+ "version": "0.1.0"
76
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,2639 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 225,
4
+ "ParamBytes": 694996992.0,
5
+ "BitsPerParam": 4.500857951427515
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 66584576,
12
+ "records": [
13
+ {
14
+ "name": "lm_head.q_weight",
15
+ "shape": [
16
+ 65024,
17
+ 256
18
+ ],
19
+ "dtype": "uint32",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 66584576,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "0f9fa0a19854bb49f3e6a74c54b1b44f"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 66584576,
31
+ "records": [
32
+ {
33
+ "name": "model.embed_tokens.q_weight",
34
+ "shape": [
35
+ 65024,
36
+ 256
37
+ ],
38
+ "dtype": "uint32",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 66584576,
41
+ "byteOffset": 0
42
+ }
43
+ ],
44
+ "md5sum": "c4851353a8d2563e47c3bf8a0d5bbb43"
45
+ },
46
+ {
47
+ "dataPath": "params_shard_2.bin",
48
+ "format": "raw-shard",
49
+ "nbytes": 23138304,
50
+ "records": [
51
+ {
52
+ "name": "lm_head.q_scale",
53
+ "shape": [
54
+ 65024,
55
+ 64
56
+ ],
57
+ "dtype": "float16",
58
+ "format": "f32-to-bf16",
59
+ "nbytes": 8323072,
60
+ "byteOffset": 0
61
+ },
62
+ {
63
+ "name": "model.embed_tokens.q_scale",
64
+ "shape": [
65
+ 65024,
66
+ 64
67
+ ],
68
+ "dtype": "float16",
69
+ "format": "f32-to-bf16",
70
+ "nbytes": 8323072,
71
+ "byteOffset": 8323072
72
+ },
73
+ {
74
+ "name": "model.layers.0.input_layernorm.weight",
75
+ "shape": [
76
+ 2048
77
+ ],
78
+ "dtype": "float16",
79
+ "format": "f32-to-bf16",
80
+ "nbytes": 4096,
81
+ "byteOffset": 16646144
82
+ },
83
+ {
84
+ "name": "model.layers.0.mlp.down_proj.q_weight",
85
+ "shape": [
86
+ 2048,
87
+ 704
88
+ ],
89
+ "dtype": "uint32",
90
+ "format": "f32-to-bf16",
91
+ "nbytes": 5767168,
92
+ "byteOffset": 16650240
93
+ },
94
+ {
95
+ "name": "model.layers.0.mlp.down_proj.q_scale",
96
+ "shape": [
97
+ 2048,
98
+ 176
99
+ ],
100
+ "dtype": "float16",
101
+ "format": "f32-to-bf16",
102
+ "nbytes": 720896,
103
+ "byteOffset": 22417408
104
+ }
105
+ ],
106
+ "md5sum": "4c6ed7223e2a4162776e7d8af803eebc"
107
+ },
108
+ {
109
+ "dataPath": "params_shard_3.bin",
110
+ "format": "raw-shard",
111
+ "nbytes": 24780800,
112
+ "records": [
113
+ {
114
+ "name": "model.layers.0.mlp.gate_up_proj.q_weight",
115
+ "shape": [
116
+ 11264,
117
+ 256
118
+ ],
119
+ "dtype": "uint32",
120
+ "format": "f32-to-bf16",
121
+ "nbytes": 11534336,
122
+ "byteOffset": 0
123
+ },
124
+ {
125
+ "name": "model.layers.0.mlp.gate_up_proj.q_scale",
126
+ "shape": [
127
+ 11264,
128
+ 64
129
+ ],
130
+ "dtype": "float16",
131
+ "format": "f32-to-bf16",
132
+ "nbytes": 1441792,
133
+ "byteOffset": 11534336
134
+ },
135
+ {
136
+ "name": "model.layers.0.post_attention_layernorm.weight",
137
+ "shape": [
138
+ 2048
139
+ ],
140
+ "dtype": "float16",
141
+ "format": "f32-to-bf16",
142
+ "nbytes": 4096,
143
+ "byteOffset": 12976128
144
+ },
145
+ {
146
+ "name": "model.layers.0.self_attn.qkv_proj.q_weight",
147
+ "shape": [
148
+ 2560,
149
+ 256
150
+ ],
151
+ "dtype": "uint32",
152
+ "format": "f32-to-bf16",
153
+ "nbytes": 2621440,
154
+ "byteOffset": 12980224
155
+ },
156
+ {
157
+ "name": "model.layers.0.self_attn.qkv_proj.q_scale",
158
+ "shape": [
159
+ 2560,
160
+ 64
161
+ ],
162
+ "dtype": "float16",
163
+ "format": "f32-to-bf16",
164
+ "nbytes": 327680,
165
+ "byteOffset": 15601664
166
+ },
167
+ {
168
+ "name": "model.layers.0.self_attn.o_proj.q_weight",
169
+ "shape": [
170
+ 2048,
171
+ 256
172
+ ],
173
+ "dtype": "uint32",
174
+ "format": "f32-to-bf16",
175
+ "nbytes": 2097152,
176
+ "byteOffset": 15929344
177
+ },
178
+ {
179
+ "name": "model.layers.0.self_attn.o_proj.q_scale",
180
+ "shape": [
181
+ 2048,
182
+ 64
183
+ ],
184
+ "dtype": "float16",
185
+ "format": "f32-to-bf16",
186
+ "nbytes": 262144,
187
+ "byteOffset": 18026496
188
+ },
189
+ {
190
+ "name": "model.layers.1.input_layernorm.weight",
191
+ "shape": [
192
+ 2048
193
+ ],
194
+ "dtype": "float16",
195
+ "format": "f32-to-bf16",
196
+ "nbytes": 4096,
197
+ "byteOffset": 18288640
198
+ },
199
+ {
200
+ "name": "model.layers.1.mlp.down_proj.q_weight",
201
+ "shape": [
202
+ 2048,
203
+ 704
204
+ ],
205
+ "dtype": "uint32",
206
+ "format": "f32-to-bf16",
207
+ "nbytes": 5767168,
208
+ "byteOffset": 18292736
209
+ },
210
+ {
211
+ "name": "model.layers.1.mlp.down_proj.q_scale",
212
+ "shape": [
213
+ 2048,
214
+ 176
215
+ ],
216
+ "dtype": "float16",
217
+ "format": "f32-to-bf16",
218
+ "nbytes": 720896,
219
+ "byteOffset": 24059904
220
+ }
221
+ ],
222
+ "md5sum": "79dfe66878509fdfa072510efc71ca01"
223
+ },
224
+ {
225
+ "dataPath": "params_shard_4.bin",
226
+ "format": "raw-shard",
227
+ "nbytes": 24780800,
228
+ "records": [
229
+ {
230
+ "name": "model.layers.1.mlp.gate_up_proj.q_weight",
231
+ "shape": [
232
+ 11264,
233
+ 256
234
+ ],
235
+ "dtype": "uint32",
236
+ "format": "f32-to-bf16",
237
+ "nbytes": 11534336,
238
+ "byteOffset": 0
239
+ },
240
+ {
241
+ "name": "model.layers.1.mlp.gate_up_proj.q_scale",
242
+ "shape": [
243
+ 11264,
244
+ 64
245
+ ],
246
+ "dtype": "float16",
247
+ "format": "f32-to-bf16",
248
+ "nbytes": 1441792,
249
+ "byteOffset": 11534336
250
+ },
251
+ {
252
+ "name": "model.layers.1.post_attention_layernorm.weight",
253
+ "shape": [
254
+ 2048
255
+ ],
256
+ "dtype": "float16",
257
+ "format": "f32-to-bf16",
258
+ "nbytes": 4096,
259
+ "byteOffset": 12976128
260
+ },
261
+ {
262
+ "name": "model.layers.1.self_attn.qkv_proj.q_weight",
263
+ "shape": [
264
+ 2560,
265
+ 256
266
+ ],
267
+ "dtype": "uint32",
268
+ "format": "f32-to-bf16",
269
+ "nbytes": 2621440,
270
+ "byteOffset": 12980224
271
+ },
272
+ {
273
+ "name": "model.layers.1.self_attn.qkv_proj.q_scale",
274
+ "shape": [
275
+ 2560,
276
+ 64
277
+ ],
278
+ "dtype": "float16",
279
+ "format": "f32-to-bf16",
280
+ "nbytes": 327680,
281
+ "byteOffset": 15601664
282
+ },
283
+ {
284
+ "name": "model.layers.1.self_attn.o_proj.q_weight",
285
+ "shape": [
286
+ 2048,
287
+ 256
288
+ ],
289
+ "dtype": "uint32",
290
+ "format": "f32-to-bf16",
291
+ "nbytes": 2097152,
292
+ "byteOffset": 15929344
293
+ },
294
+ {
295
+ "name": "model.layers.1.self_attn.o_proj.q_scale",
296
+ "shape": [
297
+ 2048,
298
+ 64
299
+ ],
300
+ "dtype": "float16",
301
+ "format": "f32-to-bf16",
302
+ "nbytes": 262144,
303
+ "byteOffset": 18026496
304
+ },
305
+ {
306
+ "name": "model.layers.10.input_layernorm.weight",
307
+ "shape": [
308
+ 2048
309
+ ],
310
+ "dtype": "float16",
311
+ "format": "f32-to-bf16",
312
+ "nbytes": 4096,
313
+ "byteOffset": 18288640
314
+ },
315
+ {
316
+ "name": "model.layers.10.mlp.down_proj.q_weight",
317
+ "shape": [
318
+ 2048,
319
+ 704
320
+ ],
321
+ "dtype": "uint32",
322
+ "format": "f32-to-bf16",
323
+ "nbytes": 5767168,
324
+ "byteOffset": 18292736
325
+ },
326
+ {
327
+ "name": "model.layers.10.mlp.down_proj.q_scale",
328
+ "shape": [
329
+ 2048,
330
+ 176
331
+ ],
332
+ "dtype": "float16",
333
+ "format": "f32-to-bf16",
334
+ "nbytes": 720896,
335
+ "byteOffset": 24059904
336
+ }
337
+ ],
338
+ "md5sum": "e7022aa5f2c11f49ae1eb3de1c1760e1"
339
+ },
340
+ {
341
+ "dataPath": "params_shard_5.bin",
342
+ "format": "raw-shard",
343
+ "nbytes": 24780800,
344
+ "records": [
345
+ {
346
+ "name": "model.layers.10.mlp.gate_up_proj.q_weight",
347
+ "shape": [
348
+ 11264,
349
+ 256
350
+ ],
351
+ "dtype": "uint32",
352
+ "format": "f32-to-bf16",
353
+ "nbytes": 11534336,
354
+ "byteOffset": 0
355
+ },
356
+ {
357
+ "name": "model.layers.10.mlp.gate_up_proj.q_scale",
358
+ "shape": [
359
+ 11264,
360
+ 64
361
+ ],
362
+ "dtype": "float16",
363
+ "format": "f32-to-bf16",
364
+ "nbytes": 1441792,
365
+ "byteOffset": 11534336
366
+ },
367
+ {
368
+ "name": "model.layers.10.post_attention_layernorm.weight",
369
+ "shape": [
370
+ 2048
371
+ ],
372
+ "dtype": "float16",
373
+ "format": "f32-to-bf16",
374
+ "nbytes": 4096,
375
+ "byteOffset": 12976128
376
+ },
377
+ {
378
+ "name": "model.layers.10.self_attn.qkv_proj.q_weight",
379
+ "shape": [
380
+ 2560,
381
+ 256
382
+ ],
383
+ "dtype": "uint32",
384
+ "format": "f32-to-bf16",
385
+ "nbytes": 2621440,
386
+ "byteOffset": 12980224
387
+ },
388
+ {
389
+ "name": "model.layers.10.self_attn.qkv_proj.q_scale",
390
+ "shape": [
391
+ 2560,
392
+ 64
393
+ ],
394
+ "dtype": "float16",
395
+ "format": "f32-to-bf16",
396
+ "nbytes": 327680,
397
+ "byteOffset": 15601664
398
+ },
399
+ {
400
+ "name": "model.layers.10.self_attn.o_proj.q_weight",
401
+ "shape": [
402
+ 2048,
403
+ 256
404
+ ],
405
+ "dtype": "uint32",
406
+ "format": "f32-to-bf16",
407
+ "nbytes": 2097152,
408
+ "byteOffset": 15929344
409
+ },
410
+ {
411
+ "name": "model.layers.10.self_attn.o_proj.q_scale",
412
+ "shape": [
413
+ 2048,
414
+ 64
415
+ ],
416
+ "dtype": "float16",
417
+ "format": "f32-to-bf16",
418
+ "nbytes": 262144,
419
+ "byteOffset": 18026496
420
+ },
421
+ {
422
+ "name": "model.layers.11.input_layernorm.weight",
423
+ "shape": [
424
+ 2048
425
+ ],
426
+ "dtype": "float16",
427
+ "format": "f32-to-bf16",
428
+ "nbytes": 4096,
429
+ "byteOffset": 18288640
430
+ },
431
+ {
432
+ "name": "model.layers.11.mlp.down_proj.q_weight",
433
+ "shape": [
434
+ 2048,
435
+ 704
436
+ ],
437
+ "dtype": "uint32",
438
+ "format": "f32-to-bf16",
439
+ "nbytes": 5767168,
440
+ "byteOffset": 18292736
441
+ },
442
+ {
443
+ "name": "model.layers.11.mlp.down_proj.q_scale",
444
+ "shape": [
445
+ 2048,
446
+ 176
447
+ ],
448
+ "dtype": "float16",
449
+ "format": "f32-to-bf16",
450
+ "nbytes": 720896,
451
+ "byteOffset": 24059904
452
+ }
453
+ ],
454
+ "md5sum": "f45cff6d2687bdc703a34bb6437b709f"
455
+ },
456
+ {
457
+ "dataPath": "params_shard_6.bin",
458
+ "format": "raw-shard",
459
+ "nbytes": 24780800,
460
+ "records": [
461
+ {
462
+ "name": "model.layers.11.mlp.gate_up_proj.q_weight",
463
+ "shape": [
464
+ 11264,
465
+ 256
466
+ ],
467
+ "dtype": "uint32",
468
+ "format": "f32-to-bf16",
469
+ "nbytes": 11534336,
470
+ "byteOffset": 0
471
+ },
472
+ {
473
+ "name": "model.layers.11.mlp.gate_up_proj.q_scale",
474
+ "shape": [
475
+ 11264,
476
+ 64
477
+ ],
478
+ "dtype": "float16",
479
+ "format": "f32-to-bf16",
480
+ "nbytes": 1441792,
481
+ "byteOffset": 11534336
482
+ },
483
+ {
484
+ "name": "model.layers.11.post_attention_layernorm.weight",
485
+ "shape": [
486
+ 2048
487
+ ],
488
+ "dtype": "float16",
489
+ "format": "f32-to-bf16",
490
+ "nbytes": 4096,
491
+ "byteOffset": 12976128
492
+ },
493
+ {
494
+ "name": "model.layers.11.self_attn.qkv_proj.q_weight",
495
+ "shape": [
496
+ 2560,
497
+ 256
498
+ ],
499
+ "dtype": "uint32",
500
+ "format": "f32-to-bf16",
501
+ "nbytes": 2621440,
502
+ "byteOffset": 12980224
503
+ },
504
+ {
505
+ "name": "model.layers.11.self_attn.qkv_proj.q_scale",
506
+ "shape": [
507
+ 2560,
508
+ 64
509
+ ],
510
+ "dtype": "float16",
511
+ "format": "f32-to-bf16",
512
+ "nbytes": 327680,
513
+ "byteOffset": 15601664
514
+ },
515
+ {
516
+ "name": "model.layers.11.self_attn.o_proj.q_weight",
517
+ "shape": [
518
+ 2048,
519
+ 256
520
+ ],
521
+ "dtype": "uint32",
522
+ "format": "f32-to-bf16",
523
+ "nbytes": 2097152,
524
+ "byteOffset": 15929344
525
+ },
526
+ {
527
+ "name": "model.layers.11.self_attn.o_proj.q_scale",
528
+ "shape": [
529
+ 2048,
530
+ 64
531
+ ],
532
+ "dtype": "float16",
533
+ "format": "f32-to-bf16",
534
+ "nbytes": 262144,
535
+ "byteOffset": 18026496
536
+ },
537
+ {
538
+ "name": "model.layers.12.input_layernorm.weight",
539
+ "shape": [
540
+ 2048
541
+ ],
542
+ "dtype": "float16",
543
+ "format": "f32-to-bf16",
544
+ "nbytes": 4096,
545
+ "byteOffset": 18288640
546
+ },
547
+ {
548
+ "name": "model.layers.12.mlp.down_proj.q_weight",
549
+ "shape": [
550
+ 2048,
551
+ 704
552
+ ],
553
+ "dtype": "uint32",
554
+ "format": "f32-to-bf16",
555
+ "nbytes": 5767168,
556
+ "byteOffset": 18292736
557
+ },
558
+ {
559
+ "name": "model.layers.12.mlp.down_proj.q_scale",
560
+ "shape": [
561
+ 2048,
562
+ 176
563
+ ],
564
+ "dtype": "float16",
565
+ "format": "f32-to-bf16",
566
+ "nbytes": 720896,
567
+ "byteOffset": 24059904
568
+ }
569
+ ],
570
+ "md5sum": "3219e41edabbc8b1decc2baf5b8ea7e6"
571
+ },
572
+ {
573
+ "dataPath": "params_shard_7.bin",
574
+ "format": "raw-shard",
575
+ "nbytes": 24780800,
576
+ "records": [
577
+ {
578
+ "name": "model.layers.12.mlp.gate_up_proj.q_weight",
579
+ "shape": [
580
+ 11264,
581
+ 256
582
+ ],
583
+ "dtype": "uint32",
584
+ "format": "f32-to-bf16",
585
+ "nbytes": 11534336,
586
+ "byteOffset": 0
587
+ },
588
+ {
589
+ "name": "model.layers.12.mlp.gate_up_proj.q_scale",
590
+ "shape": [
591
+ 11264,
592
+ 64
593
+ ],
594
+ "dtype": "float16",
595
+ "format": "f32-to-bf16",
596
+ "nbytes": 1441792,
597
+ "byteOffset": 11534336
598
+ },
599
+ {
600
+ "name": "model.layers.12.post_attention_layernorm.weight",
601
+ "shape": [
602
+ 2048
603
+ ],
604
+ "dtype": "float16",
605
+ "format": "f32-to-bf16",
606
+ "nbytes": 4096,
607
+ "byteOffset": 12976128
608
+ },
609
+ {
610
+ "name": "model.layers.12.self_attn.qkv_proj.q_weight",
611
+ "shape": [
612
+ 2560,
613
+ 256
614
+ ],
615
+ "dtype": "uint32",
616
+ "format": "f32-to-bf16",
617
+ "nbytes": 2621440,
618
+ "byteOffset": 12980224
619
+ },
620
+ {
621
+ "name": "model.layers.12.self_attn.qkv_proj.q_scale",
622
+ "shape": [
623
+ 2560,
624
+ 64
625
+ ],
626
+ "dtype": "float16",
627
+ "format": "f32-to-bf16",
628
+ "nbytes": 327680,
629
+ "byteOffset": 15601664
630
+ },
631
+ {
632
+ "name": "model.layers.12.self_attn.o_proj.q_weight",
633
+ "shape": [
634
+ 2048,
635
+ 256
636
+ ],
637
+ "dtype": "uint32",
638
+ "format": "f32-to-bf16",
639
+ "nbytes": 2097152,
640
+ "byteOffset": 15929344
641
+ },
642
+ {
643
+ "name": "model.layers.12.self_attn.o_proj.q_scale",
644
+ "shape": [
645
+ 2048,
646
+ 64
647
+ ],
648
+ "dtype": "float16",
649
+ "format": "f32-to-bf16",
650
+ "nbytes": 262144,
651
+ "byteOffset": 18026496
652
+ },
653
+ {
654
+ "name": "model.layers.13.input_layernorm.weight",
655
+ "shape": [
656
+ 2048
657
+ ],
658
+ "dtype": "float16",
659
+ "format": "f32-to-bf16",
660
+ "nbytes": 4096,
661
+ "byteOffset": 18288640
662
+ },
663
+ {
664
+ "name": "model.layers.13.mlp.down_proj.q_weight",
665
+ "shape": [
666
+ 2048,
667
+ 704
668
+ ],
669
+ "dtype": "uint32",
670
+ "format": "f32-to-bf16",
671
+ "nbytes": 5767168,
672
+ "byteOffset": 18292736
673
+ },
674
+ {
675
+ "name": "model.layers.13.mlp.down_proj.q_scale",
676
+ "shape": [
677
+ 2048,
678
+ 176
679
+ ],
680
+ "dtype": "float16",
681
+ "format": "f32-to-bf16",
682
+ "nbytes": 720896,
683
+ "byteOffset": 24059904
684
+ }
685
+ ],
686
+ "md5sum": "d7d5c74a612879f020e3c8c98899a45c"
687
+ },
688
+ {
689
+ "dataPath": "params_shard_8.bin",
690
+ "format": "raw-shard",
691
+ "nbytes": 24780800,
692
+ "records": [
693
+ {
694
+ "name": "model.layers.13.mlp.gate_up_proj.q_weight",
695
+ "shape": [
696
+ 11264,
697
+ 256
698
+ ],
699
+ "dtype": "uint32",
700
+ "format": "f32-to-bf16",
701
+ "nbytes": 11534336,
702
+ "byteOffset": 0
703
+ },
704
+ {
705
+ "name": "model.layers.13.mlp.gate_up_proj.q_scale",
706
+ "shape": [
707
+ 11264,
708
+ 64
709
+ ],
710
+ "dtype": "float16",
711
+ "format": "f32-to-bf16",
712
+ "nbytes": 1441792,
713
+ "byteOffset": 11534336
714
+ },
715
+ {
716
+ "name": "model.layers.13.post_attention_layernorm.weight",
717
+ "shape": [
718
+ 2048
719
+ ],
720
+ "dtype": "float16",
721
+ "format": "f32-to-bf16",
722
+ "nbytes": 4096,
723
+ "byteOffset": 12976128
724
+ },
725
+ {
726
+ "name": "model.layers.13.self_attn.qkv_proj.q_weight",
727
+ "shape": [
728
+ 2560,
729
+ 256
730
+ ],
731
+ "dtype": "uint32",
732
+ "format": "f32-to-bf16",
733
+ "nbytes": 2621440,
734
+ "byteOffset": 12980224
735
+ },
736
+ {
737
+ "name": "model.layers.13.self_attn.qkv_proj.q_scale",
738
+ "shape": [
739
+ 2560,
740
+ 64
741
+ ],
742
+ "dtype": "float16",
743
+ "format": "f32-to-bf16",
744
+ "nbytes": 327680,
745
+ "byteOffset": 15601664
746
+ },
747
+ {
748
+ "name": "model.layers.13.self_attn.o_proj.q_weight",
749
+ "shape": [
750
+ 2048,
751
+ 256
752
+ ],
753
+ "dtype": "uint32",
754
+ "format": "f32-to-bf16",
755
+ "nbytes": 2097152,
756
+ "byteOffset": 15929344
757
+ },
758
+ {
759
+ "name": "model.layers.13.self_attn.o_proj.q_scale",
760
+ "shape": [
761
+ 2048,
762
+ 64
763
+ ],
764
+ "dtype": "float16",
765
+ "format": "f32-to-bf16",
766
+ "nbytes": 262144,
767
+ "byteOffset": 18026496
768
+ },
769
+ {
770
+ "name": "model.layers.14.input_layernorm.weight",
771
+ "shape": [
772
+ 2048
773
+ ],
774
+ "dtype": "float16",
775
+ "format": "f32-to-bf16",
776
+ "nbytes": 4096,
777
+ "byteOffset": 18288640
778
+ },
779
+ {
780
+ "name": "model.layers.14.mlp.down_proj.q_weight",
781
+ "shape": [
782
+ 2048,
783
+ 704
784
+ ],
785
+ "dtype": "uint32",
786
+ "format": "f32-to-bf16",
787
+ "nbytes": 5767168,
788
+ "byteOffset": 18292736
789
+ },
790
+ {
791
+ "name": "model.layers.14.mlp.down_proj.q_scale",
792
+ "shape": [
793
+ 2048,
794
+ 176
795
+ ],
796
+ "dtype": "float16",
797
+ "format": "f32-to-bf16",
798
+ "nbytes": 720896,
799
+ "byteOffset": 24059904
800
+ }
801
+ ],
802
+ "md5sum": "292c4dc65c8ca71cbc9752120bc93e91"
803
+ },
804
+ {
805
+ "dataPath": "params_shard_9.bin",
806
+ "format": "raw-shard",
807
+ "nbytes": 24780800,
808
+ "records": [
809
+ {
810
+ "name": "model.layers.14.mlp.gate_up_proj.q_weight",
811
+ "shape": [
812
+ 11264,
813
+ 256
814
+ ],
815
+ "dtype": "uint32",
816
+ "format": "f32-to-bf16",
817
+ "nbytes": 11534336,
818
+ "byteOffset": 0
819
+ },
820
+ {
821
+ "name": "model.layers.14.mlp.gate_up_proj.q_scale",
822
+ "shape": [
823
+ 11264,
824
+ 64
825
+ ],
826
+ "dtype": "float16",
827
+ "format": "f32-to-bf16",
828
+ "nbytes": 1441792,
829
+ "byteOffset": 11534336
830
+ },
831
+ {
832
+ "name": "model.layers.14.post_attention_layernorm.weight",
833
+ "shape": [
834
+ 2048
835
+ ],
836
+ "dtype": "float16",
837
+ "format": "f32-to-bf16",
838
+ "nbytes": 4096,
839
+ "byteOffset": 12976128
840
+ },
841
+ {
842
+ "name": "model.layers.14.self_attn.qkv_proj.q_weight",
843
+ "shape": [
844
+ 2560,
845
+ 256
846
+ ],
847
+ "dtype": "uint32",
848
+ "format": "f32-to-bf16",
849
+ "nbytes": 2621440,
850
+ "byteOffset": 12980224
851
+ },
852
+ {
853
+ "name": "model.layers.14.self_attn.qkv_proj.q_scale",
854
+ "shape": [
855
+ 2560,
856
+ 64
857
+ ],
858
+ "dtype": "float16",
859
+ "format": "f32-to-bf16",
860
+ "nbytes": 327680,
861
+ "byteOffset": 15601664
862
+ },
863
+ {
864
+ "name": "model.layers.14.self_attn.o_proj.q_weight",
865
+ "shape": [
866
+ 2048,
867
+ 256
868
+ ],
869
+ "dtype": "uint32",
870
+ "format": "f32-to-bf16",
871
+ "nbytes": 2097152,
872
+ "byteOffset": 15929344
873
+ },
874
+ {
875
+ "name": "model.layers.14.self_attn.o_proj.q_scale",
876
+ "shape": [
877
+ 2048,
878
+ 64
879
+ ],
880
+ "dtype": "float16",
881
+ "format": "f32-to-bf16",
882
+ "nbytes": 262144,
883
+ "byteOffset": 18026496
884
+ },
885
+ {
886
+ "name": "model.layers.15.input_layernorm.weight",
887
+ "shape": [
888
+ 2048
889
+ ],
890
+ "dtype": "float16",
891
+ "format": "f32-to-bf16",
892
+ "nbytes": 4096,
893
+ "byteOffset": 18288640
894
+ },
895
+ {
896
+ "name": "model.layers.15.mlp.down_proj.q_weight",
897
+ "shape": [
898
+ 2048,
899
+ 704
900
+ ],
901
+ "dtype": "uint32",
902
+ "format": "f32-to-bf16",
903
+ "nbytes": 5767168,
904
+ "byteOffset": 18292736
905
+ },
906
+ {
907
+ "name": "model.layers.15.mlp.down_proj.q_scale",
908
+ "shape": [
909
+ 2048,
910
+ 176
911
+ ],
912
+ "dtype": "float16",
913
+ "format": "f32-to-bf16",
914
+ "nbytes": 720896,
915
+ "byteOffset": 24059904
916
+ }
917
+ ],
918
+ "md5sum": "c95244eeda3cf25438d388239bae2eb9"
919
+ },
920
+ {
921
+ "dataPath": "params_shard_10.bin",
922
+ "format": "raw-shard",
923
+ "nbytes": 24780800,
924
+ "records": [
925
+ {
926
+ "name": "model.layers.15.mlp.gate_up_proj.q_weight",
927
+ "shape": [
928
+ 11264,
929
+ 256
930
+ ],
931
+ "dtype": "uint32",
932
+ "format": "f32-to-bf16",
933
+ "nbytes": 11534336,
934
+ "byteOffset": 0
935
+ },
936
+ {
937
+ "name": "model.layers.15.mlp.gate_up_proj.q_scale",
938
+ "shape": [
939
+ 11264,
940
+ 64
941
+ ],
942
+ "dtype": "float16",
943
+ "format": "f32-to-bf16",
944
+ "nbytes": 1441792,
945
+ "byteOffset": 11534336
946
+ },
947
+ {
948
+ "name": "model.layers.15.post_attention_layernorm.weight",
949
+ "shape": [
950
+ 2048
951
+ ],
952
+ "dtype": "float16",
953
+ "format": "f32-to-bf16",
954
+ "nbytes": 4096,
955
+ "byteOffset": 12976128
956
+ },
957
+ {
958
+ "name": "model.layers.15.self_attn.qkv_proj.q_weight",
959
+ "shape": [
960
+ 2560,
961
+ 256
962
+ ],
963
+ "dtype": "uint32",
964
+ "format": "f32-to-bf16",
965
+ "nbytes": 2621440,
966
+ "byteOffset": 12980224
967
+ },
968
+ {
969
+ "name": "model.layers.15.self_attn.qkv_proj.q_scale",
970
+ "shape": [
971
+ 2560,
972
+ 64
973
+ ],
974
+ "dtype": "float16",
975
+ "format": "f32-to-bf16",
976
+ "nbytes": 327680,
977
+ "byteOffset": 15601664
978
+ },
979
+ {
980
+ "name": "model.layers.15.self_attn.o_proj.q_weight",
981
+ "shape": [
982
+ 2048,
983
+ 256
984
+ ],
985
+ "dtype": "uint32",
986
+ "format": "f32-to-bf16",
987
+ "nbytes": 2097152,
988
+ "byteOffset": 15929344
989
+ },
990
+ {
991
+ "name": "model.layers.15.self_attn.o_proj.q_scale",
992
+ "shape": [
993
+ 2048,
994
+ 64
995
+ ],
996
+ "dtype": "float16",
997
+ "format": "f32-to-bf16",
998
+ "nbytes": 262144,
999
+ "byteOffset": 18026496
1000
+ },
1001
+ {
1002
+ "name": "model.layers.16.input_layernorm.weight",
1003
+ "shape": [
1004
+ 2048
1005
+ ],
1006
+ "dtype": "float16",
1007
+ "format": "f32-to-bf16",
1008
+ "nbytes": 4096,
1009
+ "byteOffset": 18288640
1010
+ },
1011
+ {
1012
+ "name": "model.layers.16.mlp.down_proj.q_weight",
1013
+ "shape": [
1014
+ 2048,
1015
+ 704
1016
+ ],
1017
+ "dtype": "uint32",
1018
+ "format": "f32-to-bf16",
1019
+ "nbytes": 5767168,
1020
+ "byteOffset": 18292736
1021
+ },
1022
+ {
1023
+ "name": "model.layers.16.mlp.down_proj.q_scale",
1024
+ "shape": [
1025
+ 2048,
1026
+ 176
1027
+ ],
1028
+ "dtype": "float16",
1029
+ "format": "f32-to-bf16",
1030
+ "nbytes": 720896,
1031
+ "byteOffset": 24059904
1032
+ }
1033
+ ],
1034
+ "md5sum": "1d7b6cfc26c9d9fe95bfb528210928d7"
1035
+ },
1036
+ {
1037
+ "dataPath": "params_shard_11.bin",
1038
+ "format": "raw-shard",
1039
+ "nbytes": 24780800,
1040
+ "records": [
1041
+ {
1042
+ "name": "model.layers.16.mlp.gate_up_proj.q_weight",
1043
+ "shape": [
1044
+ 11264,
1045
+ 256
1046
+ ],
1047
+ "dtype": "uint32",
1048
+ "format": "f32-to-bf16",
1049
+ "nbytes": 11534336,
1050
+ "byteOffset": 0
1051
+ },
1052
+ {
1053
+ "name": "model.layers.16.mlp.gate_up_proj.q_scale",
1054
+ "shape": [
1055
+ 11264,
1056
+ 64
1057
+ ],
1058
+ "dtype": "float16",
1059
+ "format": "f32-to-bf16",
1060
+ "nbytes": 1441792,
1061
+ "byteOffset": 11534336
1062
+ },
1063
+ {
1064
+ "name": "model.layers.16.post_attention_layernorm.weight",
1065
+ "shape": [
1066
+ 2048
1067
+ ],
1068
+ "dtype": "float16",
1069
+ "format": "f32-to-bf16",
1070
+ "nbytes": 4096,
1071
+ "byteOffset": 12976128
1072
+ },
1073
+ {
1074
+ "name": "model.layers.16.self_attn.qkv_proj.q_weight",
1075
+ "shape": [
1076
+ 2560,
1077
+ 256
1078
+ ],
1079
+ "dtype": "uint32",
1080
+ "format": "f32-to-bf16",
1081
+ "nbytes": 2621440,
1082
+ "byteOffset": 12980224
1083
+ },
1084
+ {
1085
+ "name": "model.layers.16.self_attn.qkv_proj.q_scale",
1086
+ "shape": [
1087
+ 2560,
1088
+ 64
1089
+ ],
1090
+ "dtype": "float16",
1091
+ "format": "f32-to-bf16",
1092
+ "nbytes": 327680,
1093
+ "byteOffset": 15601664
1094
+ },
1095
+ {
1096
+ "name": "model.layers.16.self_attn.o_proj.q_weight",
1097
+ "shape": [
1098
+ 2048,
1099
+ 256
1100
+ ],
1101
+ "dtype": "uint32",
1102
+ "format": "f32-to-bf16",
1103
+ "nbytes": 2097152,
1104
+ "byteOffset": 15929344
1105
+ },
1106
+ {
1107
+ "name": "model.layers.16.self_attn.o_proj.q_scale",
1108
+ "shape": [
1109
+ 2048,
1110
+ 64
1111
+ ],
1112
+ "dtype": "float16",
1113
+ "format": "f32-to-bf16",
1114
+ "nbytes": 262144,
1115
+ "byteOffset": 18026496
1116
+ },
1117
+ {
1118
+ "name": "model.layers.17.input_layernorm.weight",
1119
+ "shape": [
1120
+ 2048
1121
+ ],
1122
+ "dtype": "float16",
1123
+ "format": "f32-to-bf16",
1124
+ "nbytes": 4096,
1125
+ "byteOffset": 18288640
1126
+ },
1127
+ {
1128
+ "name": "model.layers.17.mlp.down_proj.q_weight",
1129
+ "shape": [
1130
+ 2048,
1131
+ 704
1132
+ ],
1133
+ "dtype": "uint32",
1134
+ "format": "f32-to-bf16",
1135
+ "nbytes": 5767168,
1136
+ "byteOffset": 18292736
1137
+ },
1138
+ {
1139
+ "name": "model.layers.17.mlp.down_proj.q_scale",
1140
+ "shape": [
1141
+ 2048,
1142
+ 176
1143
+ ],
1144
+ "dtype": "float16",
1145
+ "format": "f32-to-bf16",
1146
+ "nbytes": 720896,
1147
+ "byteOffset": 24059904
1148
+ }
1149
+ ],
1150
+ "md5sum": "93ec21beb9f888dffbb5d8d3ff12b987"
1151
+ },
1152
+ {
1153
+ "dataPath": "params_shard_12.bin",
1154
+ "format": "raw-shard",
1155
+ "nbytes": 24780800,
1156
+ "records": [
1157
+ {
1158
+ "name": "model.layers.17.mlp.gate_up_proj.q_weight",
1159
+ "shape": [
1160
+ 11264,
1161
+ 256
1162
+ ],
1163
+ "dtype": "uint32",
1164
+ "format": "f32-to-bf16",
1165
+ "nbytes": 11534336,
1166
+ "byteOffset": 0
1167
+ },
1168
+ {
1169
+ "name": "model.layers.17.mlp.gate_up_proj.q_scale",
1170
+ "shape": [
1171
+ 11264,
1172
+ 64
1173
+ ],
1174
+ "dtype": "float16",
1175
+ "format": "f32-to-bf16",
1176
+ "nbytes": 1441792,
1177
+ "byteOffset": 11534336
1178
+ },
1179
+ {
1180
+ "name": "model.layers.17.post_attention_layernorm.weight",
1181
+ "shape": [
1182
+ 2048
1183
+ ],
1184
+ "dtype": "float16",
1185
+ "format": "f32-to-bf16",
1186
+ "nbytes": 4096,
1187
+ "byteOffset": 12976128
1188
+ },
1189
+ {
1190
+ "name": "model.layers.17.self_attn.qkv_proj.q_weight",
1191
+ "shape": [
1192
+ 2560,
1193
+ 256
1194
+ ],
1195
+ "dtype": "uint32",
1196
+ "format": "f32-to-bf16",
1197
+ "nbytes": 2621440,
1198
+ "byteOffset": 12980224
1199
+ },
1200
+ {
1201
+ "name": "model.layers.17.self_attn.qkv_proj.q_scale",
1202
+ "shape": [
1203
+ 2560,
1204
+ 64
1205
+ ],
1206
+ "dtype": "float16",
1207
+ "format": "f32-to-bf16",
1208
+ "nbytes": 327680,
1209
+ "byteOffset": 15601664
1210
+ },
1211
+ {
1212
+ "name": "model.layers.17.self_attn.o_proj.q_weight",
1213
+ "shape": [
1214
+ 2048,
1215
+ 256
1216
+ ],
1217
+ "dtype": "uint32",
1218
+ "format": "f32-to-bf16",
1219
+ "nbytes": 2097152,
1220
+ "byteOffset": 15929344
1221
+ },
1222
+ {
1223
+ "name": "model.layers.17.self_attn.o_proj.q_scale",
1224
+ "shape": [
1225
+ 2048,
1226
+ 64
1227
+ ],
1228
+ "dtype": "float16",
1229
+ "format": "f32-to-bf16",
1230
+ "nbytes": 262144,
1231
+ "byteOffset": 18026496
1232
+ },
1233
+ {
1234
+ "name": "model.layers.18.input_layernorm.weight",
1235
+ "shape": [
1236
+ 2048
1237
+ ],
1238
+ "dtype": "float16",
1239
+ "format": "f32-to-bf16",
1240
+ "nbytes": 4096,
1241
+ "byteOffset": 18288640
1242
+ },
1243
+ {
1244
+ "name": "model.layers.18.mlp.down_proj.q_weight",
1245
+ "shape": [
1246
+ 2048,
1247
+ 704
1248
+ ],
1249
+ "dtype": "uint32",
1250
+ "format": "f32-to-bf16",
1251
+ "nbytes": 5767168,
1252
+ "byteOffset": 18292736
1253
+ },
1254
+ {
1255
+ "name": "model.layers.18.mlp.down_proj.q_scale",
1256
+ "shape": [
1257
+ 2048,
1258
+ 176
1259
+ ],
1260
+ "dtype": "float16",
1261
+ "format": "f32-to-bf16",
1262
+ "nbytes": 720896,
1263
+ "byteOffset": 24059904
1264
+ }
1265
+ ],
1266
+ "md5sum": "252eb9f66784e3f186d332a2234b3b47"
1267
+ },
1268
+ {
1269
+ "dataPath": "params_shard_13.bin",
1270
+ "format": "raw-shard",
1271
+ "nbytes": 24780800,
1272
+ "records": [
1273
+ {
1274
+ "name": "model.layers.18.mlp.gate_up_proj.q_weight",
1275
+ "shape": [
1276
+ 11264,
1277
+ 256
1278
+ ],
1279
+ "dtype": "uint32",
1280
+ "format": "f32-to-bf16",
1281
+ "nbytes": 11534336,
1282
+ "byteOffset": 0
1283
+ },
1284
+ {
1285
+ "name": "model.layers.18.mlp.gate_up_proj.q_scale",
1286
+ "shape": [
1287
+ 11264,
1288
+ 64
1289
+ ],
1290
+ "dtype": "float16",
1291
+ "format": "f32-to-bf16",
1292
+ "nbytes": 1441792,
1293
+ "byteOffset": 11534336
1294
+ },
1295
+ {
1296
+ "name": "model.layers.18.post_attention_layernorm.weight",
1297
+ "shape": [
1298
+ 2048
1299
+ ],
1300
+ "dtype": "float16",
1301
+ "format": "f32-to-bf16",
1302
+ "nbytes": 4096,
1303
+ "byteOffset": 12976128
1304
+ },
1305
+ {
1306
+ "name": "model.layers.18.self_attn.qkv_proj.q_weight",
1307
+ "shape": [
1308
+ 2560,
1309
+ 256
1310
+ ],
1311
+ "dtype": "uint32",
1312
+ "format": "f32-to-bf16",
1313
+ "nbytes": 2621440,
1314
+ "byteOffset": 12980224
1315
+ },
1316
+ {
1317
+ "name": "model.layers.18.self_attn.qkv_proj.q_scale",
1318
+ "shape": [
1319
+ 2560,
1320
+ 64
1321
+ ],
1322
+ "dtype": "float16",
1323
+ "format": "f32-to-bf16",
1324
+ "nbytes": 327680,
1325
+ "byteOffset": 15601664
1326
+ },
1327
+ {
1328
+ "name": "model.layers.18.self_attn.o_proj.q_weight",
1329
+ "shape": [
1330
+ 2048,
1331
+ 256
1332
+ ],
1333
+ "dtype": "uint32",
1334
+ "format": "f32-to-bf16",
1335
+ "nbytes": 2097152,
1336
+ "byteOffset": 15929344
1337
+ },
1338
+ {
1339
+ "name": "model.layers.18.self_attn.o_proj.q_scale",
1340
+ "shape": [
1341
+ 2048,
1342
+ 64
1343
+ ],
1344
+ "dtype": "float16",
1345
+ "format": "f32-to-bf16",
1346
+ "nbytes": 262144,
1347
+ "byteOffset": 18026496
1348
+ },
1349
+ {
1350
+ "name": "model.layers.19.input_layernorm.weight",
1351
+ "shape": [
1352
+ 2048
1353
+ ],
1354
+ "dtype": "float16",
1355
+ "format": "f32-to-bf16",
1356
+ "nbytes": 4096,
1357
+ "byteOffset": 18288640
1358
+ },
1359
+ {
1360
+ "name": "model.layers.19.mlp.down_proj.q_weight",
1361
+ "shape": [
1362
+ 2048,
1363
+ 704
1364
+ ],
1365
+ "dtype": "uint32",
1366
+ "format": "f32-to-bf16",
1367
+ "nbytes": 5767168,
1368
+ "byteOffset": 18292736
1369
+ },
1370
+ {
1371
+ "name": "model.layers.19.mlp.down_proj.q_scale",
1372
+ "shape": [
1373
+ 2048,
1374
+ 176
1375
+ ],
1376
+ "dtype": "float16",
1377
+ "format": "f32-to-bf16",
1378
+ "nbytes": 720896,
1379
+ "byteOffset": 24059904
1380
+ }
1381
+ ],
1382
+ "md5sum": "059a30652f62bc53f40deb3742b57dbf"
1383
+ },
1384
+ {
1385
+ "dataPath": "params_shard_14.bin",
1386
+ "format": "raw-shard",
1387
+ "nbytes": 24780800,
1388
+ "records": [
1389
+ {
1390
+ "name": "model.layers.19.mlp.gate_up_proj.q_weight",
1391
+ "shape": [
1392
+ 11264,
1393
+ 256
1394
+ ],
1395
+ "dtype": "uint32",
1396
+ "format": "f32-to-bf16",
1397
+ "nbytes": 11534336,
1398
+ "byteOffset": 0
1399
+ },
1400
+ {
1401
+ "name": "model.layers.19.mlp.gate_up_proj.q_scale",
1402
+ "shape": [
1403
+ 11264,
1404
+ 64
1405
+ ],
1406
+ "dtype": "float16",
1407
+ "format": "f32-to-bf16",
1408
+ "nbytes": 1441792,
1409
+ "byteOffset": 11534336
1410
+ },
1411
+ {
1412
+ "name": "model.layers.19.post_attention_layernorm.weight",
1413
+ "shape": [
1414
+ 2048
1415
+ ],
1416
+ "dtype": "float16",
1417
+ "format": "f32-to-bf16",
1418
+ "nbytes": 4096,
1419
+ "byteOffset": 12976128
1420
+ },
1421
+ {
1422
+ "name": "model.layers.19.self_attn.qkv_proj.q_weight",
1423
+ "shape": [
1424
+ 2560,
1425
+ 256
1426
+ ],
1427
+ "dtype": "uint32",
1428
+ "format": "f32-to-bf16",
1429
+ "nbytes": 2621440,
1430
+ "byteOffset": 12980224
1431
+ },
1432
+ {
1433
+ "name": "model.layers.19.self_attn.qkv_proj.q_scale",
1434
+ "shape": [
1435
+ 2560,
1436
+ 64
1437
+ ],
1438
+ "dtype": "float16",
1439
+ "format": "f32-to-bf16",
1440
+ "nbytes": 327680,
1441
+ "byteOffset": 15601664
1442
+ },
1443
+ {
1444
+ "name": "model.layers.19.self_attn.o_proj.q_weight",
1445
+ "shape": [
1446
+ 2048,
1447
+ 256
1448
+ ],
1449
+ "dtype": "uint32",
1450
+ "format": "f32-to-bf16",
1451
+ "nbytes": 2097152,
1452
+ "byteOffset": 15929344
1453
+ },
1454
+ {
1455
+ "name": "model.layers.19.self_attn.o_proj.q_scale",
1456
+ "shape": [
1457
+ 2048,
1458
+ 64
1459
+ ],
1460
+ "dtype": "float16",
1461
+ "format": "f32-to-bf16",
1462
+ "nbytes": 262144,
1463
+ "byteOffset": 18026496
1464
+ },
1465
+ {
1466
+ "name": "model.layers.2.input_layernorm.weight",
1467
+ "shape": [
1468
+ 2048
1469
+ ],
1470
+ "dtype": "float16",
1471
+ "format": "f32-to-bf16",
1472
+ "nbytes": 4096,
1473
+ "byteOffset": 18288640
1474
+ },
1475
+ {
1476
+ "name": "model.layers.2.mlp.down_proj.q_weight",
1477
+ "shape": [
1478
+ 2048,
1479
+ 704
1480
+ ],
1481
+ "dtype": "uint32",
1482
+ "format": "f32-to-bf16",
1483
+ "nbytes": 5767168,
1484
+ "byteOffset": 18292736
1485
+ },
1486
+ {
1487
+ "name": "model.layers.2.mlp.down_proj.q_scale",
1488
+ "shape": [
1489
+ 2048,
1490
+ 176
1491
+ ],
1492
+ "dtype": "float16",
1493
+ "format": "f32-to-bf16",
1494
+ "nbytes": 720896,
1495
+ "byteOffset": 24059904
1496
+ }
1497
+ ],
1498
+ "md5sum": "c41e8ad7f31fe03e8156eda6593ac2c6"
1499
+ },
1500
+ {
1501
+ "dataPath": "params_shard_15.bin",
1502
+ "format": "raw-shard",
1503
+ "nbytes": 24780800,
1504
+ "records": [
1505
+ {
1506
+ "name": "model.layers.2.mlp.gate_up_proj.q_weight",
1507
+ "shape": [
1508
+ 11264,
1509
+ 256
1510
+ ],
1511
+ "dtype": "uint32",
1512
+ "format": "f32-to-bf16",
1513
+ "nbytes": 11534336,
1514
+ "byteOffset": 0
1515
+ },
1516
+ {
1517
+ "name": "model.layers.2.mlp.gate_up_proj.q_scale",
1518
+ "shape": [
1519
+ 11264,
1520
+ 64
1521
+ ],
1522
+ "dtype": "float16",
1523
+ "format": "f32-to-bf16",
1524
+ "nbytes": 1441792,
1525
+ "byteOffset": 11534336
1526
+ },
1527
+ {
1528
+ "name": "model.layers.2.post_attention_layernorm.weight",
1529
+ "shape": [
1530
+ 2048
1531
+ ],
1532
+ "dtype": "float16",
1533
+ "format": "f32-to-bf16",
1534
+ "nbytes": 4096,
1535
+ "byteOffset": 12976128
1536
+ },
1537
+ {
1538
+ "name": "model.layers.2.self_attn.qkv_proj.q_weight",
1539
+ "shape": [
1540
+ 2560,
1541
+ 256
1542
+ ],
1543
+ "dtype": "uint32",
1544
+ "format": "f32-to-bf16",
1545
+ "nbytes": 2621440,
1546
+ "byteOffset": 12980224
1547
+ },
1548
+ {
1549
+ "name": "model.layers.2.self_attn.qkv_proj.q_scale",
1550
+ "shape": [
1551
+ 2560,
1552
+ 64
1553
+ ],
1554
+ "dtype": "float16",
1555
+ "format": "f32-to-bf16",
1556
+ "nbytes": 327680,
1557
+ "byteOffset": 15601664
1558
+ },
1559
+ {
1560
+ "name": "model.layers.2.self_attn.o_proj.q_weight",
1561
+ "shape": [
1562
+ 2048,
1563
+ 256
1564
+ ],
1565
+ "dtype": "uint32",
1566
+ "format": "f32-to-bf16",
1567
+ "nbytes": 2097152,
1568
+ "byteOffset": 15929344
1569
+ },
1570
+ {
1571
+ "name": "model.layers.2.self_attn.o_proj.q_scale",
1572
+ "shape": [
1573
+ 2048,
1574
+ 64
1575
+ ],
1576
+ "dtype": "float16",
1577
+ "format": "f32-to-bf16",
1578
+ "nbytes": 262144,
1579
+ "byteOffset": 18026496
1580
+ },
1581
+ {
1582
+ "name": "model.layers.20.input_layernorm.weight",
1583
+ "shape": [
1584
+ 2048
1585
+ ],
1586
+ "dtype": "float16",
1587
+ "format": "f32-to-bf16",
1588
+ "nbytes": 4096,
1589
+ "byteOffset": 18288640
1590
+ },
1591
+ {
1592
+ "name": "model.layers.20.mlp.down_proj.q_weight",
1593
+ "shape": [
1594
+ 2048,
1595
+ 704
1596
+ ],
1597
+ "dtype": "uint32",
1598
+ "format": "f32-to-bf16",
1599
+ "nbytes": 5767168,
1600
+ "byteOffset": 18292736
1601
+ },
1602
+ {
1603
+ "name": "model.layers.20.mlp.down_proj.q_scale",
1604
+ "shape": [
1605
+ 2048,
1606
+ 176
1607
+ ],
1608
+ "dtype": "float16",
1609
+ "format": "f32-to-bf16",
1610
+ "nbytes": 720896,
1611
+ "byteOffset": 24059904
1612
+ }
1613
+ ],
1614
+ "md5sum": "3dbcb76c12ca477d6df8ea1aa572674a"
1615
+ },
1616
+ {
1617
+ "dataPath": "params_shard_16.bin",
1618
+ "format": "raw-shard",
1619
+ "nbytes": 24780800,
1620
+ "records": [
1621
+ {
1622
+ "name": "model.layers.20.mlp.gate_up_proj.q_weight",
1623
+ "shape": [
1624
+ 11264,
1625
+ 256
1626
+ ],
1627
+ "dtype": "uint32",
1628
+ "format": "f32-to-bf16",
1629
+ "nbytes": 11534336,
1630
+ "byteOffset": 0
1631
+ },
1632
+ {
1633
+ "name": "model.layers.20.mlp.gate_up_proj.q_scale",
1634
+ "shape": [
1635
+ 11264,
1636
+ 64
1637
+ ],
1638
+ "dtype": "float16",
1639
+ "format": "f32-to-bf16",
1640
+ "nbytes": 1441792,
1641
+ "byteOffset": 11534336
1642
+ },
1643
+ {
1644
+ "name": "model.layers.20.post_attention_layernorm.weight",
1645
+ "shape": [
1646
+ 2048
1647
+ ],
1648
+ "dtype": "float16",
1649
+ "format": "f32-to-bf16",
1650
+ "nbytes": 4096,
1651
+ "byteOffset": 12976128
1652
+ },
1653
+ {
1654
+ "name": "model.layers.20.self_attn.qkv_proj.q_weight",
1655
+ "shape": [
1656
+ 2560,
1657
+ 256
1658
+ ],
1659
+ "dtype": "uint32",
1660
+ "format": "f32-to-bf16",
1661
+ "nbytes": 2621440,
1662
+ "byteOffset": 12980224
1663
+ },
1664
+ {
1665
+ "name": "model.layers.20.self_attn.qkv_proj.q_scale",
1666
+ "shape": [
1667
+ 2560,
1668
+ 64
1669
+ ],
1670
+ "dtype": "float16",
1671
+ "format": "f32-to-bf16",
1672
+ "nbytes": 327680,
1673
+ "byteOffset": 15601664
1674
+ },
1675
+ {
1676
+ "name": "model.layers.20.self_attn.o_proj.q_weight",
1677
+ "shape": [
1678
+ 2048,
1679
+ 256
1680
+ ],
1681
+ "dtype": "uint32",
1682
+ "format": "f32-to-bf16",
1683
+ "nbytes": 2097152,
1684
+ "byteOffset": 15929344
1685
+ },
1686
+ {
1687
+ "name": "model.layers.20.self_attn.o_proj.q_scale",
1688
+ "shape": [
1689
+ 2048,
1690
+ 64
1691
+ ],
1692
+ "dtype": "float16",
1693
+ "format": "f32-to-bf16",
1694
+ "nbytes": 262144,
1695
+ "byteOffset": 18026496
1696
+ },
1697
+ {
1698
+ "name": "model.layers.21.input_layernorm.weight",
1699
+ "shape": [
1700
+ 2048
1701
+ ],
1702
+ "dtype": "float16",
1703
+ "format": "f32-to-bf16",
1704
+ "nbytes": 4096,
1705
+ "byteOffset": 18288640
1706
+ },
1707
+ {
1708
+ "name": "model.layers.21.mlp.down_proj.q_weight",
1709
+ "shape": [
1710
+ 2048,
1711
+ 704
1712
+ ],
1713
+ "dtype": "uint32",
1714
+ "format": "f32-to-bf16",
1715
+ "nbytes": 5767168,
1716
+ "byteOffset": 18292736
1717
+ },
1718
+ {
1719
+ "name": "model.layers.21.mlp.down_proj.q_scale",
1720
+ "shape": [
1721
+ 2048,
1722
+ 176
1723
+ ],
1724
+ "dtype": "float16",
1725
+ "format": "f32-to-bf16",
1726
+ "nbytes": 720896,
1727
+ "byteOffset": 24059904
1728
+ }
1729
+ ],
1730
+ "md5sum": "bcd7ef15371c7bc90b12f293d4de7e71"
1731
+ },
1732
+ {
1733
+ "dataPath": "params_shard_17.bin",
1734
+ "format": "raw-shard",
1735
+ "nbytes": 24780800,
1736
+ "records": [
1737
+ {
1738
+ "name": "model.layers.21.mlp.gate_up_proj.q_weight",
1739
+ "shape": [
1740
+ 11264,
1741
+ 256
1742
+ ],
1743
+ "dtype": "uint32",
1744
+ "format": "f32-to-bf16",
1745
+ "nbytes": 11534336,
1746
+ "byteOffset": 0
1747
+ },
1748
+ {
1749
+ "name": "model.layers.21.mlp.gate_up_proj.q_scale",
1750
+ "shape": [
1751
+ 11264,
1752
+ 64
1753
+ ],
1754
+ "dtype": "float16",
1755
+ "format": "f32-to-bf16",
1756
+ "nbytes": 1441792,
1757
+ "byteOffset": 11534336
1758
+ },
1759
+ {
1760
+ "name": "model.layers.21.post_attention_layernorm.weight",
1761
+ "shape": [
1762
+ 2048
1763
+ ],
1764
+ "dtype": "float16",
1765
+ "format": "f32-to-bf16",
1766
+ "nbytes": 4096,
1767
+ "byteOffset": 12976128
1768
+ },
1769
+ {
1770
+ "name": "model.layers.21.self_attn.qkv_proj.q_weight",
1771
+ "shape": [
1772
+ 2560,
1773
+ 256
1774
+ ],
1775
+ "dtype": "uint32",
1776
+ "format": "f32-to-bf16",
1777
+ "nbytes": 2621440,
1778
+ "byteOffset": 12980224
1779
+ },
1780
+ {
1781
+ "name": "model.layers.21.self_attn.qkv_proj.q_scale",
1782
+ "shape": [
1783
+ 2560,
1784
+ 64
1785
+ ],
1786
+ "dtype": "float16",
1787
+ "format": "f32-to-bf16",
1788
+ "nbytes": 327680,
1789
+ "byteOffset": 15601664
1790
+ },
1791
+ {
1792
+ "name": "model.layers.21.self_attn.o_proj.q_weight",
1793
+ "shape": [
1794
+ 2048,
1795
+ 256
1796
+ ],
1797
+ "dtype": "uint32",
1798
+ "format": "f32-to-bf16",
1799
+ "nbytes": 2097152,
1800
+ "byteOffset": 15929344
1801
+ },
1802
+ {
1803
+ "name": "model.layers.21.self_attn.o_proj.q_scale",
1804
+ "shape": [
1805
+ 2048,
1806
+ 64
1807
+ ],
1808
+ "dtype": "float16",
1809
+ "format": "f32-to-bf16",
1810
+ "nbytes": 262144,
1811
+ "byteOffset": 18026496
1812
+ },
1813
+ {
1814
+ "name": "model.layers.3.input_layernorm.weight",
1815
+ "shape": [
1816
+ 2048
1817
+ ],
1818
+ "dtype": "float16",
1819
+ "format": "f32-to-bf16",
1820
+ "nbytes": 4096,
1821
+ "byteOffset": 18288640
1822
+ },
1823
+ {
1824
+ "name": "model.layers.3.mlp.down_proj.q_weight",
1825
+ "shape": [
1826
+ 2048,
1827
+ 704
1828
+ ],
1829
+ "dtype": "uint32",
1830
+ "format": "f32-to-bf16",
1831
+ "nbytes": 5767168,
1832
+ "byteOffset": 18292736
1833
+ },
1834
+ {
1835
+ "name": "model.layers.3.mlp.down_proj.q_scale",
1836
+ "shape": [
1837
+ 2048,
1838
+ 176
1839
+ ],
1840
+ "dtype": "float16",
1841
+ "format": "f32-to-bf16",
1842
+ "nbytes": 720896,
1843
+ "byteOffset": 24059904
1844
+ }
1845
+ ],
1846
+ "md5sum": "a7bc7c31bdf390d7b42269dc738eb8c0"
1847
+ },
1848
+ {
1849
+ "dataPath": "params_shard_18.bin",
1850
+ "format": "raw-shard",
1851
+ "nbytes": 24780800,
1852
+ "records": [
1853
+ {
1854
+ "name": "model.layers.3.mlp.gate_up_proj.q_weight",
1855
+ "shape": [
1856
+ 11264,
1857
+ 256
1858
+ ],
1859
+ "dtype": "uint32",
1860
+ "format": "f32-to-bf16",
1861
+ "nbytes": 11534336,
1862
+ "byteOffset": 0
1863
+ },
1864
+ {
1865
+ "name": "model.layers.3.mlp.gate_up_proj.q_scale",
1866
+ "shape": [
1867
+ 11264,
1868
+ 64
1869
+ ],
1870
+ "dtype": "float16",
1871
+ "format": "f32-to-bf16",
1872
+ "nbytes": 1441792,
1873
+ "byteOffset": 11534336
1874
+ },
1875
+ {
1876
+ "name": "model.layers.3.post_attention_layernorm.weight",
1877
+ "shape": [
1878
+ 2048
1879
+ ],
1880
+ "dtype": "float16",
1881
+ "format": "f32-to-bf16",
1882
+ "nbytes": 4096,
1883
+ "byteOffset": 12976128
1884
+ },
1885
+ {
1886
+ "name": "model.layers.3.self_attn.qkv_proj.q_weight",
1887
+ "shape": [
1888
+ 2560,
1889
+ 256
1890
+ ],
1891
+ "dtype": "uint32",
1892
+ "format": "f32-to-bf16",
1893
+ "nbytes": 2621440,
1894
+ "byteOffset": 12980224
1895
+ },
1896
+ {
1897
+ "name": "model.layers.3.self_attn.qkv_proj.q_scale",
1898
+ "shape": [
1899
+ 2560,
1900
+ 64
1901
+ ],
1902
+ "dtype": "float16",
1903
+ "format": "f32-to-bf16",
1904
+ "nbytes": 327680,
1905
+ "byteOffset": 15601664
1906
+ },
1907
+ {
1908
+ "name": "model.layers.3.self_attn.o_proj.q_weight",
1909
+ "shape": [
1910
+ 2048,
1911
+ 256
1912
+ ],
1913
+ "dtype": "uint32",
1914
+ "format": "f32-to-bf16",
1915
+ "nbytes": 2097152,
1916
+ "byteOffset": 15929344
1917
+ },
1918
+ {
1919
+ "name": "model.layers.3.self_attn.o_proj.q_scale",
1920
+ "shape": [
1921
+ 2048,
1922
+ 64
1923
+ ],
1924
+ "dtype": "float16",
1925
+ "format": "f32-to-bf16",
1926
+ "nbytes": 262144,
1927
+ "byteOffset": 18026496
1928
+ },
1929
+ {
1930
+ "name": "model.layers.4.input_layernorm.weight",
1931
+ "shape": [
1932
+ 2048
1933
+ ],
1934
+ "dtype": "float16",
1935
+ "format": "f32-to-bf16",
1936
+ "nbytes": 4096,
1937
+ "byteOffset": 18288640
1938
+ },
1939
+ {
1940
+ "name": "model.layers.4.mlp.down_proj.q_weight",
1941
+ "shape": [
1942
+ 2048,
1943
+ 704
1944
+ ],
1945
+ "dtype": "uint32",
1946
+ "format": "f32-to-bf16",
1947
+ "nbytes": 5767168,
1948
+ "byteOffset": 18292736
1949
+ },
1950
+ {
1951
+ "name": "model.layers.4.mlp.down_proj.q_scale",
1952
+ "shape": [
1953
+ 2048,
1954
+ 176
1955
+ ],
1956
+ "dtype": "float16",
1957
+ "format": "f32-to-bf16",
1958
+ "nbytes": 720896,
1959
+ "byteOffset": 24059904
1960
+ }
1961
+ ],
1962
+ "md5sum": "8b6140e47fae0999f13e399639b3f6de"
1963
+ },
1964
+ {
1965
+ "dataPath": "params_shard_19.bin",
1966
+ "format": "raw-shard",
1967
+ "nbytes": 24780800,
1968
+ "records": [
1969
+ {
1970
+ "name": "model.layers.4.mlp.gate_up_proj.q_weight",
1971
+ "shape": [
1972
+ 11264,
1973
+ 256
1974
+ ],
1975
+ "dtype": "uint32",
1976
+ "format": "f32-to-bf16",
1977
+ "nbytes": 11534336,
1978
+ "byteOffset": 0
1979
+ },
1980
+ {
1981
+ "name": "model.layers.4.mlp.gate_up_proj.q_scale",
1982
+ "shape": [
1983
+ 11264,
1984
+ 64
1985
+ ],
1986
+ "dtype": "float16",
1987
+ "format": "f32-to-bf16",
1988
+ "nbytes": 1441792,
1989
+ "byteOffset": 11534336
1990
+ },
1991
+ {
1992
+ "name": "model.layers.4.post_attention_layernorm.weight",
1993
+ "shape": [
1994
+ 2048
1995
+ ],
1996
+ "dtype": "float16",
1997
+ "format": "f32-to-bf16",
1998
+ "nbytes": 4096,
1999
+ "byteOffset": 12976128
2000
+ },
2001
+ {
2002
+ "name": "model.layers.4.self_attn.qkv_proj.q_weight",
2003
+ "shape": [
2004
+ 2560,
2005
+ 256
2006
+ ],
2007
+ "dtype": "uint32",
2008
+ "format": "f32-to-bf16",
2009
+ "nbytes": 2621440,
2010
+ "byteOffset": 12980224
2011
+ },
2012
+ {
2013
+ "name": "model.layers.4.self_attn.qkv_proj.q_scale",
2014
+ "shape": [
2015
+ 2560,
2016
+ 64
2017
+ ],
2018
+ "dtype": "float16",
2019
+ "format": "f32-to-bf16",
2020
+ "nbytes": 327680,
2021
+ "byteOffset": 15601664
2022
+ },
2023
+ {
2024
+ "name": "model.layers.4.self_attn.o_proj.q_weight",
2025
+ "shape": [
2026
+ 2048,
2027
+ 256
2028
+ ],
2029
+ "dtype": "uint32",
2030
+ "format": "f32-to-bf16",
2031
+ "nbytes": 2097152,
2032
+ "byteOffset": 15929344
2033
+ },
2034
+ {
2035
+ "name": "model.layers.4.self_attn.o_proj.q_scale",
2036
+ "shape": [
2037
+ 2048,
2038
+ 64
2039
+ ],
2040
+ "dtype": "float16",
2041
+ "format": "f32-to-bf16",
2042
+ "nbytes": 262144,
2043
+ "byteOffset": 18026496
2044
+ },
2045
+ {
2046
+ "name": "model.layers.5.input_layernorm.weight",
2047
+ "shape": [
2048
+ 2048
2049
+ ],
2050
+ "dtype": "float16",
2051
+ "format": "f32-to-bf16",
2052
+ "nbytes": 4096,
2053
+ "byteOffset": 18288640
2054
+ },
2055
+ {
2056
+ "name": "model.layers.5.mlp.down_proj.q_weight",
2057
+ "shape": [
2058
+ 2048,
2059
+ 704
2060
+ ],
2061
+ "dtype": "uint32",
2062
+ "format": "f32-to-bf16",
2063
+ "nbytes": 5767168,
2064
+ "byteOffset": 18292736
2065
+ },
2066
+ {
2067
+ "name": "model.layers.5.mlp.down_proj.q_scale",
2068
+ "shape": [
2069
+ 2048,
2070
+ 176
2071
+ ],
2072
+ "dtype": "float16",
2073
+ "format": "f32-to-bf16",
2074
+ "nbytes": 720896,
2075
+ "byteOffset": 24059904
2076
+ }
2077
+ ],
2078
+ "md5sum": "bd6606704559e75d7958a06d3f9d37e9"
2079
+ },
2080
+ {
2081
+ "dataPath": "params_shard_20.bin",
2082
+ "format": "raw-shard",
2083
+ "nbytes": 24780800,
2084
+ "records": [
2085
+ {
2086
+ "name": "model.layers.5.mlp.gate_up_proj.q_weight",
2087
+ "shape": [
2088
+ 11264,
2089
+ 256
2090
+ ],
2091
+ "dtype": "uint32",
2092
+ "format": "f32-to-bf16",
2093
+ "nbytes": 11534336,
2094
+ "byteOffset": 0
2095
+ },
2096
+ {
2097
+ "name": "model.layers.5.mlp.gate_up_proj.q_scale",
2098
+ "shape": [
2099
+ 11264,
2100
+ 64
2101
+ ],
2102
+ "dtype": "float16",
2103
+ "format": "f32-to-bf16",
2104
+ "nbytes": 1441792,
2105
+ "byteOffset": 11534336
2106
+ },
2107
+ {
2108
+ "name": "model.layers.5.post_attention_layernorm.weight",
2109
+ "shape": [
2110
+ 2048
2111
+ ],
2112
+ "dtype": "float16",
2113
+ "format": "f32-to-bf16",
2114
+ "nbytes": 4096,
2115
+ "byteOffset": 12976128
2116
+ },
2117
+ {
2118
+ "name": "model.layers.5.self_attn.qkv_proj.q_weight",
2119
+ "shape": [
2120
+ 2560,
2121
+ 256
2122
+ ],
2123
+ "dtype": "uint32",
2124
+ "format": "f32-to-bf16",
2125
+ "nbytes": 2621440,
2126
+ "byteOffset": 12980224
2127
+ },
2128
+ {
2129
+ "name": "model.layers.5.self_attn.qkv_proj.q_scale",
2130
+ "shape": [
2131
+ 2560,
2132
+ 64
2133
+ ],
2134
+ "dtype": "float16",
2135
+ "format": "f32-to-bf16",
2136
+ "nbytes": 327680,
2137
+ "byteOffset": 15601664
2138
+ },
2139
+ {
2140
+ "name": "model.layers.5.self_attn.o_proj.q_weight",
2141
+ "shape": [
2142
+ 2048,
2143
+ 256
2144
+ ],
2145
+ "dtype": "uint32",
2146
+ "format": "f32-to-bf16",
2147
+ "nbytes": 2097152,
2148
+ "byteOffset": 15929344
2149
+ },
2150
+ {
2151
+ "name": "model.layers.5.self_attn.o_proj.q_scale",
2152
+ "shape": [
2153
+ 2048,
2154
+ 64
2155
+ ],
2156
+ "dtype": "float16",
2157
+ "format": "f32-to-bf16",
2158
+ "nbytes": 262144,
2159
+ "byteOffset": 18026496
2160
+ },
2161
+ {
2162
+ "name": "model.layers.6.input_layernorm.weight",
2163
+ "shape": [
2164
+ 2048
2165
+ ],
2166
+ "dtype": "float16",
2167
+ "format": "f32-to-bf16",
2168
+ "nbytes": 4096,
2169
+ "byteOffset": 18288640
2170
+ },
2171
+ {
2172
+ "name": "model.layers.6.mlp.down_proj.q_weight",
2173
+ "shape": [
2174
+ 2048,
2175
+ 704
2176
+ ],
2177
+ "dtype": "uint32",
2178
+ "format": "f32-to-bf16",
2179
+ "nbytes": 5767168,
2180
+ "byteOffset": 18292736
2181
+ },
2182
+ {
2183
+ "name": "model.layers.6.mlp.down_proj.q_scale",
2184
+ "shape": [
2185
+ 2048,
2186
+ 176
2187
+ ],
2188
+ "dtype": "float16",
2189
+ "format": "f32-to-bf16",
2190
+ "nbytes": 720896,
2191
+ "byteOffset": 24059904
2192
+ }
2193
+ ],
2194
+ "md5sum": "4e00b0e85bb81d8dd5c3e7a4b6f8a1fb"
2195
+ },
2196
+ {
2197
+ "dataPath": "params_shard_21.bin",
2198
+ "format": "raw-shard",
2199
+ "nbytes": 24780800,
2200
+ "records": [
2201
+ {
2202
+ "name": "model.layers.6.mlp.gate_up_proj.q_weight",
2203
+ "shape": [
2204
+ 11264,
2205
+ 256
2206
+ ],
2207
+ "dtype": "uint32",
2208
+ "format": "f32-to-bf16",
2209
+ "nbytes": 11534336,
2210
+ "byteOffset": 0
2211
+ },
2212
+ {
2213
+ "name": "model.layers.6.mlp.gate_up_proj.q_scale",
2214
+ "shape": [
2215
+ 11264,
2216
+ 64
2217
+ ],
2218
+ "dtype": "float16",
2219
+ "format": "f32-to-bf16",
2220
+ "nbytes": 1441792,
2221
+ "byteOffset": 11534336
2222
+ },
2223
+ {
2224
+ "name": "model.layers.6.post_attention_layernorm.weight",
2225
+ "shape": [
2226
+ 2048
2227
+ ],
2228
+ "dtype": "float16",
2229
+ "format": "f32-to-bf16",
2230
+ "nbytes": 4096,
2231
+ "byteOffset": 12976128
2232
+ },
2233
+ {
2234
+ "name": "model.layers.6.self_attn.qkv_proj.q_weight",
2235
+ "shape": [
2236
+ 2560,
2237
+ 256
2238
+ ],
2239
+ "dtype": "uint32",
2240
+ "format": "f32-to-bf16",
2241
+ "nbytes": 2621440,
2242
+ "byteOffset": 12980224
2243
+ },
2244
+ {
2245
+ "name": "model.layers.6.self_attn.qkv_proj.q_scale",
2246
+ "shape": [
2247
+ 2560,
2248
+ 64
2249
+ ],
2250
+ "dtype": "float16",
2251
+ "format": "f32-to-bf16",
2252
+ "nbytes": 327680,
2253
+ "byteOffset": 15601664
2254
+ },
2255
+ {
2256
+ "name": "model.layers.6.self_attn.o_proj.q_weight",
2257
+ "shape": [
2258
+ 2048,
2259
+ 256
2260
+ ],
2261
+ "dtype": "uint32",
2262
+ "format": "f32-to-bf16",
2263
+ "nbytes": 2097152,
2264
+ "byteOffset": 15929344
2265
+ },
2266
+ {
2267
+ "name": "model.layers.6.self_attn.o_proj.q_scale",
2268
+ "shape": [
2269
+ 2048,
2270
+ 64
2271
+ ],
2272
+ "dtype": "float16",
2273
+ "format": "f32-to-bf16",
2274
+ "nbytes": 262144,
2275
+ "byteOffset": 18026496
2276
+ },
2277
+ {
2278
+ "name": "model.layers.7.input_layernorm.weight",
2279
+ "shape": [
2280
+ 2048
2281
+ ],
2282
+ "dtype": "float16",
2283
+ "format": "f32-to-bf16",
2284
+ "nbytes": 4096,
2285
+ "byteOffset": 18288640
2286
+ },
2287
+ {
2288
+ "name": "model.layers.7.mlp.down_proj.q_weight",
2289
+ "shape": [
2290
+ 2048,
2291
+ 704
2292
+ ],
2293
+ "dtype": "uint32",
2294
+ "format": "f32-to-bf16",
2295
+ "nbytes": 5767168,
2296
+ "byteOffset": 18292736
2297
+ },
2298
+ {
2299
+ "name": "model.layers.7.mlp.down_proj.q_scale",
2300
+ "shape": [
2301
+ 2048,
2302
+ 176
2303
+ ],
2304
+ "dtype": "float16",
2305
+ "format": "f32-to-bf16",
2306
+ "nbytes": 720896,
2307
+ "byteOffset": 24059904
2308
+ }
2309
+ ],
2310
+ "md5sum": "98bb60cf7f41ca949866554fac1d6b41"
2311
+ },
2312
+ {
2313
+ "dataPath": "params_shard_22.bin",
2314
+ "format": "raw-shard",
2315
+ "nbytes": 24780800,
2316
+ "records": [
2317
+ {
2318
+ "name": "model.layers.7.mlp.gate_up_proj.q_weight",
2319
+ "shape": [
2320
+ 11264,
2321
+ 256
2322
+ ],
2323
+ "dtype": "uint32",
2324
+ "format": "f32-to-bf16",
2325
+ "nbytes": 11534336,
2326
+ "byteOffset": 0
2327
+ },
2328
+ {
2329
+ "name": "model.layers.7.mlp.gate_up_proj.q_scale",
2330
+ "shape": [
2331
+ 11264,
2332
+ 64
2333
+ ],
2334
+ "dtype": "float16",
2335
+ "format": "f32-to-bf16",
2336
+ "nbytes": 1441792,
2337
+ "byteOffset": 11534336
2338
+ },
2339
+ {
2340
+ "name": "model.layers.7.post_attention_layernorm.weight",
2341
+ "shape": [
2342
+ 2048
2343
+ ],
2344
+ "dtype": "float16",
2345
+ "format": "f32-to-bf16",
2346
+ "nbytes": 4096,
2347
+ "byteOffset": 12976128
2348
+ },
2349
+ {
2350
+ "name": "model.layers.7.self_attn.qkv_proj.q_weight",
2351
+ "shape": [
2352
+ 2560,
2353
+ 256
2354
+ ],
2355
+ "dtype": "uint32",
2356
+ "format": "f32-to-bf16",
2357
+ "nbytes": 2621440,
2358
+ "byteOffset": 12980224
2359
+ },
2360
+ {
2361
+ "name": "model.layers.7.self_attn.qkv_proj.q_scale",
2362
+ "shape": [
2363
+ 2560,
2364
+ 64
2365
+ ],
2366
+ "dtype": "float16",
2367
+ "format": "f32-to-bf16",
2368
+ "nbytes": 327680,
2369
+ "byteOffset": 15601664
2370
+ },
2371
+ {
2372
+ "name": "model.layers.7.self_attn.o_proj.q_weight",
2373
+ "shape": [
2374
+ 2048,
2375
+ 256
2376
+ ],
2377
+ "dtype": "uint32",
2378
+ "format": "f32-to-bf16",
2379
+ "nbytes": 2097152,
2380
+ "byteOffset": 15929344
2381
+ },
2382
+ {
2383
+ "name": "model.layers.7.self_attn.o_proj.q_scale",
2384
+ "shape": [
2385
+ 2048,
2386
+ 64
2387
+ ],
2388
+ "dtype": "float16",
2389
+ "format": "f32-to-bf16",
2390
+ "nbytes": 262144,
2391
+ "byteOffset": 18026496
2392
+ },
2393
+ {
2394
+ "name": "model.layers.8.input_layernorm.weight",
2395
+ "shape": [
2396
+ 2048
2397
+ ],
2398
+ "dtype": "float16",
2399
+ "format": "f32-to-bf16",
2400
+ "nbytes": 4096,
2401
+ "byteOffset": 18288640
2402
+ },
2403
+ {
2404
+ "name": "model.layers.8.mlp.down_proj.q_weight",
2405
+ "shape": [
2406
+ 2048,
2407
+ 704
2408
+ ],
2409
+ "dtype": "uint32",
2410
+ "format": "f32-to-bf16",
2411
+ "nbytes": 5767168,
2412
+ "byteOffset": 18292736
2413
+ },
2414
+ {
2415
+ "name": "model.layers.8.mlp.down_proj.q_scale",
2416
+ "shape": [
2417
+ 2048,
2418
+ 176
2419
+ ],
2420
+ "dtype": "float16",
2421
+ "format": "f32-to-bf16",
2422
+ "nbytes": 720896,
2423
+ "byteOffset": 24059904
2424
+ }
2425
+ ],
2426
+ "md5sum": "922f2d133b9adbd0699c0df7272ea578"
2427
+ },
2428
+ {
2429
+ "dataPath": "params_shard_23.bin",
2430
+ "format": "raw-shard",
2431
+ "nbytes": 24780800,
2432
+ "records": [
2433
+ {
2434
+ "name": "model.layers.8.mlp.gate_up_proj.q_weight",
2435
+ "shape": [
2436
+ 11264,
2437
+ 256
2438
+ ],
2439
+ "dtype": "uint32",
2440
+ "format": "f32-to-bf16",
2441
+ "nbytes": 11534336,
2442
+ "byteOffset": 0
2443
+ },
2444
+ {
2445
+ "name": "model.layers.8.mlp.gate_up_proj.q_scale",
2446
+ "shape": [
2447
+ 11264,
2448
+ 64
2449
+ ],
2450
+ "dtype": "float16",
2451
+ "format": "f32-to-bf16",
2452
+ "nbytes": 1441792,
2453
+ "byteOffset": 11534336
2454
+ },
2455
+ {
2456
+ "name": "model.layers.8.post_attention_layernorm.weight",
2457
+ "shape": [
2458
+ 2048
2459
+ ],
2460
+ "dtype": "float16",
2461
+ "format": "f32-to-bf16",
2462
+ "nbytes": 4096,
2463
+ "byteOffset": 12976128
2464
+ },
2465
+ {
2466
+ "name": "model.layers.8.self_attn.qkv_proj.q_weight",
2467
+ "shape": [
2468
+ 2560,
2469
+ 256
2470
+ ],
2471
+ "dtype": "uint32",
2472
+ "format": "f32-to-bf16",
2473
+ "nbytes": 2621440,
2474
+ "byteOffset": 12980224
2475
+ },
2476
+ {
2477
+ "name": "model.layers.8.self_attn.qkv_proj.q_scale",
2478
+ "shape": [
2479
+ 2560,
2480
+ 64
2481
+ ],
2482
+ "dtype": "float16",
2483
+ "format": "f32-to-bf16",
2484
+ "nbytes": 327680,
2485
+ "byteOffset": 15601664
2486
+ },
2487
+ {
2488
+ "name": "model.layers.8.self_attn.o_proj.q_weight",
2489
+ "shape": [
2490
+ 2048,
2491
+ 256
2492
+ ],
2493
+ "dtype": "uint32",
2494
+ "format": "f32-to-bf16",
2495
+ "nbytes": 2097152,
2496
+ "byteOffset": 15929344
2497
+ },
2498
+ {
2499
+ "name": "model.layers.8.self_attn.o_proj.q_scale",
2500
+ "shape": [
2501
+ 2048,
2502
+ 64
2503
+ ],
2504
+ "dtype": "float16",
2505
+ "format": "f32-to-bf16",
2506
+ "nbytes": 262144,
2507
+ "byteOffset": 18026496
2508
+ },
2509
+ {
2510
+ "name": "model.layers.9.input_layernorm.weight",
2511
+ "shape": [
2512
+ 2048
2513
+ ],
2514
+ "dtype": "float16",
2515
+ "format": "f32-to-bf16",
2516
+ "nbytes": 4096,
2517
+ "byteOffset": 18288640
2518
+ },
2519
+ {
2520
+ "name": "model.layers.9.mlp.down_proj.q_weight",
2521
+ "shape": [
2522
+ 2048,
2523
+ 704
2524
+ ],
2525
+ "dtype": "uint32",
2526
+ "format": "f32-to-bf16",
2527
+ "nbytes": 5767168,
2528
+ "byteOffset": 18292736
2529
+ },
2530
+ {
2531
+ "name": "model.layers.9.mlp.down_proj.q_scale",
2532
+ "shape": [
2533
+ 2048,
2534
+ 176
2535
+ ],
2536
+ "dtype": "float16",
2537
+ "format": "f32-to-bf16",
2538
+ "nbytes": 720896,
2539
+ "byteOffset": 24059904
2540
+ }
2541
+ ],
2542
+ "md5sum": "8dad1c13c9d46b44b149d8c8120c76d7"
2543
+ },
2544
+ {
2545
+ "dataPath": "params_shard_24.bin",
2546
+ "format": "raw-shard",
2547
+ "nbytes": 18292736,
2548
+ "records": [
2549
+ {
2550
+ "name": "model.layers.9.mlp.gate_up_proj.q_weight",
2551
+ "shape": [
2552
+ 11264,
2553
+ 256
2554
+ ],
2555
+ "dtype": "uint32",
2556
+ "format": "f32-to-bf16",
2557
+ "nbytes": 11534336,
2558
+ "byteOffset": 0
2559
+ },
2560
+ {
2561
+ "name": "model.layers.9.mlp.gate_up_proj.q_scale",
2562
+ "shape": [
2563
+ 11264,
2564
+ 64
2565
+ ],
2566
+ "dtype": "float16",
2567
+ "format": "f32-to-bf16",
2568
+ "nbytes": 1441792,
2569
+ "byteOffset": 11534336
2570
+ },
2571
+ {
2572
+ "name": "model.layers.9.post_attention_layernorm.weight",
2573
+ "shape": [
2574
+ 2048
2575
+ ],
2576
+ "dtype": "float16",
2577
+ "format": "f32-to-bf16",
2578
+ "nbytes": 4096,
2579
+ "byteOffset": 12976128
2580
+ },
2581
+ {
2582
+ "name": "model.layers.9.self_attn.qkv_proj.q_weight",
2583
+ "shape": [
2584
+ 2560,
2585
+ 256
2586
+ ],
2587
+ "dtype": "uint32",
2588
+ "format": "f32-to-bf16",
2589
+ "nbytes": 2621440,
2590
+ "byteOffset": 12980224
2591
+ },
2592
+ {
2593
+ "name": "model.layers.9.self_attn.qkv_proj.q_scale",
2594
+ "shape": [
2595
+ 2560,
2596
+ 64
2597
+ ],
2598
+ "dtype": "float16",
2599
+ "format": "f32-to-bf16",
2600
+ "nbytes": 327680,
2601
+ "byteOffset": 15601664
2602
+ },
2603
+ {
2604
+ "name": "model.layers.9.self_attn.o_proj.q_weight",
2605
+ "shape": [
2606
+ 2048,
2607
+ 256
2608
+ ],
2609
+ "dtype": "uint32",
2610
+ "format": "f32-to-bf16",
2611
+ "nbytes": 2097152,
2612
+ "byteOffset": 15929344
2613
+ },
2614
+ {
2615
+ "name": "model.layers.9.self_attn.o_proj.q_scale",
2616
+ "shape": [
2617
+ 2048,
2618
+ 64
2619
+ ],
2620
+ "dtype": "float16",
2621
+ "format": "f32-to-bf16",
2622
+ "nbytes": 262144,
2623
+ "byteOffset": 18026496
2624
+ },
2625
+ {
2626
+ "name": "model.norm.weight",
2627
+ "shape": [
2628
+ 2048
2629
+ ],
2630
+ "dtype": "float16",
2631
+ "format": "f32-to-bf16",
2632
+ "nbytes": 4096,
2633
+ "byteOffset": 18288640
2634
+ }
2635
+ ],
2636
+ "md5sum": "f3d45dacc534f3d71074aa3643a7860d"
2637
+ }
2638
+ ]
2639
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebab4928a26d6b810883120d38192c87a9a2039eec01cd475105a8667a96a0be
3
+ size 66584576
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5da70563e7a40602b18ead872b86c5f377d764fa5ef04e297ada6d81c94e8cb2
3
+ size 66584576
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af0978ff454356b1abff043e2faefd53e1ae83692b69be0210a7c1663630fc5f
3
+ size 24780800
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9f254670d342f123257cc6be7c3c0466abdcc591ebd6e7719611aa8b087a3ee
3
+ size 24780800
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fff6d91da3dfac239c2dadc502618d6bf9255951a7910bdf136523c1565adc19
3
+ size 24780800
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f7f1eb27bc68fb822a0e4271fd1ef4df85da9a1684e9b551ea410ffccd16f81
3
+ size 24780800
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1c1327c3fa75162670a9f6c65168f114706d5ec9e59eab1054562212fec8938
3
+ size 24780800
params_shard_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13d74088be192c5d8c1d0c2ffb722f27ef54c18ee7d454a3af63daae890537b0
3
+ size 24780800
params_shard_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3842b2b607912cad737c582b1ec7582c8f9ab1f5a8e6134f00b99eda817bbe82
3
+ size 24780800
params_shard_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7e5172f96a81ce5ea95286b9c046d2f87eeab5bfb6bfee316c3d9d1cd80dc2e
3
+ size 24780800
params_shard_18.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f843c17215c05b25c5775b20cbfc0219ee04e99f8c0f6a1adce0c277b389f1a
3
+ size 24780800
params_shard_19.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e05612df91a816af6f4565f2d84a85487672d64968b3e03c0e5eee3a5884ec40
3
+ size 24780800
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eef0e8e013736e767d19904940db5851e7e1736e3313f0d0653d3cf1bac053a0
3
+ size 23138304
params_shard_20.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d61188180d1d5223da51cbd02f118487c04b624118abe6d2b3efd7af4e146e9
3
+ size 24780800
params_shard_21.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3788ae6f89c3363c27609775477815803599689548ea7815ec907ff08d0ec9d2
3
+ size 24780800
params_shard_22.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fdd647e39ae5a23609fbd35a0247a5b300327f97fdf91bdd6c8a3a101edd54c
3
+ size 24780800
params_shard_23.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01e94772e6a89c5463ec4737e4ef239b4aaeb716e4d07d8f955cbb87d4145eb5
3
+ size 24780800
params_shard_24.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51528f4bc9bf715c9fdfd6846dc20327a66c2d24a62c94f015216198441025f4
3
+ size 18292736
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cb1b5f3d45ebd25d3efda8ba68acd64dbd0c1b47f35ddb345f7c40b5be1fe21
3
+ size 24780800
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dbd113872534296a1fb6d6ac6dda3c498decac670008fcd17dc969070975911
3
+ size 24780800
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5a2e6bc485d6f8570736a64a3a74b36f687ddd0c0eef7881e4355421d543d04
3
+ size 24780800
params_shard_6.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ced48e2c70a681535e4de45463bbd007513b0cc5d8e7d17cfd986c18062e67b0
3
+ size 24780800
params_shard_7.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9294b441540b56498e07b49120acbe905e3ba50fdebf1a47fc7c5a5e1ea3a29d
3
+ size 24780800
params_shard_8.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f18d6cd81d5859f6d8be704c7df95920da01948ac6586cad48836ebe6bb62c3b
3
+ size 24780800
params_shard_9.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5399ff9926f2f0f3f3fc0e256e6b03010e0dd8e6c4d0e15830dcc8b516ecd238
3
+ size 24780800
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7dc4c393423b76e4373e5157ddc34803a0189ba96b21ddbb40269d31468a6f2
3
+ size 1018370
tokenizer_config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "64790": {
4
+ "content": "[gMASK]",
5
+ "lstrip": false,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "64792": {
12
+ "content": "sop",
13
+ "lstrip": false,
14
+ "normalized": true,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": false
18
+ },
19
+ "64795": {
20
+ "content": "<|user|>",
21
+ "lstrip": false,
22
+ "normalized": true,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": false
26
+ },
27
+ "64796": {
28
+ "content": "<|assistant|>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": false
34
+ }
35
+ },
36
+ "auto_map": {
37
+ "AutoTokenizer": [
38
+ "tokenization_chatglm.ChatGLMTokenizer",
39
+ null
40
+ ]
41
+ },
42
+ "chat_template": "{% for message in messages %}{% if loop.first %}[gMASK]sop<|{{ message['role'] }}|>\n {{ message['content'] }}{% else %}<|{{ message['role'] }}|>\n {{ message['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>{% endif %}",
43
+ "clean_up_tokenization_spaces": false,
44
+ "do_lower_case": false,
45
+ "eos_token": "</s>",
46
+ "model_max_length": 1000000000000000019884624838656,
47
+ "pad_token": "<unk>",
48
+ "padding_side": "left",
49
+ "remove_space": false,
50
+ "tokenizer_class": "ChatGLMTokenizer",
51
+ "unk_token": "<unk>"
52
+ }