{ | |
"model_parameters": { | |
"n_layers": 16, | |
"vocab_size": 128256, | |
"embed_dim": 2048, | |
"ffn_hidden_dim": 8192, | |
"head_dim": 64, | |
"n_kv_heads": 8, | |
"rope_theta": 500000.0, | |
"rms_norm_eps": 1e-05, | |
"attention_mask_value": -100000.0, | |
"tie_embedding": true | |
}, | |
"qnn_parameters": { | |
"n_hvx_threads": 4 | |
}, | |
"graphs": [ | |
{ | |
"type": "transformers", | |
"start_layer_id": 0, | |
"end_layer_id": 16, | |
"batch_size": 1, | |
"cache_size": 1920, | |
"context_size": 2048, | |
"graph_name": "batch_1", | |
"model_path": "llama3_2_1b_0.bin", | |
"kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", | |
"kv_size": 13, | |
"x_name": "x", | |
"out_name": "out" | |
}, | |
{ | |
"type": "transformers", | |
"start_layer_id": 0, | |
"end_layer_id": 16, | |
"batch_size": 128, | |
"cache_size": 1920, | |
"context_size": 2048, | |
"graph_name": "batch_128", | |
"model_path": "llama3_2_1b_0.bin", | |
"kv_path_format": "kv/layer_{layer_id}_{kv_type}_{head_id}.raw", | |
"kv_size": 13, | |
"x_name": "x", | |
"out_name": "out" | |
} | |
], | |
"embeddings": [ | |
{ | |
"graph_name": "batch_1", | |
"model_path": "lm_head.bin", | |
"batch_size": 1, | |
"x_name": "x", | |
"out_name": "logits" | |
}, | |
{ | |
"graph_name": "batch_128", | |
"model_path": "lm_head.bin", | |
"batch_size": 128, | |
"x_name": "x", | |
"out_name": "logits" | |
} | |
] | |
} |