{ "model": { "bos_token_id": 128000, "context_length": 4096, "decoder": { "session_options": { "log_id": "onnxruntime-genai", "provider_options": [] }, "head_size": 128, "hidden_size": 3072, "inputs": { "input_ids": "input_ids", "attention_mask": "attention_mask_dummy", "position_ids": "position_ids_dummy", "past_key_names": "past_key_%d_in", "past_value_names": "past_value_%d_in" }, "outputs": { "logits": "logits_dequantized", "present_key_names": "past_key_%d_out", "present_value_names": "past_value_%d_out" }, "num_attention_heads": 24, "num_hidden_layers": 28, "num_key_value_heads": 8, "sliding_window_key_value_cache": { "window_size": 128, "pad_value": 128 }, "pipeline": [ { "position-processor": { "filename": "position-processor.onnx", "inputs": [ "attention_mask_before_processor", "position_ids" ], "outputs": [ "attention_mask_before_quantizer", "position_ids_cos_before_quantizer", "position_ids_sin_before_quantizer" ], "session_options": { "log_id": "onnxruntime-genai.position_processor", "provider_options": [ {} ] }, "run_on_token_gen": false }, "position-shifter": { "filename": "position-shifter.onnx", "inputs": [ "attention_mask_before_processor", "position_ids" ], "outputs": [ "attention_mask_shifted", "position_ids_shifted", "attention_mask_before_quantizer", "position_ids_cos_before_quantizer", "position_ids_sin_before_quantizer" ], "output_names_forwarder": { "attention_mask_shifted": "attention_mask_before_processor", "position_ids_shifted": "position_ids" }, "session_options": { "log_id": "onnxruntime-genai.position_shifter", "provider_options": [ {} ] }, "run_on_prompt": false }, "quantizer": { "filename": "quantizer.onnx", "inputs": [ "attention_mask_before_quantizer", "position_ids_cos_before_quantizer", "position_ids_sin_before_quantizer" ], "outputs": [ "attention_mask", "position_ids_cos", "position_ids_sin" ], "session_options": { "log_id": "onnxruntime-genai.quantizer", "provider_options": [ {} ] } }, "prompt-processor-1": { "filename": "prompt_1_of_3_qnn_ctx.onnx", "inputs": [ "input_ids" ], "outputs": [ "_model_model_embed_tokens_Gather_output_0" ], "session_options": { "log_id": "onnxruntime-genai.pp1", "provider_options": [ { "qnn": { "backend_path": "libQnnHtp.so", "htp_performance_mode": "burst", "enable_htp_shared_memory_allocator": "1", "qnn_context_priority": "high" } } ] }, "run_on_token_gen": false }, "prompt-processor-2": { "filename": "prompt_2_of_3_qnn_ctx.onnx", "inputs": [ "_model_model_embed_tokens_Gather_output_0", "attention_mask", "position_ids_cos", "position_ids_sin", "past_key_0_in", "past_value_0_in", "past_key_1_in", "past_value_1_in", "past_key_2_in", "past_value_2_in", "past_key_3_in", "past_value_3_in", "past_key_4_in", "past_value_4_in", "past_key_5_in", "past_value_5_in", "past_key_6_in", "past_value_6_in", "past_key_7_in", "past_value_7_in", "past_key_8_in", "past_value_8_in", "past_key_9_in", "past_value_9_in", "past_key_10_in", "past_value_10_in", "past_key_11_in", "past_value_11_in", "past_key_12_in", "past_value_12_in", "past_key_13_in", "past_value_13_in" ], "outputs": [ "_model_model_layers_13_Add_1_output_0", "past_key_0_out", "past_value_0_out", "past_key_1_out", "past_value_1_out", "past_key_2_out", "past_value_2_out", "past_key_3_out", "past_value_3_out", "past_key_4_out", "past_value_4_out", "past_key_5_out", "past_value_5_out", "past_key_6_out", "past_value_6_out", "past_key_7_out", "past_value_7_out", "past_key_8_out", "past_value_8_out", "past_key_9_out", "past_value_9_out", "past_key_10_out", "past_value_10_out", "past_key_11_out", "past_value_11_out", "past_key_12_out", "past_value_12_out", "past_key_13_out", "past_value_13_out" ], "session_options": { "log_id": "onnxruntime-genai.pp2", "provider_options": [ { "qnn": { "backend_path": "libQnnHtp.so", "htp_performance_mode": "burst", "enable_htp_shared_memory_allocator": "1", "qnn_context_priority": "high" } } ] }, "run_on_token_gen": false }, "prompt-processor-3": { "filename": "prompt_3_of_3_qnn_ctx.onnx", "inputs": [ "_model_model_layers_13_Add_1_output_0", "attention_mask", "position_ids_cos", "position_ids_sin", "past_key_14_in", "past_value_14_in", "past_key_15_in", "past_value_15_in", "past_key_16_in", "past_value_16_in", "past_key_17_in", "past_value_17_in", "past_key_18_in", "past_value_18_in", "past_key_19_in", "past_value_19_in", "past_key_20_in", "past_value_20_in", "past_key_21_in", "past_value_21_in", "past_key_22_in", "past_value_22_in", "past_key_23_in", "past_value_23_in", "past_key_24_in", "past_value_24_in", "past_key_25_in", "past_value_25_in", "past_key_26_in", "past_value_26_in", "past_key_27_in", "past_value_27_in" ], "outputs": [ "logits", "past_key_14_out", "past_value_14_out", "past_key_15_out", "past_value_15_out", "past_key_16_out", "past_value_16_out", "past_key_17_out", "past_value_17_out", "past_key_18_out", "past_value_18_out", "past_key_19_out", "past_value_19_out", "past_key_20_out", "past_value_20_out", "past_key_21_out", "past_value_21_out", "past_key_22_out", "past_value_22_out", "past_key_23_out", "past_value_23_out", "past_key_24_out", "past_value_24_out", "past_key_25_out", "past_value_25_out", "past_key_26_out", "past_value_26_out", "past_key_27_out", "past_value_27_out" ], "session_options": { "log_id": "onnxruntime-genai.pp3", "provider_options": [ { "qnn": { "backend_path": "libQnnHtp.so", "htp_performance_mode": "burst", "enable_htp_shared_memory_allocator": "1", "qnn_context_priority": "high" } } ] }, "run_on_token_gen": false }, "token-generator-1": { "filename": "token_1_of_3_qnn_ctx.onnx", "inputs": [ "input_ids" ], "outputs": [ "_model_model_embed_tokens_Gather_output_0" ], "session_options": { "log_id": "onnxruntime-genai.tg1", "provider_options": [ { "qnn": { "backend_path": "libQnnHtp.so", "htp_performance_mode": "burst", "enable_htp_shared_memory_allocator": "1", "qnn_context_priority": "high" } } ] }, "run_on_prompt": false }, "token-generator-2": { "filename": "token_2_of_3_qnn_ctx.onnx", "inputs": [ "_model_model_embed_tokens_Gather_output_0", "attention_mask", "position_ids_cos", "position_ids_sin", "past_key_0_in", "past_value_0_in", "past_key_1_in", "past_value_1_in", "past_key_2_in", "past_value_2_in", "past_key_3_in", "past_value_3_in", "past_key_4_in", "past_value_4_in", "past_key_5_in", "past_value_5_in", "past_key_6_in", "past_value_6_in", "past_key_7_in", "past_value_7_in", "past_key_8_in", "past_value_8_in", "past_key_9_in", "past_value_9_in", "past_key_10_in", "past_value_10_in", "past_key_11_in", "past_value_11_in", "past_key_12_in", "past_value_12_in", "past_key_13_in", "past_value_13_in" ], "outputs": [ "_model_model_layers_13_Add_1_output_0", "past_key_0_out", "past_value_0_out", "past_key_1_out", "past_value_1_out", "past_key_2_out", "past_value_2_out", "past_key_3_out", "past_value_3_out", "past_key_4_out", "past_value_4_out", "past_key_5_out", "past_value_5_out", "past_key_6_out", "past_value_6_out", "past_key_7_out", "past_value_7_out", "past_key_8_out", "past_value_8_out", "past_key_9_out", "past_value_9_out", "past_key_10_out", "past_value_10_out", "past_key_11_out", "past_value_11_out", "past_key_12_out", "past_value_12_out", "past_key_13_out", "past_value_13_out" ], "session_options": { "log_id": "onnxruntime-genai.tg2", "provider_options": [ { "qnn": { "backend_path": "libQnnHtp.so", "htp_performance_mode": "burst", "enable_htp_shared_memory_allocator": "1", "qnn_context_priority": "high" } } ] }, "run_on_prompt": false }, "token-generator-3": { "filename": "token_3_of_3_qnn_ctx.onnx", "inputs": [ "_model_model_layers_13_Add_1_output_0", "attention_mask", "position_ids_cos", "position_ids_sin", "past_key_14_in", "past_value_14_in", "past_key_15_in", "past_value_15_in", "past_key_16_in", "past_value_16_in", "past_key_17_in", "past_value_17_in", "past_key_18_in", "past_value_18_in", "past_key_19_in", "past_value_19_in", "past_key_20_in", "past_value_20_in", "past_key_21_in", "past_value_21_in", "past_key_22_in", "past_value_22_in", "past_key_23_in", "past_value_23_in", "past_key_24_in", "past_value_24_in", "past_key_25_in", "past_value_25_in", "past_key_26_in", "past_value_26_in", "past_key_27_in", "past_value_27_in" ], "outputs": [ "logits", "past_key_14_out", "past_value_14_out", "past_key_15_out", "past_value_15_out", "past_key_16_out", "past_value_16_out", "past_key_17_out", "past_value_17_out", "past_key_18_out", "past_value_18_out", "past_key_19_out", "past_value_19_out", "past_key_20_out", "past_value_20_out", "past_key_21_out", "past_value_21_out", "past_key_22_out", "past_value_22_out", "past_key_23_out", "past_value_23_out", "past_key_24_out", "past_value_24_out", "past_key_25_out", "past_value_25_out", "past_key_26_out", "past_value_26_out", "past_key_27_out", "past_value_27_out" ], "session_options": { "log_id": "onnxruntime-genai.tg3", "provider_options": [ { "qnn": { "backend_path": "libQnnHtp.so", "htp_performance_mode": "burst", "enable_htp_shared_memory_allocator": "1", "qnn_context_priority": "high" } } ] }, "run_on_prompt": false }, "dequantizer": { "filename": "dequantizer.onnx", "inputs": [ "logits" ], "outputs": [ "logits_dequantized" ], "session_options": { "log_id": "onnxruntime-genai.dequantizer", "provider_options": [ {} ] } } } ] }, "eos_token_id": [ 128001, 128008, 128009 ], "pad_token_id": 128001, "type": "decoder-pipeline", "vocab_size": 128256 }, "search": { "diversity_penalty": 0.0, "do_sample": true, "early_stopping": true, "length_penalty": 1.0, "max_length": 131072, "min_length": 0, "no_repeat_ngram_size": 0, "num_beams": 1, "num_return_sequences": 1, "past_present_share_buffer": true, "repetition_penalty": 1.0, "temperature": 0.6, "top_k": 1, "top_p": 0.9 } }