|
model: |
|
name: model |
|
operator: |
|
input_data: |
|
type: Input |
|
output: |
|
input_ids:0: |
|
dtype: int32 |
|
shape: [-1, -1] |
|
attention_mask:0: |
|
dtype: int32 |
|
shape: [-1, -1] |
|
distilbert.embeddings.position_embeddings.weight:0: |
|
dtype: fp32 |
|
shape: [512, 768] |
|
location: [0, 1572864] |
|
distilbert.embeddings.word_embeddings.weight:0: |
|
dtype: fp32 |
|
shape: [30522, 768] |
|
location: [1572864, 93763584] |
|
distilbert.embeddings.LayerNorm.weight:0: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [95336448, 3072] |
|
distilbert.embeddings.LayerNorm.bias:0: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [95339520, 3072] |
|
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [97139752, 4] |
|
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [97139756, 4] |
|
/distilbert/transformer/layer.0/attention/k_lin/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 768] |
|
location: [95342600, 589824] |
|
distilbert.transformer.layer.0.attention.k_lin.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [95932424, 3072] |
|
/distilbert/transformer/layer.0/attention/k_lin/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [95935496, 3072] |
|
/distilbert/transformer/layer.0/attention/k_lin/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [95938568, 3072] |
|
/distilbert/transformer/layer.0/attention/k_lin/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [95941648, 4] |
|
/distilbert/transformer/layer.0/attention/k_lin/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [95941652, 4] |
|
/distilbert/transformer/layer.0/attention/q_lin/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 768] |
|
location: [95941656, 589824] |
|
distilbert.transformer.layer.0.attention.q_lin.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [96531480, 3072] |
|
/distilbert/transformer/layer.0/attention/q_lin/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [96534552, 3072] |
|
/distilbert/transformer/layer.0/attention/q_lin/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [96537624, 3072] |
|
/distilbert/transformer/layer.0/attention/q_lin/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [96540704, 4] |
|
/distilbert/transformer/layer.0/attention/q_lin/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [96540708, 4] |
|
/distilbert/transformer/layer.0/attention/v_lin/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 768] |
|
location: [96540712, 589824] |
|
distilbert.transformer.layer.0.attention.v_lin.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [97130536, 3072] |
|
/distilbert/transformer/layer.0/attention/v_lin/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [97133608, 3072] |
|
/distilbert/transformer/layer.0/attention/v_lin/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [97136680, 3072] |
|
/distilbert/transformer/layer.0/attention/Reshape_2_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [97139784, 4] |
|
/distilbert/transformer/layer.0/attention/Reshape_2_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [97139788, 4] |
|
/distilbert/transformer/layer.0/attention/Softmax_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [97139776, 4] |
|
/distilbert/transformer/layer.0/attention/Softmax_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [97139780, 4] |
|
/distilbert/transformer/layer.0/attention/Reshape_4_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [97738840, 4] |
|
/distilbert/transformer/layer.0/attention/Reshape_4_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [97738844, 4] |
|
/distilbert/transformer/layer.0/attention/out_lin/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 768] |
|
location: [97139800, 589824] |
|
distilbert.transformer.layer.0.attention.out_lin.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [97729624, 3072] |
|
/distilbert/transformer/layer.0/attention/out_lin/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [97732696, 3072] |
|
/distilbert/transformer/layer.0/attention/out_lin/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [97735768, 3072] |
|
/distilbert/transformer/layer.0/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [97738848, 4] |
|
/distilbert/transformer/layer.0/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [97738852, 4] |
|
distilbert.transformer.layer.0.sa_layer_norm.weight:0: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [97738856, 3072] |
|
distilbert.transformer.layer.0.sa_layer_norm.bias:0: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [97741928, 3072] |
|
/distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [100141168, 4] |
|
/distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [100141172, 4] |
|
/distilbert/transformer/layer.0/ffn/lin1/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [3072, 768] |
|
location: [97745008, 2359296] |
|
distilbert.transformer.layer.0.ffn.lin1.bias:0: |
|
dtype: s32 |
|
shape: [3072] |
|
location: [100104304, 12288] |
|
/distilbert/transformer/layer.0/ffn/lin1/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [3072] |
|
location: [100116592, 12288] |
|
/distilbert/transformer/layer.0/ffn/lin1/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [3072] |
|
location: [100128880, 12288] |
|
/distilbert/transformer/layer.0/ffn/activation/Mul_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [102509696, 4] |
|
/distilbert/transformer/layer.0/ffn/activation/Mul_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [102509700, 4] |
|
/distilbert/transformer/layer.0/ffn/lin2/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 3072] |
|
location: [100141184, 2359296] |
|
distilbert.transformer.layer.0.ffn.lin2.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [102500480, 3072] |
|
/distilbert/transformer/layer.0/ffn/lin2/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [102503552, 3072] |
|
/distilbert/transformer/layer.0/ffn/lin2/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [102506624, 3072] |
|
/distilbert/transformer/layer.0/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [102509704, 4] |
|
/distilbert/transformer/layer.0/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [102509708, 4] |
|
distilbert.transformer.layer.0.output_layer_norm.weight:0: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [102509712, 3072] |
|
distilbert.transformer.layer.0.output_layer_norm.bias:0: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [102512784, 3072] |
|
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [104313016, 4] |
|
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [104313020, 4] |
|
/distilbert/transformer/layer.1/attention/k_lin/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 768] |
|
location: [102515864, 589824] |
|
distilbert.transformer.layer.1.attention.k_lin.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [103105688, 3072] |
|
/distilbert/transformer/layer.1/attention/k_lin/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [103108760, 3072] |
|
/distilbert/transformer/layer.1/attention/k_lin/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [103111832, 3072] |
|
/distilbert/transformer/layer.1/attention/k_lin/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [103114912, 4] |
|
/distilbert/transformer/layer.1/attention/k_lin/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [103114916, 4] |
|
/distilbert/transformer/layer.1/attention/q_lin/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 768] |
|
location: [103114920, 589824] |
|
distilbert.transformer.layer.1.attention.q_lin.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [103704744, 3072] |
|
/distilbert/transformer/layer.1/attention/q_lin/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [103707816, 3072] |
|
/distilbert/transformer/layer.1/attention/q_lin/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [103710888, 3072] |
|
/distilbert/transformer/layer.1/attention/q_lin/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [103713968, 4] |
|
/distilbert/transformer/layer.1/attention/q_lin/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [103713972, 4] |
|
/distilbert/transformer/layer.1/attention/v_lin/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 768] |
|
location: [103713976, 589824] |
|
distilbert.transformer.layer.1.attention.v_lin.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [104303800, 3072] |
|
/distilbert/transformer/layer.1/attention/v_lin/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [104306872, 3072] |
|
/distilbert/transformer/layer.1/attention/v_lin/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [104309944, 3072] |
|
/distilbert/transformer/layer.1/attention/Reshape_2_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [104313048, 4] |
|
/distilbert/transformer/layer.1/attention/Reshape_2_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [104313052, 4] |
|
/distilbert/transformer/layer.1/attention/Softmax_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [104313040, 4] |
|
/distilbert/transformer/layer.1/attention/Softmax_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [104313044, 4] |
|
/distilbert/transformer/layer.1/attention/Reshape_4_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [104912104, 4] |
|
/distilbert/transformer/layer.1/attention/Reshape_4_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [104912108, 4] |
|
/distilbert/transformer/layer.1/attention/out_lin/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 768] |
|
location: [104313064, 589824] |
|
distilbert.transformer.layer.1.attention.out_lin.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [104902888, 3072] |
|
/distilbert/transformer/layer.1/attention/out_lin/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [104905960, 3072] |
|
/distilbert/transformer/layer.1/attention/out_lin/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [104909032, 3072] |
|
/distilbert/transformer/layer.1/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [104912112, 4] |
|
/distilbert/transformer/layer.1/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [104912116, 4] |
|
distilbert.transformer.layer.1.sa_layer_norm.weight:0: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [104912120, 3072] |
|
distilbert.transformer.layer.1.sa_layer_norm.bias:0: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [104915192, 3072] |
|
/distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [107314432, 4] |
|
/distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [107314436, 4] |
|
/distilbert/transformer/layer.1/ffn/lin1/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [3072, 768] |
|
location: [104918272, 2359296] |
|
distilbert.transformer.layer.1.ffn.lin1.bias:0: |
|
dtype: s32 |
|
shape: [3072] |
|
location: [107277568, 12288] |
|
/distilbert/transformer/layer.1/ffn/lin1/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [3072] |
|
location: [107289856, 12288] |
|
/distilbert/transformer/layer.1/ffn/lin1/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [3072] |
|
location: [107302144, 12288] |
|
/distilbert/transformer/layer.1/ffn/activation/Mul_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [109682960, 4] |
|
/distilbert/transformer/layer.1/ffn/activation/Mul_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [109682964, 4] |
|
/distilbert/transformer/layer.1/ffn/lin2/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 3072] |
|
location: [107314448, 2359296] |
|
distilbert.transformer.layer.1.ffn.lin2.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [109673744, 3072] |
|
/distilbert/transformer/layer.1/ffn/lin2/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [109676816, 3072] |
|
/distilbert/transformer/layer.1/ffn/lin2/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [109679888, 3072] |
|
/distilbert/transformer/layer.1/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [109682968, 4] |
|
/distilbert/transformer/layer.1/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [109682972, 4] |
|
distilbert.transformer.layer.1.output_layer_norm.weight:0: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [109682976, 3072] |
|
distilbert.transformer.layer.1.output_layer_norm.bias:0: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [109686048, 3072] |
|
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [111486280, 4] |
|
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [111486284, 4] |
|
/distilbert/transformer/layer.2/attention/k_lin/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 768] |
|
location: [109689128, 589824] |
|
distilbert.transformer.layer.2.attention.k_lin.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [110278952, 3072] |
|
/distilbert/transformer/layer.2/attention/k_lin/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [110282024, 3072] |
|
/distilbert/transformer/layer.2/attention/k_lin/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [110285096, 3072] |
|
/distilbert/transformer/layer.2/attention/k_lin/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [110288176, 4] |
|
/distilbert/transformer/layer.2/attention/k_lin/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [110288180, 4] |
|
/distilbert/transformer/layer.2/attention/q_lin/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 768] |
|
location: [110288184, 589824] |
|
distilbert.transformer.layer.2.attention.q_lin.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [110878008, 3072] |
|
/distilbert/transformer/layer.2/attention/q_lin/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [110881080, 3072] |
|
/distilbert/transformer/layer.2/attention/q_lin/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [110884152, 3072] |
|
/distilbert/transformer/layer.2/attention/q_lin/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [110887232, 4] |
|
/distilbert/transformer/layer.2/attention/q_lin/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [110887236, 4] |
|
/distilbert/transformer/layer.2/attention/v_lin/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 768] |
|
location: [110887240, 589824] |
|
distilbert.transformer.layer.2.attention.v_lin.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [111477064, 3072] |
|
/distilbert/transformer/layer.2/attention/v_lin/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [111480136, 3072] |
|
/distilbert/transformer/layer.2/attention/v_lin/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [111483208, 3072] |
|
/distilbert/transformer/layer.2/attention/Reshape_2_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [111486312, 4] |
|
/distilbert/transformer/layer.2/attention/Reshape_2_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [111486316, 4] |
|
/distilbert/transformer/layer.2/attention/Softmax_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [111486304, 4] |
|
/distilbert/transformer/layer.2/attention/Softmax_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [111486308, 4] |
|
/distilbert/transformer/layer.2/attention/Reshape_4_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [112085368, 4] |
|
/distilbert/transformer/layer.2/attention/Reshape_4_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [112085372, 4] |
|
/distilbert/transformer/layer.2/attention/out_lin/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 768] |
|
location: [111486328, 589824] |
|
distilbert.transformer.layer.2.attention.out_lin.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [112076152, 3072] |
|
/distilbert/transformer/layer.2/attention/out_lin/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [112079224, 3072] |
|
/distilbert/transformer/layer.2/attention/out_lin/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [112082296, 3072] |
|
/distilbert/transformer/layer.2/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [112085376, 4] |
|
/distilbert/transformer/layer.2/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [112085380, 4] |
|
distilbert.transformer.layer.2.sa_layer_norm.weight:0: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [112085384, 3072] |
|
distilbert.transformer.layer.2.sa_layer_norm.bias:0: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [112088456, 3072] |
|
/distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [114487696, 4] |
|
/distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [114487700, 4] |
|
/distilbert/transformer/layer.2/ffn/lin1/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [3072, 768] |
|
location: [112091536, 2359296] |
|
distilbert.transformer.layer.2.ffn.lin1.bias:0: |
|
dtype: s32 |
|
shape: [3072] |
|
location: [114450832, 12288] |
|
/distilbert/transformer/layer.2/ffn/lin1/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [3072] |
|
location: [114463120, 12288] |
|
/distilbert/transformer/layer.2/ffn/lin1/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [3072] |
|
location: [114475408, 12288] |
|
/distilbert/transformer/layer.2/ffn/activation/Mul_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [116856224, 4] |
|
/distilbert/transformer/layer.2/ffn/activation/Mul_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [116856228, 4] |
|
/distilbert/transformer/layer.2/ffn/lin2/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 3072] |
|
location: [114487712, 2359296] |
|
distilbert.transformer.layer.2.ffn.lin2.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [116847008, 3072] |
|
/distilbert/transformer/layer.2/ffn/lin2/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [116850080, 3072] |
|
/distilbert/transformer/layer.2/ffn/lin2/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [116853152, 3072] |
|
/distilbert/transformer/layer.2/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [116856232, 4] |
|
/distilbert/transformer/layer.2/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [116856236, 4] |
|
distilbert.transformer.layer.2.output_layer_norm.weight:0: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [116856240, 3072] |
|
distilbert.transformer.layer.2.output_layer_norm.bias:0: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [116859312, 3072] |
|
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [118659544, 4] |
|
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [118659548, 4] |
|
/distilbert/transformer/layer.3/attention/k_lin/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 768] |
|
location: [116862392, 589824] |
|
distilbert.transformer.layer.3.attention.k_lin.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [117452216, 3072] |
|
/distilbert/transformer/layer.3/attention/k_lin/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [117455288, 3072] |
|
/distilbert/transformer/layer.3/attention/k_lin/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [117458360, 3072] |
|
/distilbert/transformer/layer.3/attention/k_lin/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [117461440, 4] |
|
/distilbert/transformer/layer.3/attention/k_lin/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [117461444, 4] |
|
/distilbert/transformer/layer.3/attention/q_lin/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 768] |
|
location: [117461448, 589824] |
|
distilbert.transformer.layer.3.attention.q_lin.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [118051272, 3072] |
|
/distilbert/transformer/layer.3/attention/q_lin/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [118054344, 3072] |
|
/distilbert/transformer/layer.3/attention/q_lin/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [118057416, 3072] |
|
/distilbert/transformer/layer.3/attention/q_lin/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [118060496, 4] |
|
/distilbert/transformer/layer.3/attention/q_lin/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [118060500, 4] |
|
/distilbert/transformer/layer.3/attention/v_lin/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 768] |
|
location: [118060504, 589824] |
|
distilbert.transformer.layer.3.attention.v_lin.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [118650328, 3072] |
|
/distilbert/transformer/layer.3/attention/v_lin/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [118653400, 3072] |
|
/distilbert/transformer/layer.3/attention/v_lin/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [118656472, 3072] |
|
/distilbert/transformer/layer.3/attention/Reshape_2_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [118659576, 4] |
|
/distilbert/transformer/layer.3/attention/Reshape_2_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [118659580, 4] |
|
/distilbert/transformer/layer.3/attention/Softmax_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [118659568, 4] |
|
/distilbert/transformer/layer.3/attention/Softmax_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [118659572, 4] |
|
/distilbert/transformer/layer.3/attention/Reshape_4_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [119258632, 4] |
|
/distilbert/transformer/layer.3/attention/Reshape_4_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [119258636, 4] |
|
/distilbert/transformer/layer.3/attention/out_lin/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 768] |
|
location: [118659592, 589824] |
|
distilbert.transformer.layer.3.attention.out_lin.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [119249416, 3072] |
|
/distilbert/transformer/layer.3/attention/out_lin/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [119252488, 3072] |
|
/distilbert/transformer/layer.3/attention/out_lin/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [119255560, 3072] |
|
/distilbert/transformer/layer.3/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [119258640, 4] |
|
/distilbert/transformer/layer.3/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [119258644, 4] |
|
distilbert.transformer.layer.3.sa_layer_norm.weight:0: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [119258648, 3072] |
|
distilbert.transformer.layer.3.sa_layer_norm.bias:0: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [119261720, 3072] |
|
/distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [121660960, 4] |
|
/distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [121660964, 4] |
|
/distilbert/transformer/layer.3/ffn/lin1/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [3072, 768] |
|
location: [119264800, 2359296] |
|
distilbert.transformer.layer.3.ffn.lin1.bias:0: |
|
dtype: s32 |
|
shape: [3072] |
|
location: [121624096, 12288] |
|
/distilbert/transformer/layer.3/ffn/lin1/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [3072] |
|
location: [121636384, 12288] |
|
/distilbert/transformer/layer.3/ffn/lin1/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [3072] |
|
location: [121648672, 12288] |
|
/distilbert/transformer/layer.3/ffn/activation/Mul_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [124029488, 4] |
|
/distilbert/transformer/layer.3/ffn/activation/Mul_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [124029492, 4] |
|
/distilbert/transformer/layer.3/ffn/lin2/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 3072] |
|
location: [121660976, 2359296] |
|
distilbert.transformer.layer.3.ffn.lin2.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [124020272, 3072] |
|
/distilbert/transformer/layer.3/ffn/lin2/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [124023344, 3072] |
|
/distilbert/transformer/layer.3/ffn/lin2/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [124026416, 3072] |
|
/distilbert/transformer/layer.3/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [124029496, 4] |
|
/distilbert/transformer/layer.3/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [124029500, 4] |
|
distilbert.transformer.layer.3.output_layer_norm.weight:0: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [124029504, 3072] |
|
distilbert.transformer.layer.3.output_layer_norm.bias:0: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [124032576, 3072] |
|
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [125832808, 4] |
|
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [125832812, 4] |
|
/distilbert/transformer/layer.4/attention/k_lin/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 768] |
|
location: [124035656, 589824] |
|
distilbert.transformer.layer.4.attention.k_lin.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [124625480, 3072] |
|
/distilbert/transformer/layer.4/attention/k_lin/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [124628552, 3072] |
|
/distilbert/transformer/layer.4/attention/k_lin/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [124631624, 3072] |
|
/distilbert/transformer/layer.4/attention/k_lin/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [124634704, 4] |
|
/distilbert/transformer/layer.4/attention/k_lin/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [124634708, 4] |
|
/distilbert/transformer/layer.4/attention/q_lin/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 768] |
|
location: [124634712, 589824] |
|
distilbert.transformer.layer.4.attention.q_lin.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [125224536, 3072] |
|
/distilbert/transformer/layer.4/attention/q_lin/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [125227608, 3072] |
|
/distilbert/transformer/layer.4/attention/q_lin/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [125230680, 3072] |
|
/distilbert/transformer/layer.4/attention/q_lin/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [125233760, 4] |
|
/distilbert/transformer/layer.4/attention/q_lin/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [125233764, 4] |
|
/distilbert/transformer/layer.4/attention/v_lin/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 768] |
|
location: [125233768, 589824] |
|
distilbert.transformer.layer.4.attention.v_lin.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [125823592, 3072] |
|
/distilbert/transformer/layer.4/attention/v_lin/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [125826664, 3072] |
|
/distilbert/transformer/layer.4/attention/v_lin/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [125829736, 3072] |
|
/distilbert/transformer/layer.4/attention/Reshape_2_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [125832840, 4] |
|
/distilbert/transformer/layer.4/attention/Reshape_2_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [125832844, 4] |
|
/distilbert/transformer/layer.4/attention/Softmax_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [125832832, 4] |
|
/distilbert/transformer/layer.4/attention/Softmax_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [125832836, 4] |
|
/distilbert/transformer/layer.4/attention/Reshape_4_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [126431896, 4] |
|
/distilbert/transformer/layer.4/attention/Reshape_4_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [126431900, 4] |
|
/distilbert/transformer/layer.4/attention/out_lin/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 768] |
|
location: [125832856, 589824] |
|
distilbert.transformer.layer.4.attention.out_lin.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [126422680, 3072] |
|
/distilbert/transformer/layer.4/attention/out_lin/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [126425752, 3072] |
|
/distilbert/transformer/layer.4/attention/out_lin/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [126428824, 3072] |
|
/distilbert/transformer/layer.4/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [126431904, 4] |
|
/distilbert/transformer/layer.4/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [126431908, 4] |
|
distilbert.transformer.layer.4.sa_layer_norm.weight:0: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [126431912, 3072] |
|
distilbert.transformer.layer.4.sa_layer_norm.bias:0: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [126434984, 3072] |
|
/distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [128834224, 4] |
|
/distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [128834228, 4] |
|
/distilbert/transformer/layer.4/ffn/lin1/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [3072, 768] |
|
location: [126438064, 2359296] |
|
distilbert.transformer.layer.4.ffn.lin1.bias:0: |
|
dtype: s32 |
|
shape: [3072] |
|
location: [128797360, 12288] |
|
/distilbert/transformer/layer.4/ffn/lin1/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [3072] |
|
location: [128809648, 12288] |
|
/distilbert/transformer/layer.4/ffn/lin1/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [3072] |
|
location: [128821936, 12288] |
|
/distilbert/transformer/layer.4/ffn/activation/Mul_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [131202752, 4] |
|
/distilbert/transformer/layer.4/ffn/activation/Mul_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [131202756, 4] |
|
/distilbert/transformer/layer.4/ffn/lin2/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 3072] |
|
location: [128834240, 2359296] |
|
distilbert.transformer.layer.4.ffn.lin2.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [131193536, 3072] |
|
/distilbert/transformer/layer.4/ffn/lin2/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [131196608, 3072] |
|
/distilbert/transformer/layer.4/ffn/lin2/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [131199680, 3072] |
|
/distilbert/transformer/layer.4/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [131202760, 4] |
|
/distilbert/transformer/layer.4/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [131202764, 4] |
|
distilbert.transformer.layer.4.output_layer_norm.weight:0: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [131202768, 3072] |
|
distilbert.transformer.layer.4.output_layer_norm.bias:0: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [131205840, 3072] |
|
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [133006072, 4] |
|
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [133006076, 4] |
|
/distilbert/transformer/layer.5/attention/k_lin/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 768] |
|
location: [131208920, 589824] |
|
distilbert.transformer.layer.5.attention.k_lin.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [131798744, 3072] |
|
/distilbert/transformer/layer.5/attention/k_lin/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [131801816, 3072] |
|
/distilbert/transformer/layer.5/attention/k_lin/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [131804888, 3072] |
|
/distilbert/transformer/layer.5/attention/k_lin/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [131807968, 4] |
|
/distilbert/transformer/layer.5/attention/k_lin/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [131807972, 4] |
|
/distilbert/transformer/layer.5/attention/q_lin/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 768] |
|
location: [131807976, 589824] |
|
distilbert.transformer.layer.5.attention.q_lin.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [132397800, 3072] |
|
/distilbert/transformer/layer.5/attention/q_lin/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [132400872, 3072] |
|
/distilbert/transformer/layer.5/attention/q_lin/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [132403944, 3072] |
|
/distilbert/transformer/layer.5/attention/q_lin/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [132407024, 4] |
|
/distilbert/transformer/layer.5/attention/q_lin/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [132407028, 4] |
|
/distilbert/transformer/layer.5/attention/v_lin/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 768] |
|
location: [132407032, 589824] |
|
distilbert.transformer.layer.5.attention.v_lin.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [132996856, 3072] |
|
/distilbert/transformer/layer.5/attention/v_lin/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [132999928, 3072] |
|
/distilbert/transformer/layer.5/attention/v_lin/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [133003000, 3072] |
|
/distilbert/transformer/layer.5/attention/Reshape_2_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [133006104, 4] |
|
/distilbert/transformer/layer.5/attention/Reshape_2_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [133006108, 4] |
|
/distilbert/transformer/layer.5/attention/Softmax_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [133006096, 4] |
|
/distilbert/transformer/layer.5/attention/Softmax_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [133006100, 4] |
|
/distilbert/transformer/layer.5/attention/Reshape_4_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [133605160, 4] |
|
/distilbert/transformer/layer.5/attention/Reshape_4_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [133605164, 4] |
|
/distilbert/transformer/layer.5/attention/out_lin/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 768] |
|
location: [133006120, 589824] |
|
distilbert.transformer.layer.5.attention.out_lin.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [133595944, 3072] |
|
/distilbert/transformer/layer.5/attention/out_lin/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [133599016, 3072] |
|
/distilbert/transformer/layer.5/attention/out_lin/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [133602088, 3072] |
|
/distilbert/transformer/layer.5/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [133605168, 4] |
|
/distilbert/transformer/layer.5/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [133605172, 4] |
|
distilbert.transformer.layer.5.sa_layer_norm.weight:0: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [133605176, 3072] |
|
distilbert.transformer.layer.5.sa_layer_norm.bias:0: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [133608248, 3072] |
|
/distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [136007488, 4] |
|
/distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [136007492, 4] |
|
/distilbert/transformer/layer.5/ffn/lin1/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [3072, 768] |
|
location: [133611328, 2359296] |
|
distilbert.transformer.layer.5.ffn.lin1.bias:0: |
|
dtype: s32 |
|
shape: [3072] |
|
location: [135970624, 12288] |
|
/distilbert/transformer/layer.5/ffn/lin1/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [3072] |
|
location: [135982912, 12288] |
|
/distilbert/transformer/layer.5/ffn/lin1/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [3072] |
|
location: [135995200, 12288] |
|
/distilbert/transformer/layer.5/ffn/activation/Mul_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [138376016, 4] |
|
/distilbert/transformer/layer.5/ffn/activation/Mul_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [138376020, 4] |
|
/distilbert/transformer/layer.5/ffn/lin2/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 3072] |
|
location: [136007504, 2359296] |
|
distilbert.transformer.layer.5.ffn.lin2.bias:0: |
|
dtype: s32 |
|
shape: [768] |
|
location: [138366800, 3072] |
|
/distilbert/transformer/layer.5/ffn/lin2/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [138369872, 3072] |
|
/distilbert/transformer/layer.5/ffn/lin2/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [138372944, 3072] |
|
/distilbert/transformer/layer.5/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [138376024, 4] |
|
/distilbert/transformer/layer.5/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [138376028, 4] |
|
distilbert.transformer.layer.5.output_layer_norm.weight:0: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [138376032, 3072] |
|
distilbert.transformer.layer.5.output_layer_norm.bias:0: |
|
dtype: fp32 |
|
shape: [768] |
|
location: [138379104, 3072] |
|
/distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [138383728, 4] |
|
/distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [138383732, 4] |
|
/qa_outputs/Transpose_output_0_quantized:0: |
|
dtype: s8 |
|
shape: [768, 2] |
|
location: [138382184, 1536] |
|
qa_outputs.bias:0: |
|
dtype: s32 |
|
shape: [2] |
|
location: [138383720, 8] |
|
/qa_outputs/Transpose_output_0_quantized:0_min: |
|
dtype: fp32 |
|
shape: [2] |
|
location: [138383736, 8] |
|
/qa_outputs/Transpose_output_0_quantized:0_max: |
|
dtype: fp32 |
|
shape: [2] |
|
location: [138383744, 8] |
|
/qa_outputs/Add_output_0:0_min: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [138383752, 4] |
|
/qa_outputs/Add_output_0:0_max: |
|
dtype: fp32 |
|
shape: [1] |
|
location: [138383756, 4] |
|
padding_sequence: |
|
type: PaddingSequence |
|
input: |
|
attention_mask:0: {} |
|
output: |
|
padding_sequence:0: {} |
|
attr: |
|
dst_shape: -1,12,0,-1 |
|
dims: 1 |
|
position_embeddings/after/reshape: |
|
type: Reshape |
|
input: |
|
distilbert.embeddings.position_embeddings.weight:0: {} |
|
input_ids:0: {} |
|
output: |
|
position_embeddings/after/reshape:0: {} |
|
attr: |
|
dst_shape: 1,-1,768 |
|
dims: 1 |
|
/distilbert/embeddings/position_embeddings/Gather: |
|
type: Reshape |
|
input: |
|
position_embeddings/after/reshape:0: {} |
|
output: |
|
/distilbert/embeddings/position_embeddings/Gather_output_0:0: {} |
|
attr: |
|
dst_shape: 1,-1 |
|
word_embeddings/reshape: |
|
type: Reshape |
|
input: |
|
input_ids:0: {} |
|
output: |
|
word_embeddings/reshape:0: {} |
|
attr: |
|
dst_shape: -1 |
|
/distilbert/embeddings/word_embeddings/Gather: |
|
type: Gather |
|
input: |
|
word_embeddings/reshape:0: {} |
|
distilbert.embeddings.word_embeddings.weight:0: {} |
|
/distilbert/embeddings/position_embeddings/Gather_output_0:0: {} |
|
input_ids:0: {} |
|
output: |
|
embeddings_add/reshape_2d:0: {} |
|
attr: |
|
axis: 0 |
|
batch_dims: 0 |
|
append_op: binary_add |
|
reshape: -1,-1,768 |
|
reshape_dims: 0,1 |
|
mul: 1,2 |
|
/distilbert/embeddings/LayerNorm/Add_1: |
|
type: LayerNorm |
|
input: |
|
embeddings_add/reshape_2d:0: {} |
|
distilbert.embeddings.LayerNorm.weight:0: {} |
|
distilbert.embeddings.LayerNorm.bias:0: {} |
|
output: |
|
/distilbert/embeddings/LayerNorm/Add_1_output_0:0: {} |
|
attr: |
|
epsilon: 9.999999960041972e-13 |
|
/distilbert/transformer/layer.0/attention/k_lin/Add_quant_0_Reorder_Post_0: |
|
type: Reorder |
|
input: |
|
/distilbert/embeddings/LayerNorm/Add_1_output_0:0: {} |
|
output: |
|
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_reorder: {} |
|
attr: |
|
src_perm: 0,1 |
|
dst_perm: 1,0 |
|
/distilbert/transformer/layer.0/attention/k_lin/Add_quant_0: |
|
type: Quantize |
|
input: |
|
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_reorder: {} |
|
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_min: {} |
|
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_max: {} |
|
output: |
|
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_quant: {} |
|
attr: |
|
output_dtype: u8 |
|
/distilbert/transformer/layer.0/attention/k_lin/Add: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.0/attention/k_lin/Transpose_output_0_quantized:0: {} |
|
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_quant: {} |
|
distilbert.transformer.layer.0.attention.k_lin.bias:0: {} |
|
/distilbert/transformer/layer.0/attention/k_lin/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.0/attention/k_lin/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_min: {} |
|
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.0/attention/k_lin/Add_output_0:0_min: {} |
|
/distilbert/transformer/layer.0/attention/k_lin/Add_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/distilbert/transformer/layer.0/attention/Reshape_1_output_0:0: {} |
|
attr: |
|
reshape: 12,64,-1, -1 |
|
reshape_dims: '0' |
|
/distilbert/transformer/layer.0/attention/q_lin/Add: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.0/attention/q_lin/Transpose_output_0_quantized:0: {} |
|
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_quant: {} |
|
distilbert.transformer.layer.0.attention.q_lin.bias:0: {} |
|
/distilbert/transformer/layer.0/attention/q_lin/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.0/attention/q_lin/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_min: {} |
|
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.0/attention/q_lin/Add_output_0:0_min: {} |
|
/distilbert/transformer/layer.0/attention/q_lin/Add_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/distilbert/transformer/layer.0/attention/Reshape_output_0:0: {} |
|
attr: |
|
reshape: 12,64,-1, -1 |
|
reshape_dims: '0' |
|
/distilbert/transformer/layer.0/attention/v_lin/Add: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.0/attention/v_lin/Transpose_output_0_quantized:0: {} |
|
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_quant: {} |
|
distilbert.transformer.layer.0.attention.v_lin.bias:0: {} |
|
/distilbert/transformer/layer.0/attention/v_lin/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.0/attention/v_lin/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_min: {} |
|
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.0/attention/Reshape_2_output_0:0_min: {} |
|
/distilbert/transformer/layer.0/attention/Reshape_2_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/distilbert/transformer/layer.0/attention/Reshape_2_output_0:0: {} |
|
attr: |
|
output_dtype: s8 |
|
reshape: 12,64,-1, -1 |
|
reshape_dims: '0' |
|
/distilbert/transformer/layer.0/attention/Where: |
|
type: Matmul |
|
input: |
|
/distilbert/transformer/layer.0/attention/Reshape_output_0:0: {} |
|
/distilbert/transformer/layer.0/attention/Reshape_1_output_0:0: {} |
|
padding_sequence:0: {} |
|
output: |
|
/distilbert/transformer/layer.0/attention/Where_output_0:0: {} |
|
attr: |
|
src0_perm: 2,0,3,1 |
|
src1_perm: 2,0,1,3 |
|
output_scale: 0.125 |
|
format_any: false |
|
append_op: binary_add |
|
/distilbert/transformer/layer.0/attention/Softmax: |
|
type: Softmax |
|
input: |
|
/distilbert/transformer/layer.0/attention/Where_output_0:0: {} |
|
/distilbert/transformer/layer.0/attention/Softmax_output_0:0_min: {} |
|
/distilbert/transformer/layer.0/attention/Softmax_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.0/attention/Softmax_output_0:0: {} |
|
attr: |
|
output_dtype: u8 |
|
/distilbert/transformer/layer.0/attention/Transpose_3: |
|
type: Matmul |
|
input: |
|
/distilbert/transformer/layer.0/attention/Softmax_output_0:0: {} |
|
/distilbert/transformer/layer.0/attention/Reshape_2_output_0:0: {} |
|
/distilbert/transformer/layer.0/attention/Softmax_output_0:0_min: {} |
|
/distilbert/transformer/layer.0/attention/Softmax_output_0:0_max: {} |
|
/distilbert/transformer/layer.0/attention/Reshape_2_output_0:0_min: {} |
|
/distilbert/transformer/layer.0/attention/Reshape_2_output_0:0_max: {} |
|
/distilbert/transformer/layer.0/attention/Reshape_4_output_0:0_min: {} |
|
/distilbert/transformer/layer.0/attention/Reshape_4_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.0/attention/Reshape_4_output_0:0: {} |
|
attr: |
|
src1_perm: 2,0,3,1 |
|
dst_perm: 1,3,0,2 |
|
output_dtype: u8 |
|
reshape: 768,-1 |
|
/distilbert/transformer/layer.0/Add: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.0/attention/out_lin/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.0/attention/Reshape_4_output_0:0: {} |
|
distilbert.transformer.layer.0.attention.out_lin.bias:0: {} |
|
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_reorder: {} |
|
/distilbert/transformer/layer.0/attention/out_lin/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.0/attention/out_lin/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.0/attention/Reshape_4_output_0:0_min: {} |
|
/distilbert/transformer/layer.0/attention/Reshape_4_output_0:0_max: {} |
|
/distilbert/transformer/layer.0/Add_output_0:0_min: {} |
|
/distilbert/transformer/layer.0/Add_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.0/Add_output_0:0: {} |
|
attr: |
|
append_op: sum |
|
/distilbert/transformer/layer.0/sa_layer_norm/Add_1: |
|
type: LayerNorm |
|
input: |
|
/distilbert/transformer/layer.0/Add_output_0:0: {} |
|
distilbert.transformer.layer.0.sa_layer_norm.weight:0: {} |
|
distilbert.transformer.layer.0.sa_layer_norm.bias:0: {} |
|
output: |
|
/distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0: {} |
|
attr: |
|
epsilon: 9.999999960041972e-13 |
|
transpose_mode: 1, 0 |
|
/distilbert/transformer/layer.0/ffn/activation/Mul_1_quant_0: |
|
type: Quantize |
|
input: |
|
/distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0: {} |
|
/distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0_quant: {} |
|
attr: |
|
output_dtype: u8 |
|
/distilbert/transformer/layer.0/ffn/activation/Mul_1: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.0/ffn/lin1/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0_quant: {} |
|
distilbert.transformer.layer.0.ffn.lin1.bias:0: {} |
|
/distilbert/transformer/layer.0/ffn/lin1/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.0/ffn/lin1/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.0/ffn/activation/Mul_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.0/ffn/activation/Mul_1_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.0/ffn/activation/Mul_1_output_0:0: {} |
|
attr: |
|
append_op: gelu_tanh |
|
output_dtype: u8 |
|
/distilbert/transformer/layer.0/Add_1: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.0/ffn/lin2/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.0/ffn/activation/Mul_1_output_0:0: {} |
|
distilbert.transformer.layer.0.ffn.lin2.bias:0: {} |
|
/distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0: {} |
|
/distilbert/transformer/layer.0/ffn/lin2/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.0/ffn/lin2/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.0/ffn/activation/Mul_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.0/ffn/activation/Mul_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.0/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.0/Add_1_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.0/Add_1_output_0:0: {} |
|
attr: |
|
append_op: sum |
|
/distilbert/transformer/layer.0/output_layer_norm/Add_1: |
|
type: LayerNorm |
|
input: |
|
/distilbert/transformer/layer.0/Add_1_output_0:0: {} |
|
distilbert.transformer.layer.0.output_layer_norm.weight:0: {} |
|
distilbert.transformer.layer.0.output_layer_norm.bias:0: {} |
|
output: |
|
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0: {} |
|
attr: |
|
epsilon: 9.999999960041972e-13 |
|
transpose_mode: 1, 0 |
|
/distilbert/transformer/layer.1/attention/k_lin/Add_quant_0: |
|
type: Quantize |
|
input: |
|
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0: {} |
|
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_quant: {} |
|
attr: |
|
output_dtype: u8 |
|
/distilbert/transformer/layer.1/attention/k_lin/Add: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.1/attention/k_lin/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_quant: {} |
|
distilbert.transformer.layer.1.attention.k_lin.bias:0: {} |
|
/distilbert/transformer/layer.1/attention/k_lin/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.1/attention/k_lin/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.1/attention/k_lin/Add_output_0:0_min: {} |
|
/distilbert/transformer/layer.1/attention/k_lin/Add_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/distilbert/transformer/layer.1/attention/Reshape_1_output_0:0: {} |
|
attr: |
|
reshape: 12,64,-1, -1 |
|
reshape_dims: '0' |
|
/distilbert/transformer/layer.1/attention/q_lin/Add: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.1/attention/q_lin/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_quant: {} |
|
distilbert.transformer.layer.1.attention.q_lin.bias:0: {} |
|
/distilbert/transformer/layer.1/attention/q_lin/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.1/attention/q_lin/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.1/attention/q_lin/Add_output_0:0_min: {} |
|
/distilbert/transformer/layer.1/attention/q_lin/Add_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/distilbert/transformer/layer.1/attention/Reshape_output_0:0: {} |
|
attr: |
|
reshape: 12,64,-1, -1 |
|
reshape_dims: '0' |
|
/distilbert/transformer/layer.1/attention/v_lin/Add: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.1/attention/v_lin/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_quant: {} |
|
distilbert.transformer.layer.1.attention.v_lin.bias:0: {} |
|
/distilbert/transformer/layer.1/attention/v_lin/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.1/attention/v_lin/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.1/attention/Reshape_2_output_0:0_min: {} |
|
/distilbert/transformer/layer.1/attention/Reshape_2_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/distilbert/transformer/layer.1/attention/Reshape_2_output_0:0: {} |
|
attr: |
|
output_dtype: s8 |
|
reshape: 12,64,-1, -1 |
|
reshape_dims: '0' |
|
/distilbert/transformer/layer.1/attention/Where: |
|
type: Matmul |
|
input: |
|
/distilbert/transformer/layer.1/attention/Reshape_output_0:0: {} |
|
/distilbert/transformer/layer.1/attention/Reshape_1_output_0:0: {} |
|
padding_sequence:0: {} |
|
output: |
|
/distilbert/transformer/layer.1/attention/Where_output_0:0: {} |
|
attr: |
|
src0_perm: 2,0,3,1 |
|
src1_perm: 2,0,1,3 |
|
output_scale: 0.125 |
|
format_any: false |
|
append_op: binary_add |
|
/distilbert/transformer/layer.1/attention/Softmax: |
|
type: Softmax |
|
input: |
|
/distilbert/transformer/layer.1/attention/Where_output_0:0: {} |
|
/distilbert/transformer/layer.1/attention/Softmax_output_0:0_min: {} |
|
/distilbert/transformer/layer.1/attention/Softmax_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.1/attention/Softmax_output_0:0: {} |
|
attr: |
|
output_dtype: u8 |
|
/distilbert/transformer/layer.1/attention/Transpose_3: |
|
type: Matmul |
|
input: |
|
/distilbert/transformer/layer.1/attention/Softmax_output_0:0: {} |
|
/distilbert/transformer/layer.1/attention/Reshape_2_output_0:0: {} |
|
/distilbert/transformer/layer.1/attention/Softmax_output_0:0_min: {} |
|
/distilbert/transformer/layer.1/attention/Softmax_output_0:0_max: {} |
|
/distilbert/transformer/layer.1/attention/Reshape_2_output_0:0_min: {} |
|
/distilbert/transformer/layer.1/attention/Reshape_2_output_0:0_max: {} |
|
/distilbert/transformer/layer.1/attention/Reshape_4_output_0:0_min: {} |
|
/distilbert/transformer/layer.1/attention/Reshape_4_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.1/attention/Reshape_4_output_0:0: {} |
|
attr: |
|
src1_perm: 2,0,3,1 |
|
dst_perm: 1,3,0,2 |
|
output_dtype: u8 |
|
reshape: 768,-1 |
|
/distilbert/transformer/layer.1/Add: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.1/attention/out_lin/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.1/attention/Reshape_4_output_0:0: {} |
|
distilbert.transformer.layer.1.attention.out_lin.bias:0: {} |
|
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0: {} |
|
/distilbert/transformer/layer.1/attention/out_lin/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.1/attention/out_lin/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.1/attention/Reshape_4_output_0:0_min: {} |
|
/distilbert/transformer/layer.1/attention/Reshape_4_output_0:0_max: {} |
|
/distilbert/transformer/layer.1/Add_output_0:0_min: {} |
|
/distilbert/transformer/layer.1/Add_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.1/Add_output_0:0: {} |
|
attr: |
|
append_op: sum |
|
/distilbert/transformer/layer.1/sa_layer_norm/Add_1: |
|
type: LayerNorm |
|
input: |
|
/distilbert/transformer/layer.1/Add_output_0:0: {} |
|
distilbert.transformer.layer.1.sa_layer_norm.weight:0: {} |
|
distilbert.transformer.layer.1.sa_layer_norm.bias:0: {} |
|
output: |
|
/distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0: {} |
|
attr: |
|
epsilon: 9.999999960041972e-13 |
|
transpose_mode: 1, 0 |
|
/distilbert/transformer/layer.1/ffn/activation/Mul_1_quant_0: |
|
type: Quantize |
|
input: |
|
/distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0: {} |
|
/distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0_quant: {} |
|
attr: |
|
output_dtype: u8 |
|
/distilbert/transformer/layer.1/ffn/activation/Mul_1: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.1/ffn/lin1/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0_quant: {} |
|
distilbert.transformer.layer.1.ffn.lin1.bias:0: {} |
|
/distilbert/transformer/layer.1/ffn/lin1/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.1/ffn/lin1/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.1/ffn/activation/Mul_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.1/ffn/activation/Mul_1_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.1/ffn/activation/Mul_1_output_0:0: {} |
|
attr: |
|
append_op: gelu_tanh |
|
output_dtype: u8 |
|
/distilbert/transformer/layer.1/Add_1: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.1/ffn/lin2/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.1/ffn/activation/Mul_1_output_0:0: {} |
|
distilbert.transformer.layer.1.ffn.lin2.bias:0: {} |
|
/distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0: {} |
|
/distilbert/transformer/layer.1/ffn/lin2/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.1/ffn/lin2/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.1/ffn/activation/Mul_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.1/ffn/activation/Mul_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.1/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.1/Add_1_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.1/Add_1_output_0:0: {} |
|
attr: |
|
append_op: sum |
|
/distilbert/transformer/layer.1/output_layer_norm/Add_1: |
|
type: LayerNorm |
|
input: |
|
/distilbert/transformer/layer.1/Add_1_output_0:0: {} |
|
distilbert.transformer.layer.1.output_layer_norm.weight:0: {} |
|
distilbert.transformer.layer.1.output_layer_norm.bias:0: {} |
|
output: |
|
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0: {} |
|
attr: |
|
epsilon: 9.999999960041972e-13 |
|
transpose_mode: 1, 0 |
|
/distilbert/transformer/layer.2/attention/k_lin/Add_quant_0: |
|
type: Quantize |
|
input: |
|
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0: {} |
|
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_quant: {} |
|
attr: |
|
output_dtype: u8 |
|
/distilbert/transformer/layer.2/attention/k_lin/Add: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.2/attention/k_lin/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_quant: {} |
|
distilbert.transformer.layer.2.attention.k_lin.bias:0: {} |
|
/distilbert/transformer/layer.2/attention/k_lin/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.2/attention/k_lin/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.2/attention/k_lin/Add_output_0:0_min: {} |
|
/distilbert/transformer/layer.2/attention/k_lin/Add_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/distilbert/transformer/layer.2/attention/Reshape_1_output_0:0: {} |
|
attr: |
|
reshape: 12,64,-1, -1 |
|
reshape_dims: '0' |
|
/distilbert/transformer/layer.2/attention/q_lin/Add: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.2/attention/q_lin/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_quant: {} |
|
distilbert.transformer.layer.2.attention.q_lin.bias:0: {} |
|
/distilbert/transformer/layer.2/attention/q_lin/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.2/attention/q_lin/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.2/attention/q_lin/Add_output_0:0_min: {} |
|
/distilbert/transformer/layer.2/attention/q_lin/Add_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/distilbert/transformer/layer.2/attention/Reshape_output_0:0: {} |
|
attr: |
|
reshape: 12,64,-1, -1 |
|
reshape_dims: '0' |
|
/distilbert/transformer/layer.2/attention/v_lin/Add: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.2/attention/v_lin/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_quant: {} |
|
distilbert.transformer.layer.2.attention.v_lin.bias:0: {} |
|
/distilbert/transformer/layer.2/attention/v_lin/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.2/attention/v_lin/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.2/attention/Reshape_2_output_0:0_min: {} |
|
/distilbert/transformer/layer.2/attention/Reshape_2_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/distilbert/transformer/layer.2/attention/Reshape_2_output_0:0: {} |
|
attr: |
|
output_dtype: s8 |
|
reshape: 12,64,-1, -1 |
|
reshape_dims: '0' |
|
/distilbert/transformer/layer.2/attention/Where: |
|
type: Matmul |
|
input: |
|
/distilbert/transformer/layer.2/attention/Reshape_output_0:0: {} |
|
/distilbert/transformer/layer.2/attention/Reshape_1_output_0:0: {} |
|
padding_sequence:0: {} |
|
output: |
|
/distilbert/transformer/layer.2/attention/Where_output_0:0: {} |
|
attr: |
|
src0_perm: 2,0,3,1 |
|
src1_perm: 2,0,1,3 |
|
output_scale: 0.125 |
|
format_any: false |
|
append_op: binary_add |
|
/distilbert/transformer/layer.2/attention/Softmax: |
|
type: Softmax |
|
input: |
|
/distilbert/transformer/layer.2/attention/Where_output_0:0: {} |
|
/distilbert/transformer/layer.2/attention/Softmax_output_0:0_min: {} |
|
/distilbert/transformer/layer.2/attention/Softmax_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.2/attention/Softmax_output_0:0: {} |
|
attr: |
|
output_dtype: u8 |
|
/distilbert/transformer/layer.2/attention/Transpose_3: |
|
type: Matmul |
|
input: |
|
/distilbert/transformer/layer.2/attention/Softmax_output_0:0: {} |
|
/distilbert/transformer/layer.2/attention/Reshape_2_output_0:0: {} |
|
/distilbert/transformer/layer.2/attention/Softmax_output_0:0_min: {} |
|
/distilbert/transformer/layer.2/attention/Softmax_output_0:0_max: {} |
|
/distilbert/transformer/layer.2/attention/Reshape_2_output_0:0_min: {} |
|
/distilbert/transformer/layer.2/attention/Reshape_2_output_0:0_max: {} |
|
/distilbert/transformer/layer.2/attention/Reshape_4_output_0:0_min: {} |
|
/distilbert/transformer/layer.2/attention/Reshape_4_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.2/attention/Reshape_4_output_0:0: {} |
|
attr: |
|
src1_perm: 2,0,3,1 |
|
dst_perm: 1,3,0,2 |
|
output_dtype: u8 |
|
reshape: 768,-1 |
|
/distilbert/transformer/layer.2/Add: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.2/attention/out_lin/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.2/attention/Reshape_4_output_0:0: {} |
|
distilbert.transformer.layer.2.attention.out_lin.bias:0: {} |
|
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0: {} |
|
/distilbert/transformer/layer.2/attention/out_lin/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.2/attention/out_lin/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.2/attention/Reshape_4_output_0:0_min: {} |
|
/distilbert/transformer/layer.2/attention/Reshape_4_output_0:0_max: {} |
|
/distilbert/transformer/layer.2/Add_output_0:0_min: {} |
|
/distilbert/transformer/layer.2/Add_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.2/Add_output_0:0: {} |
|
attr: |
|
append_op: sum |
|
/distilbert/transformer/layer.2/sa_layer_norm/Add_1: |
|
type: LayerNorm |
|
input: |
|
/distilbert/transformer/layer.2/Add_output_0:0: {} |
|
distilbert.transformer.layer.2.sa_layer_norm.weight:0: {} |
|
distilbert.transformer.layer.2.sa_layer_norm.bias:0: {} |
|
output: |
|
/distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0: {} |
|
attr: |
|
epsilon: 9.999999960041972e-13 |
|
transpose_mode: 1, 0 |
|
/distilbert/transformer/layer.2/ffn/activation/Mul_1_quant_0: |
|
type: Quantize |
|
input: |
|
/distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0: {} |
|
/distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0_quant: {} |
|
attr: |
|
output_dtype: u8 |
|
/distilbert/transformer/layer.2/ffn/activation/Mul_1: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.2/ffn/lin1/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0_quant: {} |
|
distilbert.transformer.layer.2.ffn.lin1.bias:0: {} |
|
/distilbert/transformer/layer.2/ffn/lin1/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.2/ffn/lin1/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.2/ffn/activation/Mul_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.2/ffn/activation/Mul_1_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.2/ffn/activation/Mul_1_output_0:0: {} |
|
attr: |
|
append_op: gelu_tanh |
|
output_dtype: u8 |
|
/distilbert/transformer/layer.2/Add_1: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.2/ffn/lin2/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.2/ffn/activation/Mul_1_output_0:0: {} |
|
distilbert.transformer.layer.2.ffn.lin2.bias:0: {} |
|
/distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0: {} |
|
/distilbert/transformer/layer.2/ffn/lin2/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.2/ffn/lin2/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.2/ffn/activation/Mul_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.2/ffn/activation/Mul_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.2/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.2/Add_1_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.2/Add_1_output_0:0: {} |
|
attr: |
|
append_op: sum |
|
/distilbert/transformer/layer.2/output_layer_norm/Add_1: |
|
type: LayerNorm |
|
input: |
|
/distilbert/transformer/layer.2/Add_1_output_0:0: {} |
|
distilbert.transformer.layer.2.output_layer_norm.weight:0: {} |
|
distilbert.transformer.layer.2.output_layer_norm.bias:0: {} |
|
output: |
|
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0: {} |
|
attr: |
|
epsilon: 9.999999960041972e-13 |
|
transpose_mode: 1, 0 |
|
/distilbert/transformer/layer.3/attention/k_lin/Add_quant_0: |
|
type: Quantize |
|
input: |
|
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0: {} |
|
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_quant: {} |
|
attr: |
|
output_dtype: u8 |
|
/distilbert/transformer/layer.3/attention/k_lin/Add: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.3/attention/k_lin/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_quant: {} |
|
distilbert.transformer.layer.3.attention.k_lin.bias:0: {} |
|
/distilbert/transformer/layer.3/attention/k_lin/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.3/attention/k_lin/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.3/attention/k_lin/Add_output_0:0_min: {} |
|
/distilbert/transformer/layer.3/attention/k_lin/Add_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/distilbert/transformer/layer.3/attention/Reshape_1_output_0:0: {} |
|
attr: |
|
reshape: 12,64,-1, -1 |
|
reshape_dims: '0' |
|
/distilbert/transformer/layer.3/attention/q_lin/Add: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.3/attention/q_lin/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_quant: {} |
|
distilbert.transformer.layer.3.attention.q_lin.bias:0: {} |
|
/distilbert/transformer/layer.3/attention/q_lin/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.3/attention/q_lin/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.3/attention/q_lin/Add_output_0:0_min: {} |
|
/distilbert/transformer/layer.3/attention/q_lin/Add_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/distilbert/transformer/layer.3/attention/Reshape_output_0:0: {} |
|
attr: |
|
reshape: 12,64,-1, -1 |
|
reshape_dims: '0' |
|
/distilbert/transformer/layer.3/attention/v_lin/Add: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.3/attention/v_lin/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_quant: {} |
|
distilbert.transformer.layer.3.attention.v_lin.bias:0: {} |
|
/distilbert/transformer/layer.3/attention/v_lin/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.3/attention/v_lin/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.3/attention/Reshape_2_output_0:0_min: {} |
|
/distilbert/transformer/layer.3/attention/Reshape_2_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/distilbert/transformer/layer.3/attention/Reshape_2_output_0:0: {} |
|
attr: |
|
output_dtype: s8 |
|
reshape: 12,64,-1, -1 |
|
reshape_dims: '0' |
|
/distilbert/transformer/layer.3/attention/Where: |
|
type: Matmul |
|
input: |
|
/distilbert/transformer/layer.3/attention/Reshape_output_0:0: {} |
|
/distilbert/transformer/layer.3/attention/Reshape_1_output_0:0: {} |
|
padding_sequence:0: {} |
|
output: |
|
/distilbert/transformer/layer.3/attention/Where_output_0:0: {} |
|
attr: |
|
src0_perm: 2,0,3,1 |
|
src1_perm: 2,0,1,3 |
|
output_scale: 0.125 |
|
format_any: false |
|
append_op: binary_add |
|
/distilbert/transformer/layer.3/attention/Softmax: |
|
type: Softmax |
|
input: |
|
/distilbert/transformer/layer.3/attention/Where_output_0:0: {} |
|
/distilbert/transformer/layer.3/attention/Softmax_output_0:0_min: {} |
|
/distilbert/transformer/layer.3/attention/Softmax_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.3/attention/Softmax_output_0:0: {} |
|
attr: |
|
output_dtype: u8 |
|
/distilbert/transformer/layer.3/attention/Transpose_3: |
|
type: Matmul |
|
input: |
|
/distilbert/transformer/layer.3/attention/Softmax_output_0:0: {} |
|
/distilbert/transformer/layer.3/attention/Reshape_2_output_0:0: {} |
|
/distilbert/transformer/layer.3/attention/Softmax_output_0:0_min: {} |
|
/distilbert/transformer/layer.3/attention/Softmax_output_0:0_max: {} |
|
/distilbert/transformer/layer.3/attention/Reshape_2_output_0:0_min: {} |
|
/distilbert/transformer/layer.3/attention/Reshape_2_output_0:0_max: {} |
|
/distilbert/transformer/layer.3/attention/Reshape_4_output_0:0_min: {} |
|
/distilbert/transformer/layer.3/attention/Reshape_4_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.3/attention/Reshape_4_output_0:0: {} |
|
attr: |
|
src1_perm: 2,0,3,1 |
|
dst_perm: 1,3,0,2 |
|
output_dtype: u8 |
|
reshape: 768,-1 |
|
/distilbert/transformer/layer.3/Add: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.3/attention/out_lin/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.3/attention/Reshape_4_output_0:0: {} |
|
distilbert.transformer.layer.3.attention.out_lin.bias:0: {} |
|
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0: {} |
|
/distilbert/transformer/layer.3/attention/out_lin/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.3/attention/out_lin/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.3/attention/Reshape_4_output_0:0_min: {} |
|
/distilbert/transformer/layer.3/attention/Reshape_4_output_0:0_max: {} |
|
/distilbert/transformer/layer.3/Add_output_0:0_min: {} |
|
/distilbert/transformer/layer.3/Add_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.3/Add_output_0:0: {} |
|
attr: |
|
append_op: sum |
|
/distilbert/transformer/layer.3/sa_layer_norm/Add_1: |
|
type: LayerNorm |
|
input: |
|
/distilbert/transformer/layer.3/Add_output_0:0: {} |
|
distilbert.transformer.layer.3.sa_layer_norm.weight:0: {} |
|
distilbert.transformer.layer.3.sa_layer_norm.bias:0: {} |
|
output: |
|
/distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0: {} |
|
attr: |
|
epsilon: 9.999999960041972e-13 |
|
transpose_mode: 1, 0 |
|
/distilbert/transformer/layer.3/ffn/activation/Mul_1_quant_0: |
|
type: Quantize |
|
input: |
|
/distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0: {} |
|
/distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0_quant: {} |
|
attr: |
|
output_dtype: u8 |
|
/distilbert/transformer/layer.3/ffn/activation/Mul_1: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.3/ffn/lin1/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0_quant: {} |
|
distilbert.transformer.layer.3.ffn.lin1.bias:0: {} |
|
/distilbert/transformer/layer.3/ffn/lin1/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.3/ffn/lin1/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.3/ffn/activation/Mul_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.3/ffn/activation/Mul_1_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.3/ffn/activation/Mul_1_output_0:0: {} |
|
attr: |
|
append_op: gelu_tanh |
|
output_dtype: u8 |
|
/distilbert/transformer/layer.3/Add_1: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.3/ffn/lin2/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.3/ffn/activation/Mul_1_output_0:0: {} |
|
distilbert.transformer.layer.3.ffn.lin2.bias:0: {} |
|
/distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0: {} |
|
/distilbert/transformer/layer.3/ffn/lin2/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.3/ffn/lin2/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.3/ffn/activation/Mul_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.3/ffn/activation/Mul_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.3/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.3/Add_1_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.3/Add_1_output_0:0: {} |
|
attr: |
|
append_op: sum |
|
/distilbert/transformer/layer.3/output_layer_norm/Add_1: |
|
type: LayerNorm |
|
input: |
|
/distilbert/transformer/layer.3/Add_1_output_0:0: {} |
|
distilbert.transformer.layer.3.output_layer_norm.weight:0: {} |
|
distilbert.transformer.layer.3.output_layer_norm.bias:0: {} |
|
output: |
|
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0: {} |
|
attr: |
|
epsilon: 9.999999960041972e-13 |
|
transpose_mode: 1, 0 |
|
/distilbert/transformer/layer.4/attention/k_lin/Add_quant_0: |
|
type: Quantize |
|
input: |
|
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0: {} |
|
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_quant: {} |
|
attr: |
|
output_dtype: u8 |
|
/distilbert/transformer/layer.4/attention/k_lin/Add: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.4/attention/k_lin/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_quant: {} |
|
distilbert.transformer.layer.4.attention.k_lin.bias:0: {} |
|
/distilbert/transformer/layer.4/attention/k_lin/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.4/attention/k_lin/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.4/attention/k_lin/Add_output_0:0_min: {} |
|
/distilbert/transformer/layer.4/attention/k_lin/Add_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/distilbert/transformer/layer.4/attention/Reshape_1_output_0:0: {} |
|
attr: |
|
reshape: 12,64,-1, -1 |
|
reshape_dims: '0' |
|
/distilbert/transformer/layer.4/attention/q_lin/Add: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.4/attention/q_lin/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_quant: {} |
|
distilbert.transformer.layer.4.attention.q_lin.bias:0: {} |
|
/distilbert/transformer/layer.4/attention/q_lin/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.4/attention/q_lin/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.4/attention/q_lin/Add_output_0:0_min: {} |
|
/distilbert/transformer/layer.4/attention/q_lin/Add_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/distilbert/transformer/layer.4/attention/Reshape_output_0:0: {} |
|
attr: |
|
reshape: 12,64,-1, -1 |
|
reshape_dims: '0' |
|
/distilbert/transformer/layer.4/attention/v_lin/Add: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.4/attention/v_lin/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_quant: {} |
|
distilbert.transformer.layer.4.attention.v_lin.bias:0: {} |
|
/distilbert/transformer/layer.4/attention/v_lin/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.4/attention/v_lin/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.4/attention/Reshape_2_output_0:0_min: {} |
|
/distilbert/transformer/layer.4/attention/Reshape_2_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/distilbert/transformer/layer.4/attention/Reshape_2_output_0:0: {} |
|
attr: |
|
output_dtype: s8 |
|
reshape: 12,64,-1, -1 |
|
reshape_dims: '0' |
|
/distilbert/transformer/layer.4/attention/Where: |
|
type: Matmul |
|
input: |
|
/distilbert/transformer/layer.4/attention/Reshape_output_0:0: {} |
|
/distilbert/transformer/layer.4/attention/Reshape_1_output_0:0: {} |
|
padding_sequence:0: {} |
|
output: |
|
/distilbert/transformer/layer.4/attention/Where_output_0:0: {} |
|
attr: |
|
src0_perm: 2,0,3,1 |
|
src1_perm: 2,0,1,3 |
|
output_scale: 0.125 |
|
format_any: false |
|
append_op: binary_add |
|
/distilbert/transformer/layer.4/attention/Softmax: |
|
type: Softmax |
|
input: |
|
/distilbert/transformer/layer.4/attention/Where_output_0:0: {} |
|
/distilbert/transformer/layer.4/attention/Softmax_output_0:0_min: {} |
|
/distilbert/transformer/layer.4/attention/Softmax_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.4/attention/Softmax_output_0:0: {} |
|
attr: |
|
output_dtype: u8 |
|
/distilbert/transformer/layer.4/attention/Transpose_3: |
|
type: Matmul |
|
input: |
|
/distilbert/transformer/layer.4/attention/Softmax_output_0:0: {} |
|
/distilbert/transformer/layer.4/attention/Reshape_2_output_0:0: {} |
|
/distilbert/transformer/layer.4/attention/Softmax_output_0:0_min: {} |
|
/distilbert/transformer/layer.4/attention/Softmax_output_0:0_max: {} |
|
/distilbert/transformer/layer.4/attention/Reshape_2_output_0:0_min: {} |
|
/distilbert/transformer/layer.4/attention/Reshape_2_output_0:0_max: {} |
|
/distilbert/transformer/layer.4/attention/Reshape_4_output_0:0_min: {} |
|
/distilbert/transformer/layer.4/attention/Reshape_4_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.4/attention/Reshape_4_output_0:0: {} |
|
attr: |
|
src1_perm: 2,0,3,1 |
|
dst_perm: 1,3,0,2 |
|
output_dtype: u8 |
|
reshape: 768,-1 |
|
/distilbert/transformer/layer.4/Add: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.4/attention/out_lin/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.4/attention/Reshape_4_output_0:0: {} |
|
distilbert.transformer.layer.4.attention.out_lin.bias:0: {} |
|
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0: {} |
|
/distilbert/transformer/layer.4/attention/out_lin/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.4/attention/out_lin/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.4/attention/Reshape_4_output_0:0_min: {} |
|
/distilbert/transformer/layer.4/attention/Reshape_4_output_0:0_max: {} |
|
/distilbert/transformer/layer.4/Add_output_0:0_min: {} |
|
/distilbert/transformer/layer.4/Add_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.4/Add_output_0:0: {} |
|
attr: |
|
append_op: sum |
|
/distilbert/transformer/layer.4/sa_layer_norm/Add_1: |
|
type: LayerNorm |
|
input: |
|
/distilbert/transformer/layer.4/Add_output_0:0: {} |
|
distilbert.transformer.layer.4.sa_layer_norm.weight:0: {} |
|
distilbert.transformer.layer.4.sa_layer_norm.bias:0: {} |
|
output: |
|
/distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0: {} |
|
attr: |
|
epsilon: 9.999999960041972e-13 |
|
transpose_mode: 1, 0 |
|
/distilbert/transformer/layer.4/ffn/activation/Mul_1_quant_0: |
|
type: Quantize |
|
input: |
|
/distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0: {} |
|
/distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0_quant: {} |
|
attr: |
|
output_dtype: u8 |
|
/distilbert/transformer/layer.4/ffn/activation/Mul_1: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.4/ffn/lin1/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0_quant: {} |
|
distilbert.transformer.layer.4.ffn.lin1.bias:0: {} |
|
/distilbert/transformer/layer.4/ffn/lin1/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.4/ffn/lin1/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.4/ffn/activation/Mul_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.4/ffn/activation/Mul_1_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.4/ffn/activation/Mul_1_output_0:0: {} |
|
attr: |
|
append_op: gelu_tanh |
|
output_dtype: u8 |
|
/distilbert/transformer/layer.4/Add_1: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.4/ffn/lin2/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.4/ffn/activation/Mul_1_output_0:0: {} |
|
distilbert.transformer.layer.4.ffn.lin2.bias:0: {} |
|
/distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0: {} |
|
/distilbert/transformer/layer.4/ffn/lin2/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.4/ffn/lin2/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.4/ffn/activation/Mul_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.4/ffn/activation/Mul_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.4/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.4/Add_1_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.4/Add_1_output_0:0: {} |
|
attr: |
|
append_op: sum |
|
/distilbert/transformer/layer.4/output_layer_norm/Add_1: |
|
type: LayerNorm |
|
input: |
|
/distilbert/transformer/layer.4/Add_1_output_0:0: {} |
|
distilbert.transformer.layer.4.output_layer_norm.weight:0: {} |
|
distilbert.transformer.layer.4.output_layer_norm.bias:0: {} |
|
output: |
|
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0: {} |
|
attr: |
|
epsilon: 9.999999960041972e-13 |
|
transpose_mode: 1, 0 |
|
/distilbert/transformer/layer.5/attention/k_lin/Add_quant_0: |
|
type: Quantize |
|
input: |
|
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0: {} |
|
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_quant: {} |
|
attr: |
|
output_dtype: u8 |
|
/distilbert/transformer/layer.5/attention/k_lin/Add: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.5/attention/k_lin/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_quant: {} |
|
distilbert.transformer.layer.5.attention.k_lin.bias:0: {} |
|
/distilbert/transformer/layer.5/attention/k_lin/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.5/attention/k_lin/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.5/attention/k_lin/Add_output_0:0_min: {} |
|
/distilbert/transformer/layer.5/attention/k_lin/Add_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/distilbert/transformer/layer.5/attention/Reshape_1_output_0:0: {} |
|
attr: |
|
reshape: 12,64,-1, -1 |
|
reshape_dims: '0' |
|
/distilbert/transformer/layer.5/attention/q_lin/Add: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.5/attention/q_lin/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_quant: {} |
|
distilbert.transformer.layer.5.attention.q_lin.bias:0: {} |
|
/distilbert/transformer/layer.5/attention/q_lin/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.5/attention/q_lin/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.5/attention/q_lin/Add_output_0:0_min: {} |
|
/distilbert/transformer/layer.5/attention/q_lin/Add_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/distilbert/transformer/layer.5/attention/Reshape_output_0:0: {} |
|
attr: |
|
reshape: 12,64,-1, -1 |
|
reshape_dims: '0' |
|
/distilbert/transformer/layer.5/attention/v_lin/Add: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.5/attention/v_lin/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_quant: {} |
|
distilbert.transformer.layer.5.attention.v_lin.bias:0: {} |
|
/distilbert/transformer/layer.5/attention/v_lin/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.5/attention/v_lin/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.5/attention/Reshape_2_output_0:0_min: {} |
|
/distilbert/transformer/layer.5/attention/Reshape_2_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
/distilbert/transformer/layer.5/attention/Reshape_2_output_0:0: {} |
|
attr: |
|
output_dtype: s8 |
|
reshape: 12,64,-1, -1 |
|
reshape_dims: '0' |
|
/distilbert/transformer/layer.5/attention/Where: |
|
type: Matmul |
|
input: |
|
/distilbert/transformer/layer.5/attention/Reshape_output_0:0: {} |
|
/distilbert/transformer/layer.5/attention/Reshape_1_output_0:0: {} |
|
padding_sequence:0: {} |
|
output: |
|
/distilbert/transformer/layer.5/attention/Where_output_0:0: {} |
|
attr: |
|
src0_perm: 2,0,3,1 |
|
src1_perm: 2,0,1,3 |
|
output_scale: 0.125 |
|
format_any: false |
|
append_op: binary_add |
|
/distilbert/transformer/layer.5/attention/Softmax: |
|
type: Softmax |
|
input: |
|
/distilbert/transformer/layer.5/attention/Where_output_0:0: {} |
|
/distilbert/transformer/layer.5/attention/Softmax_output_0:0_min: {} |
|
/distilbert/transformer/layer.5/attention/Softmax_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.5/attention/Softmax_output_0:0: {} |
|
attr: |
|
output_dtype: u8 |
|
/distilbert/transformer/layer.5/attention/Transpose_3: |
|
type: Matmul |
|
input: |
|
/distilbert/transformer/layer.5/attention/Softmax_output_0:0: {} |
|
/distilbert/transformer/layer.5/attention/Reshape_2_output_0:0: {} |
|
/distilbert/transformer/layer.5/attention/Softmax_output_0:0_min: {} |
|
/distilbert/transformer/layer.5/attention/Softmax_output_0:0_max: {} |
|
/distilbert/transformer/layer.5/attention/Reshape_2_output_0:0_min: {} |
|
/distilbert/transformer/layer.5/attention/Reshape_2_output_0:0_max: {} |
|
/distilbert/transformer/layer.5/attention/Reshape_4_output_0:0_min: {} |
|
/distilbert/transformer/layer.5/attention/Reshape_4_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.5/attention/Reshape_4_output_0:0: {} |
|
attr: |
|
src1_perm: 2,0,3,1 |
|
dst_perm: 1,3,0,2 |
|
output_dtype: u8 |
|
reshape: 768,-1 |
|
/distilbert/transformer/layer.5/Add: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.5/attention/out_lin/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.5/attention/Reshape_4_output_0:0: {} |
|
distilbert.transformer.layer.5.attention.out_lin.bias:0: {} |
|
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0: {} |
|
/distilbert/transformer/layer.5/attention/out_lin/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.5/attention/out_lin/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.5/attention/Reshape_4_output_0:0_min: {} |
|
/distilbert/transformer/layer.5/attention/Reshape_4_output_0:0_max: {} |
|
/distilbert/transformer/layer.5/Add_output_0:0_min: {} |
|
/distilbert/transformer/layer.5/Add_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.5/Add_output_0:0: {} |
|
attr: |
|
append_op: sum |
|
/distilbert/transformer/layer.5/sa_layer_norm/Add_1: |
|
type: LayerNorm |
|
input: |
|
/distilbert/transformer/layer.5/Add_output_0:0: {} |
|
distilbert.transformer.layer.5.sa_layer_norm.weight:0: {} |
|
distilbert.transformer.layer.5.sa_layer_norm.bias:0: {} |
|
output: |
|
/distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0: {} |
|
attr: |
|
epsilon: 9.999999960041972e-13 |
|
transpose_mode: 1, 0 |
|
/distilbert/transformer/layer.5/ffn/activation/Mul_1_quant_0: |
|
type: Quantize |
|
input: |
|
/distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0: {} |
|
/distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0_quant: {} |
|
attr: |
|
output_dtype: u8 |
|
/distilbert/transformer/layer.5/ffn/activation/Mul_1: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.5/ffn/lin1/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0_quant: {} |
|
distilbert.transformer.layer.5.ffn.lin1.bias:0: {} |
|
/distilbert/transformer/layer.5/ffn/lin1/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.5/ffn/lin1/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.5/ffn/activation/Mul_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.5/ffn/activation/Mul_1_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.5/ffn/activation/Mul_1_output_0:0: {} |
|
attr: |
|
append_op: gelu_tanh |
|
output_dtype: u8 |
|
/distilbert/transformer/layer.5/Add_1: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.5/ffn/lin2/Transpose_output_0_quantized:0: {} |
|
/distilbert/transformer/layer.5/ffn/activation/Mul_1_output_0:0: {} |
|
distilbert.transformer.layer.5.ffn.lin2.bias:0: {} |
|
/distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0: {} |
|
/distilbert/transformer/layer.5/ffn/lin2/Transpose_output_0_quantized:0_min: {} |
|
/distilbert/transformer/layer.5/ffn/lin2/Transpose_output_0_quantized:0_max: {} |
|
/distilbert/transformer/layer.5/ffn/activation/Mul_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.5/ffn/activation/Mul_1_output_0:0_max: {} |
|
/distilbert/transformer/layer.5/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.5/Add_1_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.5/Add_1_output_0:0: {} |
|
attr: |
|
append_op: sum |
|
/distilbert/transformer/layer.5/Add_1_Reorder_Recover: |
|
type: Reorder |
|
input: |
|
/distilbert/transformer/layer.5/Add_1_output_0:0: {} |
|
output: |
|
/distilbert/transformer/layer.5/Add_1_output_0:0_recover: {} |
|
attr: |
|
src_perm: 0,1 |
|
dst_perm: 1,0 |
|
/distilbert/transformer/layer.5/output_layer_norm/Add_1: |
|
type: LayerNorm |
|
input: |
|
/distilbert/transformer/layer.5/Add_1_output_0:0_recover: {} |
|
distilbert.transformer.layer.5.output_layer_norm.weight:0: {} |
|
distilbert.transformer.layer.5.output_layer_norm.bias:0: {} |
|
output: |
|
/distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0: {} |
|
attr: |
|
epsilon: 9.999999960041972e-13 |
|
/qa_outputs/Add_quant_0: |
|
type: Quantize |
|
input: |
|
/distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0: {} |
|
/distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0_max: {} |
|
output: |
|
/distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0_quant: {} |
|
attr: |
|
output_dtype: u8 |
|
/qa_outputs/Add: |
|
type: InnerProduct |
|
input: |
|
/distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0_quant: {} |
|
/qa_outputs/Transpose_output_0_quantized:0: {} |
|
qa_outputs.bias:0: {} |
|
/distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0_min: {} |
|
/distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0_max: {} |
|
/qa_outputs/Transpose_output_0_quantized:0_min: {} |
|
/qa_outputs/Transpose_output_0_quantized:0_max: {} |
|
/qa_outputs/Add_output_0:0_min: {} |
|
/qa_outputs/Add_output_0:0_max: {} |
|
input_ids:0: {} |
|
output: |
|
logits: {} |
|
attr: |
|
src1_perm: 1,0 |
|
reshape: -1,-1,2 |
|
reshape_dims: 0,1 |
|
output_data: |
|
type: Output |
|
input: |
|
logits: {} |
|
|