BoDong's picture
upload the model
6ceffc4
model:
name: model
operator:
input_data:
type: Input
output:
input_ids:0:
dtype: int32
shape: [-1, -1]
attention_mask:0:
dtype: int32
shape: [-1, -1]
distilbert.embeddings.position_embeddings.weight:0:
dtype: fp32
shape: [512, 768]
location: [0, 1572864]
distilbert.embeddings.word_embeddings.weight:0:
dtype: fp32
shape: [30522, 768]
location: [1572864, 93763584]
distilbert.embeddings.LayerNorm.weight:0:
dtype: fp32
shape: [768]
location: [95336448, 3072]
distilbert.embeddings.LayerNorm.bias:0:
dtype: fp32
shape: [768]
location: [95339520, 3072]
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_min:
dtype: fp32
shape: [1]
location: [97139752, 4]
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_max:
dtype: fp32
shape: [1]
location: [97139756, 4]
/distilbert/transformer/layer.0/attention/k_lin/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 768]
location: [95342600, 589824]
distilbert.transformer.layer.0.attention.k_lin.bias:0:
dtype: s32
shape: [768]
location: [95932424, 3072]
/distilbert/transformer/layer.0/attention/k_lin/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [95935496, 3072]
/distilbert/transformer/layer.0/attention/k_lin/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [95938568, 3072]
/distilbert/transformer/layer.0/attention/k_lin/Add_output_0:0_min:
dtype: fp32
shape: [1]
location: [95941648, 4]
/distilbert/transformer/layer.0/attention/k_lin/Add_output_0:0_max:
dtype: fp32
shape: [1]
location: [95941652, 4]
/distilbert/transformer/layer.0/attention/q_lin/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 768]
location: [95941656, 589824]
distilbert.transformer.layer.0.attention.q_lin.bias:0:
dtype: s32
shape: [768]
location: [96531480, 3072]
/distilbert/transformer/layer.0/attention/q_lin/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [96534552, 3072]
/distilbert/transformer/layer.0/attention/q_lin/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [96537624, 3072]
/distilbert/transformer/layer.0/attention/q_lin/Add_output_0:0_min:
dtype: fp32
shape: [1]
location: [96540704, 4]
/distilbert/transformer/layer.0/attention/q_lin/Add_output_0:0_max:
dtype: fp32
shape: [1]
location: [96540708, 4]
/distilbert/transformer/layer.0/attention/v_lin/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 768]
location: [96540712, 589824]
distilbert.transformer.layer.0.attention.v_lin.bias:0:
dtype: s32
shape: [768]
location: [97130536, 3072]
/distilbert/transformer/layer.0/attention/v_lin/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [97133608, 3072]
/distilbert/transformer/layer.0/attention/v_lin/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [97136680, 3072]
/distilbert/transformer/layer.0/attention/Reshape_2_output_0:0_min:
dtype: fp32
shape: [1]
location: [97139784, 4]
/distilbert/transformer/layer.0/attention/Reshape_2_output_0:0_max:
dtype: fp32
shape: [1]
location: [97139788, 4]
/distilbert/transformer/layer.0/attention/Softmax_output_0:0_min:
dtype: fp32
shape: [1]
location: [97139776, 4]
/distilbert/transformer/layer.0/attention/Softmax_output_0:0_max:
dtype: fp32
shape: [1]
location: [97139780, 4]
/distilbert/transformer/layer.0/attention/Reshape_4_output_0:0_min:
dtype: fp32
shape: [1]
location: [97738840, 4]
/distilbert/transformer/layer.0/attention/Reshape_4_output_0:0_max:
dtype: fp32
shape: [1]
location: [97738844, 4]
/distilbert/transformer/layer.0/attention/out_lin/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 768]
location: [97139800, 589824]
distilbert.transformer.layer.0.attention.out_lin.bias:0:
dtype: s32
shape: [768]
location: [97729624, 3072]
/distilbert/transformer/layer.0/attention/out_lin/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [97732696, 3072]
/distilbert/transformer/layer.0/attention/out_lin/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [97735768, 3072]
/distilbert/transformer/layer.0/Add_output_0:0_min:
dtype: fp32
shape: [1]
location: [97738848, 4]
/distilbert/transformer/layer.0/Add_output_0:0_max:
dtype: fp32
shape: [1]
location: [97738852, 4]
distilbert.transformer.layer.0.sa_layer_norm.weight:0:
dtype: fp32
shape: [768]
location: [97738856, 3072]
distilbert.transformer.layer.0.sa_layer_norm.bias:0:
dtype: fp32
shape: [768]
location: [97741928, 3072]
/distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0_min:
dtype: fp32
shape: [1]
location: [100141168, 4]
/distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0_max:
dtype: fp32
shape: [1]
location: [100141172, 4]
/distilbert/transformer/layer.0/ffn/lin1/Transpose_output_0_quantized:0:
dtype: s8
shape: [3072, 768]
location: [97745008, 2359296]
distilbert.transformer.layer.0.ffn.lin1.bias:0:
dtype: s32
shape: [3072]
location: [100104304, 12288]
/distilbert/transformer/layer.0/ffn/lin1/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [3072]
location: [100116592, 12288]
/distilbert/transformer/layer.0/ffn/lin1/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [3072]
location: [100128880, 12288]
/distilbert/transformer/layer.0/ffn/activation/Mul_1_output_0:0_min:
dtype: fp32
shape: [1]
location: [102509696, 4]
/distilbert/transformer/layer.0/ffn/activation/Mul_1_output_0:0_max:
dtype: fp32
shape: [1]
location: [102509700, 4]
/distilbert/transformer/layer.0/ffn/lin2/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 3072]
location: [100141184, 2359296]
distilbert.transformer.layer.0.ffn.lin2.bias:0:
dtype: s32
shape: [768]
location: [102500480, 3072]
/distilbert/transformer/layer.0/ffn/lin2/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [102503552, 3072]
/distilbert/transformer/layer.0/ffn/lin2/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [102506624, 3072]
/distilbert/transformer/layer.0/Add_1_output_0:0_min:
dtype: fp32
shape: [1]
location: [102509704, 4]
/distilbert/transformer/layer.0/Add_1_output_0:0_max:
dtype: fp32
shape: [1]
location: [102509708, 4]
distilbert.transformer.layer.0.output_layer_norm.weight:0:
dtype: fp32
shape: [768]
location: [102509712, 3072]
distilbert.transformer.layer.0.output_layer_norm.bias:0:
dtype: fp32
shape: [768]
location: [102512784, 3072]
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_min:
dtype: fp32
shape: [1]
location: [104313016, 4]
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_max:
dtype: fp32
shape: [1]
location: [104313020, 4]
/distilbert/transformer/layer.1/attention/k_lin/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 768]
location: [102515864, 589824]
distilbert.transformer.layer.1.attention.k_lin.bias:0:
dtype: s32
shape: [768]
location: [103105688, 3072]
/distilbert/transformer/layer.1/attention/k_lin/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [103108760, 3072]
/distilbert/transformer/layer.1/attention/k_lin/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [103111832, 3072]
/distilbert/transformer/layer.1/attention/k_lin/Add_output_0:0_min:
dtype: fp32
shape: [1]
location: [103114912, 4]
/distilbert/transformer/layer.1/attention/k_lin/Add_output_0:0_max:
dtype: fp32
shape: [1]
location: [103114916, 4]
/distilbert/transformer/layer.1/attention/q_lin/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 768]
location: [103114920, 589824]
distilbert.transformer.layer.1.attention.q_lin.bias:0:
dtype: s32
shape: [768]
location: [103704744, 3072]
/distilbert/transformer/layer.1/attention/q_lin/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [103707816, 3072]
/distilbert/transformer/layer.1/attention/q_lin/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [103710888, 3072]
/distilbert/transformer/layer.1/attention/q_lin/Add_output_0:0_min:
dtype: fp32
shape: [1]
location: [103713968, 4]
/distilbert/transformer/layer.1/attention/q_lin/Add_output_0:0_max:
dtype: fp32
shape: [1]
location: [103713972, 4]
/distilbert/transformer/layer.1/attention/v_lin/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 768]
location: [103713976, 589824]
distilbert.transformer.layer.1.attention.v_lin.bias:0:
dtype: s32
shape: [768]
location: [104303800, 3072]
/distilbert/transformer/layer.1/attention/v_lin/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [104306872, 3072]
/distilbert/transformer/layer.1/attention/v_lin/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [104309944, 3072]
/distilbert/transformer/layer.1/attention/Reshape_2_output_0:0_min:
dtype: fp32
shape: [1]
location: [104313048, 4]
/distilbert/transformer/layer.1/attention/Reshape_2_output_0:0_max:
dtype: fp32
shape: [1]
location: [104313052, 4]
/distilbert/transformer/layer.1/attention/Softmax_output_0:0_min:
dtype: fp32
shape: [1]
location: [104313040, 4]
/distilbert/transformer/layer.1/attention/Softmax_output_0:0_max:
dtype: fp32
shape: [1]
location: [104313044, 4]
/distilbert/transformer/layer.1/attention/Reshape_4_output_0:0_min:
dtype: fp32
shape: [1]
location: [104912104, 4]
/distilbert/transformer/layer.1/attention/Reshape_4_output_0:0_max:
dtype: fp32
shape: [1]
location: [104912108, 4]
/distilbert/transformer/layer.1/attention/out_lin/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 768]
location: [104313064, 589824]
distilbert.transformer.layer.1.attention.out_lin.bias:0:
dtype: s32
shape: [768]
location: [104902888, 3072]
/distilbert/transformer/layer.1/attention/out_lin/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [104905960, 3072]
/distilbert/transformer/layer.1/attention/out_lin/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [104909032, 3072]
/distilbert/transformer/layer.1/Add_output_0:0_min:
dtype: fp32
shape: [1]
location: [104912112, 4]
/distilbert/transformer/layer.1/Add_output_0:0_max:
dtype: fp32
shape: [1]
location: [104912116, 4]
distilbert.transformer.layer.1.sa_layer_norm.weight:0:
dtype: fp32
shape: [768]
location: [104912120, 3072]
distilbert.transformer.layer.1.sa_layer_norm.bias:0:
dtype: fp32
shape: [768]
location: [104915192, 3072]
/distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0_min:
dtype: fp32
shape: [1]
location: [107314432, 4]
/distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0_max:
dtype: fp32
shape: [1]
location: [107314436, 4]
/distilbert/transformer/layer.1/ffn/lin1/Transpose_output_0_quantized:0:
dtype: s8
shape: [3072, 768]
location: [104918272, 2359296]
distilbert.transformer.layer.1.ffn.lin1.bias:0:
dtype: s32
shape: [3072]
location: [107277568, 12288]
/distilbert/transformer/layer.1/ffn/lin1/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [3072]
location: [107289856, 12288]
/distilbert/transformer/layer.1/ffn/lin1/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [3072]
location: [107302144, 12288]
/distilbert/transformer/layer.1/ffn/activation/Mul_1_output_0:0_min:
dtype: fp32
shape: [1]
location: [109682960, 4]
/distilbert/transformer/layer.1/ffn/activation/Mul_1_output_0:0_max:
dtype: fp32
shape: [1]
location: [109682964, 4]
/distilbert/transformer/layer.1/ffn/lin2/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 3072]
location: [107314448, 2359296]
distilbert.transformer.layer.1.ffn.lin2.bias:0:
dtype: s32
shape: [768]
location: [109673744, 3072]
/distilbert/transformer/layer.1/ffn/lin2/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [109676816, 3072]
/distilbert/transformer/layer.1/ffn/lin2/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [109679888, 3072]
/distilbert/transformer/layer.1/Add_1_output_0:0_min:
dtype: fp32
shape: [1]
location: [109682968, 4]
/distilbert/transformer/layer.1/Add_1_output_0:0_max:
dtype: fp32
shape: [1]
location: [109682972, 4]
distilbert.transformer.layer.1.output_layer_norm.weight:0:
dtype: fp32
shape: [768]
location: [109682976, 3072]
distilbert.transformer.layer.1.output_layer_norm.bias:0:
dtype: fp32
shape: [768]
location: [109686048, 3072]
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_min:
dtype: fp32
shape: [1]
location: [111486280, 4]
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_max:
dtype: fp32
shape: [1]
location: [111486284, 4]
/distilbert/transformer/layer.2/attention/k_lin/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 768]
location: [109689128, 589824]
distilbert.transformer.layer.2.attention.k_lin.bias:0:
dtype: s32
shape: [768]
location: [110278952, 3072]
/distilbert/transformer/layer.2/attention/k_lin/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [110282024, 3072]
/distilbert/transformer/layer.2/attention/k_lin/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [110285096, 3072]
/distilbert/transformer/layer.2/attention/k_lin/Add_output_0:0_min:
dtype: fp32
shape: [1]
location: [110288176, 4]
/distilbert/transformer/layer.2/attention/k_lin/Add_output_0:0_max:
dtype: fp32
shape: [1]
location: [110288180, 4]
/distilbert/transformer/layer.2/attention/q_lin/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 768]
location: [110288184, 589824]
distilbert.transformer.layer.2.attention.q_lin.bias:0:
dtype: s32
shape: [768]
location: [110878008, 3072]
/distilbert/transformer/layer.2/attention/q_lin/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [110881080, 3072]
/distilbert/transformer/layer.2/attention/q_lin/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [110884152, 3072]
/distilbert/transformer/layer.2/attention/q_lin/Add_output_0:0_min:
dtype: fp32
shape: [1]
location: [110887232, 4]
/distilbert/transformer/layer.2/attention/q_lin/Add_output_0:0_max:
dtype: fp32
shape: [1]
location: [110887236, 4]
/distilbert/transformer/layer.2/attention/v_lin/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 768]
location: [110887240, 589824]
distilbert.transformer.layer.2.attention.v_lin.bias:0:
dtype: s32
shape: [768]
location: [111477064, 3072]
/distilbert/transformer/layer.2/attention/v_lin/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [111480136, 3072]
/distilbert/transformer/layer.2/attention/v_lin/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [111483208, 3072]
/distilbert/transformer/layer.2/attention/Reshape_2_output_0:0_min:
dtype: fp32
shape: [1]
location: [111486312, 4]
/distilbert/transformer/layer.2/attention/Reshape_2_output_0:0_max:
dtype: fp32
shape: [1]
location: [111486316, 4]
/distilbert/transformer/layer.2/attention/Softmax_output_0:0_min:
dtype: fp32
shape: [1]
location: [111486304, 4]
/distilbert/transformer/layer.2/attention/Softmax_output_0:0_max:
dtype: fp32
shape: [1]
location: [111486308, 4]
/distilbert/transformer/layer.2/attention/Reshape_4_output_0:0_min:
dtype: fp32
shape: [1]
location: [112085368, 4]
/distilbert/transformer/layer.2/attention/Reshape_4_output_0:0_max:
dtype: fp32
shape: [1]
location: [112085372, 4]
/distilbert/transformer/layer.2/attention/out_lin/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 768]
location: [111486328, 589824]
distilbert.transformer.layer.2.attention.out_lin.bias:0:
dtype: s32
shape: [768]
location: [112076152, 3072]
/distilbert/transformer/layer.2/attention/out_lin/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [112079224, 3072]
/distilbert/transformer/layer.2/attention/out_lin/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [112082296, 3072]
/distilbert/transformer/layer.2/Add_output_0:0_min:
dtype: fp32
shape: [1]
location: [112085376, 4]
/distilbert/transformer/layer.2/Add_output_0:0_max:
dtype: fp32
shape: [1]
location: [112085380, 4]
distilbert.transformer.layer.2.sa_layer_norm.weight:0:
dtype: fp32
shape: [768]
location: [112085384, 3072]
distilbert.transformer.layer.2.sa_layer_norm.bias:0:
dtype: fp32
shape: [768]
location: [112088456, 3072]
/distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0_min:
dtype: fp32
shape: [1]
location: [114487696, 4]
/distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0_max:
dtype: fp32
shape: [1]
location: [114487700, 4]
/distilbert/transformer/layer.2/ffn/lin1/Transpose_output_0_quantized:0:
dtype: s8
shape: [3072, 768]
location: [112091536, 2359296]
distilbert.transformer.layer.2.ffn.lin1.bias:0:
dtype: s32
shape: [3072]
location: [114450832, 12288]
/distilbert/transformer/layer.2/ffn/lin1/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [3072]
location: [114463120, 12288]
/distilbert/transformer/layer.2/ffn/lin1/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [3072]
location: [114475408, 12288]
/distilbert/transformer/layer.2/ffn/activation/Mul_1_output_0:0_min:
dtype: fp32
shape: [1]
location: [116856224, 4]
/distilbert/transformer/layer.2/ffn/activation/Mul_1_output_0:0_max:
dtype: fp32
shape: [1]
location: [116856228, 4]
/distilbert/transformer/layer.2/ffn/lin2/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 3072]
location: [114487712, 2359296]
distilbert.transformer.layer.2.ffn.lin2.bias:0:
dtype: s32
shape: [768]
location: [116847008, 3072]
/distilbert/transformer/layer.2/ffn/lin2/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [116850080, 3072]
/distilbert/transformer/layer.2/ffn/lin2/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [116853152, 3072]
/distilbert/transformer/layer.2/Add_1_output_0:0_min:
dtype: fp32
shape: [1]
location: [116856232, 4]
/distilbert/transformer/layer.2/Add_1_output_0:0_max:
dtype: fp32
shape: [1]
location: [116856236, 4]
distilbert.transformer.layer.2.output_layer_norm.weight:0:
dtype: fp32
shape: [768]
location: [116856240, 3072]
distilbert.transformer.layer.2.output_layer_norm.bias:0:
dtype: fp32
shape: [768]
location: [116859312, 3072]
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_min:
dtype: fp32
shape: [1]
location: [118659544, 4]
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_max:
dtype: fp32
shape: [1]
location: [118659548, 4]
/distilbert/transformer/layer.3/attention/k_lin/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 768]
location: [116862392, 589824]
distilbert.transformer.layer.3.attention.k_lin.bias:0:
dtype: s32
shape: [768]
location: [117452216, 3072]
/distilbert/transformer/layer.3/attention/k_lin/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [117455288, 3072]
/distilbert/transformer/layer.3/attention/k_lin/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [117458360, 3072]
/distilbert/transformer/layer.3/attention/k_lin/Add_output_0:0_min:
dtype: fp32
shape: [1]
location: [117461440, 4]
/distilbert/transformer/layer.3/attention/k_lin/Add_output_0:0_max:
dtype: fp32
shape: [1]
location: [117461444, 4]
/distilbert/transformer/layer.3/attention/q_lin/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 768]
location: [117461448, 589824]
distilbert.transformer.layer.3.attention.q_lin.bias:0:
dtype: s32
shape: [768]
location: [118051272, 3072]
/distilbert/transformer/layer.3/attention/q_lin/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [118054344, 3072]
/distilbert/transformer/layer.3/attention/q_lin/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [118057416, 3072]
/distilbert/transformer/layer.3/attention/q_lin/Add_output_0:0_min:
dtype: fp32
shape: [1]
location: [118060496, 4]
/distilbert/transformer/layer.3/attention/q_lin/Add_output_0:0_max:
dtype: fp32
shape: [1]
location: [118060500, 4]
/distilbert/transformer/layer.3/attention/v_lin/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 768]
location: [118060504, 589824]
distilbert.transformer.layer.3.attention.v_lin.bias:0:
dtype: s32
shape: [768]
location: [118650328, 3072]
/distilbert/transformer/layer.3/attention/v_lin/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [118653400, 3072]
/distilbert/transformer/layer.3/attention/v_lin/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [118656472, 3072]
/distilbert/transformer/layer.3/attention/Reshape_2_output_0:0_min:
dtype: fp32
shape: [1]
location: [118659576, 4]
/distilbert/transformer/layer.3/attention/Reshape_2_output_0:0_max:
dtype: fp32
shape: [1]
location: [118659580, 4]
/distilbert/transformer/layer.3/attention/Softmax_output_0:0_min:
dtype: fp32
shape: [1]
location: [118659568, 4]
/distilbert/transformer/layer.3/attention/Softmax_output_0:0_max:
dtype: fp32
shape: [1]
location: [118659572, 4]
/distilbert/transformer/layer.3/attention/Reshape_4_output_0:0_min:
dtype: fp32
shape: [1]
location: [119258632, 4]
/distilbert/transformer/layer.3/attention/Reshape_4_output_0:0_max:
dtype: fp32
shape: [1]
location: [119258636, 4]
/distilbert/transformer/layer.3/attention/out_lin/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 768]
location: [118659592, 589824]
distilbert.transformer.layer.3.attention.out_lin.bias:0:
dtype: s32
shape: [768]
location: [119249416, 3072]
/distilbert/transformer/layer.3/attention/out_lin/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [119252488, 3072]
/distilbert/transformer/layer.3/attention/out_lin/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [119255560, 3072]
/distilbert/transformer/layer.3/Add_output_0:0_min:
dtype: fp32
shape: [1]
location: [119258640, 4]
/distilbert/transformer/layer.3/Add_output_0:0_max:
dtype: fp32
shape: [1]
location: [119258644, 4]
distilbert.transformer.layer.3.sa_layer_norm.weight:0:
dtype: fp32
shape: [768]
location: [119258648, 3072]
distilbert.transformer.layer.3.sa_layer_norm.bias:0:
dtype: fp32
shape: [768]
location: [119261720, 3072]
/distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0_min:
dtype: fp32
shape: [1]
location: [121660960, 4]
/distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0_max:
dtype: fp32
shape: [1]
location: [121660964, 4]
/distilbert/transformer/layer.3/ffn/lin1/Transpose_output_0_quantized:0:
dtype: s8
shape: [3072, 768]
location: [119264800, 2359296]
distilbert.transformer.layer.3.ffn.lin1.bias:0:
dtype: s32
shape: [3072]
location: [121624096, 12288]
/distilbert/transformer/layer.3/ffn/lin1/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [3072]
location: [121636384, 12288]
/distilbert/transformer/layer.3/ffn/lin1/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [3072]
location: [121648672, 12288]
/distilbert/transformer/layer.3/ffn/activation/Mul_1_output_0:0_min:
dtype: fp32
shape: [1]
location: [124029488, 4]
/distilbert/transformer/layer.3/ffn/activation/Mul_1_output_0:0_max:
dtype: fp32
shape: [1]
location: [124029492, 4]
/distilbert/transformer/layer.3/ffn/lin2/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 3072]
location: [121660976, 2359296]
distilbert.transformer.layer.3.ffn.lin2.bias:0:
dtype: s32
shape: [768]
location: [124020272, 3072]
/distilbert/transformer/layer.3/ffn/lin2/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [124023344, 3072]
/distilbert/transformer/layer.3/ffn/lin2/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [124026416, 3072]
/distilbert/transformer/layer.3/Add_1_output_0:0_min:
dtype: fp32
shape: [1]
location: [124029496, 4]
/distilbert/transformer/layer.3/Add_1_output_0:0_max:
dtype: fp32
shape: [1]
location: [124029500, 4]
distilbert.transformer.layer.3.output_layer_norm.weight:0:
dtype: fp32
shape: [768]
location: [124029504, 3072]
distilbert.transformer.layer.3.output_layer_norm.bias:0:
dtype: fp32
shape: [768]
location: [124032576, 3072]
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_min:
dtype: fp32
shape: [1]
location: [125832808, 4]
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_max:
dtype: fp32
shape: [1]
location: [125832812, 4]
/distilbert/transformer/layer.4/attention/k_lin/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 768]
location: [124035656, 589824]
distilbert.transformer.layer.4.attention.k_lin.bias:0:
dtype: s32
shape: [768]
location: [124625480, 3072]
/distilbert/transformer/layer.4/attention/k_lin/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [124628552, 3072]
/distilbert/transformer/layer.4/attention/k_lin/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [124631624, 3072]
/distilbert/transformer/layer.4/attention/k_lin/Add_output_0:0_min:
dtype: fp32
shape: [1]
location: [124634704, 4]
/distilbert/transformer/layer.4/attention/k_lin/Add_output_0:0_max:
dtype: fp32
shape: [1]
location: [124634708, 4]
/distilbert/transformer/layer.4/attention/q_lin/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 768]
location: [124634712, 589824]
distilbert.transformer.layer.4.attention.q_lin.bias:0:
dtype: s32
shape: [768]
location: [125224536, 3072]
/distilbert/transformer/layer.4/attention/q_lin/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [125227608, 3072]
/distilbert/transformer/layer.4/attention/q_lin/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [125230680, 3072]
/distilbert/transformer/layer.4/attention/q_lin/Add_output_0:0_min:
dtype: fp32
shape: [1]
location: [125233760, 4]
/distilbert/transformer/layer.4/attention/q_lin/Add_output_0:0_max:
dtype: fp32
shape: [1]
location: [125233764, 4]
/distilbert/transformer/layer.4/attention/v_lin/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 768]
location: [125233768, 589824]
distilbert.transformer.layer.4.attention.v_lin.bias:0:
dtype: s32
shape: [768]
location: [125823592, 3072]
/distilbert/transformer/layer.4/attention/v_lin/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [125826664, 3072]
/distilbert/transformer/layer.4/attention/v_lin/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [125829736, 3072]
/distilbert/transformer/layer.4/attention/Reshape_2_output_0:0_min:
dtype: fp32
shape: [1]
location: [125832840, 4]
/distilbert/transformer/layer.4/attention/Reshape_2_output_0:0_max:
dtype: fp32
shape: [1]
location: [125832844, 4]
/distilbert/transformer/layer.4/attention/Softmax_output_0:0_min:
dtype: fp32
shape: [1]
location: [125832832, 4]
/distilbert/transformer/layer.4/attention/Softmax_output_0:0_max:
dtype: fp32
shape: [1]
location: [125832836, 4]
/distilbert/transformer/layer.4/attention/Reshape_4_output_0:0_min:
dtype: fp32
shape: [1]
location: [126431896, 4]
/distilbert/transformer/layer.4/attention/Reshape_4_output_0:0_max:
dtype: fp32
shape: [1]
location: [126431900, 4]
/distilbert/transformer/layer.4/attention/out_lin/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 768]
location: [125832856, 589824]
distilbert.transformer.layer.4.attention.out_lin.bias:0:
dtype: s32
shape: [768]
location: [126422680, 3072]
/distilbert/transformer/layer.4/attention/out_lin/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [126425752, 3072]
/distilbert/transformer/layer.4/attention/out_lin/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [126428824, 3072]
/distilbert/transformer/layer.4/Add_output_0:0_min:
dtype: fp32
shape: [1]
location: [126431904, 4]
/distilbert/transformer/layer.4/Add_output_0:0_max:
dtype: fp32
shape: [1]
location: [126431908, 4]
distilbert.transformer.layer.4.sa_layer_norm.weight:0:
dtype: fp32
shape: [768]
location: [126431912, 3072]
distilbert.transformer.layer.4.sa_layer_norm.bias:0:
dtype: fp32
shape: [768]
location: [126434984, 3072]
/distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0_min:
dtype: fp32
shape: [1]
location: [128834224, 4]
/distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0_max:
dtype: fp32
shape: [1]
location: [128834228, 4]
/distilbert/transformer/layer.4/ffn/lin1/Transpose_output_0_quantized:0:
dtype: s8
shape: [3072, 768]
location: [126438064, 2359296]
distilbert.transformer.layer.4.ffn.lin1.bias:0:
dtype: s32
shape: [3072]
location: [128797360, 12288]
/distilbert/transformer/layer.4/ffn/lin1/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [3072]
location: [128809648, 12288]
/distilbert/transformer/layer.4/ffn/lin1/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [3072]
location: [128821936, 12288]
/distilbert/transformer/layer.4/ffn/activation/Mul_1_output_0:0_min:
dtype: fp32
shape: [1]
location: [131202752, 4]
/distilbert/transformer/layer.4/ffn/activation/Mul_1_output_0:0_max:
dtype: fp32
shape: [1]
location: [131202756, 4]
/distilbert/transformer/layer.4/ffn/lin2/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 3072]
location: [128834240, 2359296]
distilbert.transformer.layer.4.ffn.lin2.bias:0:
dtype: s32
shape: [768]
location: [131193536, 3072]
/distilbert/transformer/layer.4/ffn/lin2/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [131196608, 3072]
/distilbert/transformer/layer.4/ffn/lin2/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [131199680, 3072]
/distilbert/transformer/layer.4/Add_1_output_0:0_min:
dtype: fp32
shape: [1]
location: [131202760, 4]
/distilbert/transformer/layer.4/Add_1_output_0:0_max:
dtype: fp32
shape: [1]
location: [131202764, 4]
distilbert.transformer.layer.4.output_layer_norm.weight:0:
dtype: fp32
shape: [768]
location: [131202768, 3072]
distilbert.transformer.layer.4.output_layer_norm.bias:0:
dtype: fp32
shape: [768]
location: [131205840, 3072]
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_min:
dtype: fp32
shape: [1]
location: [133006072, 4]
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_max:
dtype: fp32
shape: [1]
location: [133006076, 4]
/distilbert/transformer/layer.5/attention/k_lin/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 768]
location: [131208920, 589824]
distilbert.transformer.layer.5.attention.k_lin.bias:0:
dtype: s32
shape: [768]
location: [131798744, 3072]
/distilbert/transformer/layer.5/attention/k_lin/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [131801816, 3072]
/distilbert/transformer/layer.5/attention/k_lin/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [131804888, 3072]
/distilbert/transformer/layer.5/attention/k_lin/Add_output_0:0_min:
dtype: fp32
shape: [1]
location: [131807968, 4]
/distilbert/transformer/layer.5/attention/k_lin/Add_output_0:0_max:
dtype: fp32
shape: [1]
location: [131807972, 4]
/distilbert/transformer/layer.5/attention/q_lin/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 768]
location: [131807976, 589824]
distilbert.transformer.layer.5.attention.q_lin.bias:0:
dtype: s32
shape: [768]
location: [132397800, 3072]
/distilbert/transformer/layer.5/attention/q_lin/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [132400872, 3072]
/distilbert/transformer/layer.5/attention/q_lin/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [132403944, 3072]
/distilbert/transformer/layer.5/attention/q_lin/Add_output_0:0_min:
dtype: fp32
shape: [1]
location: [132407024, 4]
/distilbert/transformer/layer.5/attention/q_lin/Add_output_0:0_max:
dtype: fp32
shape: [1]
location: [132407028, 4]
/distilbert/transformer/layer.5/attention/v_lin/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 768]
location: [132407032, 589824]
distilbert.transformer.layer.5.attention.v_lin.bias:0:
dtype: s32
shape: [768]
location: [132996856, 3072]
/distilbert/transformer/layer.5/attention/v_lin/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [132999928, 3072]
/distilbert/transformer/layer.5/attention/v_lin/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [133003000, 3072]
/distilbert/transformer/layer.5/attention/Reshape_2_output_0:0_min:
dtype: fp32
shape: [1]
location: [133006104, 4]
/distilbert/transformer/layer.5/attention/Reshape_2_output_0:0_max:
dtype: fp32
shape: [1]
location: [133006108, 4]
/distilbert/transformer/layer.5/attention/Softmax_output_0:0_min:
dtype: fp32
shape: [1]
location: [133006096, 4]
/distilbert/transformer/layer.5/attention/Softmax_output_0:0_max:
dtype: fp32
shape: [1]
location: [133006100, 4]
/distilbert/transformer/layer.5/attention/Reshape_4_output_0:0_min:
dtype: fp32
shape: [1]
location: [133605160, 4]
/distilbert/transformer/layer.5/attention/Reshape_4_output_0:0_max:
dtype: fp32
shape: [1]
location: [133605164, 4]
/distilbert/transformer/layer.5/attention/out_lin/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 768]
location: [133006120, 589824]
distilbert.transformer.layer.5.attention.out_lin.bias:0:
dtype: s32
shape: [768]
location: [133595944, 3072]
/distilbert/transformer/layer.5/attention/out_lin/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [133599016, 3072]
/distilbert/transformer/layer.5/attention/out_lin/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [133602088, 3072]
/distilbert/transformer/layer.5/Add_output_0:0_min:
dtype: fp32
shape: [1]
location: [133605168, 4]
/distilbert/transformer/layer.5/Add_output_0:0_max:
dtype: fp32
shape: [1]
location: [133605172, 4]
distilbert.transformer.layer.5.sa_layer_norm.weight:0:
dtype: fp32
shape: [768]
location: [133605176, 3072]
distilbert.transformer.layer.5.sa_layer_norm.bias:0:
dtype: fp32
shape: [768]
location: [133608248, 3072]
/distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0_min:
dtype: fp32
shape: [1]
location: [136007488, 4]
/distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0_max:
dtype: fp32
shape: [1]
location: [136007492, 4]
/distilbert/transformer/layer.5/ffn/lin1/Transpose_output_0_quantized:0:
dtype: s8
shape: [3072, 768]
location: [133611328, 2359296]
distilbert.transformer.layer.5.ffn.lin1.bias:0:
dtype: s32
shape: [3072]
location: [135970624, 12288]
/distilbert/transformer/layer.5/ffn/lin1/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [3072]
location: [135982912, 12288]
/distilbert/transformer/layer.5/ffn/lin1/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [3072]
location: [135995200, 12288]
/distilbert/transformer/layer.5/ffn/activation/Mul_1_output_0:0_min:
dtype: fp32
shape: [1]
location: [138376016, 4]
/distilbert/transformer/layer.5/ffn/activation/Mul_1_output_0:0_max:
dtype: fp32
shape: [1]
location: [138376020, 4]
/distilbert/transformer/layer.5/ffn/lin2/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 3072]
location: [136007504, 2359296]
distilbert.transformer.layer.5.ffn.lin2.bias:0:
dtype: s32
shape: [768]
location: [138366800, 3072]
/distilbert/transformer/layer.5/ffn/lin2/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [768]
location: [138369872, 3072]
/distilbert/transformer/layer.5/ffn/lin2/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [768]
location: [138372944, 3072]
/distilbert/transformer/layer.5/Add_1_output_0:0_min:
dtype: fp32
shape: [1]
location: [138376024, 4]
/distilbert/transformer/layer.5/Add_1_output_0:0_max:
dtype: fp32
shape: [1]
location: [138376028, 4]
distilbert.transformer.layer.5.output_layer_norm.weight:0:
dtype: fp32
shape: [768]
location: [138376032, 3072]
distilbert.transformer.layer.5.output_layer_norm.bias:0:
dtype: fp32
shape: [768]
location: [138379104, 3072]
/distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0_min:
dtype: fp32
shape: [1]
location: [138383728, 4]
/distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0_max:
dtype: fp32
shape: [1]
location: [138383732, 4]
/qa_outputs/Transpose_output_0_quantized:0:
dtype: s8
shape: [768, 2]
location: [138382184, 1536]
qa_outputs.bias:0:
dtype: s32
shape: [2]
location: [138383720, 8]
/qa_outputs/Transpose_output_0_quantized:0_min:
dtype: fp32
shape: [2]
location: [138383736, 8]
/qa_outputs/Transpose_output_0_quantized:0_max:
dtype: fp32
shape: [2]
location: [138383744, 8]
/qa_outputs/Add_output_0:0_min:
dtype: fp32
shape: [1]
location: [138383752, 4]
/qa_outputs/Add_output_0:0_max:
dtype: fp32
shape: [1]
location: [138383756, 4]
padding_sequence:
type: PaddingSequence
input:
attention_mask:0: {}
output:
padding_sequence:0: {}
attr:
dst_shape: -1,12,0,-1
dims: 1
position_embeddings/after/reshape:
type: Reshape
input:
distilbert.embeddings.position_embeddings.weight:0: {}
input_ids:0: {}
output:
position_embeddings/after/reshape:0: {}
attr:
dst_shape: 1,-1,768
dims: 1
/distilbert/embeddings/position_embeddings/Gather:
type: Reshape
input:
position_embeddings/after/reshape:0: {}
output:
/distilbert/embeddings/position_embeddings/Gather_output_0:0: {}
attr:
dst_shape: 1,-1
word_embeddings/reshape:
type: Reshape
input:
input_ids:0: {}
output:
word_embeddings/reshape:0: {}
attr:
dst_shape: -1
/distilbert/embeddings/word_embeddings/Gather:
type: Gather
input:
word_embeddings/reshape:0: {}
distilbert.embeddings.word_embeddings.weight:0: {}
/distilbert/embeddings/position_embeddings/Gather_output_0:0: {}
input_ids:0: {}
output:
embeddings_add/reshape_2d:0: {}
attr:
axis: 0
batch_dims: 0
append_op: binary_add
reshape: -1,-1,768
reshape_dims: 0,1
mul: 1,2
/distilbert/embeddings/LayerNorm/Add_1:
type: LayerNorm
input:
embeddings_add/reshape_2d:0: {}
distilbert.embeddings.LayerNorm.weight:0: {}
distilbert.embeddings.LayerNorm.bias:0: {}
output:
/distilbert/embeddings/LayerNorm/Add_1_output_0:0: {}
attr:
epsilon: 9.999999960041972e-13
/distilbert/transformer/layer.0/attention/k_lin/Add_quant_0_Reorder_Post_0:
type: Reorder
input:
/distilbert/embeddings/LayerNorm/Add_1_output_0:0: {}
output:
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_reorder: {}
attr:
src_perm: 0,1
dst_perm: 1,0
/distilbert/transformer/layer.0/attention/k_lin/Add_quant_0:
type: Quantize
input:
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_reorder: {}
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_min: {}
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_max: {}
output:
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_quant: {}
attr:
output_dtype: u8
/distilbert/transformer/layer.0/attention/k_lin/Add:
type: InnerProduct
input:
/distilbert/transformer/layer.0/attention/k_lin/Transpose_output_0_quantized:0: {}
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_quant: {}
distilbert.transformer.layer.0.attention.k_lin.bias:0: {}
/distilbert/transformer/layer.0/attention/k_lin/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.0/attention/k_lin/Transpose_output_0_quantized:0_max: {}
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_min: {}
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_max: {}
/distilbert/transformer/layer.0/attention/k_lin/Add_output_0:0_min: {}
/distilbert/transformer/layer.0/attention/k_lin/Add_output_0:0_max: {}
input_ids:0: {}
output:
/distilbert/transformer/layer.0/attention/Reshape_1_output_0:0: {}
attr:
reshape: 12,64,-1, -1
reshape_dims: '0'
/distilbert/transformer/layer.0/attention/q_lin/Add:
type: InnerProduct
input:
/distilbert/transformer/layer.0/attention/q_lin/Transpose_output_0_quantized:0: {}
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_quant: {}
distilbert.transformer.layer.0.attention.q_lin.bias:0: {}
/distilbert/transformer/layer.0/attention/q_lin/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.0/attention/q_lin/Transpose_output_0_quantized:0_max: {}
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_min: {}
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_max: {}
/distilbert/transformer/layer.0/attention/q_lin/Add_output_0:0_min: {}
/distilbert/transformer/layer.0/attention/q_lin/Add_output_0:0_max: {}
input_ids:0: {}
output:
/distilbert/transformer/layer.0/attention/Reshape_output_0:0: {}
attr:
reshape: 12,64,-1, -1
reshape_dims: '0'
/distilbert/transformer/layer.0/attention/v_lin/Add:
type: InnerProduct
input:
/distilbert/transformer/layer.0/attention/v_lin/Transpose_output_0_quantized:0: {}
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_quant: {}
distilbert.transformer.layer.0.attention.v_lin.bias:0: {}
/distilbert/transformer/layer.0/attention/v_lin/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.0/attention/v_lin/Transpose_output_0_quantized:0_max: {}
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_min: {}
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_max: {}
/distilbert/transformer/layer.0/attention/Reshape_2_output_0:0_min: {}
/distilbert/transformer/layer.0/attention/Reshape_2_output_0:0_max: {}
input_ids:0: {}
output:
/distilbert/transformer/layer.0/attention/Reshape_2_output_0:0: {}
attr:
output_dtype: s8
reshape: 12,64,-1, -1
reshape_dims: '0'
/distilbert/transformer/layer.0/attention/Where:
type: Matmul
input:
/distilbert/transformer/layer.0/attention/Reshape_output_0:0: {}
/distilbert/transformer/layer.0/attention/Reshape_1_output_0:0: {}
padding_sequence:0: {}
output:
/distilbert/transformer/layer.0/attention/Where_output_0:0: {}
attr:
src0_perm: 2,0,3,1
src1_perm: 2,0,1,3
output_scale: 0.125
format_any: false
append_op: binary_add
/distilbert/transformer/layer.0/attention/Softmax:
type: Softmax
input:
/distilbert/transformer/layer.0/attention/Where_output_0:0: {}
/distilbert/transformer/layer.0/attention/Softmax_output_0:0_min: {}
/distilbert/transformer/layer.0/attention/Softmax_output_0:0_max: {}
output:
/distilbert/transformer/layer.0/attention/Softmax_output_0:0: {}
attr:
output_dtype: u8
/distilbert/transformer/layer.0/attention/Transpose_3:
type: Matmul
input:
/distilbert/transformer/layer.0/attention/Softmax_output_0:0: {}
/distilbert/transformer/layer.0/attention/Reshape_2_output_0:0: {}
/distilbert/transformer/layer.0/attention/Softmax_output_0:0_min: {}
/distilbert/transformer/layer.0/attention/Softmax_output_0:0_max: {}
/distilbert/transformer/layer.0/attention/Reshape_2_output_0:0_min: {}
/distilbert/transformer/layer.0/attention/Reshape_2_output_0:0_max: {}
/distilbert/transformer/layer.0/attention/Reshape_4_output_0:0_min: {}
/distilbert/transformer/layer.0/attention/Reshape_4_output_0:0_max: {}
output:
/distilbert/transformer/layer.0/attention/Reshape_4_output_0:0: {}
attr:
src1_perm: 2,0,3,1
dst_perm: 1,3,0,2
output_dtype: u8
reshape: 768,-1
/distilbert/transformer/layer.0/Add:
type: InnerProduct
input:
/distilbert/transformer/layer.0/attention/out_lin/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.0/attention/Reshape_4_output_0:0: {}
distilbert.transformer.layer.0.attention.out_lin.bias:0: {}
/distilbert/embeddings/LayerNorm/Add_1_output_0:0_reorder: {}
/distilbert/transformer/layer.0/attention/out_lin/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.0/attention/out_lin/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.0/attention/Reshape_4_output_0:0_min: {}
/distilbert/transformer/layer.0/attention/Reshape_4_output_0:0_max: {}
/distilbert/transformer/layer.0/Add_output_0:0_min: {}
/distilbert/transformer/layer.0/Add_output_0:0_max: {}
output:
/distilbert/transformer/layer.0/Add_output_0:0: {}
attr:
append_op: sum
/distilbert/transformer/layer.0/sa_layer_norm/Add_1:
type: LayerNorm
input:
/distilbert/transformer/layer.0/Add_output_0:0: {}
distilbert.transformer.layer.0.sa_layer_norm.weight:0: {}
distilbert.transformer.layer.0.sa_layer_norm.bias:0: {}
output:
/distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0: {}
attr:
epsilon: 9.999999960041972e-13
transpose_mode: 1, 0
/distilbert/transformer/layer.0/ffn/activation/Mul_1_quant_0:
type: Quantize
input:
/distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0: {}
/distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0_max: {}
output:
/distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0_quant: {}
attr:
output_dtype: u8
/distilbert/transformer/layer.0/ffn/activation/Mul_1:
type: InnerProduct
input:
/distilbert/transformer/layer.0/ffn/lin1/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0_quant: {}
distilbert.transformer.layer.0.ffn.lin1.bias:0: {}
/distilbert/transformer/layer.0/ffn/lin1/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.0/ffn/lin1/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0_max: {}
/distilbert/transformer/layer.0/ffn/activation/Mul_1_output_0:0_min: {}
/distilbert/transformer/layer.0/ffn/activation/Mul_1_output_0:0_max: {}
output:
/distilbert/transformer/layer.0/ffn/activation/Mul_1_output_0:0: {}
attr:
append_op: gelu_tanh
output_dtype: u8
/distilbert/transformer/layer.0/Add_1:
type: InnerProduct
input:
/distilbert/transformer/layer.0/ffn/lin2/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.0/ffn/activation/Mul_1_output_0:0: {}
distilbert.transformer.layer.0.ffn.lin2.bias:0: {}
/distilbert/transformer/layer.0/sa_layer_norm/Add_1_output_0:0: {}
/distilbert/transformer/layer.0/ffn/lin2/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.0/ffn/lin2/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.0/ffn/activation/Mul_1_output_0:0_min: {}
/distilbert/transformer/layer.0/ffn/activation/Mul_1_output_0:0_max: {}
/distilbert/transformer/layer.0/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.0/Add_1_output_0:0_max: {}
output:
/distilbert/transformer/layer.0/Add_1_output_0:0: {}
attr:
append_op: sum
/distilbert/transformer/layer.0/output_layer_norm/Add_1:
type: LayerNorm
input:
/distilbert/transformer/layer.0/Add_1_output_0:0: {}
distilbert.transformer.layer.0.output_layer_norm.weight:0: {}
distilbert.transformer.layer.0.output_layer_norm.bias:0: {}
output:
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0: {}
attr:
epsilon: 9.999999960041972e-13
transpose_mode: 1, 0
/distilbert/transformer/layer.1/attention/k_lin/Add_quant_0:
type: Quantize
input:
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0: {}
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_max: {}
output:
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_quant: {}
attr:
output_dtype: u8
/distilbert/transformer/layer.1/attention/k_lin/Add:
type: InnerProduct
input:
/distilbert/transformer/layer.1/attention/k_lin/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_quant: {}
distilbert.transformer.layer.1.attention.k_lin.bias:0: {}
/distilbert/transformer/layer.1/attention/k_lin/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.1/attention/k_lin/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_max: {}
/distilbert/transformer/layer.1/attention/k_lin/Add_output_0:0_min: {}
/distilbert/transformer/layer.1/attention/k_lin/Add_output_0:0_max: {}
input_ids:0: {}
output:
/distilbert/transformer/layer.1/attention/Reshape_1_output_0:0: {}
attr:
reshape: 12,64,-1, -1
reshape_dims: '0'
/distilbert/transformer/layer.1/attention/q_lin/Add:
type: InnerProduct
input:
/distilbert/transformer/layer.1/attention/q_lin/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_quant: {}
distilbert.transformer.layer.1.attention.q_lin.bias:0: {}
/distilbert/transformer/layer.1/attention/q_lin/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.1/attention/q_lin/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_max: {}
/distilbert/transformer/layer.1/attention/q_lin/Add_output_0:0_min: {}
/distilbert/transformer/layer.1/attention/q_lin/Add_output_0:0_max: {}
input_ids:0: {}
output:
/distilbert/transformer/layer.1/attention/Reshape_output_0:0: {}
attr:
reshape: 12,64,-1, -1
reshape_dims: '0'
/distilbert/transformer/layer.1/attention/v_lin/Add:
type: InnerProduct
input:
/distilbert/transformer/layer.1/attention/v_lin/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_quant: {}
distilbert.transformer.layer.1.attention.v_lin.bias:0: {}
/distilbert/transformer/layer.1/attention/v_lin/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.1/attention/v_lin/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0_max: {}
/distilbert/transformer/layer.1/attention/Reshape_2_output_0:0_min: {}
/distilbert/transformer/layer.1/attention/Reshape_2_output_0:0_max: {}
input_ids:0: {}
output:
/distilbert/transformer/layer.1/attention/Reshape_2_output_0:0: {}
attr:
output_dtype: s8
reshape: 12,64,-1, -1
reshape_dims: '0'
/distilbert/transformer/layer.1/attention/Where:
type: Matmul
input:
/distilbert/transformer/layer.1/attention/Reshape_output_0:0: {}
/distilbert/transformer/layer.1/attention/Reshape_1_output_0:0: {}
padding_sequence:0: {}
output:
/distilbert/transformer/layer.1/attention/Where_output_0:0: {}
attr:
src0_perm: 2,0,3,1
src1_perm: 2,0,1,3
output_scale: 0.125
format_any: false
append_op: binary_add
/distilbert/transformer/layer.1/attention/Softmax:
type: Softmax
input:
/distilbert/transformer/layer.1/attention/Where_output_0:0: {}
/distilbert/transformer/layer.1/attention/Softmax_output_0:0_min: {}
/distilbert/transformer/layer.1/attention/Softmax_output_0:0_max: {}
output:
/distilbert/transformer/layer.1/attention/Softmax_output_0:0: {}
attr:
output_dtype: u8
/distilbert/transformer/layer.1/attention/Transpose_3:
type: Matmul
input:
/distilbert/transformer/layer.1/attention/Softmax_output_0:0: {}
/distilbert/transformer/layer.1/attention/Reshape_2_output_0:0: {}
/distilbert/transformer/layer.1/attention/Softmax_output_0:0_min: {}
/distilbert/transformer/layer.1/attention/Softmax_output_0:0_max: {}
/distilbert/transformer/layer.1/attention/Reshape_2_output_0:0_min: {}
/distilbert/transformer/layer.1/attention/Reshape_2_output_0:0_max: {}
/distilbert/transformer/layer.1/attention/Reshape_4_output_0:0_min: {}
/distilbert/transformer/layer.1/attention/Reshape_4_output_0:0_max: {}
output:
/distilbert/transformer/layer.1/attention/Reshape_4_output_0:0: {}
attr:
src1_perm: 2,0,3,1
dst_perm: 1,3,0,2
output_dtype: u8
reshape: 768,-1
/distilbert/transformer/layer.1/Add:
type: InnerProduct
input:
/distilbert/transformer/layer.1/attention/out_lin/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.1/attention/Reshape_4_output_0:0: {}
distilbert.transformer.layer.1.attention.out_lin.bias:0: {}
/distilbert/transformer/layer.0/output_layer_norm/Add_1_output_0:0: {}
/distilbert/transformer/layer.1/attention/out_lin/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.1/attention/out_lin/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.1/attention/Reshape_4_output_0:0_min: {}
/distilbert/transformer/layer.1/attention/Reshape_4_output_0:0_max: {}
/distilbert/transformer/layer.1/Add_output_0:0_min: {}
/distilbert/transformer/layer.1/Add_output_0:0_max: {}
output:
/distilbert/transformer/layer.1/Add_output_0:0: {}
attr:
append_op: sum
/distilbert/transformer/layer.1/sa_layer_norm/Add_1:
type: LayerNorm
input:
/distilbert/transformer/layer.1/Add_output_0:0: {}
distilbert.transformer.layer.1.sa_layer_norm.weight:0: {}
distilbert.transformer.layer.1.sa_layer_norm.bias:0: {}
output:
/distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0: {}
attr:
epsilon: 9.999999960041972e-13
transpose_mode: 1, 0
/distilbert/transformer/layer.1/ffn/activation/Mul_1_quant_0:
type: Quantize
input:
/distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0: {}
/distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0_max: {}
output:
/distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0_quant: {}
attr:
output_dtype: u8
/distilbert/transformer/layer.1/ffn/activation/Mul_1:
type: InnerProduct
input:
/distilbert/transformer/layer.1/ffn/lin1/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0_quant: {}
distilbert.transformer.layer.1.ffn.lin1.bias:0: {}
/distilbert/transformer/layer.1/ffn/lin1/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.1/ffn/lin1/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0_max: {}
/distilbert/transformer/layer.1/ffn/activation/Mul_1_output_0:0_min: {}
/distilbert/transformer/layer.1/ffn/activation/Mul_1_output_0:0_max: {}
output:
/distilbert/transformer/layer.1/ffn/activation/Mul_1_output_0:0: {}
attr:
append_op: gelu_tanh
output_dtype: u8
/distilbert/transformer/layer.1/Add_1:
type: InnerProduct
input:
/distilbert/transformer/layer.1/ffn/lin2/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.1/ffn/activation/Mul_1_output_0:0: {}
distilbert.transformer.layer.1.ffn.lin2.bias:0: {}
/distilbert/transformer/layer.1/sa_layer_norm/Add_1_output_0:0: {}
/distilbert/transformer/layer.1/ffn/lin2/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.1/ffn/lin2/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.1/ffn/activation/Mul_1_output_0:0_min: {}
/distilbert/transformer/layer.1/ffn/activation/Mul_1_output_0:0_max: {}
/distilbert/transformer/layer.1/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.1/Add_1_output_0:0_max: {}
output:
/distilbert/transformer/layer.1/Add_1_output_0:0: {}
attr:
append_op: sum
/distilbert/transformer/layer.1/output_layer_norm/Add_1:
type: LayerNorm
input:
/distilbert/transformer/layer.1/Add_1_output_0:0: {}
distilbert.transformer.layer.1.output_layer_norm.weight:0: {}
distilbert.transformer.layer.1.output_layer_norm.bias:0: {}
output:
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0: {}
attr:
epsilon: 9.999999960041972e-13
transpose_mode: 1, 0
/distilbert/transformer/layer.2/attention/k_lin/Add_quant_0:
type: Quantize
input:
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0: {}
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_max: {}
output:
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_quant: {}
attr:
output_dtype: u8
/distilbert/transformer/layer.2/attention/k_lin/Add:
type: InnerProduct
input:
/distilbert/transformer/layer.2/attention/k_lin/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_quant: {}
distilbert.transformer.layer.2.attention.k_lin.bias:0: {}
/distilbert/transformer/layer.2/attention/k_lin/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.2/attention/k_lin/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_max: {}
/distilbert/transformer/layer.2/attention/k_lin/Add_output_0:0_min: {}
/distilbert/transformer/layer.2/attention/k_lin/Add_output_0:0_max: {}
input_ids:0: {}
output:
/distilbert/transformer/layer.2/attention/Reshape_1_output_0:0: {}
attr:
reshape: 12,64,-1, -1
reshape_dims: '0'
/distilbert/transformer/layer.2/attention/q_lin/Add:
type: InnerProduct
input:
/distilbert/transformer/layer.2/attention/q_lin/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_quant: {}
distilbert.transformer.layer.2.attention.q_lin.bias:0: {}
/distilbert/transformer/layer.2/attention/q_lin/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.2/attention/q_lin/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_max: {}
/distilbert/transformer/layer.2/attention/q_lin/Add_output_0:0_min: {}
/distilbert/transformer/layer.2/attention/q_lin/Add_output_0:0_max: {}
input_ids:0: {}
output:
/distilbert/transformer/layer.2/attention/Reshape_output_0:0: {}
attr:
reshape: 12,64,-1, -1
reshape_dims: '0'
/distilbert/transformer/layer.2/attention/v_lin/Add:
type: InnerProduct
input:
/distilbert/transformer/layer.2/attention/v_lin/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_quant: {}
distilbert.transformer.layer.2.attention.v_lin.bias:0: {}
/distilbert/transformer/layer.2/attention/v_lin/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.2/attention/v_lin/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0_max: {}
/distilbert/transformer/layer.2/attention/Reshape_2_output_0:0_min: {}
/distilbert/transformer/layer.2/attention/Reshape_2_output_0:0_max: {}
input_ids:0: {}
output:
/distilbert/transformer/layer.2/attention/Reshape_2_output_0:0: {}
attr:
output_dtype: s8
reshape: 12,64,-1, -1
reshape_dims: '0'
/distilbert/transformer/layer.2/attention/Where:
type: Matmul
input:
/distilbert/transformer/layer.2/attention/Reshape_output_0:0: {}
/distilbert/transformer/layer.2/attention/Reshape_1_output_0:0: {}
padding_sequence:0: {}
output:
/distilbert/transformer/layer.2/attention/Where_output_0:0: {}
attr:
src0_perm: 2,0,3,1
src1_perm: 2,0,1,3
output_scale: 0.125
format_any: false
append_op: binary_add
/distilbert/transformer/layer.2/attention/Softmax:
type: Softmax
input:
/distilbert/transformer/layer.2/attention/Where_output_0:0: {}
/distilbert/transformer/layer.2/attention/Softmax_output_0:0_min: {}
/distilbert/transformer/layer.2/attention/Softmax_output_0:0_max: {}
output:
/distilbert/transformer/layer.2/attention/Softmax_output_0:0: {}
attr:
output_dtype: u8
/distilbert/transformer/layer.2/attention/Transpose_3:
type: Matmul
input:
/distilbert/transformer/layer.2/attention/Softmax_output_0:0: {}
/distilbert/transformer/layer.2/attention/Reshape_2_output_0:0: {}
/distilbert/transformer/layer.2/attention/Softmax_output_0:0_min: {}
/distilbert/transformer/layer.2/attention/Softmax_output_0:0_max: {}
/distilbert/transformer/layer.2/attention/Reshape_2_output_0:0_min: {}
/distilbert/transformer/layer.2/attention/Reshape_2_output_0:0_max: {}
/distilbert/transformer/layer.2/attention/Reshape_4_output_0:0_min: {}
/distilbert/transformer/layer.2/attention/Reshape_4_output_0:0_max: {}
output:
/distilbert/transformer/layer.2/attention/Reshape_4_output_0:0: {}
attr:
src1_perm: 2,0,3,1
dst_perm: 1,3,0,2
output_dtype: u8
reshape: 768,-1
/distilbert/transformer/layer.2/Add:
type: InnerProduct
input:
/distilbert/transformer/layer.2/attention/out_lin/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.2/attention/Reshape_4_output_0:0: {}
distilbert.transformer.layer.2.attention.out_lin.bias:0: {}
/distilbert/transformer/layer.1/output_layer_norm/Add_1_output_0:0: {}
/distilbert/transformer/layer.2/attention/out_lin/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.2/attention/out_lin/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.2/attention/Reshape_4_output_0:0_min: {}
/distilbert/transformer/layer.2/attention/Reshape_4_output_0:0_max: {}
/distilbert/transformer/layer.2/Add_output_0:0_min: {}
/distilbert/transformer/layer.2/Add_output_0:0_max: {}
output:
/distilbert/transformer/layer.2/Add_output_0:0: {}
attr:
append_op: sum
/distilbert/transformer/layer.2/sa_layer_norm/Add_1:
type: LayerNorm
input:
/distilbert/transformer/layer.2/Add_output_0:0: {}
distilbert.transformer.layer.2.sa_layer_norm.weight:0: {}
distilbert.transformer.layer.2.sa_layer_norm.bias:0: {}
output:
/distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0: {}
attr:
epsilon: 9.999999960041972e-13
transpose_mode: 1, 0
/distilbert/transformer/layer.2/ffn/activation/Mul_1_quant_0:
type: Quantize
input:
/distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0: {}
/distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0_max: {}
output:
/distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0_quant: {}
attr:
output_dtype: u8
/distilbert/transformer/layer.2/ffn/activation/Mul_1:
type: InnerProduct
input:
/distilbert/transformer/layer.2/ffn/lin1/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0_quant: {}
distilbert.transformer.layer.2.ffn.lin1.bias:0: {}
/distilbert/transformer/layer.2/ffn/lin1/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.2/ffn/lin1/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0_max: {}
/distilbert/transformer/layer.2/ffn/activation/Mul_1_output_0:0_min: {}
/distilbert/transformer/layer.2/ffn/activation/Mul_1_output_0:0_max: {}
output:
/distilbert/transformer/layer.2/ffn/activation/Mul_1_output_0:0: {}
attr:
append_op: gelu_tanh
output_dtype: u8
/distilbert/transformer/layer.2/Add_1:
type: InnerProduct
input:
/distilbert/transformer/layer.2/ffn/lin2/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.2/ffn/activation/Mul_1_output_0:0: {}
distilbert.transformer.layer.2.ffn.lin2.bias:0: {}
/distilbert/transformer/layer.2/sa_layer_norm/Add_1_output_0:0: {}
/distilbert/transformer/layer.2/ffn/lin2/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.2/ffn/lin2/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.2/ffn/activation/Mul_1_output_0:0_min: {}
/distilbert/transformer/layer.2/ffn/activation/Mul_1_output_0:0_max: {}
/distilbert/transformer/layer.2/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.2/Add_1_output_0:0_max: {}
output:
/distilbert/transformer/layer.2/Add_1_output_0:0: {}
attr:
append_op: sum
/distilbert/transformer/layer.2/output_layer_norm/Add_1:
type: LayerNorm
input:
/distilbert/transformer/layer.2/Add_1_output_0:0: {}
distilbert.transformer.layer.2.output_layer_norm.weight:0: {}
distilbert.transformer.layer.2.output_layer_norm.bias:0: {}
output:
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0: {}
attr:
epsilon: 9.999999960041972e-13
transpose_mode: 1, 0
/distilbert/transformer/layer.3/attention/k_lin/Add_quant_0:
type: Quantize
input:
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0: {}
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_max: {}
output:
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_quant: {}
attr:
output_dtype: u8
/distilbert/transformer/layer.3/attention/k_lin/Add:
type: InnerProduct
input:
/distilbert/transformer/layer.3/attention/k_lin/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_quant: {}
distilbert.transformer.layer.3.attention.k_lin.bias:0: {}
/distilbert/transformer/layer.3/attention/k_lin/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.3/attention/k_lin/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_max: {}
/distilbert/transformer/layer.3/attention/k_lin/Add_output_0:0_min: {}
/distilbert/transformer/layer.3/attention/k_lin/Add_output_0:0_max: {}
input_ids:0: {}
output:
/distilbert/transformer/layer.3/attention/Reshape_1_output_0:0: {}
attr:
reshape: 12,64,-1, -1
reshape_dims: '0'
/distilbert/transformer/layer.3/attention/q_lin/Add:
type: InnerProduct
input:
/distilbert/transformer/layer.3/attention/q_lin/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_quant: {}
distilbert.transformer.layer.3.attention.q_lin.bias:0: {}
/distilbert/transformer/layer.3/attention/q_lin/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.3/attention/q_lin/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_max: {}
/distilbert/transformer/layer.3/attention/q_lin/Add_output_0:0_min: {}
/distilbert/transformer/layer.3/attention/q_lin/Add_output_0:0_max: {}
input_ids:0: {}
output:
/distilbert/transformer/layer.3/attention/Reshape_output_0:0: {}
attr:
reshape: 12,64,-1, -1
reshape_dims: '0'
/distilbert/transformer/layer.3/attention/v_lin/Add:
type: InnerProduct
input:
/distilbert/transformer/layer.3/attention/v_lin/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_quant: {}
distilbert.transformer.layer.3.attention.v_lin.bias:0: {}
/distilbert/transformer/layer.3/attention/v_lin/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.3/attention/v_lin/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0_max: {}
/distilbert/transformer/layer.3/attention/Reshape_2_output_0:0_min: {}
/distilbert/transformer/layer.3/attention/Reshape_2_output_0:0_max: {}
input_ids:0: {}
output:
/distilbert/transformer/layer.3/attention/Reshape_2_output_0:0: {}
attr:
output_dtype: s8
reshape: 12,64,-1, -1
reshape_dims: '0'
/distilbert/transformer/layer.3/attention/Where:
type: Matmul
input:
/distilbert/transformer/layer.3/attention/Reshape_output_0:0: {}
/distilbert/transformer/layer.3/attention/Reshape_1_output_0:0: {}
padding_sequence:0: {}
output:
/distilbert/transformer/layer.3/attention/Where_output_0:0: {}
attr:
src0_perm: 2,0,3,1
src1_perm: 2,0,1,3
output_scale: 0.125
format_any: false
append_op: binary_add
/distilbert/transformer/layer.3/attention/Softmax:
type: Softmax
input:
/distilbert/transformer/layer.3/attention/Where_output_0:0: {}
/distilbert/transformer/layer.3/attention/Softmax_output_0:0_min: {}
/distilbert/transformer/layer.3/attention/Softmax_output_0:0_max: {}
output:
/distilbert/transformer/layer.3/attention/Softmax_output_0:0: {}
attr:
output_dtype: u8
/distilbert/transformer/layer.3/attention/Transpose_3:
type: Matmul
input:
/distilbert/transformer/layer.3/attention/Softmax_output_0:0: {}
/distilbert/transformer/layer.3/attention/Reshape_2_output_0:0: {}
/distilbert/transformer/layer.3/attention/Softmax_output_0:0_min: {}
/distilbert/transformer/layer.3/attention/Softmax_output_0:0_max: {}
/distilbert/transformer/layer.3/attention/Reshape_2_output_0:0_min: {}
/distilbert/transformer/layer.3/attention/Reshape_2_output_0:0_max: {}
/distilbert/transformer/layer.3/attention/Reshape_4_output_0:0_min: {}
/distilbert/transformer/layer.3/attention/Reshape_4_output_0:0_max: {}
output:
/distilbert/transformer/layer.3/attention/Reshape_4_output_0:0: {}
attr:
src1_perm: 2,0,3,1
dst_perm: 1,3,0,2
output_dtype: u8
reshape: 768,-1
/distilbert/transformer/layer.3/Add:
type: InnerProduct
input:
/distilbert/transformer/layer.3/attention/out_lin/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.3/attention/Reshape_4_output_0:0: {}
distilbert.transformer.layer.3.attention.out_lin.bias:0: {}
/distilbert/transformer/layer.2/output_layer_norm/Add_1_output_0:0: {}
/distilbert/transformer/layer.3/attention/out_lin/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.3/attention/out_lin/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.3/attention/Reshape_4_output_0:0_min: {}
/distilbert/transformer/layer.3/attention/Reshape_4_output_0:0_max: {}
/distilbert/transformer/layer.3/Add_output_0:0_min: {}
/distilbert/transformer/layer.3/Add_output_0:0_max: {}
output:
/distilbert/transformer/layer.3/Add_output_0:0: {}
attr:
append_op: sum
/distilbert/transformer/layer.3/sa_layer_norm/Add_1:
type: LayerNorm
input:
/distilbert/transformer/layer.3/Add_output_0:0: {}
distilbert.transformer.layer.3.sa_layer_norm.weight:0: {}
distilbert.transformer.layer.3.sa_layer_norm.bias:0: {}
output:
/distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0: {}
attr:
epsilon: 9.999999960041972e-13
transpose_mode: 1, 0
/distilbert/transformer/layer.3/ffn/activation/Mul_1_quant_0:
type: Quantize
input:
/distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0: {}
/distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0_max: {}
output:
/distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0_quant: {}
attr:
output_dtype: u8
/distilbert/transformer/layer.3/ffn/activation/Mul_1:
type: InnerProduct
input:
/distilbert/transformer/layer.3/ffn/lin1/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0_quant: {}
distilbert.transformer.layer.3.ffn.lin1.bias:0: {}
/distilbert/transformer/layer.3/ffn/lin1/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.3/ffn/lin1/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0_max: {}
/distilbert/transformer/layer.3/ffn/activation/Mul_1_output_0:0_min: {}
/distilbert/transformer/layer.3/ffn/activation/Mul_1_output_0:0_max: {}
output:
/distilbert/transformer/layer.3/ffn/activation/Mul_1_output_0:0: {}
attr:
append_op: gelu_tanh
output_dtype: u8
/distilbert/transformer/layer.3/Add_1:
type: InnerProduct
input:
/distilbert/transformer/layer.3/ffn/lin2/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.3/ffn/activation/Mul_1_output_0:0: {}
distilbert.transformer.layer.3.ffn.lin2.bias:0: {}
/distilbert/transformer/layer.3/sa_layer_norm/Add_1_output_0:0: {}
/distilbert/transformer/layer.3/ffn/lin2/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.3/ffn/lin2/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.3/ffn/activation/Mul_1_output_0:0_min: {}
/distilbert/transformer/layer.3/ffn/activation/Mul_1_output_0:0_max: {}
/distilbert/transformer/layer.3/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.3/Add_1_output_0:0_max: {}
output:
/distilbert/transformer/layer.3/Add_1_output_0:0: {}
attr:
append_op: sum
/distilbert/transformer/layer.3/output_layer_norm/Add_1:
type: LayerNorm
input:
/distilbert/transformer/layer.3/Add_1_output_0:0: {}
distilbert.transformer.layer.3.output_layer_norm.weight:0: {}
distilbert.transformer.layer.3.output_layer_norm.bias:0: {}
output:
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0: {}
attr:
epsilon: 9.999999960041972e-13
transpose_mode: 1, 0
/distilbert/transformer/layer.4/attention/k_lin/Add_quant_0:
type: Quantize
input:
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0: {}
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_max: {}
output:
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_quant: {}
attr:
output_dtype: u8
/distilbert/transformer/layer.4/attention/k_lin/Add:
type: InnerProduct
input:
/distilbert/transformer/layer.4/attention/k_lin/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_quant: {}
distilbert.transformer.layer.4.attention.k_lin.bias:0: {}
/distilbert/transformer/layer.4/attention/k_lin/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.4/attention/k_lin/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_max: {}
/distilbert/transformer/layer.4/attention/k_lin/Add_output_0:0_min: {}
/distilbert/transformer/layer.4/attention/k_lin/Add_output_0:0_max: {}
input_ids:0: {}
output:
/distilbert/transformer/layer.4/attention/Reshape_1_output_0:0: {}
attr:
reshape: 12,64,-1, -1
reshape_dims: '0'
/distilbert/transformer/layer.4/attention/q_lin/Add:
type: InnerProduct
input:
/distilbert/transformer/layer.4/attention/q_lin/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_quant: {}
distilbert.transformer.layer.4.attention.q_lin.bias:0: {}
/distilbert/transformer/layer.4/attention/q_lin/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.4/attention/q_lin/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_max: {}
/distilbert/transformer/layer.4/attention/q_lin/Add_output_0:0_min: {}
/distilbert/transformer/layer.4/attention/q_lin/Add_output_0:0_max: {}
input_ids:0: {}
output:
/distilbert/transformer/layer.4/attention/Reshape_output_0:0: {}
attr:
reshape: 12,64,-1, -1
reshape_dims: '0'
/distilbert/transformer/layer.4/attention/v_lin/Add:
type: InnerProduct
input:
/distilbert/transformer/layer.4/attention/v_lin/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_quant: {}
distilbert.transformer.layer.4.attention.v_lin.bias:0: {}
/distilbert/transformer/layer.4/attention/v_lin/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.4/attention/v_lin/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0_max: {}
/distilbert/transformer/layer.4/attention/Reshape_2_output_0:0_min: {}
/distilbert/transformer/layer.4/attention/Reshape_2_output_0:0_max: {}
input_ids:0: {}
output:
/distilbert/transformer/layer.4/attention/Reshape_2_output_0:0: {}
attr:
output_dtype: s8
reshape: 12,64,-1, -1
reshape_dims: '0'
/distilbert/transformer/layer.4/attention/Where:
type: Matmul
input:
/distilbert/transformer/layer.4/attention/Reshape_output_0:0: {}
/distilbert/transformer/layer.4/attention/Reshape_1_output_0:0: {}
padding_sequence:0: {}
output:
/distilbert/transformer/layer.4/attention/Where_output_0:0: {}
attr:
src0_perm: 2,0,3,1
src1_perm: 2,0,1,3
output_scale: 0.125
format_any: false
append_op: binary_add
/distilbert/transformer/layer.4/attention/Softmax:
type: Softmax
input:
/distilbert/transformer/layer.4/attention/Where_output_0:0: {}
/distilbert/transformer/layer.4/attention/Softmax_output_0:0_min: {}
/distilbert/transformer/layer.4/attention/Softmax_output_0:0_max: {}
output:
/distilbert/transformer/layer.4/attention/Softmax_output_0:0: {}
attr:
output_dtype: u8
/distilbert/transformer/layer.4/attention/Transpose_3:
type: Matmul
input:
/distilbert/transformer/layer.4/attention/Softmax_output_0:0: {}
/distilbert/transformer/layer.4/attention/Reshape_2_output_0:0: {}
/distilbert/transformer/layer.4/attention/Softmax_output_0:0_min: {}
/distilbert/transformer/layer.4/attention/Softmax_output_0:0_max: {}
/distilbert/transformer/layer.4/attention/Reshape_2_output_0:0_min: {}
/distilbert/transformer/layer.4/attention/Reshape_2_output_0:0_max: {}
/distilbert/transformer/layer.4/attention/Reshape_4_output_0:0_min: {}
/distilbert/transformer/layer.4/attention/Reshape_4_output_0:0_max: {}
output:
/distilbert/transformer/layer.4/attention/Reshape_4_output_0:0: {}
attr:
src1_perm: 2,0,3,1
dst_perm: 1,3,0,2
output_dtype: u8
reshape: 768,-1
/distilbert/transformer/layer.4/Add:
type: InnerProduct
input:
/distilbert/transformer/layer.4/attention/out_lin/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.4/attention/Reshape_4_output_0:0: {}
distilbert.transformer.layer.4.attention.out_lin.bias:0: {}
/distilbert/transformer/layer.3/output_layer_norm/Add_1_output_0:0: {}
/distilbert/transformer/layer.4/attention/out_lin/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.4/attention/out_lin/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.4/attention/Reshape_4_output_0:0_min: {}
/distilbert/transformer/layer.4/attention/Reshape_4_output_0:0_max: {}
/distilbert/transformer/layer.4/Add_output_0:0_min: {}
/distilbert/transformer/layer.4/Add_output_0:0_max: {}
output:
/distilbert/transformer/layer.4/Add_output_0:0: {}
attr:
append_op: sum
/distilbert/transformer/layer.4/sa_layer_norm/Add_1:
type: LayerNorm
input:
/distilbert/transformer/layer.4/Add_output_0:0: {}
distilbert.transformer.layer.4.sa_layer_norm.weight:0: {}
distilbert.transformer.layer.4.sa_layer_norm.bias:0: {}
output:
/distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0: {}
attr:
epsilon: 9.999999960041972e-13
transpose_mode: 1, 0
/distilbert/transformer/layer.4/ffn/activation/Mul_1_quant_0:
type: Quantize
input:
/distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0: {}
/distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0_max: {}
output:
/distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0_quant: {}
attr:
output_dtype: u8
/distilbert/transformer/layer.4/ffn/activation/Mul_1:
type: InnerProduct
input:
/distilbert/transformer/layer.4/ffn/lin1/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0_quant: {}
distilbert.transformer.layer.4.ffn.lin1.bias:0: {}
/distilbert/transformer/layer.4/ffn/lin1/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.4/ffn/lin1/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0_max: {}
/distilbert/transformer/layer.4/ffn/activation/Mul_1_output_0:0_min: {}
/distilbert/transformer/layer.4/ffn/activation/Mul_1_output_0:0_max: {}
output:
/distilbert/transformer/layer.4/ffn/activation/Mul_1_output_0:0: {}
attr:
append_op: gelu_tanh
output_dtype: u8
/distilbert/transformer/layer.4/Add_1:
type: InnerProduct
input:
/distilbert/transformer/layer.4/ffn/lin2/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.4/ffn/activation/Mul_1_output_0:0: {}
distilbert.transformer.layer.4.ffn.lin2.bias:0: {}
/distilbert/transformer/layer.4/sa_layer_norm/Add_1_output_0:0: {}
/distilbert/transformer/layer.4/ffn/lin2/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.4/ffn/lin2/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.4/ffn/activation/Mul_1_output_0:0_min: {}
/distilbert/transformer/layer.4/ffn/activation/Mul_1_output_0:0_max: {}
/distilbert/transformer/layer.4/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.4/Add_1_output_0:0_max: {}
output:
/distilbert/transformer/layer.4/Add_1_output_0:0: {}
attr:
append_op: sum
/distilbert/transformer/layer.4/output_layer_norm/Add_1:
type: LayerNorm
input:
/distilbert/transformer/layer.4/Add_1_output_0:0: {}
distilbert.transformer.layer.4.output_layer_norm.weight:0: {}
distilbert.transformer.layer.4.output_layer_norm.bias:0: {}
output:
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0: {}
attr:
epsilon: 9.999999960041972e-13
transpose_mode: 1, 0
/distilbert/transformer/layer.5/attention/k_lin/Add_quant_0:
type: Quantize
input:
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0: {}
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_max: {}
output:
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_quant: {}
attr:
output_dtype: u8
/distilbert/transformer/layer.5/attention/k_lin/Add:
type: InnerProduct
input:
/distilbert/transformer/layer.5/attention/k_lin/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_quant: {}
distilbert.transformer.layer.5.attention.k_lin.bias:0: {}
/distilbert/transformer/layer.5/attention/k_lin/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.5/attention/k_lin/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_max: {}
/distilbert/transformer/layer.5/attention/k_lin/Add_output_0:0_min: {}
/distilbert/transformer/layer.5/attention/k_lin/Add_output_0:0_max: {}
input_ids:0: {}
output:
/distilbert/transformer/layer.5/attention/Reshape_1_output_0:0: {}
attr:
reshape: 12,64,-1, -1
reshape_dims: '0'
/distilbert/transformer/layer.5/attention/q_lin/Add:
type: InnerProduct
input:
/distilbert/transformer/layer.5/attention/q_lin/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_quant: {}
distilbert.transformer.layer.5.attention.q_lin.bias:0: {}
/distilbert/transformer/layer.5/attention/q_lin/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.5/attention/q_lin/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_max: {}
/distilbert/transformer/layer.5/attention/q_lin/Add_output_0:0_min: {}
/distilbert/transformer/layer.5/attention/q_lin/Add_output_0:0_max: {}
input_ids:0: {}
output:
/distilbert/transformer/layer.5/attention/Reshape_output_0:0: {}
attr:
reshape: 12,64,-1, -1
reshape_dims: '0'
/distilbert/transformer/layer.5/attention/v_lin/Add:
type: InnerProduct
input:
/distilbert/transformer/layer.5/attention/v_lin/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_quant: {}
distilbert.transformer.layer.5.attention.v_lin.bias:0: {}
/distilbert/transformer/layer.5/attention/v_lin/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.5/attention/v_lin/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0_max: {}
/distilbert/transformer/layer.5/attention/Reshape_2_output_0:0_min: {}
/distilbert/transformer/layer.5/attention/Reshape_2_output_0:0_max: {}
input_ids:0: {}
output:
/distilbert/transformer/layer.5/attention/Reshape_2_output_0:0: {}
attr:
output_dtype: s8
reshape: 12,64,-1, -1
reshape_dims: '0'
/distilbert/transformer/layer.5/attention/Where:
type: Matmul
input:
/distilbert/transformer/layer.5/attention/Reshape_output_0:0: {}
/distilbert/transformer/layer.5/attention/Reshape_1_output_0:0: {}
padding_sequence:0: {}
output:
/distilbert/transformer/layer.5/attention/Where_output_0:0: {}
attr:
src0_perm: 2,0,3,1
src1_perm: 2,0,1,3
output_scale: 0.125
format_any: false
append_op: binary_add
/distilbert/transformer/layer.5/attention/Softmax:
type: Softmax
input:
/distilbert/transformer/layer.5/attention/Where_output_0:0: {}
/distilbert/transformer/layer.5/attention/Softmax_output_0:0_min: {}
/distilbert/transformer/layer.5/attention/Softmax_output_0:0_max: {}
output:
/distilbert/transformer/layer.5/attention/Softmax_output_0:0: {}
attr:
output_dtype: u8
/distilbert/transformer/layer.5/attention/Transpose_3:
type: Matmul
input:
/distilbert/transformer/layer.5/attention/Softmax_output_0:0: {}
/distilbert/transformer/layer.5/attention/Reshape_2_output_0:0: {}
/distilbert/transformer/layer.5/attention/Softmax_output_0:0_min: {}
/distilbert/transformer/layer.5/attention/Softmax_output_0:0_max: {}
/distilbert/transformer/layer.5/attention/Reshape_2_output_0:0_min: {}
/distilbert/transformer/layer.5/attention/Reshape_2_output_0:0_max: {}
/distilbert/transformer/layer.5/attention/Reshape_4_output_0:0_min: {}
/distilbert/transformer/layer.5/attention/Reshape_4_output_0:0_max: {}
output:
/distilbert/transformer/layer.5/attention/Reshape_4_output_0:0: {}
attr:
src1_perm: 2,0,3,1
dst_perm: 1,3,0,2
output_dtype: u8
reshape: 768,-1
/distilbert/transformer/layer.5/Add:
type: InnerProduct
input:
/distilbert/transformer/layer.5/attention/out_lin/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.5/attention/Reshape_4_output_0:0: {}
distilbert.transformer.layer.5.attention.out_lin.bias:0: {}
/distilbert/transformer/layer.4/output_layer_norm/Add_1_output_0:0: {}
/distilbert/transformer/layer.5/attention/out_lin/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.5/attention/out_lin/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.5/attention/Reshape_4_output_0:0_min: {}
/distilbert/transformer/layer.5/attention/Reshape_4_output_0:0_max: {}
/distilbert/transformer/layer.5/Add_output_0:0_min: {}
/distilbert/transformer/layer.5/Add_output_0:0_max: {}
output:
/distilbert/transformer/layer.5/Add_output_0:0: {}
attr:
append_op: sum
/distilbert/transformer/layer.5/sa_layer_norm/Add_1:
type: LayerNorm
input:
/distilbert/transformer/layer.5/Add_output_0:0: {}
distilbert.transformer.layer.5.sa_layer_norm.weight:0: {}
distilbert.transformer.layer.5.sa_layer_norm.bias:0: {}
output:
/distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0: {}
attr:
epsilon: 9.999999960041972e-13
transpose_mode: 1, 0
/distilbert/transformer/layer.5/ffn/activation/Mul_1_quant_0:
type: Quantize
input:
/distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0: {}
/distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0_max: {}
output:
/distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0_quant: {}
attr:
output_dtype: u8
/distilbert/transformer/layer.5/ffn/activation/Mul_1:
type: InnerProduct
input:
/distilbert/transformer/layer.5/ffn/lin1/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0_quant: {}
distilbert.transformer.layer.5.ffn.lin1.bias:0: {}
/distilbert/transformer/layer.5/ffn/lin1/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.5/ffn/lin1/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0_max: {}
/distilbert/transformer/layer.5/ffn/activation/Mul_1_output_0:0_min: {}
/distilbert/transformer/layer.5/ffn/activation/Mul_1_output_0:0_max: {}
output:
/distilbert/transformer/layer.5/ffn/activation/Mul_1_output_0:0: {}
attr:
append_op: gelu_tanh
output_dtype: u8
/distilbert/transformer/layer.5/Add_1:
type: InnerProduct
input:
/distilbert/transformer/layer.5/ffn/lin2/Transpose_output_0_quantized:0: {}
/distilbert/transformer/layer.5/ffn/activation/Mul_1_output_0:0: {}
distilbert.transformer.layer.5.ffn.lin2.bias:0: {}
/distilbert/transformer/layer.5/sa_layer_norm/Add_1_output_0:0: {}
/distilbert/transformer/layer.5/ffn/lin2/Transpose_output_0_quantized:0_min: {}
/distilbert/transformer/layer.5/ffn/lin2/Transpose_output_0_quantized:0_max: {}
/distilbert/transformer/layer.5/ffn/activation/Mul_1_output_0:0_min: {}
/distilbert/transformer/layer.5/ffn/activation/Mul_1_output_0:0_max: {}
/distilbert/transformer/layer.5/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.5/Add_1_output_0:0_max: {}
output:
/distilbert/transformer/layer.5/Add_1_output_0:0: {}
attr:
append_op: sum
/distilbert/transformer/layer.5/Add_1_Reorder_Recover:
type: Reorder
input:
/distilbert/transformer/layer.5/Add_1_output_0:0: {}
output:
/distilbert/transformer/layer.5/Add_1_output_0:0_recover: {}
attr:
src_perm: 0,1
dst_perm: 1,0
/distilbert/transformer/layer.5/output_layer_norm/Add_1:
type: LayerNorm
input:
/distilbert/transformer/layer.5/Add_1_output_0:0_recover: {}
distilbert.transformer.layer.5.output_layer_norm.weight:0: {}
distilbert.transformer.layer.5.output_layer_norm.bias:0: {}
output:
/distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0: {}
attr:
epsilon: 9.999999960041972e-13
/qa_outputs/Add_quant_0:
type: Quantize
input:
/distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0: {}
/distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0_max: {}
output:
/distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0_quant: {}
attr:
output_dtype: u8
/qa_outputs/Add:
type: InnerProduct
input:
/distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0_quant: {}
/qa_outputs/Transpose_output_0_quantized:0: {}
qa_outputs.bias:0: {}
/distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0_min: {}
/distilbert/transformer/layer.5/output_layer_norm/Add_1_output_0:0_max: {}
/qa_outputs/Transpose_output_0_quantized:0_min: {}
/qa_outputs/Transpose_output_0_quantized:0_max: {}
/qa_outputs/Add_output_0:0_min: {}
/qa_outputs/Add_output_0:0_max: {}
input_ids:0: {}
output:
logits: {}
attr:
src1_perm: 1,0
reshape: -1,-1,2
reshape_dims: 0,1
output_data:
type: Output
input:
logits: {}