diff --git "a/ggml-small-encoder.mlmodelc/model.mil" "b/ggml-small-encoder.mlmodelc/model.mil" deleted file mode 100644--- "a/ggml-small-encoder.mlmodelc/model.mil" +++ /dev/null @@ -1,747 +0,0 @@ -program(1.0) -[buildInfo = dict, tensor>({{"coremlc-component-MIL", "4.28.4"}, {"coremlc-version", "1436.100.10"}})] -{ - func main(tensor logmel_data) { - tensor var_32 = const()[name = tensor("op_32"), val = tensor(1)]; - tensor var_40 = const()[name = tensor("op_40"), val = tensor([1])]; - tensor var_42 = const()[name = tensor("op_42"), val = tensor([1])]; - tensor var_44_pad_type_0 = const()[name = tensor("op_44_pad_type_0"), val = tensor("custom")]; - tensor var_44_pad_0 = const()[name = tensor("op_44_pad_0"), val = tensor([1, 1])]; - tensor logmel_data_to_fp16_dtype_0 = const()[name = tensor("logmel_data_to_fp16_dtype_0"), val = tensor("fp16")]; - tensor weight_3_to_fp16 = const()[name = tensor("weight_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; - tensor bias_3_to_fp16 = const()[name = tensor("bias_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(368768)))]; - tensor cast_367 = cast(dtype = logmel_data_to_fp16_dtype_0, x = logmel_data); - tensor var_44_cast = conv(bias = bias_3_to_fp16, dilations = var_42, groups = var_32, pad = var_44_pad_0, pad_type = var_44_pad_type_0, strides = var_40, weight = weight_3_to_fp16, x = cast_367); - tensor input_1_mode_0 = const()[name = tensor("input_1_mode_0"), val = tensor("EXACT")]; - tensor input_1_cast = gelu(mode = input_1_mode_0, x = var_44_cast); - tensor var_48 = const()[name = tensor("op_48"), val = tensor(1)]; - tensor var_57 = const()[name = tensor("op_57"), val = tensor([2])]; - tensor var_59 = const()[name = tensor("op_59"), val = tensor([1])]; - tensor var_61_pad_type_0 = const()[name = tensor("op_61_pad_type_0"), val = tensor("custom")]; - tensor var_61_pad_0 = const()[name = tensor("op_61_pad_0"), val = tensor([1, 1])]; - tensor weight_7_to_fp16 = const()[name = tensor("weight_7_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(370368)))]; - tensor bias_7_to_fp16 = const()[name = tensor("bias_7_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3909376)))]; - tensor var_61_cast = conv(bias = bias_7_to_fp16, dilations = var_59, groups = var_48, pad = var_61_pad_0, pad_type = var_61_pad_type_0, strides = var_57, weight = weight_7_to_fp16, x = input_1_cast); - tensor x_3_mode_0 = const()[name = tensor("x_3_mode_0"), val = tensor("EXACT")]; - tensor x_3_cast = gelu(mode = x_3_mode_0, x = var_61_cast); - tensor var_66 = const()[name = tensor("op_66"), val = tensor([0, 2, 1])]; - tensor positional_embedding_to_fp16 = const()[name = tensor("positional_embedding_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(3910976)))]; - tensor transpose_96 = transpose(perm = var_66, x = x_3_cast); - tensor var_69_cast = add(x = transpose_96, y = positional_embedding_to_fp16); - tensor var_82 = const()[name = tensor("op_82"), val = tensor(-1)]; - tensor var_99_axes_0 = const()[name = tensor("op_99_axes_0"), val = tensor([-1])]; - tensor blocks_0_attn_ln_weight_to_fp16 = const()[name = tensor("blocks_0_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6215040)))]; - tensor blocks_0_attn_ln_bias_to_fp16 = const()[name = tensor("blocks_0_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6216640)))]; - tensor var_88_to_fp16 = const()[name = tensor("op_88_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_99_cast = layer_norm(axes = var_99_axes_0, beta = blocks_0_attn_ln_bias_to_fp16, epsilon = var_88_to_fp16, gamma = blocks_0_attn_ln_weight_to_fp16, x = var_69_cast); - tensor var_110_to_fp16 = const()[name = tensor("op_110_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6218240)))]; - tensor var_111_to_fp16 = const()[name = tensor("op_111_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7397952)))]; - tensor q_1_cast = linear(bias = var_111_to_fp16, weight = var_110_to_fp16, x = var_99_cast); - tensor var_114_to_fp16 = const()[name = tensor("op_114_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(7399552)))]; - tensor k_1_bias_0_to_fp16 = const()[name = tensor("k_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8579264)))]; - tensor k_1_cast = linear(bias = k_1_bias_0_to_fp16, weight = var_114_to_fp16, x = var_99_cast); - tensor var_118_to_fp16 = const()[name = tensor("op_118_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(8580864)))]; - tensor var_119_to_fp16 = const()[name = tensor("op_119_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9760576)))]; - tensor v_1_cast = linear(bias = var_119_to_fp16, weight = var_118_to_fp16, x = var_99_cast); - tensor var_127 = const()[name = tensor("op_127"), val = tensor([1, 1500, 12, -1])]; - tensor var_128_cast = reshape(shape = var_127, x = q_1_cast); - tensor const_84_to_fp16 = const()[name = tensor("const_84_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; - tensor q_3_cast = mul(x = var_128_cast, y = const_84_to_fp16); - tensor var_134 = const()[name = tensor("op_134"), val = tensor([1, 1500, 12, -1])]; - tensor var_135_cast = reshape(shape = var_134, x = k_1_cast); - tensor const_85_to_fp16 = const()[name = tensor("const_85_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; - tensor k_3_cast = mul(x = var_135_cast, y = const_85_to_fp16); - tensor var_141 = const()[name = tensor("op_141"), val = tensor([1, 1500, 12, -1])]; - tensor var_142_cast = reshape(shape = var_141, x = v_1_cast); - tensor var_143 = const()[name = tensor("op_143"), val = tensor([0, 2, 1, 3])]; - tensor qk_1_transpose_x_0 = const()[name = tensor("qk_1_transpose_x_0"), val = tensor(false)]; - tensor qk_1_transpose_y_0 = const()[name = tensor("qk_1_transpose_y_0"), val = tensor(false)]; - tensor transpose_24_perm_0 = const()[name = tensor("transpose_24_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor transpose_25_perm_0 = const()[name = tensor("transpose_25_perm_0"), val = tensor([0, 2, 3, 1])]; - tensor transpose_93 = transpose(perm = transpose_25_perm_0, x = k_3_cast); - tensor transpose_94 = transpose(perm = transpose_24_perm_0, x = q_3_cast); - tensor qk_1_cast = matmul(transpose_x = qk_1_transpose_x_0, transpose_y = qk_1_transpose_y_0, x = transpose_94, y = transpose_93); - tensor var_147_cast = softmax(axis = var_82, x = qk_1_cast); - tensor var_149_transpose_x_0 = const()[name = tensor("op_149_transpose_x_0"), val = tensor(false)]; - tensor var_149_transpose_y_0 = const()[name = tensor("op_149_transpose_y_0"), val = tensor(false)]; - tensor transpose_95 = transpose(perm = var_143, x = var_142_cast); - tensor var_149_cast = matmul(transpose_x = var_149_transpose_x_0, transpose_y = var_149_transpose_y_0, x = var_147_cast, y = transpose_95); - tensor var_150 = const()[name = tensor("op_150"), val = tensor([0, 2, 1, 3])]; - tensor concat_0 = const()[name = tensor("concat_0"), val = tensor([1, 1500, 768])]; - tensor transpose_92 = transpose(perm = var_150, x = var_149_cast); - tensor x_11_cast = reshape(shape = concat_0, x = transpose_92); - tensor var_155_to_fp16 = const()[name = tensor("op_155_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(9762176)))]; - tensor var_156_to_fp16 = const()[name = tensor("op_156_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10941888)))]; - tensor var_157_cast = linear(bias = var_156_to_fp16, weight = var_155_to_fp16, x = x_11_cast); - tensor x_13_cast = add(x = var_69_cast, y = var_157_cast); - tensor var_163_axes_0 = const()[name = tensor("op_163_axes_0"), val = tensor([-1])]; - tensor blocks_0_mlp_ln_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10943488)))]; - tensor blocks_0_mlp_ln_bias_to_fp16 = const()[name = tensor("blocks_0_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10945088)))]; - tensor var_163_cast = layer_norm(axes = var_163_axes_0, beta = blocks_0_mlp_ln_bias_to_fp16, epsilon = var_88_to_fp16, gamma = blocks_0_mlp_ln_weight_to_fp16, x = x_13_cast); - tensor var_172_to_fp16 = const()[name = tensor("op_172_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10946688)))]; - tensor var_173_to_fp16 = const()[name = tensor("op_173_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15665344)))]; - tensor input_9_cast = linear(bias = var_173_to_fp16, weight = var_172_to_fp16, x = var_163_cast); - tensor x_17_mode_0 = const()[name = tensor("x_17_mode_0"), val = tensor("EXACT")]; - tensor x_17_cast = gelu(mode = x_17_mode_0, x = input_9_cast); - tensor var_178_to_fp16 = const()[name = tensor("op_178_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(15671552)))]; - tensor var_179_to_fp16 = const()[name = tensor("op_179_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20390208)))]; - tensor var_180_cast = linear(bias = var_179_to_fp16, weight = var_178_to_fp16, x = x_17_cast); - tensor x_19_cast = add(x = x_13_cast, y = var_180_cast); - tensor var_189 = const()[name = tensor("op_189"), val = tensor(-1)]; - tensor var_206_axes_0 = const()[name = tensor("op_206_axes_0"), val = tensor([-1])]; - tensor blocks_1_attn_ln_weight_to_fp16 = const()[name = tensor("blocks_1_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20391808)))]; - tensor blocks_1_attn_ln_bias_to_fp16 = const()[name = tensor("blocks_1_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20393408)))]; - tensor var_195_to_fp16 = const()[name = tensor("op_195_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_206_cast = layer_norm(axes = var_206_axes_0, beta = blocks_1_attn_ln_bias_to_fp16, epsilon = var_195_to_fp16, gamma = blocks_1_attn_ln_weight_to_fp16, x = x_19_cast); - tensor var_217_to_fp16 = const()[name = tensor("op_217_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20395008)))]; - tensor var_218_to_fp16 = const()[name = tensor("op_218_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21574720)))]; - tensor q_5_cast = linear(bias = var_218_to_fp16, weight = var_217_to_fp16, x = var_206_cast); - tensor var_221_to_fp16 = const()[name = tensor("op_221_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(21576320)))]; - tensor k_5_bias_0_to_fp16 = const()[name = tensor("k_5_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22756032)))]; - tensor k_5_cast = linear(bias = k_5_bias_0_to_fp16, weight = var_221_to_fp16, x = var_206_cast); - tensor var_225_to_fp16 = const()[name = tensor("op_225_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(22757632)))]; - tensor var_226_to_fp16 = const()[name = tensor("op_226_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23937344)))]; - tensor v_5_cast = linear(bias = var_226_to_fp16, weight = var_225_to_fp16, x = var_206_cast); - tensor var_234 = const()[name = tensor("op_234"), val = tensor([1, 1500, 12, -1])]; - tensor var_235_cast = reshape(shape = var_234, x = q_5_cast); - tensor const_86_to_fp16 = const()[name = tensor("const_86_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; - tensor q_7_cast = mul(x = var_235_cast, y = const_86_to_fp16); - tensor var_241 = const()[name = tensor("op_241"), val = tensor([1, 1500, 12, -1])]; - tensor var_242_cast = reshape(shape = var_241, x = k_5_cast); - tensor const_87_to_fp16 = const()[name = tensor("const_87_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; - tensor k_7_cast = mul(x = var_242_cast, y = const_87_to_fp16); - tensor var_248 = const()[name = tensor("op_248"), val = tensor([1, 1500, 12, -1])]; - tensor var_249_cast = reshape(shape = var_248, x = v_5_cast); - tensor var_250 = const()[name = tensor("op_250"), val = tensor([0, 2, 1, 3])]; - tensor qk_3_transpose_x_0 = const()[name = tensor("qk_3_transpose_x_0"), val = tensor(false)]; - tensor qk_3_transpose_y_0 = const()[name = tensor("qk_3_transpose_y_0"), val = tensor(false)]; - tensor transpose_26_perm_0 = const()[name = tensor("transpose_26_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor transpose_27_perm_0 = const()[name = tensor("transpose_27_perm_0"), val = tensor([0, 2, 3, 1])]; - tensor transpose_89 = transpose(perm = transpose_27_perm_0, x = k_7_cast); - tensor transpose_90 = transpose(perm = transpose_26_perm_0, x = q_7_cast); - tensor qk_3_cast = matmul(transpose_x = qk_3_transpose_x_0, transpose_y = qk_3_transpose_y_0, x = transpose_90, y = transpose_89); - tensor var_254_cast = softmax(axis = var_189, x = qk_3_cast); - tensor var_256_transpose_x_0 = const()[name = tensor("op_256_transpose_x_0"), val = tensor(false)]; - tensor var_256_transpose_y_0 = const()[name = tensor("op_256_transpose_y_0"), val = tensor(false)]; - tensor transpose_91 = transpose(perm = var_250, x = var_249_cast); - tensor var_256_cast = matmul(transpose_x = var_256_transpose_x_0, transpose_y = var_256_transpose_y_0, x = var_254_cast, y = transpose_91); - tensor var_257 = const()[name = tensor("op_257"), val = tensor([0, 2, 1, 3])]; - tensor concat_1 = const()[name = tensor("concat_1"), val = tensor([1, 1500, 768])]; - tensor transpose_88 = transpose(perm = var_257, x = var_256_cast); - tensor x_23_cast = reshape(shape = concat_1, x = transpose_88); - tensor var_262_to_fp16 = const()[name = tensor("op_262_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(23938944)))]; - tensor var_263_to_fp16 = const()[name = tensor("op_263_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25118656)))]; - tensor var_264_cast = linear(bias = var_263_to_fp16, weight = var_262_to_fp16, x = x_23_cast); - tensor x_25_cast = add(x = x_19_cast, y = var_264_cast); - tensor var_270_axes_0 = const()[name = tensor("op_270_axes_0"), val = tensor([-1])]; - tensor blocks_1_mlp_ln_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25120256)))]; - tensor blocks_1_mlp_ln_bias_to_fp16 = const()[name = tensor("blocks_1_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25121856)))]; - tensor var_270_cast = layer_norm(axes = var_270_axes_0, beta = blocks_1_mlp_ln_bias_to_fp16, epsilon = var_195_to_fp16, gamma = blocks_1_mlp_ln_weight_to_fp16, x = x_25_cast); - tensor var_279_to_fp16 = const()[name = tensor("op_279_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25123456)))]; - tensor var_280_to_fp16 = const()[name = tensor("op_280_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29842112)))]; - tensor input_17_cast = linear(bias = var_280_to_fp16, weight = var_279_to_fp16, x = var_270_cast); - tensor x_29_mode_0 = const()[name = tensor("x_29_mode_0"), val = tensor("EXACT")]; - tensor x_29_cast = gelu(mode = x_29_mode_0, x = input_17_cast); - tensor var_285_to_fp16 = const()[name = tensor("op_285_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(29848320)))]; - tensor var_286_to_fp16 = const()[name = tensor("op_286_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34566976)))]; - tensor var_287_cast = linear(bias = var_286_to_fp16, weight = var_285_to_fp16, x = x_29_cast); - tensor x_31_cast = add(x = x_25_cast, y = var_287_cast); - tensor var_296 = const()[name = tensor("op_296"), val = tensor(-1)]; - tensor var_313_axes_0 = const()[name = tensor("op_313_axes_0"), val = tensor([-1])]; - tensor blocks_2_attn_ln_weight_to_fp16 = const()[name = tensor("blocks_2_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34568576)))]; - tensor blocks_2_attn_ln_bias_to_fp16 = const()[name = tensor("blocks_2_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34570176)))]; - tensor var_302_to_fp16 = const()[name = tensor("op_302_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_313_cast = layer_norm(axes = var_313_axes_0, beta = blocks_2_attn_ln_bias_to_fp16, epsilon = var_302_to_fp16, gamma = blocks_2_attn_ln_weight_to_fp16, x = x_31_cast); - tensor var_324_to_fp16 = const()[name = tensor("op_324_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(34571776)))]; - tensor var_325_to_fp16 = const()[name = tensor("op_325_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35751488)))]; - tensor q_9_cast = linear(bias = var_325_to_fp16, weight = var_324_to_fp16, x = var_313_cast); - tensor var_328_to_fp16 = const()[name = tensor("op_328_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(35753088)))]; - tensor k_9_bias_0_to_fp16 = const()[name = tensor("k_9_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36932800)))]; - tensor k_9_cast = linear(bias = k_9_bias_0_to_fp16, weight = var_328_to_fp16, x = var_313_cast); - tensor var_332_to_fp16 = const()[name = tensor("op_332_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(36934400)))]; - tensor var_333_to_fp16 = const()[name = tensor("op_333_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38114112)))]; - tensor v_9_cast = linear(bias = var_333_to_fp16, weight = var_332_to_fp16, x = var_313_cast); - tensor var_341 = const()[name = tensor("op_341"), val = tensor([1, 1500, 12, -1])]; - tensor var_342_cast = reshape(shape = var_341, x = q_9_cast); - tensor const_88_to_fp16 = const()[name = tensor("const_88_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; - tensor q_11_cast = mul(x = var_342_cast, y = const_88_to_fp16); - tensor var_348 = const()[name = tensor("op_348"), val = tensor([1, 1500, 12, -1])]; - tensor var_349_cast = reshape(shape = var_348, x = k_9_cast); - tensor const_89_to_fp16 = const()[name = tensor("const_89_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; - tensor k_11_cast = mul(x = var_349_cast, y = const_89_to_fp16); - tensor var_355 = const()[name = tensor("op_355"), val = tensor([1, 1500, 12, -1])]; - tensor var_356_cast = reshape(shape = var_355, x = v_9_cast); - tensor var_357 = const()[name = tensor("op_357"), val = tensor([0, 2, 1, 3])]; - tensor qk_5_transpose_x_0 = const()[name = tensor("qk_5_transpose_x_0"), val = tensor(false)]; - tensor qk_5_transpose_y_0 = const()[name = tensor("qk_5_transpose_y_0"), val = tensor(false)]; - tensor transpose_28_perm_0 = const()[name = tensor("transpose_28_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor transpose_29_perm_0 = const()[name = tensor("transpose_29_perm_0"), val = tensor([0, 2, 3, 1])]; - tensor transpose_85 = transpose(perm = transpose_29_perm_0, x = k_11_cast); - tensor transpose_86 = transpose(perm = transpose_28_perm_0, x = q_11_cast); - tensor qk_5_cast = matmul(transpose_x = qk_5_transpose_x_0, transpose_y = qk_5_transpose_y_0, x = transpose_86, y = transpose_85); - tensor var_361_cast = softmax(axis = var_296, x = qk_5_cast); - tensor var_363_transpose_x_0 = const()[name = tensor("op_363_transpose_x_0"), val = tensor(false)]; - tensor var_363_transpose_y_0 = const()[name = tensor("op_363_transpose_y_0"), val = tensor(false)]; - tensor transpose_87 = transpose(perm = var_357, x = var_356_cast); - tensor var_363_cast = matmul(transpose_x = var_363_transpose_x_0, transpose_y = var_363_transpose_y_0, x = var_361_cast, y = transpose_87); - tensor var_364 = const()[name = tensor("op_364"), val = tensor([0, 2, 1, 3])]; - tensor concat_2 = const()[name = tensor("concat_2"), val = tensor([1, 1500, 768])]; - tensor transpose_84 = transpose(perm = var_364, x = var_363_cast); - tensor x_35_cast = reshape(shape = concat_2, x = transpose_84); - tensor var_369_to_fp16 = const()[name = tensor("op_369_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(38115712)))]; - tensor var_370_to_fp16 = const()[name = tensor("op_370_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39295424)))]; - tensor var_371_cast = linear(bias = var_370_to_fp16, weight = var_369_to_fp16, x = x_35_cast); - tensor x_37_cast = add(x = x_31_cast, y = var_371_cast); - tensor var_377_axes_0 = const()[name = tensor("op_377_axes_0"), val = tensor([-1])]; - tensor blocks_2_mlp_ln_weight_to_fp16 = const()[name = tensor("blocks_2_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39297024)))]; - tensor blocks_2_mlp_ln_bias_to_fp16 = const()[name = tensor("blocks_2_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39298624)))]; - tensor var_377_cast = layer_norm(axes = var_377_axes_0, beta = blocks_2_mlp_ln_bias_to_fp16, epsilon = var_302_to_fp16, gamma = blocks_2_mlp_ln_weight_to_fp16, x = x_37_cast); - tensor var_386_to_fp16 = const()[name = tensor("op_386_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(39300224)))]; - tensor var_387_to_fp16 = const()[name = tensor("op_387_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44018880)))]; - tensor input_25_cast = linear(bias = var_387_to_fp16, weight = var_386_to_fp16, x = var_377_cast); - tensor x_41_mode_0 = const()[name = tensor("x_41_mode_0"), val = tensor("EXACT")]; - tensor x_41_cast = gelu(mode = x_41_mode_0, x = input_25_cast); - tensor var_392_to_fp16 = const()[name = tensor("op_392_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(44025088)))]; - tensor var_393_to_fp16 = const()[name = tensor("op_393_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48743744)))]; - tensor var_394_cast = linear(bias = var_393_to_fp16, weight = var_392_to_fp16, x = x_41_cast); - tensor x_43_cast = add(x = x_37_cast, y = var_394_cast); - tensor var_403 = const()[name = tensor("op_403"), val = tensor(-1)]; - tensor var_420_axes_0 = const()[name = tensor("op_420_axes_0"), val = tensor([-1])]; - tensor blocks_3_attn_ln_weight_to_fp16 = const()[name = tensor("blocks_3_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48745344)))]; - tensor blocks_3_attn_ln_bias_to_fp16 = const()[name = tensor("blocks_3_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48746944)))]; - tensor var_409_to_fp16 = const()[name = tensor("op_409_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_420_cast = layer_norm(axes = var_420_axes_0, beta = blocks_3_attn_ln_bias_to_fp16, epsilon = var_409_to_fp16, gamma = blocks_3_attn_ln_weight_to_fp16, x = x_43_cast); - tensor var_431_to_fp16 = const()[name = tensor("op_431_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(48748544)))]; - tensor var_432_to_fp16 = const()[name = tensor("op_432_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49928256)))]; - tensor q_13_cast = linear(bias = var_432_to_fp16, weight = var_431_to_fp16, x = var_420_cast); - tensor var_435_to_fp16 = const()[name = tensor("op_435_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(49929856)))]; - tensor k_13_bias_0_to_fp16 = const()[name = tensor("k_13_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(51109568)))]; - tensor k_13_cast = linear(bias = k_13_bias_0_to_fp16, weight = var_435_to_fp16, x = var_420_cast); - tensor var_439_to_fp16 = const()[name = tensor("op_439_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(51111168)))]; - tensor var_440_to_fp16 = const()[name = tensor("op_440_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52290880)))]; - tensor v_13_cast = linear(bias = var_440_to_fp16, weight = var_439_to_fp16, x = var_420_cast); - tensor var_448 = const()[name = tensor("op_448"), val = tensor([1, 1500, 12, -1])]; - tensor var_449_cast = reshape(shape = var_448, x = q_13_cast); - tensor const_90_to_fp16 = const()[name = tensor("const_90_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; - tensor q_15_cast = mul(x = var_449_cast, y = const_90_to_fp16); - tensor var_455 = const()[name = tensor("op_455"), val = tensor([1, 1500, 12, -1])]; - tensor var_456_cast = reshape(shape = var_455, x = k_13_cast); - tensor const_91_to_fp16 = const()[name = tensor("const_91_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; - tensor k_15_cast = mul(x = var_456_cast, y = const_91_to_fp16); - tensor var_462 = const()[name = tensor("op_462"), val = tensor([1, 1500, 12, -1])]; - tensor var_463_cast = reshape(shape = var_462, x = v_13_cast); - tensor var_464 = const()[name = tensor("op_464"), val = tensor([0, 2, 1, 3])]; - tensor qk_7_transpose_x_0 = const()[name = tensor("qk_7_transpose_x_0"), val = tensor(false)]; - tensor qk_7_transpose_y_0 = const()[name = tensor("qk_7_transpose_y_0"), val = tensor(false)]; - tensor transpose_30_perm_0 = const()[name = tensor("transpose_30_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor transpose_31_perm_0 = const()[name = tensor("transpose_31_perm_0"), val = tensor([0, 2, 3, 1])]; - tensor transpose_81 = transpose(perm = transpose_31_perm_0, x = k_15_cast); - tensor transpose_82 = transpose(perm = transpose_30_perm_0, x = q_15_cast); - tensor qk_7_cast = matmul(transpose_x = qk_7_transpose_x_0, transpose_y = qk_7_transpose_y_0, x = transpose_82, y = transpose_81); - tensor var_468_cast = softmax(axis = var_403, x = qk_7_cast); - tensor var_470_transpose_x_0 = const()[name = tensor("op_470_transpose_x_0"), val = tensor(false)]; - tensor var_470_transpose_y_0 = const()[name = tensor("op_470_transpose_y_0"), val = tensor(false)]; - tensor transpose_83 = transpose(perm = var_464, x = var_463_cast); - tensor var_470_cast = matmul(transpose_x = var_470_transpose_x_0, transpose_y = var_470_transpose_y_0, x = var_468_cast, y = transpose_83); - tensor var_471 = const()[name = tensor("op_471"), val = tensor([0, 2, 1, 3])]; - tensor concat_3 = const()[name = tensor("concat_3"), val = tensor([1, 1500, 768])]; - tensor transpose_80 = transpose(perm = var_471, x = var_470_cast); - tensor x_47_cast = reshape(shape = concat_3, x = transpose_80); - tensor var_476_to_fp16 = const()[name = tensor("op_476_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(52292480)))]; - tensor var_477_to_fp16 = const()[name = tensor("op_477_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53472192)))]; - tensor var_478_cast = linear(bias = var_477_to_fp16, weight = var_476_to_fp16, x = x_47_cast); - tensor x_49_cast = add(x = x_43_cast, y = var_478_cast); - tensor var_484_axes_0 = const()[name = tensor("op_484_axes_0"), val = tensor([-1])]; - tensor blocks_3_mlp_ln_weight_to_fp16 = const()[name = tensor("blocks_3_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53473792)))]; - tensor blocks_3_mlp_ln_bias_to_fp16 = const()[name = tensor("blocks_3_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53475392)))]; - tensor var_484_cast = layer_norm(axes = var_484_axes_0, beta = blocks_3_mlp_ln_bias_to_fp16, epsilon = var_409_to_fp16, gamma = blocks_3_mlp_ln_weight_to_fp16, x = x_49_cast); - tensor var_493_to_fp16 = const()[name = tensor("op_493_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53476992)))]; - tensor var_494_to_fp16 = const()[name = tensor("op_494_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58195648)))]; - tensor input_33_cast = linear(bias = var_494_to_fp16, weight = var_493_to_fp16, x = var_484_cast); - tensor x_53_mode_0 = const()[name = tensor("x_53_mode_0"), val = tensor("EXACT")]; - tensor x_53_cast = gelu(mode = x_53_mode_0, x = input_33_cast); - tensor var_499_to_fp16 = const()[name = tensor("op_499_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(58201856)))]; - tensor var_500_to_fp16 = const()[name = tensor("op_500_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62920512)))]; - tensor var_501_cast = linear(bias = var_500_to_fp16, weight = var_499_to_fp16, x = x_53_cast); - tensor x_55_cast = add(x = x_49_cast, y = var_501_cast); - tensor var_510 = const()[name = tensor("op_510"), val = tensor(-1)]; - tensor var_527_axes_0 = const()[name = tensor("op_527_axes_0"), val = tensor([-1])]; - tensor blocks_4_attn_ln_weight_to_fp16 = const()[name = tensor("blocks_4_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62922112)))]; - tensor blocks_4_attn_ln_bias_to_fp16 = const()[name = tensor("blocks_4_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62923712)))]; - tensor var_516_to_fp16 = const()[name = tensor("op_516_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_527_cast = layer_norm(axes = var_527_axes_0, beta = blocks_4_attn_ln_bias_to_fp16, epsilon = var_516_to_fp16, gamma = blocks_4_attn_ln_weight_to_fp16, x = x_55_cast); - tensor var_538_to_fp16 = const()[name = tensor("op_538_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(62925312)))]; - tensor var_539_to_fp16 = const()[name = tensor("op_539_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64105024)))]; - tensor q_17_cast = linear(bias = var_539_to_fp16, weight = var_538_to_fp16, x = var_527_cast); - tensor var_542_to_fp16 = const()[name = tensor("op_542_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64106624)))]; - tensor k_17_bias_0_to_fp16 = const()[name = tensor("k_17_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65286336)))]; - tensor k_17_cast = linear(bias = k_17_bias_0_to_fp16, weight = var_542_to_fp16, x = var_527_cast); - tensor var_546_to_fp16 = const()[name = tensor("op_546_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(65287936)))]; - tensor var_547_to_fp16 = const()[name = tensor("op_547_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66467648)))]; - tensor v_17_cast = linear(bias = var_547_to_fp16, weight = var_546_to_fp16, x = var_527_cast); - tensor var_555 = const()[name = tensor("op_555"), val = tensor([1, 1500, 12, -1])]; - tensor var_556_cast = reshape(shape = var_555, x = q_17_cast); - tensor const_92_to_fp16 = const()[name = tensor("const_92_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; - tensor q_19_cast = mul(x = var_556_cast, y = const_92_to_fp16); - tensor var_562 = const()[name = tensor("op_562"), val = tensor([1, 1500, 12, -1])]; - tensor var_563_cast = reshape(shape = var_562, x = k_17_cast); - tensor const_93_to_fp16 = const()[name = tensor("const_93_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; - tensor k_19_cast = mul(x = var_563_cast, y = const_93_to_fp16); - tensor var_569 = const()[name = tensor("op_569"), val = tensor([1, 1500, 12, -1])]; - tensor var_570_cast = reshape(shape = var_569, x = v_17_cast); - tensor var_571 = const()[name = tensor("op_571"), val = tensor([0, 2, 1, 3])]; - tensor qk_9_transpose_x_0 = const()[name = tensor("qk_9_transpose_x_0"), val = tensor(false)]; - tensor qk_9_transpose_y_0 = const()[name = tensor("qk_9_transpose_y_0"), val = tensor(false)]; - tensor transpose_32_perm_0 = const()[name = tensor("transpose_32_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor transpose_33_perm_0 = const()[name = tensor("transpose_33_perm_0"), val = tensor([0, 2, 3, 1])]; - tensor transpose_77 = transpose(perm = transpose_33_perm_0, x = k_19_cast); - tensor transpose_78 = transpose(perm = transpose_32_perm_0, x = q_19_cast); - tensor qk_9_cast = matmul(transpose_x = qk_9_transpose_x_0, transpose_y = qk_9_transpose_y_0, x = transpose_78, y = transpose_77); - tensor var_575_cast = softmax(axis = var_510, x = qk_9_cast); - tensor var_577_transpose_x_0 = const()[name = tensor("op_577_transpose_x_0"), val = tensor(false)]; - tensor var_577_transpose_y_0 = const()[name = tensor("op_577_transpose_y_0"), val = tensor(false)]; - tensor transpose_79 = transpose(perm = var_571, x = var_570_cast); - tensor var_577_cast = matmul(transpose_x = var_577_transpose_x_0, transpose_y = var_577_transpose_y_0, x = var_575_cast, y = transpose_79); - tensor var_578 = const()[name = tensor("op_578"), val = tensor([0, 2, 1, 3])]; - tensor concat_4 = const()[name = tensor("concat_4"), val = tensor([1, 1500, 768])]; - tensor transpose_76 = transpose(perm = var_578, x = var_577_cast); - tensor x_59_cast = reshape(shape = concat_4, x = transpose_76); - tensor var_583_to_fp16 = const()[name = tensor("op_583_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66469248)))]; - tensor var_584_to_fp16 = const()[name = tensor("op_584_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67648960)))]; - tensor var_585_cast = linear(bias = var_584_to_fp16, weight = var_583_to_fp16, x = x_59_cast); - tensor x_61_cast = add(x = x_55_cast, y = var_585_cast); - tensor var_591_axes_0 = const()[name = tensor("op_591_axes_0"), val = tensor([-1])]; - tensor blocks_4_mlp_ln_weight_to_fp16 = const()[name = tensor("blocks_4_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67650560)))]; - tensor blocks_4_mlp_ln_bias_to_fp16 = const()[name = tensor("blocks_4_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67652160)))]; - tensor var_591_cast = layer_norm(axes = var_591_axes_0, beta = blocks_4_mlp_ln_bias_to_fp16, epsilon = var_516_to_fp16, gamma = blocks_4_mlp_ln_weight_to_fp16, x = x_61_cast); - tensor var_600_to_fp16 = const()[name = tensor("op_600_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(67653760)))]; - tensor var_601_to_fp16 = const()[name = tensor("op_601_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(72372416)))]; - tensor input_41_cast = linear(bias = var_601_to_fp16, weight = var_600_to_fp16, x = var_591_cast); - tensor x_65_mode_0 = const()[name = tensor("x_65_mode_0"), val = tensor("EXACT")]; - tensor x_65_cast = gelu(mode = x_65_mode_0, x = input_41_cast); - tensor var_606_to_fp16 = const()[name = tensor("op_606_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(72378624)))]; - tensor var_607_to_fp16 = const()[name = tensor("op_607_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77097280)))]; - tensor var_608_cast = linear(bias = var_607_to_fp16, weight = var_606_to_fp16, x = x_65_cast); - tensor x_67_cast = add(x = x_61_cast, y = var_608_cast); - tensor var_617 = const()[name = tensor("op_617"), val = tensor(-1)]; - tensor var_634_axes_0 = const()[name = tensor("op_634_axes_0"), val = tensor([-1])]; - tensor blocks_5_attn_ln_weight_to_fp16 = const()[name = tensor("blocks_5_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77098880)))]; - tensor blocks_5_attn_ln_bias_to_fp16 = const()[name = tensor("blocks_5_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77100480)))]; - tensor var_623_to_fp16 = const()[name = tensor("op_623_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_634_cast = layer_norm(axes = var_634_axes_0, beta = blocks_5_attn_ln_bias_to_fp16, epsilon = var_623_to_fp16, gamma = blocks_5_attn_ln_weight_to_fp16, x = x_67_cast); - tensor var_645_to_fp16 = const()[name = tensor("op_645_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(77102080)))]; - tensor var_646_to_fp16 = const()[name = tensor("op_646_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78281792)))]; - tensor q_21_cast = linear(bias = var_646_to_fp16, weight = var_645_to_fp16, x = var_634_cast); - tensor var_649_to_fp16 = const()[name = tensor("op_649_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(78283392)))]; - tensor k_21_bias_0_to_fp16 = const()[name = tensor("k_21_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(79463104)))]; - tensor k_21_cast = linear(bias = k_21_bias_0_to_fp16, weight = var_649_to_fp16, x = var_634_cast); - tensor var_653_to_fp16 = const()[name = tensor("op_653_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(79464704)))]; - tensor var_654_to_fp16 = const()[name = tensor("op_654_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80644416)))]; - tensor v_21_cast = linear(bias = var_654_to_fp16, weight = var_653_to_fp16, x = var_634_cast); - tensor var_662 = const()[name = tensor("op_662"), val = tensor([1, 1500, 12, -1])]; - tensor var_663_cast = reshape(shape = var_662, x = q_21_cast); - tensor const_94_to_fp16 = const()[name = tensor("const_94_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; - tensor q_23_cast = mul(x = var_663_cast, y = const_94_to_fp16); - tensor var_669 = const()[name = tensor("op_669"), val = tensor([1, 1500, 12, -1])]; - tensor var_670_cast = reshape(shape = var_669, x = k_21_cast); - tensor const_95_to_fp16 = const()[name = tensor("const_95_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; - tensor k_23_cast = mul(x = var_670_cast, y = const_95_to_fp16); - tensor var_676 = const()[name = tensor("op_676"), val = tensor([1, 1500, 12, -1])]; - tensor var_677_cast = reshape(shape = var_676, x = v_21_cast); - tensor var_678 = const()[name = tensor("op_678"), val = tensor([0, 2, 1, 3])]; - tensor qk_11_transpose_x_0 = const()[name = tensor("qk_11_transpose_x_0"), val = tensor(false)]; - tensor qk_11_transpose_y_0 = const()[name = tensor("qk_11_transpose_y_0"), val = tensor(false)]; - tensor transpose_34_perm_0 = const()[name = tensor("transpose_34_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor transpose_35_perm_0 = const()[name = tensor("transpose_35_perm_0"), val = tensor([0, 2, 3, 1])]; - tensor transpose_73 = transpose(perm = transpose_35_perm_0, x = k_23_cast); - tensor transpose_74 = transpose(perm = transpose_34_perm_0, x = q_23_cast); - tensor qk_11_cast = matmul(transpose_x = qk_11_transpose_x_0, transpose_y = qk_11_transpose_y_0, x = transpose_74, y = transpose_73); - tensor var_682_cast = softmax(axis = var_617, x = qk_11_cast); - tensor var_684_transpose_x_0 = const()[name = tensor("op_684_transpose_x_0"), val = tensor(false)]; - tensor var_684_transpose_y_0 = const()[name = tensor("op_684_transpose_y_0"), val = tensor(false)]; - tensor transpose_75 = transpose(perm = var_678, x = var_677_cast); - tensor var_684_cast = matmul(transpose_x = var_684_transpose_x_0, transpose_y = var_684_transpose_y_0, x = var_682_cast, y = transpose_75); - tensor var_685 = const()[name = tensor("op_685"), val = tensor([0, 2, 1, 3])]; - tensor concat_5 = const()[name = tensor("concat_5"), val = tensor([1, 1500, 768])]; - tensor transpose_72 = transpose(perm = var_685, x = var_684_cast); - tensor x_71_cast = reshape(shape = concat_5, x = transpose_72); - tensor var_690_to_fp16 = const()[name = tensor("op_690_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(80646016)))]; - tensor var_691_to_fp16 = const()[name = tensor("op_691_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81825728)))]; - tensor var_692_cast = linear(bias = var_691_to_fp16, weight = var_690_to_fp16, x = x_71_cast); - tensor x_73_cast = add(x = x_67_cast, y = var_692_cast); - tensor var_698_axes_0 = const()[name = tensor("op_698_axes_0"), val = tensor([-1])]; - tensor blocks_5_mlp_ln_weight_to_fp16 = const()[name = tensor("blocks_5_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81827328)))]; - tensor blocks_5_mlp_ln_bias_to_fp16 = const()[name = tensor("blocks_5_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81828928)))]; - tensor var_698_cast = layer_norm(axes = var_698_axes_0, beta = blocks_5_mlp_ln_bias_to_fp16, epsilon = var_623_to_fp16, gamma = blocks_5_mlp_ln_weight_to_fp16, x = x_73_cast); - tensor var_707_to_fp16 = const()[name = tensor("op_707_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(81830528)))]; - tensor var_708_to_fp16 = const()[name = tensor("op_708_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86549184)))]; - tensor input_49_cast = linear(bias = var_708_to_fp16, weight = var_707_to_fp16, x = var_698_cast); - tensor x_77_mode_0 = const()[name = tensor("x_77_mode_0"), val = tensor("EXACT")]; - tensor x_77_cast = gelu(mode = x_77_mode_0, x = input_49_cast); - tensor var_713_to_fp16 = const()[name = tensor("op_713_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(86555392)))]; - tensor var_714_to_fp16 = const()[name = tensor("op_714_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91274048)))]; - tensor var_715_cast = linear(bias = var_714_to_fp16, weight = var_713_to_fp16, x = x_77_cast); - tensor x_79_cast = add(x = x_73_cast, y = var_715_cast); - tensor var_724 = const()[name = tensor("op_724"), val = tensor(-1)]; - tensor var_741_axes_0 = const()[name = tensor("op_741_axes_0"), val = tensor([-1])]; - tensor blocks_6_attn_ln_weight_to_fp16 = const()[name = tensor("blocks_6_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91275648)))]; - tensor blocks_6_attn_ln_bias_to_fp16 = const()[name = tensor("blocks_6_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91277248)))]; - tensor var_730_to_fp16 = const()[name = tensor("op_730_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_741_cast = layer_norm(axes = var_741_axes_0, beta = blocks_6_attn_ln_bias_to_fp16, epsilon = var_730_to_fp16, gamma = blocks_6_attn_ln_weight_to_fp16, x = x_79_cast); - tensor var_752_to_fp16 = const()[name = tensor("op_752_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(91278848)))]; - tensor var_753_to_fp16 = const()[name = tensor("op_753_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92458560)))]; - tensor q_25_cast = linear(bias = var_753_to_fp16, weight = var_752_to_fp16, x = var_741_cast); - tensor var_756_to_fp16 = const()[name = tensor("op_756_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92460160)))]; - tensor k_25_bias_0_to_fp16 = const()[name = tensor("k_25_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93639872)))]; - tensor k_25_cast = linear(bias = k_25_bias_0_to_fp16, weight = var_756_to_fp16, x = var_741_cast); - tensor var_760_to_fp16 = const()[name = tensor("op_760_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93641472)))]; - tensor var_761_to_fp16 = const()[name = tensor("op_761_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94821184)))]; - tensor v_25_cast = linear(bias = var_761_to_fp16, weight = var_760_to_fp16, x = var_741_cast); - tensor var_769 = const()[name = tensor("op_769"), val = tensor([1, 1500, 12, -1])]; - tensor var_770_cast = reshape(shape = var_769, x = q_25_cast); - tensor const_96_to_fp16 = const()[name = tensor("const_96_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; - tensor q_27_cast = mul(x = var_770_cast, y = const_96_to_fp16); - tensor var_776 = const()[name = tensor("op_776"), val = tensor([1, 1500, 12, -1])]; - tensor var_777_cast = reshape(shape = var_776, x = k_25_cast); - tensor const_97_to_fp16 = const()[name = tensor("const_97_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; - tensor k_27_cast = mul(x = var_777_cast, y = const_97_to_fp16); - tensor var_783 = const()[name = tensor("op_783"), val = tensor([1, 1500, 12, -1])]; - tensor var_784_cast = reshape(shape = var_783, x = v_25_cast); - tensor var_785 = const()[name = tensor("op_785"), val = tensor([0, 2, 1, 3])]; - tensor qk_13_transpose_x_0 = const()[name = tensor("qk_13_transpose_x_0"), val = tensor(false)]; - tensor qk_13_transpose_y_0 = const()[name = tensor("qk_13_transpose_y_0"), val = tensor(false)]; - tensor transpose_36_perm_0 = const()[name = tensor("transpose_36_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor transpose_37_perm_0 = const()[name = tensor("transpose_37_perm_0"), val = tensor([0, 2, 3, 1])]; - tensor transpose_69 = transpose(perm = transpose_37_perm_0, x = k_27_cast); - tensor transpose_70 = transpose(perm = transpose_36_perm_0, x = q_27_cast); - tensor qk_13_cast = matmul(transpose_x = qk_13_transpose_x_0, transpose_y = qk_13_transpose_y_0, x = transpose_70, y = transpose_69); - tensor var_789_cast = softmax(axis = var_724, x = qk_13_cast); - tensor var_791_transpose_x_0 = const()[name = tensor("op_791_transpose_x_0"), val = tensor(false)]; - tensor var_791_transpose_y_0 = const()[name = tensor("op_791_transpose_y_0"), val = tensor(false)]; - tensor transpose_71 = transpose(perm = var_785, x = var_784_cast); - tensor var_791_cast = matmul(transpose_x = var_791_transpose_x_0, transpose_y = var_791_transpose_y_0, x = var_789_cast, y = transpose_71); - tensor var_792 = const()[name = tensor("op_792"), val = tensor([0, 2, 1, 3])]; - tensor concat_6 = const()[name = tensor("concat_6"), val = tensor([1, 1500, 768])]; - tensor transpose_68 = transpose(perm = var_792, x = var_791_cast); - tensor x_83_cast = reshape(shape = concat_6, x = transpose_68); - tensor var_797_to_fp16 = const()[name = tensor("op_797_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(94822784)))]; - tensor var_798_to_fp16 = const()[name = tensor("op_798_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(96002496)))]; - tensor var_799_cast = linear(bias = var_798_to_fp16, weight = var_797_to_fp16, x = x_83_cast); - tensor x_85_cast = add(x = x_79_cast, y = var_799_cast); - tensor var_805_axes_0 = const()[name = tensor("op_805_axes_0"), val = tensor([-1])]; - tensor blocks_6_mlp_ln_weight_to_fp16 = const()[name = tensor("blocks_6_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(96004096)))]; - tensor blocks_6_mlp_ln_bias_to_fp16 = const()[name = tensor("blocks_6_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(96005696)))]; - tensor var_805_cast = layer_norm(axes = var_805_axes_0, beta = blocks_6_mlp_ln_bias_to_fp16, epsilon = var_730_to_fp16, gamma = blocks_6_mlp_ln_weight_to_fp16, x = x_85_cast); - tensor var_814_to_fp16 = const()[name = tensor("op_814_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(96007296)))]; - tensor var_815_to_fp16 = const()[name = tensor("op_815_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100725952)))]; - tensor input_57_cast = linear(bias = var_815_to_fp16, weight = var_814_to_fp16, x = var_805_cast); - tensor x_89_mode_0 = const()[name = tensor("x_89_mode_0"), val = tensor("EXACT")]; - tensor x_89_cast = gelu(mode = x_89_mode_0, x = input_57_cast); - tensor var_820_to_fp16 = const()[name = tensor("op_820_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100732160)))]; - tensor var_821_to_fp16 = const()[name = tensor("op_821_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105450816)))]; - tensor var_822_cast = linear(bias = var_821_to_fp16, weight = var_820_to_fp16, x = x_89_cast); - tensor x_91_cast = add(x = x_85_cast, y = var_822_cast); - tensor var_831 = const()[name = tensor("op_831"), val = tensor(-1)]; - tensor var_848_axes_0 = const()[name = tensor("op_848_axes_0"), val = tensor([-1])]; - tensor blocks_7_attn_ln_weight_to_fp16 = const()[name = tensor("blocks_7_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105452416)))]; - tensor blocks_7_attn_ln_bias_to_fp16 = const()[name = tensor("blocks_7_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105454016)))]; - tensor var_837_to_fp16 = const()[name = tensor("op_837_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_848_cast = layer_norm(axes = var_848_axes_0, beta = blocks_7_attn_ln_bias_to_fp16, epsilon = var_837_to_fp16, gamma = blocks_7_attn_ln_weight_to_fp16, x = x_91_cast); - tensor var_859_to_fp16 = const()[name = tensor("op_859_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(105455616)))]; - tensor var_860_to_fp16 = const()[name = tensor("op_860_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106635328)))]; - tensor q_29_cast = linear(bias = var_860_to_fp16, weight = var_859_to_fp16, x = var_848_cast); - tensor var_863_to_fp16 = const()[name = tensor("op_863_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106636928)))]; - tensor k_29_bias_0_to_fp16 = const()[name = tensor("k_29_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107816640)))]; - tensor k_29_cast = linear(bias = k_29_bias_0_to_fp16, weight = var_863_to_fp16, x = var_848_cast); - tensor var_867_to_fp16 = const()[name = tensor("op_867_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(107818240)))]; - tensor var_868_to_fp16 = const()[name = tensor("op_868_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(108997952)))]; - tensor v_29_cast = linear(bias = var_868_to_fp16, weight = var_867_to_fp16, x = var_848_cast); - tensor var_876 = const()[name = tensor("op_876"), val = tensor([1, 1500, 12, -1])]; - tensor var_877_cast = reshape(shape = var_876, x = q_29_cast); - tensor const_98_to_fp16 = const()[name = tensor("const_98_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; - tensor q_31_cast = mul(x = var_877_cast, y = const_98_to_fp16); - tensor var_883 = const()[name = tensor("op_883"), val = tensor([1, 1500, 12, -1])]; - tensor var_884_cast = reshape(shape = var_883, x = k_29_cast); - tensor const_99_to_fp16 = const()[name = tensor("const_99_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; - tensor k_31_cast = mul(x = var_884_cast, y = const_99_to_fp16); - tensor var_890 = const()[name = tensor("op_890"), val = tensor([1, 1500, 12, -1])]; - tensor var_891_cast = reshape(shape = var_890, x = v_29_cast); - tensor var_892 = const()[name = tensor("op_892"), val = tensor([0, 2, 1, 3])]; - tensor qk_15_transpose_x_0 = const()[name = tensor("qk_15_transpose_x_0"), val = tensor(false)]; - tensor qk_15_transpose_y_0 = const()[name = tensor("qk_15_transpose_y_0"), val = tensor(false)]; - tensor transpose_38_perm_0 = const()[name = tensor("transpose_38_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor transpose_39_perm_0 = const()[name = tensor("transpose_39_perm_0"), val = tensor([0, 2, 3, 1])]; - tensor transpose_65 = transpose(perm = transpose_39_perm_0, x = k_31_cast); - tensor transpose_66 = transpose(perm = transpose_38_perm_0, x = q_31_cast); - tensor qk_15_cast = matmul(transpose_x = qk_15_transpose_x_0, transpose_y = qk_15_transpose_y_0, x = transpose_66, y = transpose_65); - tensor var_896_cast = softmax(axis = var_831, x = qk_15_cast); - tensor var_898_transpose_x_0 = const()[name = tensor("op_898_transpose_x_0"), val = tensor(false)]; - tensor var_898_transpose_y_0 = const()[name = tensor("op_898_transpose_y_0"), val = tensor(false)]; - tensor transpose_67 = transpose(perm = var_892, x = var_891_cast); - tensor var_898_cast = matmul(transpose_x = var_898_transpose_x_0, transpose_y = var_898_transpose_y_0, x = var_896_cast, y = transpose_67); - tensor var_899 = const()[name = tensor("op_899"), val = tensor([0, 2, 1, 3])]; - tensor concat_7 = const()[name = tensor("concat_7"), val = tensor([1, 1500, 768])]; - tensor transpose_64 = transpose(perm = var_899, x = var_898_cast); - tensor x_95_cast = reshape(shape = concat_7, x = transpose_64); - tensor var_904_to_fp16 = const()[name = tensor("op_904_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(108999552)))]; - tensor var_905_to_fp16 = const()[name = tensor("op_905_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110179264)))]; - tensor var_906_cast = linear(bias = var_905_to_fp16, weight = var_904_to_fp16, x = x_95_cast); - tensor x_97_cast = add(x = x_91_cast, y = var_906_cast); - tensor var_912_axes_0 = const()[name = tensor("op_912_axes_0"), val = tensor([-1])]; - tensor blocks_7_mlp_ln_weight_to_fp16 = const()[name = tensor("blocks_7_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110180864)))]; - tensor blocks_7_mlp_ln_bias_to_fp16 = const()[name = tensor("blocks_7_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110182464)))]; - tensor var_912_cast = layer_norm(axes = var_912_axes_0, beta = blocks_7_mlp_ln_bias_to_fp16, epsilon = var_837_to_fp16, gamma = blocks_7_mlp_ln_weight_to_fp16, x = x_97_cast); - tensor var_921_to_fp16 = const()[name = tensor("op_921_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(110184064)))]; - tensor var_922_to_fp16 = const()[name = tensor("op_922_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114902720)))]; - tensor input_65_cast = linear(bias = var_922_to_fp16, weight = var_921_to_fp16, x = var_912_cast); - tensor x_101_mode_0 = const()[name = tensor("x_101_mode_0"), val = tensor("EXACT")]; - tensor x_101_cast = gelu(mode = x_101_mode_0, x = input_65_cast); - tensor var_927_to_fp16 = const()[name = tensor("op_927_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(114908928)))]; - tensor var_928_to_fp16 = const()[name = tensor("op_928_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119627584)))]; - tensor var_929_cast = linear(bias = var_928_to_fp16, weight = var_927_to_fp16, x = x_101_cast); - tensor x_103_cast = add(x = x_97_cast, y = var_929_cast); - tensor var_938 = const()[name = tensor("op_938"), val = tensor(-1)]; - tensor var_955_axes_0 = const()[name = tensor("op_955_axes_0"), val = tensor([-1])]; - tensor blocks_8_attn_ln_weight_to_fp16 = const()[name = tensor("blocks_8_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119629184)))]; - tensor blocks_8_attn_ln_bias_to_fp16 = const()[name = tensor("blocks_8_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119630784)))]; - tensor var_944_to_fp16 = const()[name = tensor("op_944_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_955_cast = layer_norm(axes = var_955_axes_0, beta = blocks_8_attn_ln_bias_to_fp16, epsilon = var_944_to_fp16, gamma = blocks_8_attn_ln_weight_to_fp16, x = x_103_cast); - tensor var_966_to_fp16 = const()[name = tensor("op_966_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119632384)))]; - tensor var_967_to_fp16 = const()[name = tensor("op_967_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120812096)))]; - tensor q_33_cast = linear(bias = var_967_to_fp16, weight = var_966_to_fp16, x = var_955_cast); - tensor var_970_to_fp16 = const()[name = tensor("op_970_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(120813696)))]; - tensor k_33_bias_0_to_fp16 = const()[name = tensor("k_33_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(121993408)))]; - tensor k_33_cast = linear(bias = k_33_bias_0_to_fp16, weight = var_970_to_fp16, x = var_955_cast); - tensor var_974_to_fp16 = const()[name = tensor("op_974_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(121995008)))]; - tensor var_975_to_fp16 = const()[name = tensor("op_975_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123174720)))]; - tensor v_33_cast = linear(bias = var_975_to_fp16, weight = var_974_to_fp16, x = var_955_cast); - tensor var_983 = const()[name = tensor("op_983"), val = tensor([1, 1500, 12, -1])]; - tensor var_984_cast = reshape(shape = var_983, x = q_33_cast); - tensor const_100_to_fp16 = const()[name = tensor("const_100_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; - tensor q_35_cast = mul(x = var_984_cast, y = const_100_to_fp16); - tensor var_990 = const()[name = tensor("op_990"), val = tensor([1, 1500, 12, -1])]; - tensor var_991_cast = reshape(shape = var_990, x = k_33_cast); - tensor const_101_to_fp16 = const()[name = tensor("const_101_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; - tensor k_35_cast = mul(x = var_991_cast, y = const_101_to_fp16); - tensor var_997 = const()[name = tensor("op_997"), val = tensor([1, 1500, 12, -1])]; - tensor var_998_cast = reshape(shape = var_997, x = v_33_cast); - tensor var_999 = const()[name = tensor("op_999"), val = tensor([0, 2, 1, 3])]; - tensor qk_17_transpose_x_0 = const()[name = tensor("qk_17_transpose_x_0"), val = tensor(false)]; - tensor qk_17_transpose_y_0 = const()[name = tensor("qk_17_transpose_y_0"), val = tensor(false)]; - tensor transpose_40_perm_0 = const()[name = tensor("transpose_40_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor transpose_41_perm_0 = const()[name = tensor("transpose_41_perm_0"), val = tensor([0, 2, 3, 1])]; - tensor transpose_61 = transpose(perm = transpose_41_perm_0, x = k_35_cast); - tensor transpose_62 = transpose(perm = transpose_40_perm_0, x = q_35_cast); - tensor qk_17_cast = matmul(transpose_x = qk_17_transpose_x_0, transpose_y = qk_17_transpose_y_0, x = transpose_62, y = transpose_61); - tensor var_1003_cast = softmax(axis = var_938, x = qk_17_cast); - tensor var_1005_transpose_x_0 = const()[name = tensor("op_1005_transpose_x_0"), val = tensor(false)]; - tensor var_1005_transpose_y_0 = const()[name = tensor("op_1005_transpose_y_0"), val = tensor(false)]; - tensor transpose_63 = transpose(perm = var_999, x = var_998_cast); - tensor var_1005_cast = matmul(transpose_x = var_1005_transpose_x_0, transpose_y = var_1005_transpose_y_0, x = var_1003_cast, y = transpose_63); - tensor var_1006 = const()[name = tensor("op_1006"), val = tensor([0, 2, 1, 3])]; - tensor concat_8 = const()[name = tensor("concat_8"), val = tensor([1, 1500, 768])]; - tensor transpose_60 = transpose(perm = var_1006, x = var_1005_cast); - tensor x_107_cast = reshape(shape = concat_8, x = transpose_60); - tensor var_1011_to_fp16 = const()[name = tensor("op_1011_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(123176320)))]; - tensor var_1012_to_fp16 = const()[name = tensor("op_1012_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124356032)))]; - tensor var_1013_cast = linear(bias = var_1012_to_fp16, weight = var_1011_to_fp16, x = x_107_cast); - tensor x_109_cast = add(x = x_103_cast, y = var_1013_cast); - tensor var_1019_axes_0 = const()[name = tensor("op_1019_axes_0"), val = tensor([-1])]; - tensor blocks_8_mlp_ln_weight_to_fp16 = const()[name = tensor("blocks_8_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124357632)))]; - tensor blocks_8_mlp_ln_bias_to_fp16 = const()[name = tensor("blocks_8_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124359232)))]; - tensor var_1019_cast = layer_norm(axes = var_1019_axes_0, beta = blocks_8_mlp_ln_bias_to_fp16, epsilon = var_944_to_fp16, gamma = blocks_8_mlp_ln_weight_to_fp16, x = x_109_cast); - tensor var_1028_to_fp16 = const()[name = tensor("op_1028_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(124360832)))]; - tensor var_1029_to_fp16 = const()[name = tensor("op_1029_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129079488)))]; - tensor input_73_cast = linear(bias = var_1029_to_fp16, weight = var_1028_to_fp16, x = var_1019_cast); - tensor x_113_mode_0 = const()[name = tensor("x_113_mode_0"), val = tensor("EXACT")]; - tensor x_113_cast = gelu(mode = x_113_mode_0, x = input_73_cast); - tensor var_1034_to_fp16 = const()[name = tensor("op_1034_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(129085696)))]; - tensor var_1035_to_fp16 = const()[name = tensor("op_1035_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133804352)))]; - tensor var_1036_cast = linear(bias = var_1035_to_fp16, weight = var_1034_to_fp16, x = x_113_cast); - tensor x_115_cast = add(x = x_109_cast, y = var_1036_cast); - tensor var_1045 = const()[name = tensor("op_1045"), val = tensor(-1)]; - tensor var_1062_axes_0 = const()[name = tensor("op_1062_axes_0"), val = tensor([-1])]; - tensor blocks_9_attn_ln_weight_to_fp16 = const()[name = tensor("blocks_9_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133805952)))]; - tensor blocks_9_attn_ln_bias_to_fp16 = const()[name = tensor("blocks_9_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133807552)))]; - tensor var_1051_to_fp16 = const()[name = tensor("op_1051_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_1062_cast = layer_norm(axes = var_1062_axes_0, beta = blocks_9_attn_ln_bias_to_fp16, epsilon = var_1051_to_fp16, gamma = blocks_9_attn_ln_weight_to_fp16, x = x_115_cast); - tensor var_1073_to_fp16 = const()[name = tensor("op_1073_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(133809152)))]; - tensor var_1074_to_fp16 = const()[name = tensor("op_1074_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134988864)))]; - tensor q_37_cast = linear(bias = var_1074_to_fp16, weight = var_1073_to_fp16, x = var_1062_cast); - tensor var_1077_to_fp16 = const()[name = tensor("op_1077_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(134990464)))]; - tensor k_37_bias_0_to_fp16 = const()[name = tensor("k_37_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136170176)))]; - tensor k_37_cast = linear(bias = k_37_bias_0_to_fp16, weight = var_1077_to_fp16, x = var_1062_cast); - tensor var_1081_to_fp16 = const()[name = tensor("op_1081_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(136171776)))]; - tensor var_1082_to_fp16 = const()[name = tensor("op_1082_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137351488)))]; - tensor v_37_cast = linear(bias = var_1082_to_fp16, weight = var_1081_to_fp16, x = var_1062_cast); - tensor var_1090 = const()[name = tensor("op_1090"), val = tensor([1, 1500, 12, -1])]; - tensor var_1091_cast = reshape(shape = var_1090, x = q_37_cast); - tensor const_102_to_fp16 = const()[name = tensor("const_102_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; - tensor q_39_cast = mul(x = var_1091_cast, y = const_102_to_fp16); - tensor var_1097 = const()[name = tensor("op_1097"), val = tensor([1, 1500, 12, -1])]; - tensor var_1098_cast = reshape(shape = var_1097, x = k_37_cast); - tensor const_103_to_fp16 = const()[name = tensor("const_103_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; - tensor k_39_cast = mul(x = var_1098_cast, y = const_103_to_fp16); - tensor var_1104 = const()[name = tensor("op_1104"), val = tensor([1, 1500, 12, -1])]; - tensor var_1105_cast = reshape(shape = var_1104, x = v_37_cast); - tensor var_1106 = const()[name = tensor("op_1106"), val = tensor([0, 2, 1, 3])]; - tensor qk_19_transpose_x_0 = const()[name = tensor("qk_19_transpose_x_0"), val = tensor(false)]; - tensor qk_19_transpose_y_0 = const()[name = tensor("qk_19_transpose_y_0"), val = tensor(false)]; - tensor transpose_42_perm_0 = const()[name = tensor("transpose_42_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor transpose_43_perm_0 = const()[name = tensor("transpose_43_perm_0"), val = tensor([0, 2, 3, 1])]; - tensor transpose_57 = transpose(perm = transpose_43_perm_0, x = k_39_cast); - tensor transpose_58 = transpose(perm = transpose_42_perm_0, x = q_39_cast); - tensor qk_19_cast = matmul(transpose_x = qk_19_transpose_x_0, transpose_y = qk_19_transpose_y_0, x = transpose_58, y = transpose_57); - tensor var_1110_cast = softmax(axis = var_1045, x = qk_19_cast); - tensor var_1112_transpose_x_0 = const()[name = tensor("op_1112_transpose_x_0"), val = tensor(false)]; - tensor var_1112_transpose_y_0 = const()[name = tensor("op_1112_transpose_y_0"), val = tensor(false)]; - tensor transpose_59 = transpose(perm = var_1106, x = var_1105_cast); - tensor var_1112_cast = matmul(transpose_x = var_1112_transpose_x_0, transpose_y = var_1112_transpose_y_0, x = var_1110_cast, y = transpose_59); - tensor var_1113 = const()[name = tensor("op_1113"), val = tensor([0, 2, 1, 3])]; - tensor concat_9 = const()[name = tensor("concat_9"), val = tensor([1, 1500, 768])]; - tensor transpose_56 = transpose(perm = var_1113, x = var_1112_cast); - tensor x_119_cast = reshape(shape = concat_9, x = transpose_56); - tensor var_1118_to_fp16 = const()[name = tensor("op_1118_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(137353088)))]; - tensor var_1119_to_fp16 = const()[name = tensor("op_1119_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138532800)))]; - tensor var_1120_cast = linear(bias = var_1119_to_fp16, weight = var_1118_to_fp16, x = x_119_cast); - tensor x_121_cast = add(x = x_115_cast, y = var_1120_cast); - tensor var_1126_axes_0 = const()[name = tensor("op_1126_axes_0"), val = tensor([-1])]; - tensor blocks_9_mlp_ln_weight_to_fp16 = const()[name = tensor("blocks_9_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138534400)))]; - tensor blocks_9_mlp_ln_bias_to_fp16 = const()[name = tensor("blocks_9_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138536000)))]; - tensor var_1126_cast = layer_norm(axes = var_1126_axes_0, beta = blocks_9_mlp_ln_bias_to_fp16, epsilon = var_1051_to_fp16, gamma = blocks_9_mlp_ln_weight_to_fp16, x = x_121_cast); - tensor var_1135_to_fp16 = const()[name = tensor("op_1135_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138537600)))]; - tensor var_1136_to_fp16 = const()[name = tensor("op_1136_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143256256)))]; - tensor input_81_cast = linear(bias = var_1136_to_fp16, weight = var_1135_to_fp16, x = var_1126_cast); - tensor x_125_mode_0 = const()[name = tensor("x_125_mode_0"), val = tensor("EXACT")]; - tensor x_125_cast = gelu(mode = x_125_mode_0, x = input_81_cast); - tensor var_1141_to_fp16 = const()[name = tensor("op_1141_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(143262464)))]; - tensor var_1142_to_fp16 = const()[name = tensor("op_1142_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147981120)))]; - tensor var_1143_cast = linear(bias = var_1142_to_fp16, weight = var_1141_to_fp16, x = x_125_cast); - tensor x_127_cast = add(x = x_121_cast, y = var_1143_cast); - tensor var_1152 = const()[name = tensor("op_1152"), val = tensor(-1)]; - tensor var_1169_axes_0 = const()[name = tensor("op_1169_axes_0"), val = tensor([-1])]; - tensor blocks_10_attn_ln_weight_to_fp16 = const()[name = tensor("blocks_10_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147982720)))]; - tensor blocks_10_attn_ln_bias_to_fp16 = const()[name = tensor("blocks_10_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147984320)))]; - tensor var_1158_to_fp16 = const()[name = tensor("op_1158_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_1169_cast = layer_norm(axes = var_1169_axes_0, beta = blocks_10_attn_ln_bias_to_fp16, epsilon = var_1158_to_fp16, gamma = blocks_10_attn_ln_weight_to_fp16, x = x_127_cast); - tensor var_1180_to_fp16 = const()[name = tensor("op_1180_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(147985920)))]; - tensor var_1181_to_fp16 = const()[name = tensor("op_1181_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149165632)))]; - tensor q_41_cast = linear(bias = var_1181_to_fp16, weight = var_1180_to_fp16, x = var_1169_cast); - tensor var_1184_to_fp16 = const()[name = tensor("op_1184_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(149167232)))]; - tensor k_41_bias_0_to_fp16 = const()[name = tensor("k_41_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(150346944)))]; - tensor k_41_cast = linear(bias = k_41_bias_0_to_fp16, weight = var_1184_to_fp16, x = var_1169_cast); - tensor var_1188_to_fp16 = const()[name = tensor("op_1188_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(150348544)))]; - tensor var_1189_to_fp16 = const()[name = tensor("op_1189_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151528256)))]; - tensor v_41_cast = linear(bias = var_1189_to_fp16, weight = var_1188_to_fp16, x = var_1169_cast); - tensor var_1197 = const()[name = tensor("op_1197"), val = tensor([1, 1500, 12, -1])]; - tensor var_1198_cast = reshape(shape = var_1197, x = q_41_cast); - tensor const_104_to_fp16 = const()[name = tensor("const_104_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; - tensor q_43_cast = mul(x = var_1198_cast, y = const_104_to_fp16); - tensor var_1204 = const()[name = tensor("op_1204"), val = tensor([1, 1500, 12, -1])]; - tensor var_1205_cast = reshape(shape = var_1204, x = k_41_cast); - tensor const_105_to_fp16 = const()[name = tensor("const_105_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; - tensor k_43_cast = mul(x = var_1205_cast, y = const_105_to_fp16); - tensor var_1211 = const()[name = tensor("op_1211"), val = tensor([1, 1500, 12, -1])]; - tensor var_1212_cast = reshape(shape = var_1211, x = v_41_cast); - tensor var_1213 = const()[name = tensor("op_1213"), val = tensor([0, 2, 1, 3])]; - tensor qk_21_transpose_x_0 = const()[name = tensor("qk_21_transpose_x_0"), val = tensor(false)]; - tensor qk_21_transpose_y_0 = const()[name = tensor("qk_21_transpose_y_0"), val = tensor(false)]; - tensor transpose_44_perm_0 = const()[name = tensor("transpose_44_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor transpose_45_perm_0 = const()[name = tensor("transpose_45_perm_0"), val = tensor([0, 2, 3, 1])]; - tensor transpose_53 = transpose(perm = transpose_45_perm_0, x = k_43_cast); - tensor transpose_54 = transpose(perm = transpose_44_perm_0, x = q_43_cast); - tensor qk_21_cast = matmul(transpose_x = qk_21_transpose_x_0, transpose_y = qk_21_transpose_y_0, x = transpose_54, y = transpose_53); - tensor var_1217_cast = softmax(axis = var_1152, x = qk_21_cast); - tensor var_1219_transpose_x_0 = const()[name = tensor("op_1219_transpose_x_0"), val = tensor(false)]; - tensor var_1219_transpose_y_0 = const()[name = tensor("op_1219_transpose_y_0"), val = tensor(false)]; - tensor transpose_55 = transpose(perm = var_1213, x = var_1212_cast); - tensor var_1219_cast = matmul(transpose_x = var_1219_transpose_x_0, transpose_y = var_1219_transpose_y_0, x = var_1217_cast, y = transpose_55); - tensor var_1220 = const()[name = tensor("op_1220"), val = tensor([0, 2, 1, 3])]; - tensor concat_10 = const()[name = tensor("concat_10"), val = tensor([1, 1500, 768])]; - tensor transpose_52 = transpose(perm = var_1220, x = var_1219_cast); - tensor x_131_cast = reshape(shape = concat_10, x = transpose_52); - tensor var_1225_to_fp16 = const()[name = tensor("op_1225_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151529856)))]; - tensor var_1226_to_fp16 = const()[name = tensor("op_1226_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152709568)))]; - tensor var_1227_cast = linear(bias = var_1226_to_fp16, weight = var_1225_to_fp16, x = x_131_cast); - tensor x_133_cast = add(x = x_127_cast, y = var_1227_cast); - tensor var_1233_axes_0 = const()[name = tensor("op_1233_axes_0"), val = tensor([-1])]; - tensor blocks_10_mlp_ln_weight_to_fp16 = const()[name = tensor("blocks_10_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152711168)))]; - tensor blocks_10_mlp_ln_bias_to_fp16 = const()[name = tensor("blocks_10_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152712768)))]; - tensor var_1233_cast = layer_norm(axes = var_1233_axes_0, beta = blocks_10_mlp_ln_bias_to_fp16, epsilon = var_1158_to_fp16, gamma = blocks_10_mlp_ln_weight_to_fp16, x = x_133_cast); - tensor var_1242_to_fp16 = const()[name = tensor("op_1242_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(152714368)))]; - tensor var_1243_to_fp16 = const()[name = tensor("op_1243_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157433024)))]; - tensor input_89_cast = linear(bias = var_1243_to_fp16, weight = var_1242_to_fp16, x = var_1233_cast); - tensor x_137_mode_0 = const()[name = tensor("x_137_mode_0"), val = tensor("EXACT")]; - tensor x_137_cast = gelu(mode = x_137_mode_0, x = input_89_cast); - tensor var_1248_to_fp16 = const()[name = tensor("op_1248_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(157439232)))]; - tensor var_1249_to_fp16 = const()[name = tensor("op_1249_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162157888)))]; - tensor var_1250_cast = linear(bias = var_1249_to_fp16, weight = var_1248_to_fp16, x = x_137_cast); - tensor x_139_cast = add(x = x_133_cast, y = var_1250_cast); - tensor var_1259 = const()[name = tensor("op_1259"), val = tensor(-1)]; - tensor var_1276_axes_0 = const()[name = tensor("op_1276_axes_0"), val = tensor([-1])]; - tensor blocks_11_attn_ln_weight_to_fp16 = const()[name = tensor("blocks_11_attn_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162159488)))]; - tensor blocks_11_attn_ln_bias_to_fp16 = const()[name = tensor("blocks_11_attn_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162161088)))]; - tensor var_1265_to_fp16 = const()[name = tensor("op_1265_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_1276_cast = layer_norm(axes = var_1276_axes_0, beta = blocks_11_attn_ln_bias_to_fp16, epsilon = var_1265_to_fp16, gamma = blocks_11_attn_ln_weight_to_fp16, x = x_139_cast); - tensor var_1287_to_fp16 = const()[name = tensor("op_1287_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(162162688)))]; - tensor var_1288_to_fp16 = const()[name = tensor("op_1288_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163342400)))]; - tensor q_45_cast = linear(bias = var_1288_to_fp16, weight = var_1287_to_fp16, x = var_1276_cast); - tensor var_1291_to_fp16 = const()[name = tensor("op_1291_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(163344000)))]; - tensor k_45_bias_0_to_fp16 = const()[name = tensor("k_45_bias_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(164523712)))]; - tensor k_45_cast = linear(bias = k_45_bias_0_to_fp16, weight = var_1291_to_fp16, x = var_1276_cast); - tensor var_1295_to_fp16 = const()[name = tensor("op_1295_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(164525312)))]; - tensor var_1296_to_fp16 = const()[name = tensor("op_1296_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(165705024)))]; - tensor v_45_cast = linear(bias = var_1296_to_fp16, weight = var_1295_to_fp16, x = var_1276_cast); - tensor var_1304 = const()[name = tensor("op_1304"), val = tensor([1, 1500, 12, -1])]; - tensor var_1305_cast = reshape(shape = var_1304, x = q_45_cast); - tensor const_106_to_fp16 = const()[name = tensor("const_106_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; - tensor q_cast = mul(x = var_1305_cast, y = const_106_to_fp16); - tensor var_1311 = const()[name = tensor("op_1311"), val = tensor([1, 1500, 12, -1])]; - tensor var_1312_cast = reshape(shape = var_1311, x = k_45_cast); - tensor const_107_to_fp16 = const()[name = tensor("const_107_to_fp16"), val = tensor([[[[0x1.6ap-2]]]])]; - tensor k_cast = mul(x = var_1312_cast, y = const_107_to_fp16); - tensor var_1318 = const()[name = tensor("op_1318"), val = tensor([1, 1500, 12, -1])]; - tensor var_1319_cast = reshape(shape = var_1318, x = v_45_cast); - tensor var_1320 = const()[name = tensor("op_1320"), val = tensor([0, 2, 1, 3])]; - tensor qk_transpose_x_0 = const()[name = tensor("qk_transpose_x_0"), val = tensor(false)]; - tensor qk_transpose_y_0 = const()[name = tensor("qk_transpose_y_0"), val = tensor(false)]; - tensor transpose_46_perm_0 = const()[name = tensor("transpose_46_perm_0"), val = tensor([0, 2, 1, 3])]; - tensor transpose_47_perm_0 = const()[name = tensor("transpose_47_perm_0"), val = tensor([0, 2, 3, 1])]; - tensor transpose_49 = transpose(perm = transpose_47_perm_0, x = k_cast); - tensor transpose_50 = transpose(perm = transpose_46_perm_0, x = q_cast); - tensor qk_cast = matmul(transpose_x = qk_transpose_x_0, transpose_y = qk_transpose_y_0, x = transpose_50, y = transpose_49); - tensor var_1324_cast = softmax(axis = var_1259, x = qk_cast); - tensor var_1326_transpose_x_0 = const()[name = tensor("op_1326_transpose_x_0"), val = tensor(false)]; - tensor var_1326_transpose_y_0 = const()[name = tensor("op_1326_transpose_y_0"), val = tensor(false)]; - tensor transpose_51 = transpose(perm = var_1320, x = var_1319_cast); - tensor var_1326_cast = matmul(transpose_x = var_1326_transpose_x_0, transpose_y = var_1326_transpose_y_0, x = var_1324_cast, y = transpose_51); - tensor var_1327 = const()[name = tensor("op_1327"), val = tensor([0, 2, 1, 3])]; - tensor concat_11 = const()[name = tensor("concat_11"), val = tensor([1, 1500, 768])]; - tensor transpose_48 = transpose(perm = var_1327, x = var_1326_cast); - tensor x_143_cast = reshape(shape = concat_11, x = transpose_48); - tensor var_1332_to_fp16 = const()[name = tensor("op_1332_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(165706624)))]; - tensor var_1333_to_fp16 = const()[name = tensor("op_1333_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166886336)))]; - tensor var_1334_cast = linear(bias = var_1333_to_fp16, weight = var_1332_to_fp16, x = x_143_cast); - tensor x_145_cast = add(x = x_139_cast, y = var_1334_cast); - tensor var_1340_axes_0 = const()[name = tensor("op_1340_axes_0"), val = tensor([-1])]; - tensor blocks_11_mlp_ln_weight_to_fp16 = const()[name = tensor("blocks_11_mlp_ln_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166887936)))]; - tensor blocks_11_mlp_ln_bias_to_fp16 = const()[name = tensor("blocks_11_mlp_ln_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166889536)))]; - tensor var_1340_cast = layer_norm(axes = var_1340_axes_0, beta = blocks_11_mlp_ln_bias_to_fp16, epsilon = var_1265_to_fp16, gamma = blocks_11_mlp_ln_weight_to_fp16, x = x_145_cast); - tensor var_1349_to_fp16 = const()[name = tensor("op_1349_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(166891136)))]; - tensor var_1350_to_fp16 = const()[name = tensor("op_1350_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171609792)))]; - tensor input_97_cast = linear(bias = var_1350_to_fp16, weight = var_1349_to_fp16, x = var_1340_cast); - tensor x_149_mode_0 = const()[name = tensor("x_149_mode_0"), val = tensor("EXACT")]; - tensor x_149_cast = gelu(mode = x_149_mode_0, x = input_97_cast); - tensor var_1355_to_fp16 = const()[name = tensor("op_1355_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171616000)))]; - tensor var_1356_to_fp16 = const()[name = tensor("op_1356_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(176334656)))]; - tensor var_1357_cast = linear(bias = var_1356_to_fp16, weight = var_1355_to_fp16, x = x_149_cast); - tensor x_cast = add(x = x_145_cast, y = var_1357_cast); - tensor var_1370_axes_0 = const()[name = tensor("op_1370_axes_0"), val = tensor([-1])]; - tensor ln_post_weight_to_fp16 = const()[name = tensor("ln_post_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(176336256)))]; - tensor ln_post_bias_to_fp16 = const()[name = tensor("ln_post_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(176337856)))]; - tensor var_1361_to_fp16 = const()[name = tensor("op_1361_to_fp16"), val = tensor(0x1.5p-17)]; - tensor var_1370_cast = layer_norm(axes = var_1370_axes_0, beta = ln_post_bias_to_fp16, epsilon = var_1361_to_fp16, gamma = ln_post_weight_to_fp16, x = x_cast); - tensor var_1370_cast_to_fp32_dtype_0 = const()[name = tensor("op_1370_cast_to_fp32_dtype_0"), val = tensor("fp32")]; - tensor output = cast(dtype = var_1370_cast_to_fp32_dtype_0, x = var_1370_cast); - } -> (output); -} \ No newline at end of file