diff --git "a/openai_whisper-large-v2_turbo/AudioEncoder.mlmodelc/model.mil" "b/openai_whisper-large-v2_turbo/AudioEncoder.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/openai_whisper-large-v2_turbo/AudioEncoder.mlmodelc/model.mil" @@ -0,0 +1,39790 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "5.33.5"}, {"coremlc-version", "1877.40.3"}, {"coremltools-component-torch", "2.2.1"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "7.1"}})] +{ + func main(tensor melspectrogram_features) { + tensor var_90 = const()[name = tensor("op_90"), val = tensor([1, 1])]; + tensor var_96 = const()[name = tensor("op_96"), val = tensor([1, 1])]; + tensor var_101 = const()[name = tensor("op_101"), val = tensor(1)]; + tensor var_106_pad_type_0 = const()[name = tensor("op_106_pad_type_0"), val = tensor("custom")]; + tensor var_106_pad_0 = const()[name = tensor("op_106_pad_0"), val = tensor([0, 0, 1, 1])]; + tensor var_81_to_fp16 = const()[name = tensor("op_81_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor var_87_to_fp16 = const()[name = tensor("op_87_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(614528)))]; + tensor var_106_cast_fp16 = conv(bias = var_87_to_fp16, dilations = var_96, groups = var_101, pad = var_106_pad_0, pad_type = var_106_pad_type_0, strides = var_90, weight = var_81_to_fp16, x = melspectrogram_features)[name = tensor("op_106_cast_fp16")]; + tensor hidden_states_1_mode_0 = const()[name = tensor("hidden_states_1_mode_0"), val = tensor("EXACT")]; + tensor hidden_states_1_cast_fp16 = gelu(mode = hidden_states_1_mode_0, x = var_106_cast_fp16)[name = tensor("hidden_states_1_cast_fp16")]; + tensor var_130 = const()[name = tensor("op_130"), val = tensor([2, 2])]; + tensor var_136 = const()[name = tensor("op_136"), val = tensor([1, 1])]; + tensor var_141 = const()[name = tensor("op_141"), val = tensor(1)]; + tensor var_146_pad_type_0 = const()[name = tensor("op_146_pad_type_0"), val = tensor("custom")]; + tensor var_146_pad_0 = const()[name = tensor("op_146_pad_0"), val = tensor([0, 0, 1, 1])]; + tensor var_121_to_fp16 = const()[name = tensor("op_121_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(617152)))]; + tensor var_127_to_fp16 = const()[name = tensor("op_127_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10447616)))]; + tensor var_146_cast_fp16 = conv(bias = var_127_to_fp16, dilations = var_136, groups = var_141, pad = var_146_pad_0, pad_type = var_146_pad_type_0, strides = var_130, weight = var_121_to_fp16, x = hidden_states_1_cast_fp16)[name = tensor("op_146_cast_fp16")]; + tensor hidden_states_3_mode_0 = const()[name = tensor("hidden_states_3_mode_0"), val = tensor("EXACT")]; + tensor hidden_states_3_cast_fp16 = gelu(mode = hidden_states_3_mode_0, x = var_146_cast_fp16)[name = tensor("hidden_states_3_cast_fp16")]; + tensor var_164_to_fp16 = const()[name = tensor("op_164_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(10450240)))]; + tensor inputs_1_cast_fp16 = add(x = hidden_states_3_cast_fp16, y = var_164_to_fp16)[name = tensor("inputs_1_cast_fp16")]; + tensor var_174 = const()[name = tensor("op_174"), val = tensor(3)]; + tensor var_199 = const()[name = tensor("op_199"), val = tensor(1)]; + tensor var_200 = const()[name = tensor("op_200"), val = tensor(true)]; + tensor var_210 = const()[name = tensor("op_210"), val = tensor([1])]; + tensor channels_mean_1_cast_fp16 = reduce_mean(axes = var_210, keep_dims = var_200, x = inputs_1_cast_fp16)[name = tensor("channels_mean_1_cast_fp16")]; + tensor zero_mean_1_cast_fp16 = sub(x = inputs_1_cast_fp16, y = channels_mean_1_cast_fp16)[name = tensor("zero_mean_1_cast_fp16")]; + tensor zero_mean_sq_1_cast_fp16 = mul(x = zero_mean_1_cast_fp16, y = zero_mean_1_cast_fp16)[name = tensor("zero_mean_sq_1_cast_fp16")]; + tensor var_214 = const()[name = tensor("op_214"), val = tensor([1])]; + tensor var_215_cast_fp16 = reduce_mean(axes = var_214, keep_dims = var_200, x = zero_mean_sq_1_cast_fp16)[name = tensor("op_215_cast_fp16")]; + tensor var_216_to_fp16 = const()[name = tensor("op_216_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_217_cast_fp16 = add(x = var_215_cast_fp16, y = var_216_to_fp16)[name = tensor("op_217_cast_fp16")]; + tensor denom_1_epsilon_0_to_fp16 = const()[name = tensor("denom_1_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_1_cast_fp16 = rsqrt(epsilon = denom_1_epsilon_0_to_fp16, x = var_217_cast_fp16)[name = tensor("denom_1_cast_fp16")]; + tensor out_1_cast_fp16 = mul(x = zero_mean_1_cast_fp16, y = denom_1_cast_fp16)[name = tensor("out_1_cast_fp16")]; + tensor obj_1_mean_0_to_fp16 = const()[name = tensor("obj_1_mean_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14290304)))]; + tensor obj_1_variance_0_to_fp16 = const()[name = tensor("obj_1_variance_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14292928)))]; + tensor obj_1_gamma_0_to_fp16 = const()[name = tensor("obj_1_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14295552)))]; + tensor obj_1_beta_0_to_fp16 = const()[name = tensor("obj_1_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14298176)))]; + tensor obj_1_epsilon_0_to_fp16 = const()[name = tensor("obj_1_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_1_cast_fp16 = batch_norm(beta = obj_1_beta_0_to_fp16, epsilon = obj_1_epsilon_0_to_fp16, gamma = obj_1_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_1_cast_fp16)[name = tensor("obj_1_cast_fp16")]; + tensor var_232 = const()[name = tensor("op_232"), val = tensor([1, 1])]; + tensor var_234 = const()[name = tensor("op_234"), val = tensor([1, 1])]; + tensor query_1_pad_type_0 = const()[name = tensor("query_1_pad_type_0"), val = tensor("custom")]; + tensor query_1_pad_0 = const()[name = tensor("query_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_0_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_0_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(14300800)))]; + tensor layers_0_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17577664)))]; + tensor query_1_cast_fp16 = conv(bias = layers_0_self_attn_q_proj_bias_to_fp16, dilations = var_234, groups = var_199, pad = query_1_pad_0, pad_type = query_1_pad_type_0, strides = var_232, weight = layers_0_self_attn_q_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor("query_1_cast_fp16")]; + tensor var_238 = const()[name = tensor("op_238"), val = tensor([1, 1])]; + tensor var_240 = const()[name = tensor("op_240"), val = tensor([1, 1])]; + tensor key_1_pad_type_0 = const()[name = tensor("key_1_pad_type_0"), val = tensor("custom")]; + tensor key_1_pad_0 = const()[name = tensor("key_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_0_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_0_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(17580288)))]; + tensor key_1_cast_fp16 = conv(dilations = var_240, groups = var_199, pad = key_1_pad_0, pad_type = key_1_pad_type_0, strides = var_238, weight = layers_0_self_attn_k_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor("key_1_cast_fp16")]; + tensor var_245 = const()[name = tensor("op_245"), val = tensor([1, 1])]; + tensor var_247 = const()[name = tensor("op_247"), val = tensor([1, 1])]; + tensor value_1_pad_type_0 = const()[name = tensor("value_1_pad_type_0"), val = tensor("custom")]; + tensor value_1_pad_0 = const()[name = tensor("value_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_0_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_0_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(20857152)))]; + tensor layers_0_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(24134016)))]; + tensor value_1_cast_fp16 = conv(bias = layers_0_self_attn_v_proj_bias_to_fp16, dilations = var_247, groups = var_199, pad = value_1_pad_0, pad_type = value_1_pad_type_0, strides = var_245, weight = layers_0_self_attn_v_proj_weight_to_fp16, x = obj_1_cast_fp16)[name = tensor("value_1_cast_fp16")]; + tensor var_254_begin_0 = const()[name = tensor("op_254_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_254_end_0 = const()[name = tensor("op_254_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_254_end_mask_0 = const()[name = tensor("op_254_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_254_cast_fp16 = slice_by_index(begin = var_254_begin_0, end = var_254_end_0, end_mask = var_254_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_254_cast_fp16")]; + tensor var_258_begin_0 = const()[name = tensor("op_258_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_258_end_0 = const()[name = tensor("op_258_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_258_end_mask_0 = const()[name = tensor("op_258_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_258_cast_fp16 = slice_by_index(begin = var_258_begin_0, end = var_258_end_0, end_mask = var_258_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_258_cast_fp16")]; + tensor var_262_begin_0 = const()[name = tensor("op_262_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_262_end_0 = const()[name = tensor("op_262_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_262_end_mask_0 = const()[name = tensor("op_262_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_262_cast_fp16 = slice_by_index(begin = var_262_begin_0, end = var_262_end_0, end_mask = var_262_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_262_cast_fp16")]; + tensor var_266_begin_0 = const()[name = tensor("op_266_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_266_end_0 = const()[name = tensor("op_266_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_266_end_mask_0 = const()[name = tensor("op_266_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_266_cast_fp16 = slice_by_index(begin = var_266_begin_0, end = var_266_end_0, end_mask = var_266_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_266_cast_fp16")]; + tensor var_270_begin_0 = const()[name = tensor("op_270_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_270_end_0 = const()[name = tensor("op_270_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_270_end_mask_0 = const()[name = tensor("op_270_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_270_cast_fp16 = slice_by_index(begin = var_270_begin_0, end = var_270_end_0, end_mask = var_270_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_270_cast_fp16")]; + tensor var_274_begin_0 = const()[name = tensor("op_274_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_274_end_0 = const()[name = tensor("op_274_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_274_end_mask_0 = const()[name = tensor("op_274_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_274_cast_fp16 = slice_by_index(begin = var_274_begin_0, end = var_274_end_0, end_mask = var_274_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_274_cast_fp16")]; + tensor var_278_begin_0 = const()[name = tensor("op_278_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_278_end_0 = const()[name = tensor("op_278_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_278_end_mask_0 = const()[name = tensor("op_278_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_278_cast_fp16 = slice_by_index(begin = var_278_begin_0, end = var_278_end_0, end_mask = var_278_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_278_cast_fp16")]; + tensor var_282_begin_0 = const()[name = tensor("op_282_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_282_end_0 = const()[name = tensor("op_282_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_282_end_mask_0 = const()[name = tensor("op_282_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_282_cast_fp16 = slice_by_index(begin = var_282_begin_0, end = var_282_end_0, end_mask = var_282_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_282_cast_fp16")]; + tensor var_286_begin_0 = const()[name = tensor("op_286_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_286_end_0 = const()[name = tensor("op_286_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_286_end_mask_0 = const()[name = tensor("op_286_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_286_cast_fp16 = slice_by_index(begin = var_286_begin_0, end = var_286_end_0, end_mask = var_286_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_286_cast_fp16")]; + tensor var_290_begin_0 = const()[name = tensor("op_290_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_290_end_0 = const()[name = tensor("op_290_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_290_end_mask_0 = const()[name = tensor("op_290_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_290_cast_fp16 = slice_by_index(begin = var_290_begin_0, end = var_290_end_0, end_mask = var_290_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_290_cast_fp16")]; + tensor var_294_begin_0 = const()[name = tensor("op_294_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_294_end_0 = const()[name = tensor("op_294_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_294_end_mask_0 = const()[name = tensor("op_294_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_294_cast_fp16 = slice_by_index(begin = var_294_begin_0, end = var_294_end_0, end_mask = var_294_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_294_cast_fp16")]; + tensor var_298_begin_0 = const()[name = tensor("op_298_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_298_end_0 = const()[name = tensor("op_298_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_298_end_mask_0 = const()[name = tensor("op_298_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_298_cast_fp16 = slice_by_index(begin = var_298_begin_0, end = var_298_end_0, end_mask = var_298_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_298_cast_fp16")]; + tensor var_302_begin_0 = const()[name = tensor("op_302_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_302_end_0 = const()[name = tensor("op_302_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_302_end_mask_0 = const()[name = tensor("op_302_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_302_cast_fp16 = slice_by_index(begin = var_302_begin_0, end = var_302_end_0, end_mask = var_302_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_302_cast_fp16")]; + tensor var_306_begin_0 = const()[name = tensor("op_306_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_306_end_0 = const()[name = tensor("op_306_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_306_end_mask_0 = const()[name = tensor("op_306_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_306_cast_fp16 = slice_by_index(begin = var_306_begin_0, end = var_306_end_0, end_mask = var_306_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_306_cast_fp16")]; + tensor var_310_begin_0 = const()[name = tensor("op_310_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_310_end_0 = const()[name = tensor("op_310_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_310_end_mask_0 = const()[name = tensor("op_310_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_310_cast_fp16 = slice_by_index(begin = var_310_begin_0, end = var_310_end_0, end_mask = var_310_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_310_cast_fp16")]; + tensor var_314_begin_0 = const()[name = tensor("op_314_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_314_end_0 = const()[name = tensor("op_314_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_314_end_mask_0 = const()[name = tensor("op_314_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_314_cast_fp16 = slice_by_index(begin = var_314_begin_0, end = var_314_end_0, end_mask = var_314_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_314_cast_fp16")]; + tensor var_318_begin_0 = const()[name = tensor("op_318_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_318_end_0 = const()[name = tensor("op_318_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_318_end_mask_0 = const()[name = tensor("op_318_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_318_cast_fp16 = slice_by_index(begin = var_318_begin_0, end = var_318_end_0, end_mask = var_318_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_318_cast_fp16")]; + tensor var_322_begin_0 = const()[name = tensor("op_322_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_322_end_0 = const()[name = tensor("op_322_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_322_end_mask_0 = const()[name = tensor("op_322_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_322_cast_fp16 = slice_by_index(begin = var_322_begin_0, end = var_322_end_0, end_mask = var_322_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_322_cast_fp16")]; + tensor var_326_begin_0 = const()[name = tensor("op_326_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_326_end_0 = const()[name = tensor("op_326_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_326_end_mask_0 = const()[name = tensor("op_326_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_326_cast_fp16 = slice_by_index(begin = var_326_begin_0, end = var_326_end_0, end_mask = var_326_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_326_cast_fp16")]; + tensor var_330_begin_0 = const()[name = tensor("op_330_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_330_end_0 = const()[name = tensor("op_330_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_330_end_mask_0 = const()[name = tensor("op_330_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_330_cast_fp16 = slice_by_index(begin = var_330_begin_0, end = var_330_end_0, end_mask = var_330_end_mask_0, x = query_1_cast_fp16)[name = tensor("op_330_cast_fp16")]; + tensor var_339_begin_0 = const()[name = tensor("op_339_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_339_end_0 = const()[name = tensor("op_339_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_339_end_mask_0 = const()[name = tensor("op_339_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_339_cast_fp16 = slice_by_index(begin = var_339_begin_0, end = var_339_end_0, end_mask = var_339_end_mask_0, x = var_254_cast_fp16)[name = tensor("op_339_cast_fp16")]; + tensor var_346_begin_0 = const()[name = tensor("op_346_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_346_end_0 = const()[name = tensor("op_346_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_346_end_mask_0 = const()[name = tensor("op_346_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_346_cast_fp16 = slice_by_index(begin = var_346_begin_0, end = var_346_end_0, end_mask = var_346_end_mask_0, x = var_254_cast_fp16)[name = tensor("op_346_cast_fp16")]; + tensor var_353_begin_0 = const()[name = tensor("op_353_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_353_end_0 = const()[name = tensor("op_353_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_353_end_mask_0 = const()[name = tensor("op_353_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_353_cast_fp16 = slice_by_index(begin = var_353_begin_0, end = var_353_end_0, end_mask = var_353_end_mask_0, x = var_254_cast_fp16)[name = tensor("op_353_cast_fp16")]; + tensor var_360_begin_0 = const()[name = tensor("op_360_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_360_end_0 = const()[name = tensor("op_360_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_360_end_mask_0 = const()[name = tensor("op_360_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_360_cast_fp16 = slice_by_index(begin = var_360_begin_0, end = var_360_end_0, end_mask = var_360_end_mask_0, x = var_254_cast_fp16)[name = tensor("op_360_cast_fp16")]; + tensor var_367_begin_0 = const()[name = tensor("op_367_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_367_end_0 = const()[name = tensor("op_367_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_367_end_mask_0 = const()[name = tensor("op_367_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_367_cast_fp16 = slice_by_index(begin = var_367_begin_0, end = var_367_end_0, end_mask = var_367_end_mask_0, x = var_258_cast_fp16)[name = tensor("op_367_cast_fp16")]; + tensor var_374_begin_0 = const()[name = tensor("op_374_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_374_end_0 = const()[name = tensor("op_374_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_374_end_mask_0 = const()[name = tensor("op_374_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_374_cast_fp16 = slice_by_index(begin = var_374_begin_0, end = var_374_end_0, end_mask = var_374_end_mask_0, x = var_258_cast_fp16)[name = tensor("op_374_cast_fp16")]; + tensor var_381_begin_0 = const()[name = tensor("op_381_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_381_end_0 = const()[name = tensor("op_381_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_381_end_mask_0 = const()[name = tensor("op_381_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_381_cast_fp16 = slice_by_index(begin = var_381_begin_0, end = var_381_end_0, end_mask = var_381_end_mask_0, x = var_258_cast_fp16)[name = tensor("op_381_cast_fp16")]; + tensor var_388_begin_0 = const()[name = tensor("op_388_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_388_end_0 = const()[name = tensor("op_388_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_388_end_mask_0 = const()[name = tensor("op_388_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_388_cast_fp16 = slice_by_index(begin = var_388_begin_0, end = var_388_end_0, end_mask = var_388_end_mask_0, x = var_258_cast_fp16)[name = tensor("op_388_cast_fp16")]; + tensor var_395_begin_0 = const()[name = tensor("op_395_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_395_end_0 = const()[name = tensor("op_395_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_395_end_mask_0 = const()[name = tensor("op_395_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_395_cast_fp16 = slice_by_index(begin = var_395_begin_0, end = var_395_end_0, end_mask = var_395_end_mask_0, x = var_262_cast_fp16)[name = tensor("op_395_cast_fp16")]; + tensor var_402_begin_0 = const()[name = tensor("op_402_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_402_end_0 = const()[name = tensor("op_402_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_402_end_mask_0 = const()[name = tensor("op_402_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_402_cast_fp16 = slice_by_index(begin = var_402_begin_0, end = var_402_end_0, end_mask = var_402_end_mask_0, x = var_262_cast_fp16)[name = tensor("op_402_cast_fp16")]; + tensor var_409_begin_0 = const()[name = tensor("op_409_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_409_end_0 = const()[name = tensor("op_409_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_409_end_mask_0 = const()[name = tensor("op_409_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_409_cast_fp16 = slice_by_index(begin = var_409_begin_0, end = var_409_end_0, end_mask = var_409_end_mask_0, x = var_262_cast_fp16)[name = tensor("op_409_cast_fp16")]; + tensor var_416_begin_0 = const()[name = tensor("op_416_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_416_end_0 = const()[name = tensor("op_416_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_416_end_mask_0 = const()[name = tensor("op_416_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_416_cast_fp16 = slice_by_index(begin = var_416_begin_0, end = var_416_end_0, end_mask = var_416_end_mask_0, x = var_262_cast_fp16)[name = tensor("op_416_cast_fp16")]; + tensor var_423_begin_0 = const()[name = tensor("op_423_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_423_end_0 = const()[name = tensor("op_423_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_423_end_mask_0 = const()[name = tensor("op_423_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_423_cast_fp16 = slice_by_index(begin = var_423_begin_0, end = var_423_end_0, end_mask = var_423_end_mask_0, x = var_266_cast_fp16)[name = tensor("op_423_cast_fp16")]; + tensor var_430_begin_0 = const()[name = tensor("op_430_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_430_end_0 = const()[name = tensor("op_430_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_430_end_mask_0 = const()[name = tensor("op_430_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_430_cast_fp16 = slice_by_index(begin = var_430_begin_0, end = var_430_end_0, end_mask = var_430_end_mask_0, x = var_266_cast_fp16)[name = tensor("op_430_cast_fp16")]; + tensor var_437_begin_0 = const()[name = tensor("op_437_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_437_end_0 = const()[name = tensor("op_437_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_437_end_mask_0 = const()[name = tensor("op_437_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_437_cast_fp16 = slice_by_index(begin = var_437_begin_0, end = var_437_end_0, end_mask = var_437_end_mask_0, x = var_266_cast_fp16)[name = tensor("op_437_cast_fp16")]; + tensor var_444_begin_0 = const()[name = tensor("op_444_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_444_end_0 = const()[name = tensor("op_444_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_444_end_mask_0 = const()[name = tensor("op_444_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_444_cast_fp16 = slice_by_index(begin = var_444_begin_0, end = var_444_end_0, end_mask = var_444_end_mask_0, x = var_266_cast_fp16)[name = tensor("op_444_cast_fp16")]; + tensor var_451_begin_0 = const()[name = tensor("op_451_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_451_end_0 = const()[name = tensor("op_451_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_451_end_mask_0 = const()[name = tensor("op_451_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_451_cast_fp16 = slice_by_index(begin = var_451_begin_0, end = var_451_end_0, end_mask = var_451_end_mask_0, x = var_270_cast_fp16)[name = tensor("op_451_cast_fp16")]; + tensor var_458_begin_0 = const()[name = tensor("op_458_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_458_end_0 = const()[name = tensor("op_458_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_458_end_mask_0 = const()[name = tensor("op_458_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_458_cast_fp16 = slice_by_index(begin = var_458_begin_0, end = var_458_end_0, end_mask = var_458_end_mask_0, x = var_270_cast_fp16)[name = tensor("op_458_cast_fp16")]; + tensor var_465_begin_0 = const()[name = tensor("op_465_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_465_end_0 = const()[name = tensor("op_465_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_465_end_mask_0 = const()[name = tensor("op_465_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_465_cast_fp16 = slice_by_index(begin = var_465_begin_0, end = var_465_end_0, end_mask = var_465_end_mask_0, x = var_270_cast_fp16)[name = tensor("op_465_cast_fp16")]; + tensor var_472_begin_0 = const()[name = tensor("op_472_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_472_end_0 = const()[name = tensor("op_472_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_472_end_mask_0 = const()[name = tensor("op_472_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_472_cast_fp16 = slice_by_index(begin = var_472_begin_0, end = var_472_end_0, end_mask = var_472_end_mask_0, x = var_270_cast_fp16)[name = tensor("op_472_cast_fp16")]; + tensor var_479_begin_0 = const()[name = tensor("op_479_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_479_end_0 = const()[name = tensor("op_479_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_479_end_mask_0 = const()[name = tensor("op_479_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_479_cast_fp16 = slice_by_index(begin = var_479_begin_0, end = var_479_end_0, end_mask = var_479_end_mask_0, x = var_274_cast_fp16)[name = tensor("op_479_cast_fp16")]; + tensor var_486_begin_0 = const()[name = tensor("op_486_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_486_end_0 = const()[name = tensor("op_486_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_486_end_mask_0 = const()[name = tensor("op_486_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_486_cast_fp16 = slice_by_index(begin = var_486_begin_0, end = var_486_end_0, end_mask = var_486_end_mask_0, x = var_274_cast_fp16)[name = tensor("op_486_cast_fp16")]; + tensor var_493_begin_0 = const()[name = tensor("op_493_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_493_end_0 = const()[name = tensor("op_493_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_493_end_mask_0 = const()[name = tensor("op_493_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_493_cast_fp16 = slice_by_index(begin = var_493_begin_0, end = var_493_end_0, end_mask = var_493_end_mask_0, x = var_274_cast_fp16)[name = tensor("op_493_cast_fp16")]; + tensor var_500_begin_0 = const()[name = tensor("op_500_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_500_end_0 = const()[name = tensor("op_500_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_500_end_mask_0 = const()[name = tensor("op_500_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_500_cast_fp16 = slice_by_index(begin = var_500_begin_0, end = var_500_end_0, end_mask = var_500_end_mask_0, x = var_274_cast_fp16)[name = tensor("op_500_cast_fp16")]; + tensor var_507_begin_0 = const()[name = tensor("op_507_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_507_end_0 = const()[name = tensor("op_507_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_507_end_mask_0 = const()[name = tensor("op_507_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_507_cast_fp16 = slice_by_index(begin = var_507_begin_0, end = var_507_end_0, end_mask = var_507_end_mask_0, x = var_278_cast_fp16)[name = tensor("op_507_cast_fp16")]; + tensor var_514_begin_0 = const()[name = tensor("op_514_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_514_end_0 = const()[name = tensor("op_514_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_514_end_mask_0 = const()[name = tensor("op_514_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_514_cast_fp16 = slice_by_index(begin = var_514_begin_0, end = var_514_end_0, end_mask = var_514_end_mask_0, x = var_278_cast_fp16)[name = tensor("op_514_cast_fp16")]; + tensor var_521_begin_0 = const()[name = tensor("op_521_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_521_end_0 = const()[name = tensor("op_521_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_521_end_mask_0 = const()[name = tensor("op_521_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_521_cast_fp16 = slice_by_index(begin = var_521_begin_0, end = var_521_end_0, end_mask = var_521_end_mask_0, x = var_278_cast_fp16)[name = tensor("op_521_cast_fp16")]; + tensor var_528_begin_0 = const()[name = tensor("op_528_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_528_end_0 = const()[name = tensor("op_528_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_528_end_mask_0 = const()[name = tensor("op_528_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_528_cast_fp16 = slice_by_index(begin = var_528_begin_0, end = var_528_end_0, end_mask = var_528_end_mask_0, x = var_278_cast_fp16)[name = tensor("op_528_cast_fp16")]; + tensor var_535_begin_0 = const()[name = tensor("op_535_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_535_end_0 = const()[name = tensor("op_535_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_535_end_mask_0 = const()[name = tensor("op_535_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_535_cast_fp16 = slice_by_index(begin = var_535_begin_0, end = var_535_end_0, end_mask = var_535_end_mask_0, x = var_282_cast_fp16)[name = tensor("op_535_cast_fp16")]; + tensor var_542_begin_0 = const()[name = tensor("op_542_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_542_end_0 = const()[name = tensor("op_542_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_542_end_mask_0 = const()[name = tensor("op_542_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_542_cast_fp16 = slice_by_index(begin = var_542_begin_0, end = var_542_end_0, end_mask = var_542_end_mask_0, x = var_282_cast_fp16)[name = tensor("op_542_cast_fp16")]; + tensor var_549_begin_0 = const()[name = tensor("op_549_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_549_end_0 = const()[name = tensor("op_549_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_549_end_mask_0 = const()[name = tensor("op_549_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_549_cast_fp16 = slice_by_index(begin = var_549_begin_0, end = var_549_end_0, end_mask = var_549_end_mask_0, x = var_282_cast_fp16)[name = tensor("op_549_cast_fp16")]; + tensor var_556_begin_0 = const()[name = tensor("op_556_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_556_end_0 = const()[name = tensor("op_556_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_556_end_mask_0 = const()[name = tensor("op_556_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_556_cast_fp16 = slice_by_index(begin = var_556_begin_0, end = var_556_end_0, end_mask = var_556_end_mask_0, x = var_282_cast_fp16)[name = tensor("op_556_cast_fp16")]; + tensor var_563_begin_0 = const()[name = tensor("op_563_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_563_end_0 = const()[name = tensor("op_563_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_563_end_mask_0 = const()[name = tensor("op_563_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_563_cast_fp16 = slice_by_index(begin = var_563_begin_0, end = var_563_end_0, end_mask = var_563_end_mask_0, x = var_286_cast_fp16)[name = tensor("op_563_cast_fp16")]; + tensor var_570_begin_0 = const()[name = tensor("op_570_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_570_end_0 = const()[name = tensor("op_570_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_570_end_mask_0 = const()[name = tensor("op_570_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_570_cast_fp16 = slice_by_index(begin = var_570_begin_0, end = var_570_end_0, end_mask = var_570_end_mask_0, x = var_286_cast_fp16)[name = tensor("op_570_cast_fp16")]; + tensor var_577_begin_0 = const()[name = tensor("op_577_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_577_end_0 = const()[name = tensor("op_577_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_577_end_mask_0 = const()[name = tensor("op_577_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_577_cast_fp16 = slice_by_index(begin = var_577_begin_0, end = var_577_end_0, end_mask = var_577_end_mask_0, x = var_286_cast_fp16)[name = tensor("op_577_cast_fp16")]; + tensor var_584_begin_0 = const()[name = tensor("op_584_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_584_end_0 = const()[name = tensor("op_584_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_584_end_mask_0 = const()[name = tensor("op_584_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_584_cast_fp16 = slice_by_index(begin = var_584_begin_0, end = var_584_end_0, end_mask = var_584_end_mask_0, x = var_286_cast_fp16)[name = tensor("op_584_cast_fp16")]; + tensor var_591_begin_0 = const()[name = tensor("op_591_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_591_end_0 = const()[name = tensor("op_591_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_591_end_mask_0 = const()[name = tensor("op_591_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_591_cast_fp16 = slice_by_index(begin = var_591_begin_0, end = var_591_end_0, end_mask = var_591_end_mask_0, x = var_290_cast_fp16)[name = tensor("op_591_cast_fp16")]; + tensor var_598_begin_0 = const()[name = tensor("op_598_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_598_end_0 = const()[name = tensor("op_598_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_598_end_mask_0 = const()[name = tensor("op_598_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_598_cast_fp16 = slice_by_index(begin = var_598_begin_0, end = var_598_end_0, end_mask = var_598_end_mask_0, x = var_290_cast_fp16)[name = tensor("op_598_cast_fp16")]; + tensor var_605_begin_0 = const()[name = tensor("op_605_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_605_end_0 = const()[name = tensor("op_605_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_605_end_mask_0 = const()[name = tensor("op_605_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_605_cast_fp16 = slice_by_index(begin = var_605_begin_0, end = var_605_end_0, end_mask = var_605_end_mask_0, x = var_290_cast_fp16)[name = tensor("op_605_cast_fp16")]; + tensor var_612_begin_0 = const()[name = tensor("op_612_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_612_end_0 = const()[name = tensor("op_612_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_612_end_mask_0 = const()[name = tensor("op_612_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_612_cast_fp16 = slice_by_index(begin = var_612_begin_0, end = var_612_end_0, end_mask = var_612_end_mask_0, x = var_290_cast_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_619_begin_0 = const()[name = tensor("op_619_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_619_end_0 = const()[name = tensor("op_619_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_619_end_mask_0 = const()[name = tensor("op_619_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_619_cast_fp16 = slice_by_index(begin = var_619_begin_0, end = var_619_end_0, end_mask = var_619_end_mask_0, x = var_294_cast_fp16)[name = tensor("op_619_cast_fp16")]; + tensor var_626_begin_0 = const()[name = tensor("op_626_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_626_end_0 = const()[name = tensor("op_626_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_626_end_mask_0 = const()[name = tensor("op_626_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_626_cast_fp16 = slice_by_index(begin = var_626_begin_0, end = var_626_end_0, end_mask = var_626_end_mask_0, x = var_294_cast_fp16)[name = tensor("op_626_cast_fp16")]; + tensor var_633_begin_0 = const()[name = tensor("op_633_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_633_end_0 = const()[name = tensor("op_633_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_633_end_mask_0 = const()[name = tensor("op_633_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_633_cast_fp16 = slice_by_index(begin = var_633_begin_0, end = var_633_end_0, end_mask = var_633_end_mask_0, x = var_294_cast_fp16)[name = tensor("op_633_cast_fp16")]; + tensor var_640_begin_0 = const()[name = tensor("op_640_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_640_end_0 = const()[name = tensor("op_640_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_640_end_mask_0 = const()[name = tensor("op_640_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_640_cast_fp16 = slice_by_index(begin = var_640_begin_0, end = var_640_end_0, end_mask = var_640_end_mask_0, x = var_294_cast_fp16)[name = tensor("op_640_cast_fp16")]; + tensor var_647_begin_0 = const()[name = tensor("op_647_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_647_end_0 = const()[name = tensor("op_647_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_647_end_mask_0 = const()[name = tensor("op_647_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_647_cast_fp16 = slice_by_index(begin = var_647_begin_0, end = var_647_end_0, end_mask = var_647_end_mask_0, x = var_298_cast_fp16)[name = tensor("op_647_cast_fp16")]; + tensor var_654_begin_0 = const()[name = tensor("op_654_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_654_end_0 = const()[name = tensor("op_654_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_654_end_mask_0 = const()[name = tensor("op_654_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_654_cast_fp16 = slice_by_index(begin = var_654_begin_0, end = var_654_end_0, end_mask = var_654_end_mask_0, x = var_298_cast_fp16)[name = tensor("op_654_cast_fp16")]; + tensor var_661_begin_0 = const()[name = tensor("op_661_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_661_end_0 = const()[name = tensor("op_661_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_661_end_mask_0 = const()[name = tensor("op_661_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_661_cast_fp16 = slice_by_index(begin = var_661_begin_0, end = var_661_end_0, end_mask = var_661_end_mask_0, x = var_298_cast_fp16)[name = tensor("op_661_cast_fp16")]; + tensor var_668_begin_0 = const()[name = tensor("op_668_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_668_end_0 = const()[name = tensor("op_668_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_668_end_mask_0 = const()[name = tensor("op_668_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_668_cast_fp16 = slice_by_index(begin = var_668_begin_0, end = var_668_end_0, end_mask = var_668_end_mask_0, x = var_298_cast_fp16)[name = tensor("op_668_cast_fp16")]; + tensor var_675_begin_0 = const()[name = tensor("op_675_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_675_end_0 = const()[name = tensor("op_675_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_675_end_mask_0 = const()[name = tensor("op_675_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_675_cast_fp16 = slice_by_index(begin = var_675_begin_0, end = var_675_end_0, end_mask = var_675_end_mask_0, x = var_302_cast_fp16)[name = tensor("op_675_cast_fp16")]; + tensor var_682_begin_0 = const()[name = tensor("op_682_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_682_end_0 = const()[name = tensor("op_682_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_682_end_mask_0 = const()[name = tensor("op_682_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_682_cast_fp16 = slice_by_index(begin = var_682_begin_0, end = var_682_end_0, end_mask = var_682_end_mask_0, x = var_302_cast_fp16)[name = tensor("op_682_cast_fp16")]; + tensor var_689_begin_0 = const()[name = tensor("op_689_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_689_end_0 = const()[name = tensor("op_689_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_689_end_mask_0 = const()[name = tensor("op_689_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_689_cast_fp16 = slice_by_index(begin = var_689_begin_0, end = var_689_end_0, end_mask = var_689_end_mask_0, x = var_302_cast_fp16)[name = tensor("op_689_cast_fp16")]; + tensor var_696_begin_0 = const()[name = tensor("op_696_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_696_end_0 = const()[name = tensor("op_696_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_696_end_mask_0 = const()[name = tensor("op_696_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_696_cast_fp16 = slice_by_index(begin = var_696_begin_0, end = var_696_end_0, end_mask = var_696_end_mask_0, x = var_302_cast_fp16)[name = tensor("op_696_cast_fp16")]; + tensor var_703_begin_0 = const()[name = tensor("op_703_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_703_end_0 = const()[name = tensor("op_703_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_703_end_mask_0 = const()[name = tensor("op_703_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_703_cast_fp16 = slice_by_index(begin = var_703_begin_0, end = var_703_end_0, end_mask = var_703_end_mask_0, x = var_306_cast_fp16)[name = tensor("op_703_cast_fp16")]; + tensor var_710_begin_0 = const()[name = tensor("op_710_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_710_end_0 = const()[name = tensor("op_710_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_710_end_mask_0 = const()[name = tensor("op_710_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_710_cast_fp16 = slice_by_index(begin = var_710_begin_0, end = var_710_end_0, end_mask = var_710_end_mask_0, x = var_306_cast_fp16)[name = tensor("op_710_cast_fp16")]; + tensor var_717_begin_0 = const()[name = tensor("op_717_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_717_end_0 = const()[name = tensor("op_717_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_717_end_mask_0 = const()[name = tensor("op_717_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_717_cast_fp16 = slice_by_index(begin = var_717_begin_0, end = var_717_end_0, end_mask = var_717_end_mask_0, x = var_306_cast_fp16)[name = tensor("op_717_cast_fp16")]; + tensor var_724_begin_0 = const()[name = tensor("op_724_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_724_end_0 = const()[name = tensor("op_724_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_724_end_mask_0 = const()[name = tensor("op_724_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_724_cast_fp16 = slice_by_index(begin = var_724_begin_0, end = var_724_end_0, end_mask = var_724_end_mask_0, x = var_306_cast_fp16)[name = tensor("op_724_cast_fp16")]; + tensor var_731_begin_0 = const()[name = tensor("op_731_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_731_end_0 = const()[name = tensor("op_731_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_731_end_mask_0 = const()[name = tensor("op_731_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_731_cast_fp16 = slice_by_index(begin = var_731_begin_0, end = var_731_end_0, end_mask = var_731_end_mask_0, x = var_310_cast_fp16)[name = tensor("op_731_cast_fp16")]; + tensor var_738_begin_0 = const()[name = tensor("op_738_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_738_end_0 = const()[name = tensor("op_738_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_738_end_mask_0 = const()[name = tensor("op_738_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_738_cast_fp16 = slice_by_index(begin = var_738_begin_0, end = var_738_end_0, end_mask = var_738_end_mask_0, x = var_310_cast_fp16)[name = tensor("op_738_cast_fp16")]; + tensor var_745_begin_0 = const()[name = tensor("op_745_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_745_end_0 = const()[name = tensor("op_745_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_745_end_mask_0 = const()[name = tensor("op_745_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_745_cast_fp16 = slice_by_index(begin = var_745_begin_0, end = var_745_end_0, end_mask = var_745_end_mask_0, x = var_310_cast_fp16)[name = tensor("op_745_cast_fp16")]; + tensor var_752_begin_0 = const()[name = tensor("op_752_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_752_end_0 = const()[name = tensor("op_752_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_752_end_mask_0 = const()[name = tensor("op_752_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_752_cast_fp16 = slice_by_index(begin = var_752_begin_0, end = var_752_end_0, end_mask = var_752_end_mask_0, x = var_310_cast_fp16)[name = tensor("op_752_cast_fp16")]; + tensor var_759_begin_0 = const()[name = tensor("op_759_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_759_end_0 = const()[name = tensor("op_759_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_759_end_mask_0 = const()[name = tensor("op_759_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_759_cast_fp16 = slice_by_index(begin = var_759_begin_0, end = var_759_end_0, end_mask = var_759_end_mask_0, x = var_314_cast_fp16)[name = tensor("op_759_cast_fp16")]; + tensor var_766_begin_0 = const()[name = tensor("op_766_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_766_end_0 = const()[name = tensor("op_766_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_766_end_mask_0 = const()[name = tensor("op_766_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_766_cast_fp16 = slice_by_index(begin = var_766_begin_0, end = var_766_end_0, end_mask = var_766_end_mask_0, x = var_314_cast_fp16)[name = tensor("op_766_cast_fp16")]; + tensor var_773_begin_0 = const()[name = tensor("op_773_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_773_end_0 = const()[name = tensor("op_773_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_773_end_mask_0 = const()[name = tensor("op_773_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_773_cast_fp16 = slice_by_index(begin = var_773_begin_0, end = var_773_end_0, end_mask = var_773_end_mask_0, x = var_314_cast_fp16)[name = tensor("op_773_cast_fp16")]; + tensor var_780_begin_0 = const()[name = tensor("op_780_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_780_end_0 = const()[name = tensor("op_780_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_780_end_mask_0 = const()[name = tensor("op_780_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_780_cast_fp16 = slice_by_index(begin = var_780_begin_0, end = var_780_end_0, end_mask = var_780_end_mask_0, x = var_314_cast_fp16)[name = tensor("op_780_cast_fp16")]; + tensor var_787_begin_0 = const()[name = tensor("op_787_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_787_end_0 = const()[name = tensor("op_787_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_787_end_mask_0 = const()[name = tensor("op_787_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_787_cast_fp16 = slice_by_index(begin = var_787_begin_0, end = var_787_end_0, end_mask = var_787_end_mask_0, x = var_318_cast_fp16)[name = tensor("op_787_cast_fp16")]; + tensor var_794_begin_0 = const()[name = tensor("op_794_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_794_end_0 = const()[name = tensor("op_794_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_794_end_mask_0 = const()[name = tensor("op_794_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_794_cast_fp16 = slice_by_index(begin = var_794_begin_0, end = var_794_end_0, end_mask = var_794_end_mask_0, x = var_318_cast_fp16)[name = tensor("op_794_cast_fp16")]; + tensor var_801_begin_0 = const()[name = tensor("op_801_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_801_end_0 = const()[name = tensor("op_801_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_801_end_mask_0 = const()[name = tensor("op_801_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_801_cast_fp16 = slice_by_index(begin = var_801_begin_0, end = var_801_end_0, end_mask = var_801_end_mask_0, x = var_318_cast_fp16)[name = tensor("op_801_cast_fp16")]; + tensor var_808_begin_0 = const()[name = tensor("op_808_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_808_end_0 = const()[name = tensor("op_808_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_808_end_mask_0 = const()[name = tensor("op_808_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_808_cast_fp16 = slice_by_index(begin = var_808_begin_0, end = var_808_end_0, end_mask = var_808_end_mask_0, x = var_318_cast_fp16)[name = tensor("op_808_cast_fp16")]; + tensor var_815_begin_0 = const()[name = tensor("op_815_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_815_end_0 = const()[name = tensor("op_815_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_815_end_mask_0 = const()[name = tensor("op_815_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_815_cast_fp16 = slice_by_index(begin = var_815_begin_0, end = var_815_end_0, end_mask = var_815_end_mask_0, x = var_322_cast_fp16)[name = tensor("op_815_cast_fp16")]; + tensor var_822_begin_0 = const()[name = tensor("op_822_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_822_end_0 = const()[name = tensor("op_822_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_822_end_mask_0 = const()[name = tensor("op_822_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_822_cast_fp16 = slice_by_index(begin = var_822_begin_0, end = var_822_end_0, end_mask = var_822_end_mask_0, x = var_322_cast_fp16)[name = tensor("op_822_cast_fp16")]; + tensor var_829_begin_0 = const()[name = tensor("op_829_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_829_end_0 = const()[name = tensor("op_829_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_829_end_mask_0 = const()[name = tensor("op_829_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_829_cast_fp16 = slice_by_index(begin = var_829_begin_0, end = var_829_end_0, end_mask = var_829_end_mask_0, x = var_322_cast_fp16)[name = tensor("op_829_cast_fp16")]; + tensor var_836_begin_0 = const()[name = tensor("op_836_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_836_end_0 = const()[name = tensor("op_836_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_836_end_mask_0 = const()[name = tensor("op_836_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_836_cast_fp16 = slice_by_index(begin = var_836_begin_0, end = var_836_end_0, end_mask = var_836_end_mask_0, x = var_322_cast_fp16)[name = tensor("op_836_cast_fp16")]; + tensor var_843_begin_0 = const()[name = tensor("op_843_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_843_end_0 = const()[name = tensor("op_843_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_843_end_mask_0 = const()[name = tensor("op_843_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_843_cast_fp16 = slice_by_index(begin = var_843_begin_0, end = var_843_end_0, end_mask = var_843_end_mask_0, x = var_326_cast_fp16)[name = tensor("op_843_cast_fp16")]; + tensor var_850_begin_0 = const()[name = tensor("op_850_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_850_end_0 = const()[name = tensor("op_850_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_850_end_mask_0 = const()[name = tensor("op_850_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_850_cast_fp16 = slice_by_index(begin = var_850_begin_0, end = var_850_end_0, end_mask = var_850_end_mask_0, x = var_326_cast_fp16)[name = tensor("op_850_cast_fp16")]; + tensor var_857_begin_0 = const()[name = tensor("op_857_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_857_end_0 = const()[name = tensor("op_857_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_857_end_mask_0 = const()[name = tensor("op_857_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_857_cast_fp16 = slice_by_index(begin = var_857_begin_0, end = var_857_end_0, end_mask = var_857_end_mask_0, x = var_326_cast_fp16)[name = tensor("op_857_cast_fp16")]; + tensor var_864_begin_0 = const()[name = tensor("op_864_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_864_end_0 = const()[name = tensor("op_864_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_864_end_mask_0 = const()[name = tensor("op_864_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_864_cast_fp16 = slice_by_index(begin = var_864_begin_0, end = var_864_end_0, end_mask = var_864_end_mask_0, x = var_326_cast_fp16)[name = tensor("op_864_cast_fp16")]; + tensor var_871_begin_0 = const()[name = tensor("op_871_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_871_end_0 = const()[name = tensor("op_871_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_871_end_mask_0 = const()[name = tensor("op_871_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_871_cast_fp16 = slice_by_index(begin = var_871_begin_0, end = var_871_end_0, end_mask = var_871_end_mask_0, x = var_330_cast_fp16)[name = tensor("op_871_cast_fp16")]; + tensor var_878_begin_0 = const()[name = tensor("op_878_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_878_end_0 = const()[name = tensor("op_878_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_878_end_mask_0 = const()[name = tensor("op_878_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_878_cast_fp16 = slice_by_index(begin = var_878_begin_0, end = var_878_end_0, end_mask = var_878_end_mask_0, x = var_330_cast_fp16)[name = tensor("op_878_cast_fp16")]; + tensor var_885_begin_0 = const()[name = tensor("op_885_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_885_end_0 = const()[name = tensor("op_885_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_885_end_mask_0 = const()[name = tensor("op_885_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_885_cast_fp16 = slice_by_index(begin = var_885_begin_0, end = var_885_end_0, end_mask = var_885_end_mask_0, x = var_330_cast_fp16)[name = tensor("op_885_cast_fp16")]; + tensor var_892_begin_0 = const()[name = tensor("op_892_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_892_end_0 = const()[name = tensor("op_892_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_892_end_mask_0 = const()[name = tensor("op_892_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_892_cast_fp16 = slice_by_index(begin = var_892_begin_0, end = var_892_end_0, end_mask = var_892_end_mask_0, x = var_330_cast_fp16)[name = tensor("op_892_cast_fp16")]; + tensor k_1_perm_0 = const()[name = tensor("k_1_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_897_begin_0 = const()[name = tensor("op_897_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_897_end_0 = const()[name = tensor("op_897_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_897_end_mask_0 = const()[name = tensor("op_897_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_31 = transpose(perm = k_1_perm_0, x = key_1_cast_fp16)[name = tensor("transpose_31")]; + tensor var_897_cast_fp16 = slice_by_index(begin = var_897_begin_0, end = var_897_end_0, end_mask = var_897_end_mask_0, x = transpose_31)[name = tensor("op_897_cast_fp16")]; + tensor var_901_begin_0 = const()[name = tensor("op_901_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_901_end_0 = const()[name = tensor("op_901_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_901_end_mask_0 = const()[name = tensor("op_901_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_901_cast_fp16 = slice_by_index(begin = var_901_begin_0, end = var_901_end_0, end_mask = var_901_end_mask_0, x = transpose_31)[name = tensor("op_901_cast_fp16")]; + tensor var_905_begin_0 = const()[name = tensor("op_905_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_905_end_0 = const()[name = tensor("op_905_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_905_end_mask_0 = const()[name = tensor("op_905_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_905_cast_fp16 = slice_by_index(begin = var_905_begin_0, end = var_905_end_0, end_mask = var_905_end_mask_0, x = transpose_31)[name = tensor("op_905_cast_fp16")]; + tensor var_909_begin_0 = const()[name = tensor("op_909_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_909_end_0 = const()[name = tensor("op_909_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_909_end_mask_0 = const()[name = tensor("op_909_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_909_cast_fp16 = slice_by_index(begin = var_909_begin_0, end = var_909_end_0, end_mask = var_909_end_mask_0, x = transpose_31)[name = tensor("op_909_cast_fp16")]; + tensor var_913_begin_0 = const()[name = tensor("op_913_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_913_end_0 = const()[name = tensor("op_913_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_913_end_mask_0 = const()[name = tensor("op_913_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_913_cast_fp16 = slice_by_index(begin = var_913_begin_0, end = var_913_end_0, end_mask = var_913_end_mask_0, x = transpose_31)[name = tensor("op_913_cast_fp16")]; + tensor var_917_begin_0 = const()[name = tensor("op_917_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_917_end_0 = const()[name = tensor("op_917_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_917_end_mask_0 = const()[name = tensor("op_917_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_917_cast_fp16 = slice_by_index(begin = var_917_begin_0, end = var_917_end_0, end_mask = var_917_end_mask_0, x = transpose_31)[name = tensor("op_917_cast_fp16")]; + tensor var_921_begin_0 = const()[name = tensor("op_921_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_921_end_0 = const()[name = tensor("op_921_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_921_end_mask_0 = const()[name = tensor("op_921_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_921_cast_fp16 = slice_by_index(begin = var_921_begin_0, end = var_921_end_0, end_mask = var_921_end_mask_0, x = transpose_31)[name = tensor("op_921_cast_fp16")]; + tensor var_925_begin_0 = const()[name = tensor("op_925_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_925_end_0 = const()[name = tensor("op_925_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_925_end_mask_0 = const()[name = tensor("op_925_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_925_cast_fp16 = slice_by_index(begin = var_925_begin_0, end = var_925_end_0, end_mask = var_925_end_mask_0, x = transpose_31)[name = tensor("op_925_cast_fp16")]; + tensor var_929_begin_0 = const()[name = tensor("op_929_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_929_end_0 = const()[name = tensor("op_929_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_929_end_mask_0 = const()[name = tensor("op_929_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_929_cast_fp16 = slice_by_index(begin = var_929_begin_0, end = var_929_end_0, end_mask = var_929_end_mask_0, x = transpose_31)[name = tensor("op_929_cast_fp16")]; + tensor var_933_begin_0 = const()[name = tensor("op_933_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_933_end_0 = const()[name = tensor("op_933_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_933_end_mask_0 = const()[name = tensor("op_933_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_933_cast_fp16 = slice_by_index(begin = var_933_begin_0, end = var_933_end_0, end_mask = var_933_end_mask_0, x = transpose_31)[name = tensor("op_933_cast_fp16")]; + tensor var_937_begin_0 = const()[name = tensor("op_937_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_937_end_0 = const()[name = tensor("op_937_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_937_end_mask_0 = const()[name = tensor("op_937_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_937_cast_fp16 = slice_by_index(begin = var_937_begin_0, end = var_937_end_0, end_mask = var_937_end_mask_0, x = transpose_31)[name = tensor("op_937_cast_fp16")]; + tensor var_941_begin_0 = const()[name = tensor("op_941_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_941_end_0 = const()[name = tensor("op_941_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_941_end_mask_0 = const()[name = tensor("op_941_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_941_cast_fp16 = slice_by_index(begin = var_941_begin_0, end = var_941_end_0, end_mask = var_941_end_mask_0, x = transpose_31)[name = tensor("op_941_cast_fp16")]; + tensor var_945_begin_0 = const()[name = tensor("op_945_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_945_end_0 = const()[name = tensor("op_945_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_945_end_mask_0 = const()[name = tensor("op_945_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_945_cast_fp16 = slice_by_index(begin = var_945_begin_0, end = var_945_end_0, end_mask = var_945_end_mask_0, x = transpose_31)[name = tensor("op_945_cast_fp16")]; + tensor var_949_begin_0 = const()[name = tensor("op_949_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_949_end_0 = const()[name = tensor("op_949_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_949_end_mask_0 = const()[name = tensor("op_949_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_949_cast_fp16 = slice_by_index(begin = var_949_begin_0, end = var_949_end_0, end_mask = var_949_end_mask_0, x = transpose_31)[name = tensor("op_949_cast_fp16")]; + tensor var_953_begin_0 = const()[name = tensor("op_953_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_953_end_0 = const()[name = tensor("op_953_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_953_end_mask_0 = const()[name = tensor("op_953_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_953_cast_fp16 = slice_by_index(begin = var_953_begin_0, end = var_953_end_0, end_mask = var_953_end_mask_0, x = transpose_31)[name = tensor("op_953_cast_fp16")]; + tensor var_957_begin_0 = const()[name = tensor("op_957_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_957_end_0 = const()[name = tensor("op_957_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_957_end_mask_0 = const()[name = tensor("op_957_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_957_cast_fp16 = slice_by_index(begin = var_957_begin_0, end = var_957_end_0, end_mask = var_957_end_mask_0, x = transpose_31)[name = tensor("op_957_cast_fp16")]; + tensor var_961_begin_0 = const()[name = tensor("op_961_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_961_end_0 = const()[name = tensor("op_961_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_961_end_mask_0 = const()[name = tensor("op_961_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_961_cast_fp16 = slice_by_index(begin = var_961_begin_0, end = var_961_end_0, end_mask = var_961_end_mask_0, x = transpose_31)[name = tensor("op_961_cast_fp16")]; + tensor var_965_begin_0 = const()[name = tensor("op_965_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_965_end_0 = const()[name = tensor("op_965_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_965_end_mask_0 = const()[name = tensor("op_965_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_965_cast_fp16 = slice_by_index(begin = var_965_begin_0, end = var_965_end_0, end_mask = var_965_end_mask_0, x = transpose_31)[name = tensor("op_965_cast_fp16")]; + tensor var_969_begin_0 = const()[name = tensor("op_969_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_969_end_0 = const()[name = tensor("op_969_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_969_end_mask_0 = const()[name = tensor("op_969_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_969_cast_fp16 = slice_by_index(begin = var_969_begin_0, end = var_969_end_0, end_mask = var_969_end_mask_0, x = transpose_31)[name = tensor("op_969_cast_fp16")]; + tensor var_973_begin_0 = const()[name = tensor("op_973_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_973_end_0 = const()[name = tensor("op_973_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_973_end_mask_0 = const()[name = tensor("op_973_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_973_cast_fp16 = slice_by_index(begin = var_973_begin_0, end = var_973_end_0, end_mask = var_973_end_mask_0, x = transpose_31)[name = tensor("op_973_cast_fp16")]; + tensor var_975_begin_0 = const()[name = tensor("op_975_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_975_end_0 = const()[name = tensor("op_975_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_975_end_mask_0 = const()[name = tensor("op_975_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_975_cast_fp16 = slice_by_index(begin = var_975_begin_0, end = var_975_end_0, end_mask = var_975_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_975_cast_fp16")]; + tensor var_979_begin_0 = const()[name = tensor("op_979_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_979_end_0 = const()[name = tensor("op_979_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_979_end_mask_0 = const()[name = tensor("op_979_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_979_cast_fp16 = slice_by_index(begin = var_979_begin_0, end = var_979_end_0, end_mask = var_979_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_979_cast_fp16")]; + tensor var_983_begin_0 = const()[name = tensor("op_983_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_983_end_0 = const()[name = tensor("op_983_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_983_end_mask_0 = const()[name = tensor("op_983_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_983_cast_fp16 = slice_by_index(begin = var_983_begin_0, end = var_983_end_0, end_mask = var_983_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_983_cast_fp16")]; + tensor var_987_begin_0 = const()[name = tensor("op_987_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_987_end_0 = const()[name = tensor("op_987_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_987_end_mask_0 = const()[name = tensor("op_987_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_987_cast_fp16 = slice_by_index(begin = var_987_begin_0, end = var_987_end_0, end_mask = var_987_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_987_cast_fp16")]; + tensor var_991_begin_0 = const()[name = tensor("op_991_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_991_end_0 = const()[name = tensor("op_991_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_991_end_mask_0 = const()[name = tensor("op_991_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_991_cast_fp16 = slice_by_index(begin = var_991_begin_0, end = var_991_end_0, end_mask = var_991_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_991_cast_fp16")]; + tensor var_995_begin_0 = const()[name = tensor("op_995_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_995_end_0 = const()[name = tensor("op_995_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_995_end_mask_0 = const()[name = tensor("op_995_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_995_cast_fp16 = slice_by_index(begin = var_995_begin_0, end = var_995_end_0, end_mask = var_995_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_995_cast_fp16")]; + tensor var_999_begin_0 = const()[name = tensor("op_999_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_999_end_0 = const()[name = tensor("op_999_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_999_end_mask_0 = const()[name = tensor("op_999_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_999_cast_fp16 = slice_by_index(begin = var_999_begin_0, end = var_999_end_0, end_mask = var_999_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_999_cast_fp16")]; + tensor var_1003_begin_0 = const()[name = tensor("op_1003_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_1003_end_0 = const()[name = tensor("op_1003_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_1003_end_mask_0 = const()[name = tensor("op_1003_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1003_cast_fp16 = slice_by_index(begin = var_1003_begin_0, end = var_1003_end_0, end_mask = var_1003_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1003_cast_fp16")]; + tensor var_1007_begin_0 = const()[name = tensor("op_1007_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1007_end_0 = const()[name = tensor("op_1007_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_1007_end_mask_0 = const()[name = tensor("op_1007_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1007_cast_fp16 = slice_by_index(begin = var_1007_begin_0, end = var_1007_end_0, end_mask = var_1007_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1007_cast_fp16")]; + tensor var_1011_begin_0 = const()[name = tensor("op_1011_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_1011_end_0 = const()[name = tensor("op_1011_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_1011_end_mask_0 = const()[name = tensor("op_1011_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1011_cast_fp16 = slice_by_index(begin = var_1011_begin_0, end = var_1011_end_0, end_mask = var_1011_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1011_cast_fp16")]; + tensor var_1015_begin_0 = const()[name = tensor("op_1015_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1015_end_0 = const()[name = tensor("op_1015_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_1015_end_mask_0 = const()[name = tensor("op_1015_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1015_cast_fp16 = slice_by_index(begin = var_1015_begin_0, end = var_1015_end_0, end_mask = var_1015_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1015_cast_fp16")]; + tensor var_1019_begin_0 = const()[name = tensor("op_1019_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_1019_end_0 = const()[name = tensor("op_1019_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_1019_end_mask_0 = const()[name = tensor("op_1019_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1019_cast_fp16 = slice_by_index(begin = var_1019_begin_0, end = var_1019_end_0, end_mask = var_1019_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1019_cast_fp16")]; + tensor var_1023_begin_0 = const()[name = tensor("op_1023_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1023_end_0 = const()[name = tensor("op_1023_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_1023_end_mask_0 = const()[name = tensor("op_1023_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1023_cast_fp16 = slice_by_index(begin = var_1023_begin_0, end = var_1023_end_0, end_mask = var_1023_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1023_cast_fp16")]; + tensor var_1027_begin_0 = const()[name = tensor("op_1027_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_1027_end_0 = const()[name = tensor("op_1027_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_1027_end_mask_0 = const()[name = tensor("op_1027_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1027_cast_fp16 = slice_by_index(begin = var_1027_begin_0, end = var_1027_end_0, end_mask = var_1027_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1027_cast_fp16")]; + tensor var_1031_begin_0 = const()[name = tensor("op_1031_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1031_end_0 = const()[name = tensor("op_1031_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_1031_end_mask_0 = const()[name = tensor("op_1031_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1031_cast_fp16 = slice_by_index(begin = var_1031_begin_0, end = var_1031_end_0, end_mask = var_1031_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1031_cast_fp16")]; + tensor var_1035_begin_0 = const()[name = tensor("op_1035_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_1035_end_0 = const()[name = tensor("op_1035_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_1035_end_mask_0 = const()[name = tensor("op_1035_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1035_cast_fp16 = slice_by_index(begin = var_1035_begin_0, end = var_1035_end_0, end_mask = var_1035_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1035_cast_fp16")]; + tensor var_1039_begin_0 = const()[name = tensor("op_1039_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_1039_end_0 = const()[name = tensor("op_1039_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_1039_end_mask_0 = const()[name = tensor("op_1039_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1039_cast_fp16 = slice_by_index(begin = var_1039_begin_0, end = var_1039_end_0, end_mask = var_1039_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1039_cast_fp16")]; + tensor var_1043_begin_0 = const()[name = tensor("op_1043_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_1043_end_0 = const()[name = tensor("op_1043_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_1043_end_mask_0 = const()[name = tensor("op_1043_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1043_cast_fp16 = slice_by_index(begin = var_1043_begin_0, end = var_1043_end_0, end_mask = var_1043_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1043_cast_fp16")]; + tensor var_1047_begin_0 = const()[name = tensor("op_1047_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_1047_end_0 = const()[name = tensor("op_1047_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_1047_end_mask_0 = const()[name = tensor("op_1047_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1047_cast_fp16 = slice_by_index(begin = var_1047_begin_0, end = var_1047_end_0, end_mask = var_1047_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1047_cast_fp16")]; + tensor var_1051_begin_0 = const()[name = tensor("op_1051_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_1051_end_0 = const()[name = tensor("op_1051_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_1051_end_mask_0 = const()[name = tensor("op_1051_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1051_cast_fp16 = slice_by_index(begin = var_1051_begin_0, end = var_1051_end_0, end_mask = var_1051_end_mask_0, x = value_1_cast_fp16)[name = tensor("op_1051_cast_fp16")]; + tensor var_1055_equation_0 = const()[name = tensor("op_1055_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1055_cast_fp16 = einsum(equation = var_1055_equation_0, values = (var_897_cast_fp16, var_339_cast_fp16))[name = tensor("op_1055_cast_fp16")]; + tensor var_1056_to_fp16 = const()[name = tensor("op_1056_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1_cast_fp16 = mul(x = var_1055_cast_fp16, y = var_1056_to_fp16)[name = tensor("aw_chunk_1_cast_fp16")]; + tensor var_1059_equation_0 = const()[name = tensor("op_1059_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1059_cast_fp16 = einsum(equation = var_1059_equation_0, values = (var_897_cast_fp16, var_346_cast_fp16))[name = tensor("op_1059_cast_fp16")]; + tensor var_1060_to_fp16 = const()[name = tensor("op_1060_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3_cast_fp16 = mul(x = var_1059_cast_fp16, y = var_1060_to_fp16)[name = tensor("aw_chunk_3_cast_fp16")]; + tensor var_1063_equation_0 = const()[name = tensor("op_1063_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1063_cast_fp16 = einsum(equation = var_1063_equation_0, values = (var_897_cast_fp16, var_353_cast_fp16))[name = tensor("op_1063_cast_fp16")]; + tensor var_1064_to_fp16 = const()[name = tensor("op_1064_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5_cast_fp16 = mul(x = var_1063_cast_fp16, y = var_1064_to_fp16)[name = tensor("aw_chunk_5_cast_fp16")]; + tensor var_1067_equation_0 = const()[name = tensor("op_1067_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1067_cast_fp16 = einsum(equation = var_1067_equation_0, values = (var_897_cast_fp16, var_360_cast_fp16))[name = tensor("op_1067_cast_fp16")]; + tensor var_1068_to_fp16 = const()[name = tensor("op_1068_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_7_cast_fp16 = mul(x = var_1067_cast_fp16, y = var_1068_to_fp16)[name = tensor("aw_chunk_7_cast_fp16")]; + tensor var_1071_equation_0 = const()[name = tensor("op_1071_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1071_cast_fp16 = einsum(equation = var_1071_equation_0, values = (var_901_cast_fp16, var_367_cast_fp16))[name = tensor("op_1071_cast_fp16")]; + tensor var_1072_to_fp16 = const()[name = tensor("op_1072_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_9_cast_fp16 = mul(x = var_1071_cast_fp16, y = var_1072_to_fp16)[name = tensor("aw_chunk_9_cast_fp16")]; + tensor var_1075_equation_0 = const()[name = tensor("op_1075_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1075_cast_fp16 = einsum(equation = var_1075_equation_0, values = (var_901_cast_fp16, var_374_cast_fp16))[name = tensor("op_1075_cast_fp16")]; + tensor var_1076_to_fp16 = const()[name = tensor("op_1076_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_11_cast_fp16 = mul(x = var_1075_cast_fp16, y = var_1076_to_fp16)[name = tensor("aw_chunk_11_cast_fp16")]; + tensor var_1079_equation_0 = const()[name = tensor("op_1079_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1079_cast_fp16 = einsum(equation = var_1079_equation_0, values = (var_901_cast_fp16, var_381_cast_fp16))[name = tensor("op_1079_cast_fp16")]; + tensor var_1080_to_fp16 = const()[name = tensor("op_1080_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_13_cast_fp16 = mul(x = var_1079_cast_fp16, y = var_1080_to_fp16)[name = tensor("aw_chunk_13_cast_fp16")]; + tensor var_1083_equation_0 = const()[name = tensor("op_1083_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1083_cast_fp16 = einsum(equation = var_1083_equation_0, values = (var_901_cast_fp16, var_388_cast_fp16))[name = tensor("op_1083_cast_fp16")]; + tensor var_1084_to_fp16 = const()[name = tensor("op_1084_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_15_cast_fp16 = mul(x = var_1083_cast_fp16, y = var_1084_to_fp16)[name = tensor("aw_chunk_15_cast_fp16")]; + tensor var_1087_equation_0 = const()[name = tensor("op_1087_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1087_cast_fp16 = einsum(equation = var_1087_equation_0, values = (var_905_cast_fp16, var_395_cast_fp16))[name = tensor("op_1087_cast_fp16")]; + tensor var_1088_to_fp16 = const()[name = tensor("op_1088_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_17_cast_fp16 = mul(x = var_1087_cast_fp16, y = var_1088_to_fp16)[name = tensor("aw_chunk_17_cast_fp16")]; + tensor var_1091_equation_0 = const()[name = tensor("op_1091_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1091_cast_fp16 = einsum(equation = var_1091_equation_0, values = (var_905_cast_fp16, var_402_cast_fp16))[name = tensor("op_1091_cast_fp16")]; + tensor var_1092_to_fp16 = const()[name = tensor("op_1092_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_19_cast_fp16 = mul(x = var_1091_cast_fp16, y = var_1092_to_fp16)[name = tensor("aw_chunk_19_cast_fp16")]; + tensor var_1095_equation_0 = const()[name = tensor("op_1095_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1095_cast_fp16 = einsum(equation = var_1095_equation_0, values = (var_905_cast_fp16, var_409_cast_fp16))[name = tensor("op_1095_cast_fp16")]; + tensor var_1096_to_fp16 = const()[name = tensor("op_1096_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_21_cast_fp16 = mul(x = var_1095_cast_fp16, y = var_1096_to_fp16)[name = tensor("aw_chunk_21_cast_fp16")]; + tensor var_1099_equation_0 = const()[name = tensor("op_1099_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1099_cast_fp16 = einsum(equation = var_1099_equation_0, values = (var_905_cast_fp16, var_416_cast_fp16))[name = tensor("op_1099_cast_fp16")]; + tensor var_1100_to_fp16 = const()[name = tensor("op_1100_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_23_cast_fp16 = mul(x = var_1099_cast_fp16, y = var_1100_to_fp16)[name = tensor("aw_chunk_23_cast_fp16")]; + tensor var_1103_equation_0 = const()[name = tensor("op_1103_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1103_cast_fp16 = einsum(equation = var_1103_equation_0, values = (var_909_cast_fp16, var_423_cast_fp16))[name = tensor("op_1103_cast_fp16")]; + tensor var_1104_to_fp16 = const()[name = tensor("op_1104_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_25_cast_fp16 = mul(x = var_1103_cast_fp16, y = var_1104_to_fp16)[name = tensor("aw_chunk_25_cast_fp16")]; + tensor var_1107_equation_0 = const()[name = tensor("op_1107_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1107_cast_fp16 = einsum(equation = var_1107_equation_0, values = (var_909_cast_fp16, var_430_cast_fp16))[name = tensor("op_1107_cast_fp16")]; + tensor var_1108_to_fp16 = const()[name = tensor("op_1108_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_27_cast_fp16 = mul(x = var_1107_cast_fp16, y = var_1108_to_fp16)[name = tensor("aw_chunk_27_cast_fp16")]; + tensor var_1111_equation_0 = const()[name = tensor("op_1111_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1111_cast_fp16 = einsum(equation = var_1111_equation_0, values = (var_909_cast_fp16, var_437_cast_fp16))[name = tensor("op_1111_cast_fp16")]; + tensor var_1112_to_fp16 = const()[name = tensor("op_1112_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_29_cast_fp16 = mul(x = var_1111_cast_fp16, y = var_1112_to_fp16)[name = tensor("aw_chunk_29_cast_fp16")]; + tensor var_1115_equation_0 = const()[name = tensor("op_1115_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1115_cast_fp16 = einsum(equation = var_1115_equation_0, values = (var_909_cast_fp16, var_444_cast_fp16))[name = tensor("op_1115_cast_fp16")]; + tensor var_1116_to_fp16 = const()[name = tensor("op_1116_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_31_cast_fp16 = mul(x = var_1115_cast_fp16, y = var_1116_to_fp16)[name = tensor("aw_chunk_31_cast_fp16")]; + tensor var_1119_equation_0 = const()[name = tensor("op_1119_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1119_cast_fp16 = einsum(equation = var_1119_equation_0, values = (var_913_cast_fp16, var_451_cast_fp16))[name = tensor("op_1119_cast_fp16")]; + tensor var_1120_to_fp16 = const()[name = tensor("op_1120_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_33_cast_fp16 = mul(x = var_1119_cast_fp16, y = var_1120_to_fp16)[name = tensor("aw_chunk_33_cast_fp16")]; + tensor var_1123_equation_0 = const()[name = tensor("op_1123_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1123_cast_fp16 = einsum(equation = var_1123_equation_0, values = (var_913_cast_fp16, var_458_cast_fp16))[name = tensor("op_1123_cast_fp16")]; + tensor var_1124_to_fp16 = const()[name = tensor("op_1124_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_35_cast_fp16 = mul(x = var_1123_cast_fp16, y = var_1124_to_fp16)[name = tensor("aw_chunk_35_cast_fp16")]; + tensor var_1127_equation_0 = const()[name = tensor("op_1127_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1127_cast_fp16 = einsum(equation = var_1127_equation_0, values = (var_913_cast_fp16, var_465_cast_fp16))[name = tensor("op_1127_cast_fp16")]; + tensor var_1128_to_fp16 = const()[name = tensor("op_1128_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_37_cast_fp16 = mul(x = var_1127_cast_fp16, y = var_1128_to_fp16)[name = tensor("aw_chunk_37_cast_fp16")]; + tensor var_1131_equation_0 = const()[name = tensor("op_1131_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1131_cast_fp16 = einsum(equation = var_1131_equation_0, values = (var_913_cast_fp16, var_472_cast_fp16))[name = tensor("op_1131_cast_fp16")]; + tensor var_1132_to_fp16 = const()[name = tensor("op_1132_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_39_cast_fp16 = mul(x = var_1131_cast_fp16, y = var_1132_to_fp16)[name = tensor("aw_chunk_39_cast_fp16")]; + tensor var_1135_equation_0 = const()[name = tensor("op_1135_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1135_cast_fp16 = einsum(equation = var_1135_equation_0, values = (var_917_cast_fp16, var_479_cast_fp16))[name = tensor("op_1135_cast_fp16")]; + tensor var_1136_to_fp16 = const()[name = tensor("op_1136_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_41_cast_fp16 = mul(x = var_1135_cast_fp16, y = var_1136_to_fp16)[name = tensor("aw_chunk_41_cast_fp16")]; + tensor var_1139_equation_0 = const()[name = tensor("op_1139_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1139_cast_fp16 = einsum(equation = var_1139_equation_0, values = (var_917_cast_fp16, var_486_cast_fp16))[name = tensor("op_1139_cast_fp16")]; + tensor var_1140_to_fp16 = const()[name = tensor("op_1140_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_43_cast_fp16 = mul(x = var_1139_cast_fp16, y = var_1140_to_fp16)[name = tensor("aw_chunk_43_cast_fp16")]; + tensor var_1143_equation_0 = const()[name = tensor("op_1143_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1143_cast_fp16 = einsum(equation = var_1143_equation_0, values = (var_917_cast_fp16, var_493_cast_fp16))[name = tensor("op_1143_cast_fp16")]; + tensor var_1144_to_fp16 = const()[name = tensor("op_1144_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_45_cast_fp16 = mul(x = var_1143_cast_fp16, y = var_1144_to_fp16)[name = tensor("aw_chunk_45_cast_fp16")]; + tensor var_1147_equation_0 = const()[name = tensor("op_1147_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1147_cast_fp16 = einsum(equation = var_1147_equation_0, values = (var_917_cast_fp16, var_500_cast_fp16))[name = tensor("op_1147_cast_fp16")]; + tensor var_1148_to_fp16 = const()[name = tensor("op_1148_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_47_cast_fp16 = mul(x = var_1147_cast_fp16, y = var_1148_to_fp16)[name = tensor("aw_chunk_47_cast_fp16")]; + tensor var_1151_equation_0 = const()[name = tensor("op_1151_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1151_cast_fp16 = einsum(equation = var_1151_equation_0, values = (var_921_cast_fp16, var_507_cast_fp16))[name = tensor("op_1151_cast_fp16")]; + tensor var_1152_to_fp16 = const()[name = tensor("op_1152_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_49_cast_fp16 = mul(x = var_1151_cast_fp16, y = var_1152_to_fp16)[name = tensor("aw_chunk_49_cast_fp16")]; + tensor var_1155_equation_0 = const()[name = tensor("op_1155_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1155_cast_fp16 = einsum(equation = var_1155_equation_0, values = (var_921_cast_fp16, var_514_cast_fp16))[name = tensor("op_1155_cast_fp16")]; + tensor var_1156_to_fp16 = const()[name = tensor("op_1156_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_51_cast_fp16 = mul(x = var_1155_cast_fp16, y = var_1156_to_fp16)[name = tensor("aw_chunk_51_cast_fp16")]; + tensor var_1159_equation_0 = const()[name = tensor("op_1159_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1159_cast_fp16 = einsum(equation = var_1159_equation_0, values = (var_921_cast_fp16, var_521_cast_fp16))[name = tensor("op_1159_cast_fp16")]; + tensor var_1160_to_fp16 = const()[name = tensor("op_1160_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_53_cast_fp16 = mul(x = var_1159_cast_fp16, y = var_1160_to_fp16)[name = tensor("aw_chunk_53_cast_fp16")]; + tensor var_1163_equation_0 = const()[name = tensor("op_1163_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1163_cast_fp16 = einsum(equation = var_1163_equation_0, values = (var_921_cast_fp16, var_528_cast_fp16))[name = tensor("op_1163_cast_fp16")]; + tensor var_1164_to_fp16 = const()[name = tensor("op_1164_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_55_cast_fp16 = mul(x = var_1163_cast_fp16, y = var_1164_to_fp16)[name = tensor("aw_chunk_55_cast_fp16")]; + tensor var_1167_equation_0 = const()[name = tensor("op_1167_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1167_cast_fp16 = einsum(equation = var_1167_equation_0, values = (var_925_cast_fp16, var_535_cast_fp16))[name = tensor("op_1167_cast_fp16")]; + tensor var_1168_to_fp16 = const()[name = tensor("op_1168_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_57_cast_fp16 = mul(x = var_1167_cast_fp16, y = var_1168_to_fp16)[name = tensor("aw_chunk_57_cast_fp16")]; + tensor var_1171_equation_0 = const()[name = tensor("op_1171_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1171_cast_fp16 = einsum(equation = var_1171_equation_0, values = (var_925_cast_fp16, var_542_cast_fp16))[name = tensor("op_1171_cast_fp16")]; + tensor var_1172_to_fp16 = const()[name = tensor("op_1172_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_59_cast_fp16 = mul(x = var_1171_cast_fp16, y = var_1172_to_fp16)[name = tensor("aw_chunk_59_cast_fp16")]; + tensor var_1175_equation_0 = const()[name = tensor("op_1175_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1175_cast_fp16 = einsum(equation = var_1175_equation_0, values = (var_925_cast_fp16, var_549_cast_fp16))[name = tensor("op_1175_cast_fp16")]; + tensor var_1176_to_fp16 = const()[name = tensor("op_1176_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_61_cast_fp16 = mul(x = var_1175_cast_fp16, y = var_1176_to_fp16)[name = tensor("aw_chunk_61_cast_fp16")]; + tensor var_1179_equation_0 = const()[name = tensor("op_1179_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1179_cast_fp16 = einsum(equation = var_1179_equation_0, values = (var_925_cast_fp16, var_556_cast_fp16))[name = tensor("op_1179_cast_fp16")]; + tensor var_1180_to_fp16 = const()[name = tensor("op_1180_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_63_cast_fp16 = mul(x = var_1179_cast_fp16, y = var_1180_to_fp16)[name = tensor("aw_chunk_63_cast_fp16")]; + tensor var_1183_equation_0 = const()[name = tensor("op_1183_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1183_cast_fp16 = einsum(equation = var_1183_equation_0, values = (var_929_cast_fp16, var_563_cast_fp16))[name = tensor("op_1183_cast_fp16")]; + tensor var_1184_to_fp16 = const()[name = tensor("op_1184_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_65_cast_fp16 = mul(x = var_1183_cast_fp16, y = var_1184_to_fp16)[name = tensor("aw_chunk_65_cast_fp16")]; + tensor var_1187_equation_0 = const()[name = tensor("op_1187_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1187_cast_fp16 = einsum(equation = var_1187_equation_0, values = (var_929_cast_fp16, var_570_cast_fp16))[name = tensor("op_1187_cast_fp16")]; + tensor var_1188_to_fp16 = const()[name = tensor("op_1188_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_67_cast_fp16 = mul(x = var_1187_cast_fp16, y = var_1188_to_fp16)[name = tensor("aw_chunk_67_cast_fp16")]; + tensor var_1191_equation_0 = const()[name = tensor("op_1191_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1191_cast_fp16 = einsum(equation = var_1191_equation_0, values = (var_929_cast_fp16, var_577_cast_fp16))[name = tensor("op_1191_cast_fp16")]; + tensor var_1192_to_fp16 = const()[name = tensor("op_1192_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_69_cast_fp16 = mul(x = var_1191_cast_fp16, y = var_1192_to_fp16)[name = tensor("aw_chunk_69_cast_fp16")]; + tensor var_1195_equation_0 = const()[name = tensor("op_1195_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1195_cast_fp16 = einsum(equation = var_1195_equation_0, values = (var_929_cast_fp16, var_584_cast_fp16))[name = tensor("op_1195_cast_fp16")]; + tensor var_1196_to_fp16 = const()[name = tensor("op_1196_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_71_cast_fp16 = mul(x = var_1195_cast_fp16, y = var_1196_to_fp16)[name = tensor("aw_chunk_71_cast_fp16")]; + tensor var_1199_equation_0 = const()[name = tensor("op_1199_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1199_cast_fp16 = einsum(equation = var_1199_equation_0, values = (var_933_cast_fp16, var_591_cast_fp16))[name = tensor("op_1199_cast_fp16")]; + tensor var_1200_to_fp16 = const()[name = tensor("op_1200_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_73_cast_fp16 = mul(x = var_1199_cast_fp16, y = var_1200_to_fp16)[name = tensor("aw_chunk_73_cast_fp16")]; + tensor var_1203_equation_0 = const()[name = tensor("op_1203_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1203_cast_fp16 = einsum(equation = var_1203_equation_0, values = (var_933_cast_fp16, var_598_cast_fp16))[name = tensor("op_1203_cast_fp16")]; + tensor var_1204_to_fp16 = const()[name = tensor("op_1204_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_75_cast_fp16 = mul(x = var_1203_cast_fp16, y = var_1204_to_fp16)[name = tensor("aw_chunk_75_cast_fp16")]; + tensor var_1207_equation_0 = const()[name = tensor("op_1207_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1207_cast_fp16 = einsum(equation = var_1207_equation_0, values = (var_933_cast_fp16, var_605_cast_fp16))[name = tensor("op_1207_cast_fp16")]; + tensor var_1208_to_fp16 = const()[name = tensor("op_1208_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_77_cast_fp16 = mul(x = var_1207_cast_fp16, y = var_1208_to_fp16)[name = tensor("aw_chunk_77_cast_fp16")]; + tensor var_1211_equation_0 = const()[name = tensor("op_1211_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1211_cast_fp16 = einsum(equation = var_1211_equation_0, values = (var_933_cast_fp16, var_612_cast_fp16))[name = tensor("op_1211_cast_fp16")]; + tensor var_1212_to_fp16 = const()[name = tensor("op_1212_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_79_cast_fp16 = mul(x = var_1211_cast_fp16, y = var_1212_to_fp16)[name = tensor("aw_chunk_79_cast_fp16")]; + tensor var_1215_equation_0 = const()[name = tensor("op_1215_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1215_cast_fp16 = einsum(equation = var_1215_equation_0, values = (var_937_cast_fp16, var_619_cast_fp16))[name = tensor("op_1215_cast_fp16")]; + tensor var_1216_to_fp16 = const()[name = tensor("op_1216_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_81_cast_fp16 = mul(x = var_1215_cast_fp16, y = var_1216_to_fp16)[name = tensor("aw_chunk_81_cast_fp16")]; + tensor var_1219_equation_0 = const()[name = tensor("op_1219_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1219_cast_fp16 = einsum(equation = var_1219_equation_0, values = (var_937_cast_fp16, var_626_cast_fp16))[name = tensor("op_1219_cast_fp16")]; + tensor var_1220_to_fp16 = const()[name = tensor("op_1220_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_83_cast_fp16 = mul(x = var_1219_cast_fp16, y = var_1220_to_fp16)[name = tensor("aw_chunk_83_cast_fp16")]; + tensor var_1223_equation_0 = const()[name = tensor("op_1223_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1223_cast_fp16 = einsum(equation = var_1223_equation_0, values = (var_937_cast_fp16, var_633_cast_fp16))[name = tensor("op_1223_cast_fp16")]; + tensor var_1224_to_fp16 = const()[name = tensor("op_1224_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_85_cast_fp16 = mul(x = var_1223_cast_fp16, y = var_1224_to_fp16)[name = tensor("aw_chunk_85_cast_fp16")]; + tensor var_1227_equation_0 = const()[name = tensor("op_1227_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1227_cast_fp16 = einsum(equation = var_1227_equation_0, values = (var_937_cast_fp16, var_640_cast_fp16))[name = tensor("op_1227_cast_fp16")]; + tensor var_1228_to_fp16 = const()[name = tensor("op_1228_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_87_cast_fp16 = mul(x = var_1227_cast_fp16, y = var_1228_to_fp16)[name = tensor("aw_chunk_87_cast_fp16")]; + tensor var_1231_equation_0 = const()[name = tensor("op_1231_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1231_cast_fp16 = einsum(equation = var_1231_equation_0, values = (var_941_cast_fp16, var_647_cast_fp16))[name = tensor("op_1231_cast_fp16")]; + tensor var_1232_to_fp16 = const()[name = tensor("op_1232_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_89_cast_fp16 = mul(x = var_1231_cast_fp16, y = var_1232_to_fp16)[name = tensor("aw_chunk_89_cast_fp16")]; + tensor var_1235_equation_0 = const()[name = tensor("op_1235_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1235_cast_fp16 = einsum(equation = var_1235_equation_0, values = (var_941_cast_fp16, var_654_cast_fp16))[name = tensor("op_1235_cast_fp16")]; + tensor var_1236_to_fp16 = const()[name = tensor("op_1236_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_91_cast_fp16 = mul(x = var_1235_cast_fp16, y = var_1236_to_fp16)[name = tensor("aw_chunk_91_cast_fp16")]; + tensor var_1239_equation_0 = const()[name = tensor("op_1239_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1239_cast_fp16 = einsum(equation = var_1239_equation_0, values = (var_941_cast_fp16, var_661_cast_fp16))[name = tensor("op_1239_cast_fp16")]; + tensor var_1240_to_fp16 = const()[name = tensor("op_1240_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_93_cast_fp16 = mul(x = var_1239_cast_fp16, y = var_1240_to_fp16)[name = tensor("aw_chunk_93_cast_fp16")]; + tensor var_1243_equation_0 = const()[name = tensor("op_1243_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1243_cast_fp16 = einsum(equation = var_1243_equation_0, values = (var_941_cast_fp16, var_668_cast_fp16))[name = tensor("op_1243_cast_fp16")]; + tensor var_1244_to_fp16 = const()[name = tensor("op_1244_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_95_cast_fp16 = mul(x = var_1243_cast_fp16, y = var_1244_to_fp16)[name = tensor("aw_chunk_95_cast_fp16")]; + tensor var_1247_equation_0 = const()[name = tensor("op_1247_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1247_cast_fp16 = einsum(equation = var_1247_equation_0, values = (var_945_cast_fp16, var_675_cast_fp16))[name = tensor("op_1247_cast_fp16")]; + tensor var_1248_to_fp16 = const()[name = tensor("op_1248_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_97_cast_fp16 = mul(x = var_1247_cast_fp16, y = var_1248_to_fp16)[name = tensor("aw_chunk_97_cast_fp16")]; + tensor var_1251_equation_0 = const()[name = tensor("op_1251_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1251_cast_fp16 = einsum(equation = var_1251_equation_0, values = (var_945_cast_fp16, var_682_cast_fp16))[name = tensor("op_1251_cast_fp16")]; + tensor var_1252_to_fp16 = const()[name = tensor("op_1252_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_99_cast_fp16 = mul(x = var_1251_cast_fp16, y = var_1252_to_fp16)[name = tensor("aw_chunk_99_cast_fp16")]; + tensor var_1255_equation_0 = const()[name = tensor("op_1255_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1255_cast_fp16 = einsum(equation = var_1255_equation_0, values = (var_945_cast_fp16, var_689_cast_fp16))[name = tensor("op_1255_cast_fp16")]; + tensor var_1256_to_fp16 = const()[name = tensor("op_1256_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_101_cast_fp16 = mul(x = var_1255_cast_fp16, y = var_1256_to_fp16)[name = tensor("aw_chunk_101_cast_fp16")]; + tensor var_1259_equation_0 = const()[name = tensor("op_1259_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1259_cast_fp16 = einsum(equation = var_1259_equation_0, values = (var_945_cast_fp16, var_696_cast_fp16))[name = tensor("op_1259_cast_fp16")]; + tensor var_1260_to_fp16 = const()[name = tensor("op_1260_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_103_cast_fp16 = mul(x = var_1259_cast_fp16, y = var_1260_to_fp16)[name = tensor("aw_chunk_103_cast_fp16")]; + tensor var_1263_equation_0 = const()[name = tensor("op_1263_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1263_cast_fp16 = einsum(equation = var_1263_equation_0, values = (var_949_cast_fp16, var_703_cast_fp16))[name = tensor("op_1263_cast_fp16")]; + tensor var_1264_to_fp16 = const()[name = tensor("op_1264_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_105_cast_fp16 = mul(x = var_1263_cast_fp16, y = var_1264_to_fp16)[name = tensor("aw_chunk_105_cast_fp16")]; + tensor var_1267_equation_0 = const()[name = tensor("op_1267_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1267_cast_fp16 = einsum(equation = var_1267_equation_0, values = (var_949_cast_fp16, var_710_cast_fp16))[name = tensor("op_1267_cast_fp16")]; + tensor var_1268_to_fp16 = const()[name = tensor("op_1268_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_107_cast_fp16 = mul(x = var_1267_cast_fp16, y = var_1268_to_fp16)[name = tensor("aw_chunk_107_cast_fp16")]; + tensor var_1271_equation_0 = const()[name = tensor("op_1271_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1271_cast_fp16 = einsum(equation = var_1271_equation_0, values = (var_949_cast_fp16, var_717_cast_fp16))[name = tensor("op_1271_cast_fp16")]; + tensor var_1272_to_fp16 = const()[name = tensor("op_1272_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_109_cast_fp16 = mul(x = var_1271_cast_fp16, y = var_1272_to_fp16)[name = tensor("aw_chunk_109_cast_fp16")]; + tensor var_1275_equation_0 = const()[name = tensor("op_1275_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1275_cast_fp16 = einsum(equation = var_1275_equation_0, values = (var_949_cast_fp16, var_724_cast_fp16))[name = tensor("op_1275_cast_fp16")]; + tensor var_1276_to_fp16 = const()[name = tensor("op_1276_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_111_cast_fp16 = mul(x = var_1275_cast_fp16, y = var_1276_to_fp16)[name = tensor("aw_chunk_111_cast_fp16")]; + tensor var_1279_equation_0 = const()[name = tensor("op_1279_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1279_cast_fp16 = einsum(equation = var_1279_equation_0, values = (var_953_cast_fp16, var_731_cast_fp16))[name = tensor("op_1279_cast_fp16")]; + tensor var_1280_to_fp16 = const()[name = tensor("op_1280_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_113_cast_fp16 = mul(x = var_1279_cast_fp16, y = var_1280_to_fp16)[name = tensor("aw_chunk_113_cast_fp16")]; + tensor var_1283_equation_0 = const()[name = tensor("op_1283_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1283_cast_fp16 = einsum(equation = var_1283_equation_0, values = (var_953_cast_fp16, var_738_cast_fp16))[name = tensor("op_1283_cast_fp16")]; + tensor var_1284_to_fp16 = const()[name = tensor("op_1284_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_115_cast_fp16 = mul(x = var_1283_cast_fp16, y = var_1284_to_fp16)[name = tensor("aw_chunk_115_cast_fp16")]; + tensor var_1287_equation_0 = const()[name = tensor("op_1287_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1287_cast_fp16 = einsum(equation = var_1287_equation_0, values = (var_953_cast_fp16, var_745_cast_fp16))[name = tensor("op_1287_cast_fp16")]; + tensor var_1288_to_fp16 = const()[name = tensor("op_1288_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_117_cast_fp16 = mul(x = var_1287_cast_fp16, y = var_1288_to_fp16)[name = tensor("aw_chunk_117_cast_fp16")]; + tensor var_1291_equation_0 = const()[name = tensor("op_1291_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1291_cast_fp16 = einsum(equation = var_1291_equation_0, values = (var_953_cast_fp16, var_752_cast_fp16))[name = tensor("op_1291_cast_fp16")]; + tensor var_1292_to_fp16 = const()[name = tensor("op_1292_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_119_cast_fp16 = mul(x = var_1291_cast_fp16, y = var_1292_to_fp16)[name = tensor("aw_chunk_119_cast_fp16")]; + tensor var_1295_equation_0 = const()[name = tensor("op_1295_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1295_cast_fp16 = einsum(equation = var_1295_equation_0, values = (var_957_cast_fp16, var_759_cast_fp16))[name = tensor("op_1295_cast_fp16")]; + tensor var_1296_to_fp16 = const()[name = tensor("op_1296_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_121_cast_fp16 = mul(x = var_1295_cast_fp16, y = var_1296_to_fp16)[name = tensor("aw_chunk_121_cast_fp16")]; + tensor var_1299_equation_0 = const()[name = tensor("op_1299_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1299_cast_fp16 = einsum(equation = var_1299_equation_0, values = (var_957_cast_fp16, var_766_cast_fp16))[name = tensor("op_1299_cast_fp16")]; + tensor var_1300_to_fp16 = const()[name = tensor("op_1300_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_123_cast_fp16 = mul(x = var_1299_cast_fp16, y = var_1300_to_fp16)[name = tensor("aw_chunk_123_cast_fp16")]; + tensor var_1303_equation_0 = const()[name = tensor("op_1303_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1303_cast_fp16 = einsum(equation = var_1303_equation_0, values = (var_957_cast_fp16, var_773_cast_fp16))[name = tensor("op_1303_cast_fp16")]; + tensor var_1304_to_fp16 = const()[name = tensor("op_1304_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_125_cast_fp16 = mul(x = var_1303_cast_fp16, y = var_1304_to_fp16)[name = tensor("aw_chunk_125_cast_fp16")]; + tensor var_1307_equation_0 = const()[name = tensor("op_1307_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1307_cast_fp16 = einsum(equation = var_1307_equation_0, values = (var_957_cast_fp16, var_780_cast_fp16))[name = tensor("op_1307_cast_fp16")]; + tensor var_1308_to_fp16 = const()[name = tensor("op_1308_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_127_cast_fp16 = mul(x = var_1307_cast_fp16, y = var_1308_to_fp16)[name = tensor("aw_chunk_127_cast_fp16")]; + tensor var_1311_equation_0 = const()[name = tensor("op_1311_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1311_cast_fp16 = einsum(equation = var_1311_equation_0, values = (var_961_cast_fp16, var_787_cast_fp16))[name = tensor("op_1311_cast_fp16")]; + tensor var_1312_to_fp16 = const()[name = tensor("op_1312_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_129_cast_fp16 = mul(x = var_1311_cast_fp16, y = var_1312_to_fp16)[name = tensor("aw_chunk_129_cast_fp16")]; + tensor var_1315_equation_0 = const()[name = tensor("op_1315_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1315_cast_fp16 = einsum(equation = var_1315_equation_0, values = (var_961_cast_fp16, var_794_cast_fp16))[name = tensor("op_1315_cast_fp16")]; + tensor var_1316_to_fp16 = const()[name = tensor("op_1316_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_131_cast_fp16 = mul(x = var_1315_cast_fp16, y = var_1316_to_fp16)[name = tensor("aw_chunk_131_cast_fp16")]; + tensor var_1319_equation_0 = const()[name = tensor("op_1319_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1319_cast_fp16 = einsum(equation = var_1319_equation_0, values = (var_961_cast_fp16, var_801_cast_fp16))[name = tensor("op_1319_cast_fp16")]; + tensor var_1320_to_fp16 = const()[name = tensor("op_1320_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_133_cast_fp16 = mul(x = var_1319_cast_fp16, y = var_1320_to_fp16)[name = tensor("aw_chunk_133_cast_fp16")]; + tensor var_1323_equation_0 = const()[name = tensor("op_1323_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1323_cast_fp16 = einsum(equation = var_1323_equation_0, values = (var_961_cast_fp16, var_808_cast_fp16))[name = tensor("op_1323_cast_fp16")]; + tensor var_1324_to_fp16 = const()[name = tensor("op_1324_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_135_cast_fp16 = mul(x = var_1323_cast_fp16, y = var_1324_to_fp16)[name = tensor("aw_chunk_135_cast_fp16")]; + tensor var_1327_equation_0 = const()[name = tensor("op_1327_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1327_cast_fp16 = einsum(equation = var_1327_equation_0, values = (var_965_cast_fp16, var_815_cast_fp16))[name = tensor("op_1327_cast_fp16")]; + tensor var_1328_to_fp16 = const()[name = tensor("op_1328_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_137_cast_fp16 = mul(x = var_1327_cast_fp16, y = var_1328_to_fp16)[name = tensor("aw_chunk_137_cast_fp16")]; + tensor var_1331_equation_0 = const()[name = tensor("op_1331_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1331_cast_fp16 = einsum(equation = var_1331_equation_0, values = (var_965_cast_fp16, var_822_cast_fp16))[name = tensor("op_1331_cast_fp16")]; + tensor var_1332_to_fp16 = const()[name = tensor("op_1332_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_139_cast_fp16 = mul(x = var_1331_cast_fp16, y = var_1332_to_fp16)[name = tensor("aw_chunk_139_cast_fp16")]; + tensor var_1335_equation_0 = const()[name = tensor("op_1335_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1335_cast_fp16 = einsum(equation = var_1335_equation_0, values = (var_965_cast_fp16, var_829_cast_fp16))[name = tensor("op_1335_cast_fp16")]; + tensor var_1336_to_fp16 = const()[name = tensor("op_1336_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_141_cast_fp16 = mul(x = var_1335_cast_fp16, y = var_1336_to_fp16)[name = tensor("aw_chunk_141_cast_fp16")]; + tensor var_1339_equation_0 = const()[name = tensor("op_1339_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1339_cast_fp16 = einsum(equation = var_1339_equation_0, values = (var_965_cast_fp16, var_836_cast_fp16))[name = tensor("op_1339_cast_fp16")]; + tensor var_1340_to_fp16 = const()[name = tensor("op_1340_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_143_cast_fp16 = mul(x = var_1339_cast_fp16, y = var_1340_to_fp16)[name = tensor("aw_chunk_143_cast_fp16")]; + tensor var_1343_equation_0 = const()[name = tensor("op_1343_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1343_cast_fp16 = einsum(equation = var_1343_equation_0, values = (var_969_cast_fp16, var_843_cast_fp16))[name = tensor("op_1343_cast_fp16")]; + tensor var_1344_to_fp16 = const()[name = tensor("op_1344_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_145_cast_fp16 = mul(x = var_1343_cast_fp16, y = var_1344_to_fp16)[name = tensor("aw_chunk_145_cast_fp16")]; + tensor var_1347_equation_0 = const()[name = tensor("op_1347_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1347_cast_fp16 = einsum(equation = var_1347_equation_0, values = (var_969_cast_fp16, var_850_cast_fp16))[name = tensor("op_1347_cast_fp16")]; + tensor var_1348_to_fp16 = const()[name = tensor("op_1348_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_147_cast_fp16 = mul(x = var_1347_cast_fp16, y = var_1348_to_fp16)[name = tensor("aw_chunk_147_cast_fp16")]; + tensor var_1351_equation_0 = const()[name = tensor("op_1351_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1351_cast_fp16 = einsum(equation = var_1351_equation_0, values = (var_969_cast_fp16, var_857_cast_fp16))[name = tensor("op_1351_cast_fp16")]; + tensor var_1352_to_fp16 = const()[name = tensor("op_1352_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_149_cast_fp16 = mul(x = var_1351_cast_fp16, y = var_1352_to_fp16)[name = tensor("aw_chunk_149_cast_fp16")]; + tensor var_1355_equation_0 = const()[name = tensor("op_1355_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1355_cast_fp16 = einsum(equation = var_1355_equation_0, values = (var_969_cast_fp16, var_864_cast_fp16))[name = tensor("op_1355_cast_fp16")]; + tensor var_1356_to_fp16 = const()[name = tensor("op_1356_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_151_cast_fp16 = mul(x = var_1355_cast_fp16, y = var_1356_to_fp16)[name = tensor("aw_chunk_151_cast_fp16")]; + tensor var_1359_equation_0 = const()[name = tensor("op_1359_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1359_cast_fp16 = einsum(equation = var_1359_equation_0, values = (var_973_cast_fp16, var_871_cast_fp16))[name = tensor("op_1359_cast_fp16")]; + tensor var_1360_to_fp16 = const()[name = tensor("op_1360_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_153_cast_fp16 = mul(x = var_1359_cast_fp16, y = var_1360_to_fp16)[name = tensor("aw_chunk_153_cast_fp16")]; + tensor var_1363_equation_0 = const()[name = tensor("op_1363_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1363_cast_fp16 = einsum(equation = var_1363_equation_0, values = (var_973_cast_fp16, var_878_cast_fp16))[name = tensor("op_1363_cast_fp16")]; + tensor var_1364_to_fp16 = const()[name = tensor("op_1364_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_155_cast_fp16 = mul(x = var_1363_cast_fp16, y = var_1364_to_fp16)[name = tensor("aw_chunk_155_cast_fp16")]; + tensor var_1367_equation_0 = const()[name = tensor("op_1367_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1367_cast_fp16 = einsum(equation = var_1367_equation_0, values = (var_973_cast_fp16, var_885_cast_fp16))[name = tensor("op_1367_cast_fp16")]; + tensor var_1368_to_fp16 = const()[name = tensor("op_1368_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_157_cast_fp16 = mul(x = var_1367_cast_fp16, y = var_1368_to_fp16)[name = tensor("aw_chunk_157_cast_fp16")]; + tensor var_1371_equation_0 = const()[name = tensor("op_1371_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1371_cast_fp16 = einsum(equation = var_1371_equation_0, values = (var_973_cast_fp16, var_892_cast_fp16))[name = tensor("op_1371_cast_fp16")]; + tensor var_1372_to_fp16 = const()[name = tensor("op_1372_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_159_cast_fp16 = mul(x = var_1371_cast_fp16, y = var_1372_to_fp16)[name = tensor("aw_chunk_159_cast_fp16")]; + tensor var_1374_cast_fp16 = softmax(axis = var_199, x = aw_chunk_1_cast_fp16)[name = tensor("op_1374_cast_fp16")]; + tensor var_1375_cast_fp16 = softmax(axis = var_199, x = aw_chunk_3_cast_fp16)[name = tensor("op_1375_cast_fp16")]; + tensor var_1376_cast_fp16 = softmax(axis = var_199, x = aw_chunk_5_cast_fp16)[name = tensor("op_1376_cast_fp16")]; + tensor var_1377_cast_fp16 = softmax(axis = var_199, x = aw_chunk_7_cast_fp16)[name = tensor("op_1377_cast_fp16")]; + tensor var_1378_cast_fp16 = softmax(axis = var_199, x = aw_chunk_9_cast_fp16)[name = tensor("op_1378_cast_fp16")]; + tensor var_1379_cast_fp16 = softmax(axis = var_199, x = aw_chunk_11_cast_fp16)[name = tensor("op_1379_cast_fp16")]; + tensor var_1380_cast_fp16 = softmax(axis = var_199, x = aw_chunk_13_cast_fp16)[name = tensor("op_1380_cast_fp16")]; + tensor var_1381_cast_fp16 = softmax(axis = var_199, x = aw_chunk_15_cast_fp16)[name = tensor("op_1381_cast_fp16")]; + tensor var_1382_cast_fp16 = softmax(axis = var_199, x = aw_chunk_17_cast_fp16)[name = tensor("op_1382_cast_fp16")]; + tensor var_1383_cast_fp16 = softmax(axis = var_199, x = aw_chunk_19_cast_fp16)[name = tensor("op_1383_cast_fp16")]; + tensor var_1384_cast_fp16 = softmax(axis = var_199, x = aw_chunk_21_cast_fp16)[name = tensor("op_1384_cast_fp16")]; + tensor var_1385_cast_fp16 = softmax(axis = var_199, x = aw_chunk_23_cast_fp16)[name = tensor("op_1385_cast_fp16")]; + tensor var_1386_cast_fp16 = softmax(axis = var_199, x = aw_chunk_25_cast_fp16)[name = tensor("op_1386_cast_fp16")]; + tensor var_1387_cast_fp16 = softmax(axis = var_199, x = aw_chunk_27_cast_fp16)[name = tensor("op_1387_cast_fp16")]; + tensor var_1388_cast_fp16 = softmax(axis = var_199, x = aw_chunk_29_cast_fp16)[name = tensor("op_1388_cast_fp16")]; + tensor var_1389_cast_fp16 = softmax(axis = var_199, x = aw_chunk_31_cast_fp16)[name = tensor("op_1389_cast_fp16")]; + tensor var_1390_cast_fp16 = softmax(axis = var_199, x = aw_chunk_33_cast_fp16)[name = tensor("op_1390_cast_fp16")]; + tensor var_1391_cast_fp16 = softmax(axis = var_199, x = aw_chunk_35_cast_fp16)[name = tensor("op_1391_cast_fp16")]; + tensor var_1392_cast_fp16 = softmax(axis = var_199, x = aw_chunk_37_cast_fp16)[name = tensor("op_1392_cast_fp16")]; + tensor var_1393_cast_fp16 = softmax(axis = var_199, x = aw_chunk_39_cast_fp16)[name = tensor("op_1393_cast_fp16")]; + tensor var_1394_cast_fp16 = softmax(axis = var_199, x = aw_chunk_41_cast_fp16)[name = tensor("op_1394_cast_fp16")]; + tensor var_1395_cast_fp16 = softmax(axis = var_199, x = aw_chunk_43_cast_fp16)[name = tensor("op_1395_cast_fp16")]; + tensor var_1396_cast_fp16 = softmax(axis = var_199, x = aw_chunk_45_cast_fp16)[name = tensor("op_1396_cast_fp16")]; + tensor var_1397_cast_fp16 = softmax(axis = var_199, x = aw_chunk_47_cast_fp16)[name = tensor("op_1397_cast_fp16")]; + tensor var_1398_cast_fp16 = softmax(axis = var_199, x = aw_chunk_49_cast_fp16)[name = tensor("op_1398_cast_fp16")]; + tensor var_1399_cast_fp16 = softmax(axis = var_199, x = aw_chunk_51_cast_fp16)[name = tensor("op_1399_cast_fp16")]; + tensor var_1400_cast_fp16 = softmax(axis = var_199, x = aw_chunk_53_cast_fp16)[name = tensor("op_1400_cast_fp16")]; + tensor var_1401_cast_fp16 = softmax(axis = var_199, x = aw_chunk_55_cast_fp16)[name = tensor("op_1401_cast_fp16")]; + tensor var_1402_cast_fp16 = softmax(axis = var_199, x = aw_chunk_57_cast_fp16)[name = tensor("op_1402_cast_fp16")]; + tensor var_1403_cast_fp16 = softmax(axis = var_199, x = aw_chunk_59_cast_fp16)[name = tensor("op_1403_cast_fp16")]; + tensor var_1404_cast_fp16 = softmax(axis = var_199, x = aw_chunk_61_cast_fp16)[name = tensor("op_1404_cast_fp16")]; + tensor var_1405_cast_fp16 = softmax(axis = var_199, x = aw_chunk_63_cast_fp16)[name = tensor("op_1405_cast_fp16")]; + tensor var_1406_cast_fp16 = softmax(axis = var_199, x = aw_chunk_65_cast_fp16)[name = tensor("op_1406_cast_fp16")]; + tensor var_1407_cast_fp16 = softmax(axis = var_199, x = aw_chunk_67_cast_fp16)[name = tensor("op_1407_cast_fp16")]; + tensor var_1408_cast_fp16 = softmax(axis = var_199, x = aw_chunk_69_cast_fp16)[name = tensor("op_1408_cast_fp16")]; + tensor var_1409_cast_fp16 = softmax(axis = var_199, x = aw_chunk_71_cast_fp16)[name = tensor("op_1409_cast_fp16")]; + tensor var_1410_cast_fp16 = softmax(axis = var_199, x = aw_chunk_73_cast_fp16)[name = tensor("op_1410_cast_fp16")]; + tensor var_1411_cast_fp16 = softmax(axis = var_199, x = aw_chunk_75_cast_fp16)[name = tensor("op_1411_cast_fp16")]; + tensor var_1412_cast_fp16 = softmax(axis = var_199, x = aw_chunk_77_cast_fp16)[name = tensor("op_1412_cast_fp16")]; + tensor var_1413_cast_fp16 = softmax(axis = var_199, x = aw_chunk_79_cast_fp16)[name = tensor("op_1413_cast_fp16")]; + tensor var_1414_cast_fp16 = softmax(axis = var_199, x = aw_chunk_81_cast_fp16)[name = tensor("op_1414_cast_fp16")]; + tensor var_1415_cast_fp16 = softmax(axis = var_199, x = aw_chunk_83_cast_fp16)[name = tensor("op_1415_cast_fp16")]; + tensor var_1416_cast_fp16 = softmax(axis = var_199, x = aw_chunk_85_cast_fp16)[name = tensor("op_1416_cast_fp16")]; + tensor var_1417_cast_fp16 = softmax(axis = var_199, x = aw_chunk_87_cast_fp16)[name = tensor("op_1417_cast_fp16")]; + tensor var_1418_cast_fp16 = softmax(axis = var_199, x = aw_chunk_89_cast_fp16)[name = tensor("op_1418_cast_fp16")]; + tensor var_1419_cast_fp16 = softmax(axis = var_199, x = aw_chunk_91_cast_fp16)[name = tensor("op_1419_cast_fp16")]; + tensor var_1420_cast_fp16 = softmax(axis = var_199, x = aw_chunk_93_cast_fp16)[name = tensor("op_1420_cast_fp16")]; + tensor var_1421_cast_fp16 = softmax(axis = var_199, x = aw_chunk_95_cast_fp16)[name = tensor("op_1421_cast_fp16")]; + tensor var_1422_cast_fp16 = softmax(axis = var_199, x = aw_chunk_97_cast_fp16)[name = tensor("op_1422_cast_fp16")]; + tensor var_1423_cast_fp16 = softmax(axis = var_199, x = aw_chunk_99_cast_fp16)[name = tensor("op_1423_cast_fp16")]; + tensor var_1424_cast_fp16 = softmax(axis = var_199, x = aw_chunk_101_cast_fp16)[name = tensor("op_1424_cast_fp16")]; + tensor var_1425_cast_fp16 = softmax(axis = var_199, x = aw_chunk_103_cast_fp16)[name = tensor("op_1425_cast_fp16")]; + tensor var_1426_cast_fp16 = softmax(axis = var_199, x = aw_chunk_105_cast_fp16)[name = tensor("op_1426_cast_fp16")]; + tensor var_1427_cast_fp16 = softmax(axis = var_199, x = aw_chunk_107_cast_fp16)[name = tensor("op_1427_cast_fp16")]; + tensor var_1428_cast_fp16 = softmax(axis = var_199, x = aw_chunk_109_cast_fp16)[name = tensor("op_1428_cast_fp16")]; + tensor var_1429_cast_fp16 = softmax(axis = var_199, x = aw_chunk_111_cast_fp16)[name = tensor("op_1429_cast_fp16")]; + tensor var_1430_cast_fp16 = softmax(axis = var_199, x = aw_chunk_113_cast_fp16)[name = tensor("op_1430_cast_fp16")]; + tensor var_1431_cast_fp16 = softmax(axis = var_199, x = aw_chunk_115_cast_fp16)[name = tensor("op_1431_cast_fp16")]; + tensor var_1432_cast_fp16 = softmax(axis = var_199, x = aw_chunk_117_cast_fp16)[name = tensor("op_1432_cast_fp16")]; + tensor var_1433_cast_fp16 = softmax(axis = var_199, x = aw_chunk_119_cast_fp16)[name = tensor("op_1433_cast_fp16")]; + tensor var_1434_cast_fp16 = softmax(axis = var_199, x = aw_chunk_121_cast_fp16)[name = tensor("op_1434_cast_fp16")]; + tensor var_1435_cast_fp16 = softmax(axis = var_199, x = aw_chunk_123_cast_fp16)[name = tensor("op_1435_cast_fp16")]; + tensor var_1436_cast_fp16 = softmax(axis = var_199, x = aw_chunk_125_cast_fp16)[name = tensor("op_1436_cast_fp16")]; + tensor var_1437_cast_fp16 = softmax(axis = var_199, x = aw_chunk_127_cast_fp16)[name = tensor("op_1437_cast_fp16")]; + tensor var_1438_cast_fp16 = softmax(axis = var_199, x = aw_chunk_129_cast_fp16)[name = tensor("op_1438_cast_fp16")]; + tensor var_1439_cast_fp16 = softmax(axis = var_199, x = aw_chunk_131_cast_fp16)[name = tensor("op_1439_cast_fp16")]; + tensor var_1440_cast_fp16 = softmax(axis = var_199, x = aw_chunk_133_cast_fp16)[name = tensor("op_1440_cast_fp16")]; + tensor var_1441_cast_fp16 = softmax(axis = var_199, x = aw_chunk_135_cast_fp16)[name = tensor("op_1441_cast_fp16")]; + tensor var_1442_cast_fp16 = softmax(axis = var_199, x = aw_chunk_137_cast_fp16)[name = tensor("op_1442_cast_fp16")]; + tensor var_1443_cast_fp16 = softmax(axis = var_199, x = aw_chunk_139_cast_fp16)[name = tensor("op_1443_cast_fp16")]; + tensor var_1444_cast_fp16 = softmax(axis = var_199, x = aw_chunk_141_cast_fp16)[name = tensor("op_1444_cast_fp16")]; + tensor var_1445_cast_fp16 = softmax(axis = var_199, x = aw_chunk_143_cast_fp16)[name = tensor("op_1445_cast_fp16")]; + tensor var_1446_cast_fp16 = softmax(axis = var_199, x = aw_chunk_145_cast_fp16)[name = tensor("op_1446_cast_fp16")]; + tensor var_1447_cast_fp16 = softmax(axis = var_199, x = aw_chunk_147_cast_fp16)[name = tensor("op_1447_cast_fp16")]; + tensor var_1448_cast_fp16 = softmax(axis = var_199, x = aw_chunk_149_cast_fp16)[name = tensor("op_1448_cast_fp16")]; + tensor var_1449_cast_fp16 = softmax(axis = var_199, x = aw_chunk_151_cast_fp16)[name = tensor("op_1449_cast_fp16")]; + tensor var_1450_cast_fp16 = softmax(axis = var_199, x = aw_chunk_153_cast_fp16)[name = tensor("op_1450_cast_fp16")]; + tensor var_1451_cast_fp16 = softmax(axis = var_199, x = aw_chunk_155_cast_fp16)[name = tensor("op_1451_cast_fp16")]; + tensor var_1452_cast_fp16 = softmax(axis = var_199, x = aw_chunk_157_cast_fp16)[name = tensor("op_1452_cast_fp16")]; + tensor var_1453_cast_fp16 = softmax(axis = var_199, x = aw_chunk_159_cast_fp16)[name = tensor("op_1453_cast_fp16")]; + tensor var_1455_equation_0 = const()[name = tensor("op_1455_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1455_cast_fp16 = einsum(equation = var_1455_equation_0, values = (var_975_cast_fp16, var_1374_cast_fp16))[name = tensor("op_1455_cast_fp16")]; + tensor var_1457_equation_0 = const()[name = tensor("op_1457_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1457_cast_fp16 = einsum(equation = var_1457_equation_0, values = (var_975_cast_fp16, var_1375_cast_fp16))[name = tensor("op_1457_cast_fp16")]; + tensor var_1459_equation_0 = const()[name = tensor("op_1459_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1459_cast_fp16 = einsum(equation = var_1459_equation_0, values = (var_975_cast_fp16, var_1376_cast_fp16))[name = tensor("op_1459_cast_fp16")]; + tensor var_1461_equation_0 = const()[name = tensor("op_1461_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1461_cast_fp16 = einsum(equation = var_1461_equation_0, values = (var_975_cast_fp16, var_1377_cast_fp16))[name = tensor("op_1461_cast_fp16")]; + tensor var_1463_equation_0 = const()[name = tensor("op_1463_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1463_cast_fp16 = einsum(equation = var_1463_equation_0, values = (var_979_cast_fp16, var_1378_cast_fp16))[name = tensor("op_1463_cast_fp16")]; + tensor var_1465_equation_0 = const()[name = tensor("op_1465_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1465_cast_fp16 = einsum(equation = var_1465_equation_0, values = (var_979_cast_fp16, var_1379_cast_fp16))[name = tensor("op_1465_cast_fp16")]; + tensor var_1467_equation_0 = const()[name = tensor("op_1467_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1467_cast_fp16 = einsum(equation = var_1467_equation_0, values = (var_979_cast_fp16, var_1380_cast_fp16))[name = tensor("op_1467_cast_fp16")]; + tensor var_1469_equation_0 = const()[name = tensor("op_1469_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1469_cast_fp16 = einsum(equation = var_1469_equation_0, values = (var_979_cast_fp16, var_1381_cast_fp16))[name = tensor("op_1469_cast_fp16")]; + tensor var_1471_equation_0 = const()[name = tensor("op_1471_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1471_cast_fp16 = einsum(equation = var_1471_equation_0, values = (var_983_cast_fp16, var_1382_cast_fp16))[name = tensor("op_1471_cast_fp16")]; + tensor var_1473_equation_0 = const()[name = tensor("op_1473_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1473_cast_fp16 = einsum(equation = var_1473_equation_0, values = (var_983_cast_fp16, var_1383_cast_fp16))[name = tensor("op_1473_cast_fp16")]; + tensor var_1475_equation_0 = const()[name = tensor("op_1475_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1475_cast_fp16 = einsum(equation = var_1475_equation_0, values = (var_983_cast_fp16, var_1384_cast_fp16))[name = tensor("op_1475_cast_fp16")]; + tensor var_1477_equation_0 = const()[name = tensor("op_1477_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1477_cast_fp16 = einsum(equation = var_1477_equation_0, values = (var_983_cast_fp16, var_1385_cast_fp16))[name = tensor("op_1477_cast_fp16")]; + tensor var_1479_equation_0 = const()[name = tensor("op_1479_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1479_cast_fp16 = einsum(equation = var_1479_equation_0, values = (var_987_cast_fp16, var_1386_cast_fp16))[name = tensor("op_1479_cast_fp16")]; + tensor var_1481_equation_0 = const()[name = tensor("op_1481_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1481_cast_fp16 = einsum(equation = var_1481_equation_0, values = (var_987_cast_fp16, var_1387_cast_fp16))[name = tensor("op_1481_cast_fp16")]; + tensor var_1483_equation_0 = const()[name = tensor("op_1483_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1483_cast_fp16 = einsum(equation = var_1483_equation_0, values = (var_987_cast_fp16, var_1388_cast_fp16))[name = tensor("op_1483_cast_fp16")]; + tensor var_1485_equation_0 = const()[name = tensor("op_1485_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1485_cast_fp16 = einsum(equation = var_1485_equation_0, values = (var_987_cast_fp16, var_1389_cast_fp16))[name = tensor("op_1485_cast_fp16")]; + tensor var_1487_equation_0 = const()[name = tensor("op_1487_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1487_cast_fp16 = einsum(equation = var_1487_equation_0, values = (var_991_cast_fp16, var_1390_cast_fp16))[name = tensor("op_1487_cast_fp16")]; + tensor var_1489_equation_0 = const()[name = tensor("op_1489_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1489_cast_fp16 = einsum(equation = var_1489_equation_0, values = (var_991_cast_fp16, var_1391_cast_fp16))[name = tensor("op_1489_cast_fp16")]; + tensor var_1491_equation_0 = const()[name = tensor("op_1491_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1491_cast_fp16 = einsum(equation = var_1491_equation_0, values = (var_991_cast_fp16, var_1392_cast_fp16))[name = tensor("op_1491_cast_fp16")]; + tensor var_1493_equation_0 = const()[name = tensor("op_1493_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1493_cast_fp16 = einsum(equation = var_1493_equation_0, values = (var_991_cast_fp16, var_1393_cast_fp16))[name = tensor("op_1493_cast_fp16")]; + tensor var_1495_equation_0 = const()[name = tensor("op_1495_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1495_cast_fp16 = einsum(equation = var_1495_equation_0, values = (var_995_cast_fp16, var_1394_cast_fp16))[name = tensor("op_1495_cast_fp16")]; + tensor var_1497_equation_0 = const()[name = tensor("op_1497_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1497_cast_fp16 = einsum(equation = var_1497_equation_0, values = (var_995_cast_fp16, var_1395_cast_fp16))[name = tensor("op_1497_cast_fp16")]; + tensor var_1499_equation_0 = const()[name = tensor("op_1499_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1499_cast_fp16 = einsum(equation = var_1499_equation_0, values = (var_995_cast_fp16, var_1396_cast_fp16))[name = tensor("op_1499_cast_fp16")]; + tensor var_1501_equation_0 = const()[name = tensor("op_1501_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1501_cast_fp16 = einsum(equation = var_1501_equation_0, values = (var_995_cast_fp16, var_1397_cast_fp16))[name = tensor("op_1501_cast_fp16")]; + tensor var_1503_equation_0 = const()[name = tensor("op_1503_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1503_cast_fp16 = einsum(equation = var_1503_equation_0, values = (var_999_cast_fp16, var_1398_cast_fp16))[name = tensor("op_1503_cast_fp16")]; + tensor var_1505_equation_0 = const()[name = tensor("op_1505_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1505_cast_fp16 = einsum(equation = var_1505_equation_0, values = (var_999_cast_fp16, var_1399_cast_fp16))[name = tensor("op_1505_cast_fp16")]; + tensor var_1507_equation_0 = const()[name = tensor("op_1507_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1507_cast_fp16 = einsum(equation = var_1507_equation_0, values = (var_999_cast_fp16, var_1400_cast_fp16))[name = tensor("op_1507_cast_fp16")]; + tensor var_1509_equation_0 = const()[name = tensor("op_1509_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1509_cast_fp16 = einsum(equation = var_1509_equation_0, values = (var_999_cast_fp16, var_1401_cast_fp16))[name = tensor("op_1509_cast_fp16")]; + tensor var_1511_equation_0 = const()[name = tensor("op_1511_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1511_cast_fp16 = einsum(equation = var_1511_equation_0, values = (var_1003_cast_fp16, var_1402_cast_fp16))[name = tensor("op_1511_cast_fp16")]; + tensor var_1513_equation_0 = const()[name = tensor("op_1513_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1513_cast_fp16 = einsum(equation = var_1513_equation_0, values = (var_1003_cast_fp16, var_1403_cast_fp16))[name = tensor("op_1513_cast_fp16")]; + tensor var_1515_equation_0 = const()[name = tensor("op_1515_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1515_cast_fp16 = einsum(equation = var_1515_equation_0, values = (var_1003_cast_fp16, var_1404_cast_fp16))[name = tensor("op_1515_cast_fp16")]; + tensor var_1517_equation_0 = const()[name = tensor("op_1517_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1517_cast_fp16 = einsum(equation = var_1517_equation_0, values = (var_1003_cast_fp16, var_1405_cast_fp16))[name = tensor("op_1517_cast_fp16")]; + tensor var_1519_equation_0 = const()[name = tensor("op_1519_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1519_cast_fp16 = einsum(equation = var_1519_equation_0, values = (var_1007_cast_fp16, var_1406_cast_fp16))[name = tensor("op_1519_cast_fp16")]; + tensor var_1521_equation_0 = const()[name = tensor("op_1521_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1521_cast_fp16 = einsum(equation = var_1521_equation_0, values = (var_1007_cast_fp16, var_1407_cast_fp16))[name = tensor("op_1521_cast_fp16")]; + tensor var_1523_equation_0 = const()[name = tensor("op_1523_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1523_cast_fp16 = einsum(equation = var_1523_equation_0, values = (var_1007_cast_fp16, var_1408_cast_fp16))[name = tensor("op_1523_cast_fp16")]; + tensor var_1525_equation_0 = const()[name = tensor("op_1525_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1525_cast_fp16 = einsum(equation = var_1525_equation_0, values = (var_1007_cast_fp16, var_1409_cast_fp16))[name = tensor("op_1525_cast_fp16")]; + tensor var_1527_equation_0 = const()[name = tensor("op_1527_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1527_cast_fp16 = einsum(equation = var_1527_equation_0, values = (var_1011_cast_fp16, var_1410_cast_fp16))[name = tensor("op_1527_cast_fp16")]; + tensor var_1529_equation_0 = const()[name = tensor("op_1529_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1529_cast_fp16 = einsum(equation = var_1529_equation_0, values = (var_1011_cast_fp16, var_1411_cast_fp16))[name = tensor("op_1529_cast_fp16")]; + tensor var_1531_equation_0 = const()[name = tensor("op_1531_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1531_cast_fp16 = einsum(equation = var_1531_equation_0, values = (var_1011_cast_fp16, var_1412_cast_fp16))[name = tensor("op_1531_cast_fp16")]; + tensor var_1533_equation_0 = const()[name = tensor("op_1533_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1533_cast_fp16 = einsum(equation = var_1533_equation_0, values = (var_1011_cast_fp16, var_1413_cast_fp16))[name = tensor("op_1533_cast_fp16")]; + tensor var_1535_equation_0 = const()[name = tensor("op_1535_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1535_cast_fp16 = einsum(equation = var_1535_equation_0, values = (var_1015_cast_fp16, var_1414_cast_fp16))[name = tensor("op_1535_cast_fp16")]; + tensor var_1537_equation_0 = const()[name = tensor("op_1537_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1537_cast_fp16 = einsum(equation = var_1537_equation_0, values = (var_1015_cast_fp16, var_1415_cast_fp16))[name = tensor("op_1537_cast_fp16")]; + tensor var_1539_equation_0 = const()[name = tensor("op_1539_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1539_cast_fp16 = einsum(equation = var_1539_equation_0, values = (var_1015_cast_fp16, var_1416_cast_fp16))[name = tensor("op_1539_cast_fp16")]; + tensor var_1541_equation_0 = const()[name = tensor("op_1541_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1541_cast_fp16 = einsum(equation = var_1541_equation_0, values = (var_1015_cast_fp16, var_1417_cast_fp16))[name = tensor("op_1541_cast_fp16")]; + tensor var_1543_equation_0 = const()[name = tensor("op_1543_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1543_cast_fp16 = einsum(equation = var_1543_equation_0, values = (var_1019_cast_fp16, var_1418_cast_fp16))[name = tensor("op_1543_cast_fp16")]; + tensor var_1545_equation_0 = const()[name = tensor("op_1545_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1545_cast_fp16 = einsum(equation = var_1545_equation_0, values = (var_1019_cast_fp16, var_1419_cast_fp16))[name = tensor("op_1545_cast_fp16")]; + tensor var_1547_equation_0 = const()[name = tensor("op_1547_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1547_cast_fp16 = einsum(equation = var_1547_equation_0, values = (var_1019_cast_fp16, var_1420_cast_fp16))[name = tensor("op_1547_cast_fp16")]; + tensor var_1549_equation_0 = const()[name = tensor("op_1549_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1549_cast_fp16 = einsum(equation = var_1549_equation_0, values = (var_1019_cast_fp16, var_1421_cast_fp16))[name = tensor("op_1549_cast_fp16")]; + tensor var_1551_equation_0 = const()[name = tensor("op_1551_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1551_cast_fp16 = einsum(equation = var_1551_equation_0, values = (var_1023_cast_fp16, var_1422_cast_fp16))[name = tensor("op_1551_cast_fp16")]; + tensor var_1553_equation_0 = const()[name = tensor("op_1553_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1553_cast_fp16 = einsum(equation = var_1553_equation_0, values = (var_1023_cast_fp16, var_1423_cast_fp16))[name = tensor("op_1553_cast_fp16")]; + tensor var_1555_equation_0 = const()[name = tensor("op_1555_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1555_cast_fp16 = einsum(equation = var_1555_equation_0, values = (var_1023_cast_fp16, var_1424_cast_fp16))[name = tensor("op_1555_cast_fp16")]; + tensor var_1557_equation_0 = const()[name = tensor("op_1557_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1557_cast_fp16 = einsum(equation = var_1557_equation_0, values = (var_1023_cast_fp16, var_1425_cast_fp16))[name = tensor("op_1557_cast_fp16")]; + tensor var_1559_equation_0 = const()[name = tensor("op_1559_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1559_cast_fp16 = einsum(equation = var_1559_equation_0, values = (var_1027_cast_fp16, var_1426_cast_fp16))[name = tensor("op_1559_cast_fp16")]; + tensor var_1561_equation_0 = const()[name = tensor("op_1561_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1561_cast_fp16 = einsum(equation = var_1561_equation_0, values = (var_1027_cast_fp16, var_1427_cast_fp16))[name = tensor("op_1561_cast_fp16")]; + tensor var_1563_equation_0 = const()[name = tensor("op_1563_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1563_cast_fp16 = einsum(equation = var_1563_equation_0, values = (var_1027_cast_fp16, var_1428_cast_fp16))[name = tensor("op_1563_cast_fp16")]; + tensor var_1565_equation_0 = const()[name = tensor("op_1565_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1565_cast_fp16 = einsum(equation = var_1565_equation_0, values = (var_1027_cast_fp16, var_1429_cast_fp16))[name = tensor("op_1565_cast_fp16")]; + tensor var_1567_equation_0 = const()[name = tensor("op_1567_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1567_cast_fp16 = einsum(equation = var_1567_equation_0, values = (var_1031_cast_fp16, var_1430_cast_fp16))[name = tensor("op_1567_cast_fp16")]; + tensor var_1569_equation_0 = const()[name = tensor("op_1569_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1569_cast_fp16 = einsum(equation = var_1569_equation_0, values = (var_1031_cast_fp16, var_1431_cast_fp16))[name = tensor("op_1569_cast_fp16")]; + tensor var_1571_equation_0 = const()[name = tensor("op_1571_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1571_cast_fp16 = einsum(equation = var_1571_equation_0, values = (var_1031_cast_fp16, var_1432_cast_fp16))[name = tensor("op_1571_cast_fp16")]; + tensor var_1573_equation_0 = const()[name = tensor("op_1573_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1573_cast_fp16 = einsum(equation = var_1573_equation_0, values = (var_1031_cast_fp16, var_1433_cast_fp16))[name = tensor("op_1573_cast_fp16")]; + tensor var_1575_equation_0 = const()[name = tensor("op_1575_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1575_cast_fp16 = einsum(equation = var_1575_equation_0, values = (var_1035_cast_fp16, var_1434_cast_fp16))[name = tensor("op_1575_cast_fp16")]; + tensor var_1577_equation_0 = const()[name = tensor("op_1577_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1577_cast_fp16 = einsum(equation = var_1577_equation_0, values = (var_1035_cast_fp16, var_1435_cast_fp16))[name = tensor("op_1577_cast_fp16")]; + tensor var_1579_equation_0 = const()[name = tensor("op_1579_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1579_cast_fp16 = einsum(equation = var_1579_equation_0, values = (var_1035_cast_fp16, var_1436_cast_fp16))[name = tensor("op_1579_cast_fp16")]; + tensor var_1581_equation_0 = const()[name = tensor("op_1581_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1581_cast_fp16 = einsum(equation = var_1581_equation_0, values = (var_1035_cast_fp16, var_1437_cast_fp16))[name = tensor("op_1581_cast_fp16")]; + tensor var_1583_equation_0 = const()[name = tensor("op_1583_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1583_cast_fp16 = einsum(equation = var_1583_equation_0, values = (var_1039_cast_fp16, var_1438_cast_fp16))[name = tensor("op_1583_cast_fp16")]; + tensor var_1585_equation_0 = const()[name = tensor("op_1585_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1585_cast_fp16 = einsum(equation = var_1585_equation_0, values = (var_1039_cast_fp16, var_1439_cast_fp16))[name = tensor("op_1585_cast_fp16")]; + tensor var_1587_equation_0 = const()[name = tensor("op_1587_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1587_cast_fp16 = einsum(equation = var_1587_equation_0, values = (var_1039_cast_fp16, var_1440_cast_fp16))[name = tensor("op_1587_cast_fp16")]; + tensor var_1589_equation_0 = const()[name = tensor("op_1589_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1589_cast_fp16 = einsum(equation = var_1589_equation_0, values = (var_1039_cast_fp16, var_1441_cast_fp16))[name = tensor("op_1589_cast_fp16")]; + tensor var_1591_equation_0 = const()[name = tensor("op_1591_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1591_cast_fp16 = einsum(equation = var_1591_equation_0, values = (var_1043_cast_fp16, var_1442_cast_fp16))[name = tensor("op_1591_cast_fp16")]; + tensor var_1593_equation_0 = const()[name = tensor("op_1593_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1593_cast_fp16 = einsum(equation = var_1593_equation_0, values = (var_1043_cast_fp16, var_1443_cast_fp16))[name = tensor("op_1593_cast_fp16")]; + tensor var_1595_equation_0 = const()[name = tensor("op_1595_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1595_cast_fp16 = einsum(equation = var_1595_equation_0, values = (var_1043_cast_fp16, var_1444_cast_fp16))[name = tensor("op_1595_cast_fp16")]; + tensor var_1597_equation_0 = const()[name = tensor("op_1597_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1597_cast_fp16 = einsum(equation = var_1597_equation_0, values = (var_1043_cast_fp16, var_1445_cast_fp16))[name = tensor("op_1597_cast_fp16")]; + tensor var_1599_equation_0 = const()[name = tensor("op_1599_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1599_cast_fp16 = einsum(equation = var_1599_equation_0, values = (var_1047_cast_fp16, var_1446_cast_fp16))[name = tensor("op_1599_cast_fp16")]; + tensor var_1601_equation_0 = const()[name = tensor("op_1601_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1601_cast_fp16 = einsum(equation = var_1601_equation_0, values = (var_1047_cast_fp16, var_1447_cast_fp16))[name = tensor("op_1601_cast_fp16")]; + tensor var_1603_equation_0 = const()[name = tensor("op_1603_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1603_cast_fp16 = einsum(equation = var_1603_equation_0, values = (var_1047_cast_fp16, var_1448_cast_fp16))[name = tensor("op_1603_cast_fp16")]; + tensor var_1605_equation_0 = const()[name = tensor("op_1605_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1605_cast_fp16 = einsum(equation = var_1605_equation_0, values = (var_1047_cast_fp16, var_1449_cast_fp16))[name = tensor("op_1605_cast_fp16")]; + tensor var_1607_equation_0 = const()[name = tensor("op_1607_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1607_cast_fp16 = einsum(equation = var_1607_equation_0, values = (var_1051_cast_fp16, var_1450_cast_fp16))[name = tensor("op_1607_cast_fp16")]; + tensor var_1609_equation_0 = const()[name = tensor("op_1609_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1609_cast_fp16 = einsum(equation = var_1609_equation_0, values = (var_1051_cast_fp16, var_1451_cast_fp16))[name = tensor("op_1609_cast_fp16")]; + tensor var_1611_equation_0 = const()[name = tensor("op_1611_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1611_cast_fp16 = einsum(equation = var_1611_equation_0, values = (var_1051_cast_fp16, var_1452_cast_fp16))[name = tensor("op_1611_cast_fp16")]; + tensor var_1613_equation_0 = const()[name = tensor("op_1613_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1613_cast_fp16 = einsum(equation = var_1613_equation_0, values = (var_1051_cast_fp16, var_1453_cast_fp16))[name = tensor("op_1613_cast_fp16")]; + tensor var_1615_interleave_0 = const()[name = tensor("op_1615_interleave_0"), val = tensor(false)]; + tensor var_1615_cast_fp16 = concat(axis = var_174, interleave = var_1615_interleave_0, values = (var_1455_cast_fp16, var_1457_cast_fp16, var_1459_cast_fp16, var_1461_cast_fp16))[name = tensor("op_1615_cast_fp16")]; + tensor var_1617_interleave_0 = const()[name = tensor("op_1617_interleave_0"), val = tensor(false)]; + tensor var_1617_cast_fp16 = concat(axis = var_174, interleave = var_1617_interleave_0, values = (var_1463_cast_fp16, var_1465_cast_fp16, var_1467_cast_fp16, var_1469_cast_fp16))[name = tensor("op_1617_cast_fp16")]; + tensor var_1619_interleave_0 = const()[name = tensor("op_1619_interleave_0"), val = tensor(false)]; + tensor var_1619_cast_fp16 = concat(axis = var_174, interleave = var_1619_interleave_0, values = (var_1471_cast_fp16, var_1473_cast_fp16, var_1475_cast_fp16, var_1477_cast_fp16))[name = tensor("op_1619_cast_fp16")]; + tensor var_1621_interleave_0 = const()[name = tensor("op_1621_interleave_0"), val = tensor(false)]; + tensor var_1621_cast_fp16 = concat(axis = var_174, interleave = var_1621_interleave_0, values = (var_1479_cast_fp16, var_1481_cast_fp16, var_1483_cast_fp16, var_1485_cast_fp16))[name = tensor("op_1621_cast_fp16")]; + tensor var_1623_interleave_0 = const()[name = tensor("op_1623_interleave_0"), val = tensor(false)]; + tensor var_1623_cast_fp16 = concat(axis = var_174, interleave = var_1623_interleave_0, values = (var_1487_cast_fp16, var_1489_cast_fp16, var_1491_cast_fp16, var_1493_cast_fp16))[name = tensor("op_1623_cast_fp16")]; + tensor var_1625_interleave_0 = const()[name = tensor("op_1625_interleave_0"), val = tensor(false)]; + tensor var_1625_cast_fp16 = concat(axis = var_174, interleave = var_1625_interleave_0, values = (var_1495_cast_fp16, var_1497_cast_fp16, var_1499_cast_fp16, var_1501_cast_fp16))[name = tensor("op_1625_cast_fp16")]; + tensor var_1627_interleave_0 = const()[name = tensor("op_1627_interleave_0"), val = tensor(false)]; + tensor var_1627_cast_fp16 = concat(axis = var_174, interleave = var_1627_interleave_0, values = (var_1503_cast_fp16, var_1505_cast_fp16, var_1507_cast_fp16, var_1509_cast_fp16))[name = tensor("op_1627_cast_fp16")]; + tensor var_1629_interleave_0 = const()[name = tensor("op_1629_interleave_0"), val = tensor(false)]; + tensor var_1629_cast_fp16 = concat(axis = var_174, interleave = var_1629_interleave_0, values = (var_1511_cast_fp16, var_1513_cast_fp16, var_1515_cast_fp16, var_1517_cast_fp16))[name = tensor("op_1629_cast_fp16")]; + tensor var_1631_interleave_0 = const()[name = tensor("op_1631_interleave_0"), val = tensor(false)]; + tensor var_1631_cast_fp16 = concat(axis = var_174, interleave = var_1631_interleave_0, values = (var_1519_cast_fp16, var_1521_cast_fp16, var_1523_cast_fp16, var_1525_cast_fp16))[name = tensor("op_1631_cast_fp16")]; + tensor var_1633_interleave_0 = const()[name = tensor("op_1633_interleave_0"), val = tensor(false)]; + tensor var_1633_cast_fp16 = concat(axis = var_174, interleave = var_1633_interleave_0, values = (var_1527_cast_fp16, var_1529_cast_fp16, var_1531_cast_fp16, var_1533_cast_fp16))[name = tensor("op_1633_cast_fp16")]; + tensor var_1635_interleave_0 = const()[name = tensor("op_1635_interleave_0"), val = tensor(false)]; + tensor var_1635_cast_fp16 = concat(axis = var_174, interleave = var_1635_interleave_0, values = (var_1535_cast_fp16, var_1537_cast_fp16, var_1539_cast_fp16, var_1541_cast_fp16))[name = tensor("op_1635_cast_fp16")]; + tensor var_1637_interleave_0 = const()[name = tensor("op_1637_interleave_0"), val = tensor(false)]; + tensor var_1637_cast_fp16 = concat(axis = var_174, interleave = var_1637_interleave_0, values = (var_1543_cast_fp16, var_1545_cast_fp16, var_1547_cast_fp16, var_1549_cast_fp16))[name = tensor("op_1637_cast_fp16")]; + tensor var_1639_interleave_0 = const()[name = tensor("op_1639_interleave_0"), val = tensor(false)]; + tensor var_1639_cast_fp16 = concat(axis = var_174, interleave = var_1639_interleave_0, values = (var_1551_cast_fp16, var_1553_cast_fp16, var_1555_cast_fp16, var_1557_cast_fp16))[name = tensor("op_1639_cast_fp16")]; + tensor var_1641_interleave_0 = const()[name = tensor("op_1641_interleave_0"), val = tensor(false)]; + tensor var_1641_cast_fp16 = concat(axis = var_174, interleave = var_1641_interleave_0, values = (var_1559_cast_fp16, var_1561_cast_fp16, var_1563_cast_fp16, var_1565_cast_fp16))[name = tensor("op_1641_cast_fp16")]; + tensor var_1643_interleave_0 = const()[name = tensor("op_1643_interleave_0"), val = tensor(false)]; + tensor var_1643_cast_fp16 = concat(axis = var_174, interleave = var_1643_interleave_0, values = (var_1567_cast_fp16, var_1569_cast_fp16, var_1571_cast_fp16, var_1573_cast_fp16))[name = tensor("op_1643_cast_fp16")]; + tensor var_1645_interleave_0 = const()[name = tensor("op_1645_interleave_0"), val = tensor(false)]; + tensor var_1645_cast_fp16 = concat(axis = var_174, interleave = var_1645_interleave_0, values = (var_1575_cast_fp16, var_1577_cast_fp16, var_1579_cast_fp16, var_1581_cast_fp16))[name = tensor("op_1645_cast_fp16")]; + tensor var_1647_interleave_0 = const()[name = tensor("op_1647_interleave_0"), val = tensor(false)]; + tensor var_1647_cast_fp16 = concat(axis = var_174, interleave = var_1647_interleave_0, values = (var_1583_cast_fp16, var_1585_cast_fp16, var_1587_cast_fp16, var_1589_cast_fp16))[name = tensor("op_1647_cast_fp16")]; + tensor var_1649_interleave_0 = const()[name = tensor("op_1649_interleave_0"), val = tensor(false)]; + tensor var_1649_cast_fp16 = concat(axis = var_174, interleave = var_1649_interleave_0, values = (var_1591_cast_fp16, var_1593_cast_fp16, var_1595_cast_fp16, var_1597_cast_fp16))[name = tensor("op_1649_cast_fp16")]; + tensor var_1651_interleave_0 = const()[name = tensor("op_1651_interleave_0"), val = tensor(false)]; + tensor var_1651_cast_fp16 = concat(axis = var_174, interleave = var_1651_interleave_0, values = (var_1599_cast_fp16, var_1601_cast_fp16, var_1603_cast_fp16, var_1605_cast_fp16))[name = tensor("op_1651_cast_fp16")]; + tensor var_1653_interleave_0 = const()[name = tensor("op_1653_interleave_0"), val = tensor(false)]; + tensor var_1653_cast_fp16 = concat(axis = var_174, interleave = var_1653_interleave_0, values = (var_1607_cast_fp16, var_1609_cast_fp16, var_1611_cast_fp16, var_1613_cast_fp16))[name = tensor("op_1653_cast_fp16")]; + tensor input_1_interleave_0 = const()[name = tensor("input_1_interleave_0"), val = tensor(false)]; + tensor input_1_cast_fp16 = concat(axis = var_199, interleave = input_1_interleave_0, values = (var_1615_cast_fp16, var_1617_cast_fp16, var_1619_cast_fp16, var_1621_cast_fp16, var_1623_cast_fp16, var_1625_cast_fp16, var_1627_cast_fp16, var_1629_cast_fp16, var_1631_cast_fp16, var_1633_cast_fp16, var_1635_cast_fp16, var_1637_cast_fp16, var_1639_cast_fp16, var_1641_cast_fp16, var_1643_cast_fp16, var_1645_cast_fp16, var_1647_cast_fp16, var_1649_cast_fp16, var_1651_cast_fp16, var_1653_cast_fp16))[name = tensor("input_1_cast_fp16")]; + tensor var_1658 = const()[name = tensor("op_1658"), val = tensor([1, 1])]; + tensor var_1660 = const()[name = tensor("op_1660"), val = tensor([1, 1])]; + tensor obj_3_pad_type_0 = const()[name = tensor("obj_3_pad_type_0"), val = tensor("custom")]; + tensor obj_3_pad_0 = const()[name = tensor("obj_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_0_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_0_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(24136640)))]; + tensor layers_0_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_0_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(27413504)))]; + tensor obj_3_cast_fp16 = conv(bias = layers_0_self_attn_o_proj_bias_to_fp16, dilations = var_1660, groups = var_199, pad = obj_3_pad_0, pad_type = obj_3_pad_type_0, strides = var_1658, weight = layers_0_self_attn_o_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("obj_3_cast_fp16")]; + tensor inputs_3_cast_fp16 = add(x = inputs_1_cast_fp16, y = obj_3_cast_fp16)[name = tensor("inputs_3_cast_fp16")]; + tensor var_1666 = const()[name = tensor("op_1666"), val = tensor([1])]; + tensor channels_mean_3_cast_fp16 = reduce_mean(axes = var_1666, keep_dims = var_200, x = inputs_3_cast_fp16)[name = tensor("channels_mean_3_cast_fp16")]; + tensor zero_mean_3_cast_fp16 = sub(x = inputs_3_cast_fp16, y = channels_mean_3_cast_fp16)[name = tensor("zero_mean_3_cast_fp16")]; + tensor zero_mean_sq_3_cast_fp16 = mul(x = zero_mean_3_cast_fp16, y = zero_mean_3_cast_fp16)[name = tensor("zero_mean_sq_3_cast_fp16")]; + tensor var_1670 = const()[name = tensor("op_1670"), val = tensor([1])]; + tensor var_1671_cast_fp16 = reduce_mean(axes = var_1670, keep_dims = var_200, x = zero_mean_sq_3_cast_fp16)[name = tensor("op_1671_cast_fp16")]; + tensor var_1672_to_fp16 = const()[name = tensor("op_1672_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_1673_cast_fp16 = add(x = var_1671_cast_fp16, y = var_1672_to_fp16)[name = tensor("op_1673_cast_fp16")]; + tensor denom_3_epsilon_0_to_fp16 = const()[name = tensor("denom_3_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_3_cast_fp16 = rsqrt(epsilon = denom_3_epsilon_0_to_fp16, x = var_1673_cast_fp16)[name = tensor("denom_3_cast_fp16")]; + tensor out_3_cast_fp16 = mul(x = zero_mean_3_cast_fp16, y = denom_3_cast_fp16)[name = tensor("out_3_cast_fp16")]; + tensor input_3_gamma_0_to_fp16 = const()[name = tensor("input_3_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(27416128)))]; + tensor input_3_beta_0_to_fp16 = const()[name = tensor("input_3_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(27418752)))]; + tensor input_3_epsilon_0_to_fp16 = const()[name = tensor("input_3_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_3_cast_fp16 = batch_norm(beta = input_3_beta_0_to_fp16, epsilon = input_3_epsilon_0_to_fp16, gamma = input_3_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_3_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_1684 = const()[name = tensor("op_1684"), val = tensor([1, 1])]; + tensor var_1686 = const()[name = tensor("op_1686"), val = tensor([1, 1])]; + tensor input_5_pad_type_0 = const()[name = tensor("input_5_pad_type_0"), val = tensor("custom")]; + tensor input_5_pad_0 = const()[name = tensor("input_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_0_fc1_weight_to_fp16 = const()[name = tensor("layers_0_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(27421376)))]; + tensor layers_0_fc1_bias_to_fp16 = const()[name = tensor("layers_0_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40528640)))]; + tensor input_5_cast_fp16 = conv(bias = layers_0_fc1_bias_to_fp16, dilations = var_1686, groups = var_199, pad = input_5_pad_0, pad_type = input_5_pad_type_0, strides = var_1684, weight = layers_0_fc1_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("input_5_cast_fp16")]; + tensor input_7_mode_0 = const()[name = tensor("input_7_mode_0"), val = tensor("EXACT")]; + tensor input_7_cast_fp16 = gelu(mode = input_7_mode_0, x = input_5_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_1692 = const()[name = tensor("op_1692"), val = tensor([1, 1])]; + tensor var_1694 = const()[name = tensor("op_1694"), val = tensor([1, 1])]; + tensor hidden_states_5_pad_type_0 = const()[name = tensor("hidden_states_5_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_5_pad_0 = const()[name = tensor("hidden_states_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_0_fc2_weight_to_fp16 = const()[name = tensor("layers_0_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(40538944)))]; + tensor layers_0_fc2_bias_to_fp16 = const()[name = tensor("layers_0_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53646208)))]; + tensor hidden_states_5_cast_fp16 = conv(bias = layers_0_fc2_bias_to_fp16, dilations = var_1694, groups = var_199, pad = hidden_states_5_pad_0, pad_type = hidden_states_5_pad_type_0, strides = var_1692, weight = layers_0_fc2_weight_to_fp16, x = input_7_cast_fp16)[name = tensor("hidden_states_5_cast_fp16")]; + tensor inputs_5_cast_fp16 = add(x = inputs_3_cast_fp16, y = hidden_states_5_cast_fp16)[name = tensor("inputs_5_cast_fp16")]; + tensor var_1701 = const()[name = tensor("op_1701"), val = tensor(3)]; + tensor var_1726 = const()[name = tensor("op_1726"), val = tensor(1)]; + tensor var_1727 = const()[name = tensor("op_1727"), val = tensor(true)]; + tensor var_1737 = const()[name = tensor("op_1737"), val = tensor([1])]; + tensor channels_mean_5_cast_fp16 = reduce_mean(axes = var_1737, keep_dims = var_1727, x = inputs_5_cast_fp16)[name = tensor("channels_mean_5_cast_fp16")]; + tensor zero_mean_5_cast_fp16 = sub(x = inputs_5_cast_fp16, y = channels_mean_5_cast_fp16)[name = tensor("zero_mean_5_cast_fp16")]; + tensor zero_mean_sq_5_cast_fp16 = mul(x = zero_mean_5_cast_fp16, y = zero_mean_5_cast_fp16)[name = tensor("zero_mean_sq_5_cast_fp16")]; + tensor var_1741 = const()[name = tensor("op_1741"), val = tensor([1])]; + tensor var_1742_cast_fp16 = reduce_mean(axes = var_1741, keep_dims = var_1727, x = zero_mean_sq_5_cast_fp16)[name = tensor("op_1742_cast_fp16")]; + tensor var_1743_to_fp16 = const()[name = tensor("op_1743_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_1744_cast_fp16 = add(x = var_1742_cast_fp16, y = var_1743_to_fp16)[name = tensor("op_1744_cast_fp16")]; + tensor denom_5_epsilon_0_to_fp16 = const()[name = tensor("denom_5_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_5_cast_fp16 = rsqrt(epsilon = denom_5_epsilon_0_to_fp16, x = var_1744_cast_fp16)[name = tensor("denom_5_cast_fp16")]; + tensor out_5_cast_fp16 = mul(x = zero_mean_5_cast_fp16, y = denom_5_cast_fp16)[name = tensor("out_5_cast_fp16")]; + tensor obj_5_gamma_0_to_fp16 = const()[name = tensor("obj_5_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53648832)))]; + tensor obj_5_beta_0_to_fp16 = const()[name = tensor("obj_5_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53651456)))]; + tensor obj_5_epsilon_0_to_fp16 = const()[name = tensor("obj_5_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_5_cast_fp16 = batch_norm(beta = obj_5_beta_0_to_fp16, epsilon = obj_5_epsilon_0_to_fp16, gamma = obj_5_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_5_cast_fp16)[name = tensor("obj_5_cast_fp16")]; + tensor var_1759 = const()[name = tensor("op_1759"), val = tensor([1, 1])]; + tensor var_1761 = const()[name = tensor("op_1761"), val = tensor([1, 1])]; + tensor query_3_pad_type_0 = const()[name = tensor("query_3_pad_type_0"), val = tensor("custom")]; + tensor query_3_pad_0 = const()[name = tensor("query_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_1_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_1_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(53654080)))]; + tensor layers_1_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56930944)))]; + tensor query_3_cast_fp16 = conv(bias = layers_1_self_attn_q_proj_bias_to_fp16, dilations = var_1761, groups = var_1726, pad = query_3_pad_0, pad_type = query_3_pad_type_0, strides = var_1759, weight = layers_1_self_attn_q_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor("query_3_cast_fp16")]; + tensor var_1765 = const()[name = tensor("op_1765"), val = tensor([1, 1])]; + tensor var_1767 = const()[name = tensor("op_1767"), val = tensor([1, 1])]; + tensor key_3_pad_type_0 = const()[name = tensor("key_3_pad_type_0"), val = tensor("custom")]; + tensor key_3_pad_0 = const()[name = tensor("key_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_1_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_1_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(56933568)))]; + tensor key_3_cast_fp16 = conv(dilations = var_1767, groups = var_1726, pad = key_3_pad_0, pad_type = key_3_pad_type_0, strides = var_1765, weight = layers_1_self_attn_k_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor("key_3_cast_fp16")]; + tensor var_1772 = const()[name = tensor("op_1772"), val = tensor([1, 1])]; + tensor var_1774 = const()[name = tensor("op_1774"), val = tensor([1, 1])]; + tensor value_3_pad_type_0 = const()[name = tensor("value_3_pad_type_0"), val = tensor("custom")]; + tensor value_3_pad_0 = const()[name = tensor("value_3_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_1_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_1_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(60210432)))]; + tensor layers_1_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63487296)))]; + tensor value_3_cast_fp16 = conv(bias = layers_1_self_attn_v_proj_bias_to_fp16, dilations = var_1774, groups = var_1726, pad = value_3_pad_0, pad_type = value_3_pad_type_0, strides = var_1772, weight = layers_1_self_attn_v_proj_weight_to_fp16, x = obj_5_cast_fp16)[name = tensor("value_3_cast_fp16")]; + tensor var_1781_begin_0 = const()[name = tensor("op_1781_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1781_end_0 = const()[name = tensor("op_1781_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_1781_end_mask_0 = const()[name = tensor("op_1781_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1781_cast_fp16 = slice_by_index(begin = var_1781_begin_0, end = var_1781_end_0, end_mask = var_1781_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1781_cast_fp16")]; + tensor var_1785_begin_0 = const()[name = tensor("op_1785_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_1785_end_0 = const()[name = tensor("op_1785_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_1785_end_mask_0 = const()[name = tensor("op_1785_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1785_cast_fp16 = slice_by_index(begin = var_1785_begin_0, end = var_1785_end_0, end_mask = var_1785_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1785_cast_fp16")]; + tensor var_1789_begin_0 = const()[name = tensor("op_1789_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1789_end_0 = const()[name = tensor("op_1789_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_1789_end_mask_0 = const()[name = tensor("op_1789_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1789_cast_fp16 = slice_by_index(begin = var_1789_begin_0, end = var_1789_end_0, end_mask = var_1789_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1789_cast_fp16")]; + tensor var_1793_begin_0 = const()[name = tensor("op_1793_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_1793_end_0 = const()[name = tensor("op_1793_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_1793_end_mask_0 = const()[name = tensor("op_1793_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1793_cast_fp16 = slice_by_index(begin = var_1793_begin_0, end = var_1793_end_0, end_mask = var_1793_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1793_cast_fp16")]; + tensor var_1797_begin_0 = const()[name = tensor("op_1797_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1797_end_0 = const()[name = tensor("op_1797_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_1797_end_mask_0 = const()[name = tensor("op_1797_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1797_cast_fp16 = slice_by_index(begin = var_1797_begin_0, end = var_1797_end_0, end_mask = var_1797_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1797_cast_fp16")]; + tensor var_1801_begin_0 = const()[name = tensor("op_1801_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_1801_end_0 = const()[name = tensor("op_1801_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_1801_end_mask_0 = const()[name = tensor("op_1801_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1801_cast_fp16 = slice_by_index(begin = var_1801_begin_0, end = var_1801_end_0, end_mask = var_1801_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1801_cast_fp16")]; + tensor var_1805_begin_0 = const()[name = tensor("op_1805_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1805_end_0 = const()[name = tensor("op_1805_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_1805_end_mask_0 = const()[name = tensor("op_1805_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1805_cast_fp16 = slice_by_index(begin = var_1805_begin_0, end = var_1805_end_0, end_mask = var_1805_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1805_cast_fp16")]; + tensor var_1809_begin_0 = const()[name = tensor("op_1809_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_1809_end_0 = const()[name = tensor("op_1809_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_1809_end_mask_0 = const()[name = tensor("op_1809_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1809_cast_fp16 = slice_by_index(begin = var_1809_begin_0, end = var_1809_end_0, end_mask = var_1809_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1809_cast_fp16")]; + tensor var_1813_begin_0 = const()[name = tensor("op_1813_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1813_end_0 = const()[name = tensor("op_1813_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_1813_end_mask_0 = const()[name = tensor("op_1813_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1813_cast_fp16 = slice_by_index(begin = var_1813_begin_0, end = var_1813_end_0, end_mask = var_1813_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1813_cast_fp16")]; + tensor var_1817_begin_0 = const()[name = tensor("op_1817_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_1817_end_0 = const()[name = tensor("op_1817_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_1817_end_mask_0 = const()[name = tensor("op_1817_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1817_cast_fp16 = slice_by_index(begin = var_1817_begin_0, end = var_1817_end_0, end_mask = var_1817_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1817_cast_fp16")]; + tensor var_1821_begin_0 = const()[name = tensor("op_1821_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1821_end_0 = const()[name = tensor("op_1821_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_1821_end_mask_0 = const()[name = tensor("op_1821_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1821_cast_fp16 = slice_by_index(begin = var_1821_begin_0, end = var_1821_end_0, end_mask = var_1821_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1821_cast_fp16")]; + tensor var_1825_begin_0 = const()[name = tensor("op_1825_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_1825_end_0 = const()[name = tensor("op_1825_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_1825_end_mask_0 = const()[name = tensor("op_1825_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1825_cast_fp16 = slice_by_index(begin = var_1825_begin_0, end = var_1825_end_0, end_mask = var_1825_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1825_cast_fp16")]; + tensor var_1829_begin_0 = const()[name = tensor("op_1829_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1829_end_0 = const()[name = tensor("op_1829_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_1829_end_mask_0 = const()[name = tensor("op_1829_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1829_cast_fp16 = slice_by_index(begin = var_1829_begin_0, end = var_1829_end_0, end_mask = var_1829_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1829_cast_fp16")]; + tensor var_1833_begin_0 = const()[name = tensor("op_1833_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_1833_end_0 = const()[name = tensor("op_1833_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_1833_end_mask_0 = const()[name = tensor("op_1833_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1833_cast_fp16 = slice_by_index(begin = var_1833_begin_0, end = var_1833_end_0, end_mask = var_1833_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1833_cast_fp16")]; + tensor var_1837_begin_0 = const()[name = tensor("op_1837_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1837_end_0 = const()[name = tensor("op_1837_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_1837_end_mask_0 = const()[name = tensor("op_1837_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1837_cast_fp16 = slice_by_index(begin = var_1837_begin_0, end = var_1837_end_0, end_mask = var_1837_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1837_cast_fp16")]; + tensor var_1841_begin_0 = const()[name = tensor("op_1841_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_1841_end_0 = const()[name = tensor("op_1841_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_1841_end_mask_0 = const()[name = tensor("op_1841_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1841_cast_fp16 = slice_by_index(begin = var_1841_begin_0, end = var_1841_end_0, end_mask = var_1841_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1841_cast_fp16")]; + tensor var_1845_begin_0 = const()[name = tensor("op_1845_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_1845_end_0 = const()[name = tensor("op_1845_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_1845_end_mask_0 = const()[name = tensor("op_1845_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1845_cast_fp16 = slice_by_index(begin = var_1845_begin_0, end = var_1845_end_0, end_mask = var_1845_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1845_cast_fp16")]; + tensor var_1849_begin_0 = const()[name = tensor("op_1849_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_1849_end_0 = const()[name = tensor("op_1849_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_1849_end_mask_0 = const()[name = tensor("op_1849_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1849_cast_fp16 = slice_by_index(begin = var_1849_begin_0, end = var_1849_end_0, end_mask = var_1849_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1849_cast_fp16")]; + tensor var_1853_begin_0 = const()[name = tensor("op_1853_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_1853_end_0 = const()[name = tensor("op_1853_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_1853_end_mask_0 = const()[name = tensor("op_1853_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1853_cast_fp16 = slice_by_index(begin = var_1853_begin_0, end = var_1853_end_0, end_mask = var_1853_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1853_cast_fp16")]; + tensor var_1857_begin_0 = const()[name = tensor("op_1857_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_1857_end_0 = const()[name = tensor("op_1857_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_1857_end_mask_0 = const()[name = tensor("op_1857_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1857_cast_fp16 = slice_by_index(begin = var_1857_begin_0, end = var_1857_end_0, end_mask = var_1857_end_mask_0, x = query_3_cast_fp16)[name = tensor("op_1857_cast_fp16")]; + tensor var_1866_begin_0 = const()[name = tensor("op_1866_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1866_end_0 = const()[name = tensor("op_1866_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_1866_end_mask_0 = const()[name = tensor("op_1866_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1866_cast_fp16 = slice_by_index(begin = var_1866_begin_0, end = var_1866_end_0, end_mask = var_1866_end_mask_0, x = var_1781_cast_fp16)[name = tensor("op_1866_cast_fp16")]; + tensor var_1873_begin_0 = const()[name = tensor("op_1873_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_1873_end_0 = const()[name = tensor("op_1873_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_1873_end_mask_0 = const()[name = tensor("op_1873_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1873_cast_fp16 = slice_by_index(begin = var_1873_begin_0, end = var_1873_end_0, end_mask = var_1873_end_mask_0, x = var_1781_cast_fp16)[name = tensor("op_1873_cast_fp16")]; + tensor var_1880_begin_0 = const()[name = tensor("op_1880_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_1880_end_0 = const()[name = tensor("op_1880_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_1880_end_mask_0 = const()[name = tensor("op_1880_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1880_cast_fp16 = slice_by_index(begin = var_1880_begin_0, end = var_1880_end_0, end_mask = var_1880_end_mask_0, x = var_1781_cast_fp16)[name = tensor("op_1880_cast_fp16")]; + tensor var_1887_begin_0 = const()[name = tensor("op_1887_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_1887_end_0 = const()[name = tensor("op_1887_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_1887_end_mask_0 = const()[name = tensor("op_1887_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1887_cast_fp16 = slice_by_index(begin = var_1887_begin_0, end = var_1887_end_0, end_mask = var_1887_end_mask_0, x = var_1781_cast_fp16)[name = tensor("op_1887_cast_fp16")]; + tensor var_1894_begin_0 = const()[name = tensor("op_1894_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1894_end_0 = const()[name = tensor("op_1894_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_1894_end_mask_0 = const()[name = tensor("op_1894_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1894_cast_fp16 = slice_by_index(begin = var_1894_begin_0, end = var_1894_end_0, end_mask = var_1894_end_mask_0, x = var_1785_cast_fp16)[name = tensor("op_1894_cast_fp16")]; + tensor var_1901_begin_0 = const()[name = tensor("op_1901_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_1901_end_0 = const()[name = tensor("op_1901_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_1901_end_mask_0 = const()[name = tensor("op_1901_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1901_cast_fp16 = slice_by_index(begin = var_1901_begin_0, end = var_1901_end_0, end_mask = var_1901_end_mask_0, x = var_1785_cast_fp16)[name = tensor("op_1901_cast_fp16")]; + tensor var_1908_begin_0 = const()[name = tensor("op_1908_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_1908_end_0 = const()[name = tensor("op_1908_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_1908_end_mask_0 = const()[name = tensor("op_1908_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1908_cast_fp16 = slice_by_index(begin = var_1908_begin_0, end = var_1908_end_0, end_mask = var_1908_end_mask_0, x = var_1785_cast_fp16)[name = tensor("op_1908_cast_fp16")]; + tensor var_1915_begin_0 = const()[name = tensor("op_1915_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_1915_end_0 = const()[name = tensor("op_1915_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_1915_end_mask_0 = const()[name = tensor("op_1915_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1915_cast_fp16 = slice_by_index(begin = var_1915_begin_0, end = var_1915_end_0, end_mask = var_1915_end_mask_0, x = var_1785_cast_fp16)[name = tensor("op_1915_cast_fp16")]; + tensor var_1922_begin_0 = const()[name = tensor("op_1922_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1922_end_0 = const()[name = tensor("op_1922_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_1922_end_mask_0 = const()[name = tensor("op_1922_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1922_cast_fp16 = slice_by_index(begin = var_1922_begin_0, end = var_1922_end_0, end_mask = var_1922_end_mask_0, x = var_1789_cast_fp16)[name = tensor("op_1922_cast_fp16")]; + tensor var_1929_begin_0 = const()[name = tensor("op_1929_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_1929_end_0 = const()[name = tensor("op_1929_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_1929_end_mask_0 = const()[name = tensor("op_1929_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1929_cast_fp16 = slice_by_index(begin = var_1929_begin_0, end = var_1929_end_0, end_mask = var_1929_end_mask_0, x = var_1789_cast_fp16)[name = tensor("op_1929_cast_fp16")]; + tensor var_1936_begin_0 = const()[name = tensor("op_1936_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_1936_end_0 = const()[name = tensor("op_1936_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_1936_end_mask_0 = const()[name = tensor("op_1936_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1936_cast_fp16 = slice_by_index(begin = var_1936_begin_0, end = var_1936_end_0, end_mask = var_1936_end_mask_0, x = var_1789_cast_fp16)[name = tensor("op_1936_cast_fp16")]; + tensor var_1943_begin_0 = const()[name = tensor("op_1943_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_1943_end_0 = const()[name = tensor("op_1943_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_1943_end_mask_0 = const()[name = tensor("op_1943_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1943_cast_fp16 = slice_by_index(begin = var_1943_begin_0, end = var_1943_end_0, end_mask = var_1943_end_mask_0, x = var_1789_cast_fp16)[name = tensor("op_1943_cast_fp16")]; + tensor var_1950_begin_0 = const()[name = tensor("op_1950_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1950_end_0 = const()[name = tensor("op_1950_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_1950_end_mask_0 = const()[name = tensor("op_1950_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1950_cast_fp16 = slice_by_index(begin = var_1950_begin_0, end = var_1950_end_0, end_mask = var_1950_end_mask_0, x = var_1793_cast_fp16)[name = tensor("op_1950_cast_fp16")]; + tensor var_1957_begin_0 = const()[name = tensor("op_1957_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_1957_end_0 = const()[name = tensor("op_1957_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_1957_end_mask_0 = const()[name = tensor("op_1957_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1957_cast_fp16 = slice_by_index(begin = var_1957_begin_0, end = var_1957_end_0, end_mask = var_1957_end_mask_0, x = var_1793_cast_fp16)[name = tensor("op_1957_cast_fp16")]; + tensor var_1964_begin_0 = const()[name = tensor("op_1964_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_1964_end_0 = const()[name = tensor("op_1964_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_1964_end_mask_0 = const()[name = tensor("op_1964_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1964_cast_fp16 = slice_by_index(begin = var_1964_begin_0, end = var_1964_end_0, end_mask = var_1964_end_mask_0, x = var_1793_cast_fp16)[name = tensor("op_1964_cast_fp16")]; + tensor var_1971_begin_0 = const()[name = tensor("op_1971_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_1971_end_0 = const()[name = tensor("op_1971_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_1971_end_mask_0 = const()[name = tensor("op_1971_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1971_cast_fp16 = slice_by_index(begin = var_1971_begin_0, end = var_1971_end_0, end_mask = var_1971_end_mask_0, x = var_1793_cast_fp16)[name = tensor("op_1971_cast_fp16")]; + tensor var_1978_begin_0 = const()[name = tensor("op_1978_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1978_end_0 = const()[name = tensor("op_1978_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_1978_end_mask_0 = const()[name = tensor("op_1978_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1978_cast_fp16 = slice_by_index(begin = var_1978_begin_0, end = var_1978_end_0, end_mask = var_1978_end_mask_0, x = var_1797_cast_fp16)[name = tensor("op_1978_cast_fp16")]; + tensor var_1985_begin_0 = const()[name = tensor("op_1985_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_1985_end_0 = const()[name = tensor("op_1985_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_1985_end_mask_0 = const()[name = tensor("op_1985_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1985_cast_fp16 = slice_by_index(begin = var_1985_begin_0, end = var_1985_end_0, end_mask = var_1985_end_mask_0, x = var_1797_cast_fp16)[name = tensor("op_1985_cast_fp16")]; + tensor var_1992_begin_0 = const()[name = tensor("op_1992_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_1992_end_0 = const()[name = tensor("op_1992_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_1992_end_mask_0 = const()[name = tensor("op_1992_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1992_cast_fp16 = slice_by_index(begin = var_1992_begin_0, end = var_1992_end_0, end_mask = var_1992_end_mask_0, x = var_1797_cast_fp16)[name = tensor("op_1992_cast_fp16")]; + tensor var_1999_begin_0 = const()[name = tensor("op_1999_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_1999_end_0 = const()[name = tensor("op_1999_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_1999_end_mask_0 = const()[name = tensor("op_1999_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1999_cast_fp16 = slice_by_index(begin = var_1999_begin_0, end = var_1999_end_0, end_mask = var_1999_end_mask_0, x = var_1797_cast_fp16)[name = tensor("op_1999_cast_fp16")]; + tensor var_2006_begin_0 = const()[name = tensor("op_2006_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2006_end_0 = const()[name = tensor("op_2006_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_2006_end_mask_0 = const()[name = tensor("op_2006_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2006_cast_fp16 = slice_by_index(begin = var_2006_begin_0, end = var_2006_end_0, end_mask = var_2006_end_mask_0, x = var_1801_cast_fp16)[name = tensor("op_2006_cast_fp16")]; + tensor var_2013_begin_0 = const()[name = tensor("op_2013_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_2013_end_0 = const()[name = tensor("op_2013_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_2013_end_mask_0 = const()[name = tensor("op_2013_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2013_cast_fp16 = slice_by_index(begin = var_2013_begin_0, end = var_2013_end_0, end_mask = var_2013_end_mask_0, x = var_1801_cast_fp16)[name = tensor("op_2013_cast_fp16")]; + tensor var_2020_begin_0 = const()[name = tensor("op_2020_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_2020_end_0 = const()[name = tensor("op_2020_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_2020_end_mask_0 = const()[name = tensor("op_2020_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2020_cast_fp16 = slice_by_index(begin = var_2020_begin_0, end = var_2020_end_0, end_mask = var_2020_end_mask_0, x = var_1801_cast_fp16)[name = tensor("op_2020_cast_fp16")]; + tensor var_2027_begin_0 = const()[name = tensor("op_2027_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_2027_end_0 = const()[name = tensor("op_2027_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_2027_end_mask_0 = const()[name = tensor("op_2027_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2027_cast_fp16 = slice_by_index(begin = var_2027_begin_0, end = var_2027_end_0, end_mask = var_2027_end_mask_0, x = var_1801_cast_fp16)[name = tensor("op_2027_cast_fp16")]; + tensor var_2034_begin_0 = const()[name = tensor("op_2034_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2034_end_0 = const()[name = tensor("op_2034_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_2034_end_mask_0 = const()[name = tensor("op_2034_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2034_cast_fp16 = slice_by_index(begin = var_2034_begin_0, end = var_2034_end_0, end_mask = var_2034_end_mask_0, x = var_1805_cast_fp16)[name = tensor("op_2034_cast_fp16")]; + tensor var_2041_begin_0 = const()[name = tensor("op_2041_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_2041_end_0 = const()[name = tensor("op_2041_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_2041_end_mask_0 = const()[name = tensor("op_2041_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2041_cast_fp16 = slice_by_index(begin = var_2041_begin_0, end = var_2041_end_0, end_mask = var_2041_end_mask_0, x = var_1805_cast_fp16)[name = tensor("op_2041_cast_fp16")]; + tensor var_2048_begin_0 = const()[name = tensor("op_2048_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_2048_end_0 = const()[name = tensor("op_2048_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_2048_end_mask_0 = const()[name = tensor("op_2048_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2048_cast_fp16 = slice_by_index(begin = var_2048_begin_0, end = var_2048_end_0, end_mask = var_2048_end_mask_0, x = var_1805_cast_fp16)[name = tensor("op_2048_cast_fp16")]; + tensor var_2055_begin_0 = const()[name = tensor("op_2055_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_2055_end_0 = const()[name = tensor("op_2055_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_2055_end_mask_0 = const()[name = tensor("op_2055_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2055_cast_fp16 = slice_by_index(begin = var_2055_begin_0, end = var_2055_end_0, end_mask = var_2055_end_mask_0, x = var_1805_cast_fp16)[name = tensor("op_2055_cast_fp16")]; + tensor var_2062_begin_0 = const()[name = tensor("op_2062_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2062_end_0 = const()[name = tensor("op_2062_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_2062_end_mask_0 = const()[name = tensor("op_2062_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2062_cast_fp16 = slice_by_index(begin = var_2062_begin_0, end = var_2062_end_0, end_mask = var_2062_end_mask_0, x = var_1809_cast_fp16)[name = tensor("op_2062_cast_fp16")]; + tensor var_2069_begin_0 = const()[name = tensor("op_2069_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_2069_end_0 = const()[name = tensor("op_2069_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_2069_end_mask_0 = const()[name = tensor("op_2069_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2069_cast_fp16 = slice_by_index(begin = var_2069_begin_0, end = var_2069_end_0, end_mask = var_2069_end_mask_0, x = var_1809_cast_fp16)[name = tensor("op_2069_cast_fp16")]; + tensor var_2076_begin_0 = const()[name = tensor("op_2076_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_2076_end_0 = const()[name = tensor("op_2076_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_2076_end_mask_0 = const()[name = tensor("op_2076_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2076_cast_fp16 = slice_by_index(begin = var_2076_begin_0, end = var_2076_end_0, end_mask = var_2076_end_mask_0, x = var_1809_cast_fp16)[name = tensor("op_2076_cast_fp16")]; + tensor var_2083_begin_0 = const()[name = tensor("op_2083_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_2083_end_0 = const()[name = tensor("op_2083_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_2083_end_mask_0 = const()[name = tensor("op_2083_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2083_cast_fp16 = slice_by_index(begin = var_2083_begin_0, end = var_2083_end_0, end_mask = var_2083_end_mask_0, x = var_1809_cast_fp16)[name = tensor("op_2083_cast_fp16")]; + tensor var_2090_begin_0 = const()[name = tensor("op_2090_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2090_end_0 = const()[name = tensor("op_2090_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_2090_end_mask_0 = const()[name = tensor("op_2090_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2090_cast_fp16 = slice_by_index(begin = var_2090_begin_0, end = var_2090_end_0, end_mask = var_2090_end_mask_0, x = var_1813_cast_fp16)[name = tensor("op_2090_cast_fp16")]; + tensor var_2097_begin_0 = const()[name = tensor("op_2097_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_2097_end_0 = const()[name = tensor("op_2097_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_2097_end_mask_0 = const()[name = tensor("op_2097_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2097_cast_fp16 = slice_by_index(begin = var_2097_begin_0, end = var_2097_end_0, end_mask = var_2097_end_mask_0, x = var_1813_cast_fp16)[name = tensor("op_2097_cast_fp16")]; + tensor var_2104_begin_0 = const()[name = tensor("op_2104_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_2104_end_0 = const()[name = tensor("op_2104_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_2104_end_mask_0 = const()[name = tensor("op_2104_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2104_cast_fp16 = slice_by_index(begin = var_2104_begin_0, end = var_2104_end_0, end_mask = var_2104_end_mask_0, x = var_1813_cast_fp16)[name = tensor("op_2104_cast_fp16")]; + tensor var_2111_begin_0 = const()[name = tensor("op_2111_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_2111_end_0 = const()[name = tensor("op_2111_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_2111_end_mask_0 = const()[name = tensor("op_2111_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2111_cast_fp16 = slice_by_index(begin = var_2111_begin_0, end = var_2111_end_0, end_mask = var_2111_end_mask_0, x = var_1813_cast_fp16)[name = tensor("op_2111_cast_fp16")]; + tensor var_2118_begin_0 = const()[name = tensor("op_2118_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2118_end_0 = const()[name = tensor("op_2118_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_2118_end_mask_0 = const()[name = tensor("op_2118_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2118_cast_fp16 = slice_by_index(begin = var_2118_begin_0, end = var_2118_end_0, end_mask = var_2118_end_mask_0, x = var_1817_cast_fp16)[name = tensor("op_2118_cast_fp16")]; + tensor var_2125_begin_0 = const()[name = tensor("op_2125_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_2125_end_0 = const()[name = tensor("op_2125_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_2125_end_mask_0 = const()[name = tensor("op_2125_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2125_cast_fp16 = slice_by_index(begin = var_2125_begin_0, end = var_2125_end_0, end_mask = var_2125_end_mask_0, x = var_1817_cast_fp16)[name = tensor("op_2125_cast_fp16")]; + tensor var_2132_begin_0 = const()[name = tensor("op_2132_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_2132_end_0 = const()[name = tensor("op_2132_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_2132_end_mask_0 = const()[name = tensor("op_2132_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2132_cast_fp16 = slice_by_index(begin = var_2132_begin_0, end = var_2132_end_0, end_mask = var_2132_end_mask_0, x = var_1817_cast_fp16)[name = tensor("op_2132_cast_fp16")]; + tensor var_2139_begin_0 = const()[name = tensor("op_2139_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_2139_end_0 = const()[name = tensor("op_2139_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_2139_end_mask_0 = const()[name = tensor("op_2139_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2139_cast_fp16 = slice_by_index(begin = var_2139_begin_0, end = var_2139_end_0, end_mask = var_2139_end_mask_0, x = var_1817_cast_fp16)[name = tensor("op_2139_cast_fp16")]; + tensor var_2146_begin_0 = const()[name = tensor("op_2146_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2146_end_0 = const()[name = tensor("op_2146_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_2146_end_mask_0 = const()[name = tensor("op_2146_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2146_cast_fp16 = slice_by_index(begin = var_2146_begin_0, end = var_2146_end_0, end_mask = var_2146_end_mask_0, x = var_1821_cast_fp16)[name = tensor("op_2146_cast_fp16")]; + tensor var_2153_begin_0 = const()[name = tensor("op_2153_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_2153_end_0 = const()[name = tensor("op_2153_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_2153_end_mask_0 = const()[name = tensor("op_2153_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2153_cast_fp16 = slice_by_index(begin = var_2153_begin_0, end = var_2153_end_0, end_mask = var_2153_end_mask_0, x = var_1821_cast_fp16)[name = tensor("op_2153_cast_fp16")]; + tensor var_2160_begin_0 = const()[name = tensor("op_2160_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_2160_end_0 = const()[name = tensor("op_2160_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_2160_end_mask_0 = const()[name = tensor("op_2160_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2160_cast_fp16 = slice_by_index(begin = var_2160_begin_0, end = var_2160_end_0, end_mask = var_2160_end_mask_0, x = var_1821_cast_fp16)[name = tensor("op_2160_cast_fp16")]; + tensor var_2167_begin_0 = const()[name = tensor("op_2167_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_2167_end_0 = const()[name = tensor("op_2167_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_2167_end_mask_0 = const()[name = tensor("op_2167_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2167_cast_fp16 = slice_by_index(begin = var_2167_begin_0, end = var_2167_end_0, end_mask = var_2167_end_mask_0, x = var_1821_cast_fp16)[name = tensor("op_2167_cast_fp16")]; + tensor var_2174_begin_0 = const()[name = tensor("op_2174_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2174_end_0 = const()[name = tensor("op_2174_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_2174_end_mask_0 = const()[name = tensor("op_2174_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2174_cast_fp16 = slice_by_index(begin = var_2174_begin_0, end = var_2174_end_0, end_mask = var_2174_end_mask_0, x = var_1825_cast_fp16)[name = tensor("op_2174_cast_fp16")]; + tensor var_2181_begin_0 = const()[name = tensor("op_2181_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_2181_end_0 = const()[name = tensor("op_2181_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_2181_end_mask_0 = const()[name = tensor("op_2181_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2181_cast_fp16 = slice_by_index(begin = var_2181_begin_0, end = var_2181_end_0, end_mask = var_2181_end_mask_0, x = var_1825_cast_fp16)[name = tensor("op_2181_cast_fp16")]; + tensor var_2188_begin_0 = const()[name = tensor("op_2188_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_2188_end_0 = const()[name = tensor("op_2188_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_2188_end_mask_0 = const()[name = tensor("op_2188_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2188_cast_fp16 = slice_by_index(begin = var_2188_begin_0, end = var_2188_end_0, end_mask = var_2188_end_mask_0, x = var_1825_cast_fp16)[name = tensor("op_2188_cast_fp16")]; + tensor var_2195_begin_0 = const()[name = tensor("op_2195_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_2195_end_0 = const()[name = tensor("op_2195_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_2195_end_mask_0 = const()[name = tensor("op_2195_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2195_cast_fp16 = slice_by_index(begin = var_2195_begin_0, end = var_2195_end_0, end_mask = var_2195_end_mask_0, x = var_1825_cast_fp16)[name = tensor("op_2195_cast_fp16")]; + tensor var_2202_begin_0 = const()[name = tensor("op_2202_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2202_end_0 = const()[name = tensor("op_2202_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_2202_end_mask_0 = const()[name = tensor("op_2202_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2202_cast_fp16 = slice_by_index(begin = var_2202_begin_0, end = var_2202_end_0, end_mask = var_2202_end_mask_0, x = var_1829_cast_fp16)[name = tensor("op_2202_cast_fp16")]; + tensor var_2209_begin_0 = const()[name = tensor("op_2209_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_2209_end_0 = const()[name = tensor("op_2209_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_2209_end_mask_0 = const()[name = tensor("op_2209_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2209_cast_fp16 = slice_by_index(begin = var_2209_begin_0, end = var_2209_end_0, end_mask = var_2209_end_mask_0, x = var_1829_cast_fp16)[name = tensor("op_2209_cast_fp16")]; + tensor var_2216_begin_0 = const()[name = tensor("op_2216_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_2216_end_0 = const()[name = tensor("op_2216_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_2216_end_mask_0 = const()[name = tensor("op_2216_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2216_cast_fp16 = slice_by_index(begin = var_2216_begin_0, end = var_2216_end_0, end_mask = var_2216_end_mask_0, x = var_1829_cast_fp16)[name = tensor("op_2216_cast_fp16")]; + tensor var_2223_begin_0 = const()[name = tensor("op_2223_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_2223_end_0 = const()[name = tensor("op_2223_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_2223_end_mask_0 = const()[name = tensor("op_2223_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2223_cast_fp16 = slice_by_index(begin = var_2223_begin_0, end = var_2223_end_0, end_mask = var_2223_end_mask_0, x = var_1829_cast_fp16)[name = tensor("op_2223_cast_fp16")]; + tensor var_2230_begin_0 = const()[name = tensor("op_2230_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2230_end_0 = const()[name = tensor("op_2230_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_2230_end_mask_0 = const()[name = tensor("op_2230_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2230_cast_fp16 = slice_by_index(begin = var_2230_begin_0, end = var_2230_end_0, end_mask = var_2230_end_mask_0, x = var_1833_cast_fp16)[name = tensor("op_2230_cast_fp16")]; + tensor var_2237_begin_0 = const()[name = tensor("op_2237_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_2237_end_0 = const()[name = tensor("op_2237_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_2237_end_mask_0 = const()[name = tensor("op_2237_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2237_cast_fp16 = slice_by_index(begin = var_2237_begin_0, end = var_2237_end_0, end_mask = var_2237_end_mask_0, x = var_1833_cast_fp16)[name = tensor("op_2237_cast_fp16")]; + tensor var_2244_begin_0 = const()[name = tensor("op_2244_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_2244_end_0 = const()[name = tensor("op_2244_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_2244_end_mask_0 = const()[name = tensor("op_2244_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2244_cast_fp16 = slice_by_index(begin = var_2244_begin_0, end = var_2244_end_0, end_mask = var_2244_end_mask_0, x = var_1833_cast_fp16)[name = tensor("op_2244_cast_fp16")]; + tensor var_2251_begin_0 = const()[name = tensor("op_2251_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_2251_end_0 = const()[name = tensor("op_2251_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_2251_end_mask_0 = const()[name = tensor("op_2251_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2251_cast_fp16 = slice_by_index(begin = var_2251_begin_0, end = var_2251_end_0, end_mask = var_2251_end_mask_0, x = var_1833_cast_fp16)[name = tensor("op_2251_cast_fp16")]; + tensor var_2258_begin_0 = const()[name = tensor("op_2258_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2258_end_0 = const()[name = tensor("op_2258_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_2258_end_mask_0 = const()[name = tensor("op_2258_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2258_cast_fp16 = slice_by_index(begin = var_2258_begin_0, end = var_2258_end_0, end_mask = var_2258_end_mask_0, x = var_1837_cast_fp16)[name = tensor("op_2258_cast_fp16")]; + tensor var_2265_begin_0 = const()[name = tensor("op_2265_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_2265_end_0 = const()[name = tensor("op_2265_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_2265_end_mask_0 = const()[name = tensor("op_2265_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2265_cast_fp16 = slice_by_index(begin = var_2265_begin_0, end = var_2265_end_0, end_mask = var_2265_end_mask_0, x = var_1837_cast_fp16)[name = tensor("op_2265_cast_fp16")]; + tensor var_2272_begin_0 = const()[name = tensor("op_2272_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_2272_end_0 = const()[name = tensor("op_2272_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_2272_end_mask_0 = const()[name = tensor("op_2272_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2272_cast_fp16 = slice_by_index(begin = var_2272_begin_0, end = var_2272_end_0, end_mask = var_2272_end_mask_0, x = var_1837_cast_fp16)[name = tensor("op_2272_cast_fp16")]; + tensor var_2279_begin_0 = const()[name = tensor("op_2279_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_2279_end_0 = const()[name = tensor("op_2279_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_2279_end_mask_0 = const()[name = tensor("op_2279_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2279_cast_fp16 = slice_by_index(begin = var_2279_begin_0, end = var_2279_end_0, end_mask = var_2279_end_mask_0, x = var_1837_cast_fp16)[name = tensor("op_2279_cast_fp16")]; + tensor var_2286_begin_0 = const()[name = tensor("op_2286_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2286_end_0 = const()[name = tensor("op_2286_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_2286_end_mask_0 = const()[name = tensor("op_2286_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2286_cast_fp16 = slice_by_index(begin = var_2286_begin_0, end = var_2286_end_0, end_mask = var_2286_end_mask_0, x = var_1841_cast_fp16)[name = tensor("op_2286_cast_fp16")]; + tensor var_2293_begin_0 = const()[name = tensor("op_2293_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_2293_end_0 = const()[name = tensor("op_2293_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_2293_end_mask_0 = const()[name = tensor("op_2293_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2293_cast_fp16 = slice_by_index(begin = var_2293_begin_0, end = var_2293_end_0, end_mask = var_2293_end_mask_0, x = var_1841_cast_fp16)[name = tensor("op_2293_cast_fp16")]; + tensor var_2300_begin_0 = const()[name = tensor("op_2300_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_2300_end_0 = const()[name = tensor("op_2300_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_2300_end_mask_0 = const()[name = tensor("op_2300_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2300_cast_fp16 = slice_by_index(begin = var_2300_begin_0, end = var_2300_end_0, end_mask = var_2300_end_mask_0, x = var_1841_cast_fp16)[name = tensor("op_2300_cast_fp16")]; + tensor var_2307_begin_0 = const()[name = tensor("op_2307_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_2307_end_0 = const()[name = tensor("op_2307_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_2307_end_mask_0 = const()[name = tensor("op_2307_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2307_cast_fp16 = slice_by_index(begin = var_2307_begin_0, end = var_2307_end_0, end_mask = var_2307_end_mask_0, x = var_1841_cast_fp16)[name = tensor("op_2307_cast_fp16")]; + tensor var_2314_begin_0 = const()[name = tensor("op_2314_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2314_end_0 = const()[name = tensor("op_2314_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_2314_end_mask_0 = const()[name = tensor("op_2314_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2314_cast_fp16 = slice_by_index(begin = var_2314_begin_0, end = var_2314_end_0, end_mask = var_2314_end_mask_0, x = var_1845_cast_fp16)[name = tensor("op_2314_cast_fp16")]; + tensor var_2321_begin_0 = const()[name = tensor("op_2321_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_2321_end_0 = const()[name = tensor("op_2321_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_2321_end_mask_0 = const()[name = tensor("op_2321_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2321_cast_fp16 = slice_by_index(begin = var_2321_begin_0, end = var_2321_end_0, end_mask = var_2321_end_mask_0, x = var_1845_cast_fp16)[name = tensor("op_2321_cast_fp16")]; + tensor var_2328_begin_0 = const()[name = tensor("op_2328_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_2328_end_0 = const()[name = tensor("op_2328_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_2328_end_mask_0 = const()[name = tensor("op_2328_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2328_cast_fp16 = slice_by_index(begin = var_2328_begin_0, end = var_2328_end_0, end_mask = var_2328_end_mask_0, x = var_1845_cast_fp16)[name = tensor("op_2328_cast_fp16")]; + tensor var_2335_begin_0 = const()[name = tensor("op_2335_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_2335_end_0 = const()[name = tensor("op_2335_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_2335_end_mask_0 = const()[name = tensor("op_2335_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2335_cast_fp16 = slice_by_index(begin = var_2335_begin_0, end = var_2335_end_0, end_mask = var_2335_end_mask_0, x = var_1845_cast_fp16)[name = tensor("op_2335_cast_fp16")]; + tensor var_2342_begin_0 = const()[name = tensor("op_2342_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2342_end_0 = const()[name = tensor("op_2342_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_2342_end_mask_0 = const()[name = tensor("op_2342_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2342_cast_fp16 = slice_by_index(begin = var_2342_begin_0, end = var_2342_end_0, end_mask = var_2342_end_mask_0, x = var_1849_cast_fp16)[name = tensor("op_2342_cast_fp16")]; + tensor var_2349_begin_0 = const()[name = tensor("op_2349_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_2349_end_0 = const()[name = tensor("op_2349_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_2349_end_mask_0 = const()[name = tensor("op_2349_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2349_cast_fp16 = slice_by_index(begin = var_2349_begin_0, end = var_2349_end_0, end_mask = var_2349_end_mask_0, x = var_1849_cast_fp16)[name = tensor("op_2349_cast_fp16")]; + tensor var_2356_begin_0 = const()[name = tensor("op_2356_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_2356_end_0 = const()[name = tensor("op_2356_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_2356_end_mask_0 = const()[name = tensor("op_2356_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2356_cast_fp16 = slice_by_index(begin = var_2356_begin_0, end = var_2356_end_0, end_mask = var_2356_end_mask_0, x = var_1849_cast_fp16)[name = tensor("op_2356_cast_fp16")]; + tensor var_2363_begin_0 = const()[name = tensor("op_2363_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_2363_end_0 = const()[name = tensor("op_2363_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_2363_end_mask_0 = const()[name = tensor("op_2363_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2363_cast_fp16 = slice_by_index(begin = var_2363_begin_0, end = var_2363_end_0, end_mask = var_2363_end_mask_0, x = var_1849_cast_fp16)[name = tensor("op_2363_cast_fp16")]; + tensor var_2370_begin_0 = const()[name = tensor("op_2370_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2370_end_0 = const()[name = tensor("op_2370_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_2370_end_mask_0 = const()[name = tensor("op_2370_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2370_cast_fp16 = slice_by_index(begin = var_2370_begin_0, end = var_2370_end_0, end_mask = var_2370_end_mask_0, x = var_1853_cast_fp16)[name = tensor("op_2370_cast_fp16")]; + tensor var_2377_begin_0 = const()[name = tensor("op_2377_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_2377_end_0 = const()[name = tensor("op_2377_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_2377_end_mask_0 = const()[name = tensor("op_2377_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2377_cast_fp16 = slice_by_index(begin = var_2377_begin_0, end = var_2377_end_0, end_mask = var_2377_end_mask_0, x = var_1853_cast_fp16)[name = tensor("op_2377_cast_fp16")]; + tensor var_2384_begin_0 = const()[name = tensor("op_2384_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_2384_end_0 = const()[name = tensor("op_2384_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_2384_end_mask_0 = const()[name = tensor("op_2384_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2384_cast_fp16 = slice_by_index(begin = var_2384_begin_0, end = var_2384_end_0, end_mask = var_2384_end_mask_0, x = var_1853_cast_fp16)[name = tensor("op_2384_cast_fp16")]; + tensor var_2391_begin_0 = const()[name = tensor("op_2391_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_2391_end_0 = const()[name = tensor("op_2391_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_2391_end_mask_0 = const()[name = tensor("op_2391_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2391_cast_fp16 = slice_by_index(begin = var_2391_begin_0, end = var_2391_end_0, end_mask = var_2391_end_mask_0, x = var_1853_cast_fp16)[name = tensor("op_2391_cast_fp16")]; + tensor var_2398_begin_0 = const()[name = tensor("op_2398_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2398_end_0 = const()[name = tensor("op_2398_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_2398_end_mask_0 = const()[name = tensor("op_2398_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2398_cast_fp16 = slice_by_index(begin = var_2398_begin_0, end = var_2398_end_0, end_mask = var_2398_end_mask_0, x = var_1857_cast_fp16)[name = tensor("op_2398_cast_fp16")]; + tensor var_2405_begin_0 = const()[name = tensor("op_2405_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_2405_end_0 = const()[name = tensor("op_2405_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_2405_end_mask_0 = const()[name = tensor("op_2405_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2405_cast_fp16 = slice_by_index(begin = var_2405_begin_0, end = var_2405_end_0, end_mask = var_2405_end_mask_0, x = var_1857_cast_fp16)[name = tensor("op_2405_cast_fp16")]; + tensor var_2412_begin_0 = const()[name = tensor("op_2412_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_2412_end_0 = const()[name = tensor("op_2412_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_2412_end_mask_0 = const()[name = tensor("op_2412_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2412_cast_fp16 = slice_by_index(begin = var_2412_begin_0, end = var_2412_end_0, end_mask = var_2412_end_mask_0, x = var_1857_cast_fp16)[name = tensor("op_2412_cast_fp16")]; + tensor var_2419_begin_0 = const()[name = tensor("op_2419_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_2419_end_0 = const()[name = tensor("op_2419_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_2419_end_mask_0 = const()[name = tensor("op_2419_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2419_cast_fp16 = slice_by_index(begin = var_2419_begin_0, end = var_2419_end_0, end_mask = var_2419_end_mask_0, x = var_1857_cast_fp16)[name = tensor("op_2419_cast_fp16")]; + tensor k_3_perm_0 = const()[name = tensor("k_3_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_2424_begin_0 = const()[name = tensor("op_2424_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2424_end_0 = const()[name = tensor("op_2424_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_2424_end_mask_0 = const()[name = tensor("op_2424_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_30 = transpose(perm = k_3_perm_0, x = key_3_cast_fp16)[name = tensor("transpose_30")]; + tensor var_2424_cast_fp16 = slice_by_index(begin = var_2424_begin_0, end = var_2424_end_0, end_mask = var_2424_end_mask_0, x = transpose_30)[name = tensor("op_2424_cast_fp16")]; + tensor var_2428_begin_0 = const()[name = tensor("op_2428_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_2428_end_0 = const()[name = tensor("op_2428_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_2428_end_mask_0 = const()[name = tensor("op_2428_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2428_cast_fp16 = slice_by_index(begin = var_2428_begin_0, end = var_2428_end_0, end_mask = var_2428_end_mask_0, x = transpose_30)[name = tensor("op_2428_cast_fp16")]; + tensor var_2432_begin_0 = const()[name = tensor("op_2432_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_2432_end_0 = const()[name = tensor("op_2432_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_2432_end_mask_0 = const()[name = tensor("op_2432_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2432_cast_fp16 = slice_by_index(begin = var_2432_begin_0, end = var_2432_end_0, end_mask = var_2432_end_mask_0, x = transpose_30)[name = tensor("op_2432_cast_fp16")]; + tensor var_2436_begin_0 = const()[name = tensor("op_2436_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_2436_end_0 = const()[name = tensor("op_2436_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_2436_end_mask_0 = const()[name = tensor("op_2436_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2436_cast_fp16 = slice_by_index(begin = var_2436_begin_0, end = var_2436_end_0, end_mask = var_2436_end_mask_0, x = transpose_30)[name = tensor("op_2436_cast_fp16")]; + tensor var_2440_begin_0 = const()[name = tensor("op_2440_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_2440_end_0 = const()[name = tensor("op_2440_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_2440_end_mask_0 = const()[name = tensor("op_2440_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2440_cast_fp16 = slice_by_index(begin = var_2440_begin_0, end = var_2440_end_0, end_mask = var_2440_end_mask_0, x = transpose_30)[name = tensor("op_2440_cast_fp16")]; + tensor var_2444_begin_0 = const()[name = tensor("op_2444_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_2444_end_0 = const()[name = tensor("op_2444_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_2444_end_mask_0 = const()[name = tensor("op_2444_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2444_cast_fp16 = slice_by_index(begin = var_2444_begin_0, end = var_2444_end_0, end_mask = var_2444_end_mask_0, x = transpose_30)[name = tensor("op_2444_cast_fp16")]; + tensor var_2448_begin_0 = const()[name = tensor("op_2448_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_2448_end_0 = const()[name = tensor("op_2448_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_2448_end_mask_0 = const()[name = tensor("op_2448_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2448_cast_fp16 = slice_by_index(begin = var_2448_begin_0, end = var_2448_end_0, end_mask = var_2448_end_mask_0, x = transpose_30)[name = tensor("op_2448_cast_fp16")]; + tensor var_2452_begin_0 = const()[name = tensor("op_2452_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_2452_end_0 = const()[name = tensor("op_2452_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_2452_end_mask_0 = const()[name = tensor("op_2452_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2452_cast_fp16 = slice_by_index(begin = var_2452_begin_0, end = var_2452_end_0, end_mask = var_2452_end_mask_0, x = transpose_30)[name = tensor("op_2452_cast_fp16")]; + tensor var_2456_begin_0 = const()[name = tensor("op_2456_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_2456_end_0 = const()[name = tensor("op_2456_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_2456_end_mask_0 = const()[name = tensor("op_2456_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2456_cast_fp16 = slice_by_index(begin = var_2456_begin_0, end = var_2456_end_0, end_mask = var_2456_end_mask_0, x = transpose_30)[name = tensor("op_2456_cast_fp16")]; + tensor var_2460_begin_0 = const()[name = tensor("op_2460_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_2460_end_0 = const()[name = tensor("op_2460_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_2460_end_mask_0 = const()[name = tensor("op_2460_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2460_cast_fp16 = slice_by_index(begin = var_2460_begin_0, end = var_2460_end_0, end_mask = var_2460_end_mask_0, x = transpose_30)[name = tensor("op_2460_cast_fp16")]; + tensor var_2464_begin_0 = const()[name = tensor("op_2464_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_2464_end_0 = const()[name = tensor("op_2464_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_2464_end_mask_0 = const()[name = tensor("op_2464_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2464_cast_fp16 = slice_by_index(begin = var_2464_begin_0, end = var_2464_end_0, end_mask = var_2464_end_mask_0, x = transpose_30)[name = tensor("op_2464_cast_fp16")]; + tensor var_2468_begin_0 = const()[name = tensor("op_2468_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_2468_end_0 = const()[name = tensor("op_2468_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_2468_end_mask_0 = const()[name = tensor("op_2468_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2468_cast_fp16 = slice_by_index(begin = var_2468_begin_0, end = var_2468_end_0, end_mask = var_2468_end_mask_0, x = transpose_30)[name = tensor("op_2468_cast_fp16")]; + tensor var_2472_begin_0 = const()[name = tensor("op_2472_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_2472_end_0 = const()[name = tensor("op_2472_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_2472_end_mask_0 = const()[name = tensor("op_2472_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2472_cast_fp16 = slice_by_index(begin = var_2472_begin_0, end = var_2472_end_0, end_mask = var_2472_end_mask_0, x = transpose_30)[name = tensor("op_2472_cast_fp16")]; + tensor var_2476_begin_0 = const()[name = tensor("op_2476_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_2476_end_0 = const()[name = tensor("op_2476_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_2476_end_mask_0 = const()[name = tensor("op_2476_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2476_cast_fp16 = slice_by_index(begin = var_2476_begin_0, end = var_2476_end_0, end_mask = var_2476_end_mask_0, x = transpose_30)[name = tensor("op_2476_cast_fp16")]; + tensor var_2480_begin_0 = const()[name = tensor("op_2480_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_2480_end_0 = const()[name = tensor("op_2480_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_2480_end_mask_0 = const()[name = tensor("op_2480_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2480_cast_fp16 = slice_by_index(begin = var_2480_begin_0, end = var_2480_end_0, end_mask = var_2480_end_mask_0, x = transpose_30)[name = tensor("op_2480_cast_fp16")]; + tensor var_2484_begin_0 = const()[name = tensor("op_2484_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_2484_end_0 = const()[name = tensor("op_2484_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_2484_end_mask_0 = const()[name = tensor("op_2484_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2484_cast_fp16 = slice_by_index(begin = var_2484_begin_0, end = var_2484_end_0, end_mask = var_2484_end_mask_0, x = transpose_30)[name = tensor("op_2484_cast_fp16")]; + tensor var_2488_begin_0 = const()[name = tensor("op_2488_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_2488_end_0 = const()[name = tensor("op_2488_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_2488_end_mask_0 = const()[name = tensor("op_2488_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2488_cast_fp16 = slice_by_index(begin = var_2488_begin_0, end = var_2488_end_0, end_mask = var_2488_end_mask_0, x = transpose_30)[name = tensor("op_2488_cast_fp16")]; + tensor var_2492_begin_0 = const()[name = tensor("op_2492_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_2492_end_0 = const()[name = tensor("op_2492_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_2492_end_mask_0 = const()[name = tensor("op_2492_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2492_cast_fp16 = slice_by_index(begin = var_2492_begin_0, end = var_2492_end_0, end_mask = var_2492_end_mask_0, x = transpose_30)[name = tensor("op_2492_cast_fp16")]; + tensor var_2496_begin_0 = const()[name = tensor("op_2496_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_2496_end_0 = const()[name = tensor("op_2496_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_2496_end_mask_0 = const()[name = tensor("op_2496_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2496_cast_fp16 = slice_by_index(begin = var_2496_begin_0, end = var_2496_end_0, end_mask = var_2496_end_mask_0, x = transpose_30)[name = tensor("op_2496_cast_fp16")]; + tensor var_2500_begin_0 = const()[name = tensor("op_2500_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_2500_end_0 = const()[name = tensor("op_2500_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_2500_end_mask_0 = const()[name = tensor("op_2500_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_2500_cast_fp16 = slice_by_index(begin = var_2500_begin_0, end = var_2500_end_0, end_mask = var_2500_end_mask_0, x = transpose_30)[name = tensor("op_2500_cast_fp16")]; + tensor var_2502_begin_0 = const()[name = tensor("op_2502_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_2502_end_0 = const()[name = tensor("op_2502_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_2502_end_mask_0 = const()[name = tensor("op_2502_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2502_cast_fp16 = slice_by_index(begin = var_2502_begin_0, end = var_2502_end_0, end_mask = var_2502_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2502_cast_fp16")]; + tensor var_2506_begin_0 = const()[name = tensor("op_2506_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_2506_end_0 = const()[name = tensor("op_2506_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_2506_end_mask_0 = const()[name = tensor("op_2506_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2506_cast_fp16 = slice_by_index(begin = var_2506_begin_0, end = var_2506_end_0, end_mask = var_2506_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2506_cast_fp16")]; + tensor var_2510_begin_0 = const()[name = tensor("op_2510_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_2510_end_0 = const()[name = tensor("op_2510_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_2510_end_mask_0 = const()[name = tensor("op_2510_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2510_cast_fp16 = slice_by_index(begin = var_2510_begin_0, end = var_2510_end_0, end_mask = var_2510_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2510_cast_fp16")]; + tensor var_2514_begin_0 = const()[name = tensor("op_2514_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_2514_end_0 = const()[name = tensor("op_2514_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_2514_end_mask_0 = const()[name = tensor("op_2514_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2514_cast_fp16 = slice_by_index(begin = var_2514_begin_0, end = var_2514_end_0, end_mask = var_2514_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2514_cast_fp16")]; + tensor var_2518_begin_0 = const()[name = tensor("op_2518_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_2518_end_0 = const()[name = tensor("op_2518_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_2518_end_mask_0 = const()[name = tensor("op_2518_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2518_cast_fp16 = slice_by_index(begin = var_2518_begin_0, end = var_2518_end_0, end_mask = var_2518_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2518_cast_fp16")]; + tensor var_2522_begin_0 = const()[name = tensor("op_2522_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_2522_end_0 = const()[name = tensor("op_2522_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_2522_end_mask_0 = const()[name = tensor("op_2522_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2522_cast_fp16 = slice_by_index(begin = var_2522_begin_0, end = var_2522_end_0, end_mask = var_2522_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2522_cast_fp16")]; + tensor var_2526_begin_0 = const()[name = tensor("op_2526_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_2526_end_0 = const()[name = tensor("op_2526_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_2526_end_mask_0 = const()[name = tensor("op_2526_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2526_cast_fp16 = slice_by_index(begin = var_2526_begin_0, end = var_2526_end_0, end_mask = var_2526_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2526_cast_fp16")]; + tensor var_2530_begin_0 = const()[name = tensor("op_2530_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_2530_end_0 = const()[name = tensor("op_2530_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_2530_end_mask_0 = const()[name = tensor("op_2530_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2530_cast_fp16 = slice_by_index(begin = var_2530_begin_0, end = var_2530_end_0, end_mask = var_2530_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2530_cast_fp16")]; + tensor var_2534_begin_0 = const()[name = tensor("op_2534_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_2534_end_0 = const()[name = tensor("op_2534_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_2534_end_mask_0 = const()[name = tensor("op_2534_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2534_cast_fp16 = slice_by_index(begin = var_2534_begin_0, end = var_2534_end_0, end_mask = var_2534_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2534_cast_fp16")]; + tensor var_2538_begin_0 = const()[name = tensor("op_2538_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_2538_end_0 = const()[name = tensor("op_2538_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_2538_end_mask_0 = const()[name = tensor("op_2538_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2538_cast_fp16 = slice_by_index(begin = var_2538_begin_0, end = var_2538_end_0, end_mask = var_2538_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2538_cast_fp16")]; + tensor var_2542_begin_0 = const()[name = tensor("op_2542_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_2542_end_0 = const()[name = tensor("op_2542_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_2542_end_mask_0 = const()[name = tensor("op_2542_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2542_cast_fp16 = slice_by_index(begin = var_2542_begin_0, end = var_2542_end_0, end_mask = var_2542_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2542_cast_fp16")]; + tensor var_2546_begin_0 = const()[name = tensor("op_2546_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_2546_end_0 = const()[name = tensor("op_2546_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_2546_end_mask_0 = const()[name = tensor("op_2546_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2546_cast_fp16 = slice_by_index(begin = var_2546_begin_0, end = var_2546_end_0, end_mask = var_2546_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2546_cast_fp16")]; + tensor var_2550_begin_0 = const()[name = tensor("op_2550_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_2550_end_0 = const()[name = tensor("op_2550_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_2550_end_mask_0 = const()[name = tensor("op_2550_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2550_cast_fp16 = slice_by_index(begin = var_2550_begin_0, end = var_2550_end_0, end_mask = var_2550_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2550_cast_fp16")]; + tensor var_2554_begin_0 = const()[name = tensor("op_2554_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_2554_end_0 = const()[name = tensor("op_2554_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_2554_end_mask_0 = const()[name = tensor("op_2554_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2554_cast_fp16 = slice_by_index(begin = var_2554_begin_0, end = var_2554_end_0, end_mask = var_2554_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2554_cast_fp16")]; + tensor var_2558_begin_0 = const()[name = tensor("op_2558_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_2558_end_0 = const()[name = tensor("op_2558_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_2558_end_mask_0 = const()[name = tensor("op_2558_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2558_cast_fp16 = slice_by_index(begin = var_2558_begin_0, end = var_2558_end_0, end_mask = var_2558_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2558_cast_fp16")]; + tensor var_2562_begin_0 = const()[name = tensor("op_2562_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_2562_end_0 = const()[name = tensor("op_2562_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_2562_end_mask_0 = const()[name = tensor("op_2562_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2562_cast_fp16 = slice_by_index(begin = var_2562_begin_0, end = var_2562_end_0, end_mask = var_2562_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2562_cast_fp16")]; + tensor var_2566_begin_0 = const()[name = tensor("op_2566_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_2566_end_0 = const()[name = tensor("op_2566_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_2566_end_mask_0 = const()[name = tensor("op_2566_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2566_cast_fp16 = slice_by_index(begin = var_2566_begin_0, end = var_2566_end_0, end_mask = var_2566_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2566_cast_fp16")]; + tensor var_2570_begin_0 = const()[name = tensor("op_2570_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_2570_end_0 = const()[name = tensor("op_2570_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_2570_end_mask_0 = const()[name = tensor("op_2570_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2570_cast_fp16 = slice_by_index(begin = var_2570_begin_0, end = var_2570_end_0, end_mask = var_2570_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2570_cast_fp16")]; + tensor var_2574_begin_0 = const()[name = tensor("op_2574_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_2574_end_0 = const()[name = tensor("op_2574_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_2574_end_mask_0 = const()[name = tensor("op_2574_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2574_cast_fp16 = slice_by_index(begin = var_2574_begin_0, end = var_2574_end_0, end_mask = var_2574_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2574_cast_fp16")]; + tensor var_2578_begin_0 = const()[name = tensor("op_2578_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_2578_end_0 = const()[name = tensor("op_2578_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_2578_end_mask_0 = const()[name = tensor("op_2578_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_2578_cast_fp16 = slice_by_index(begin = var_2578_begin_0, end = var_2578_end_0, end_mask = var_2578_end_mask_0, x = value_3_cast_fp16)[name = tensor("op_2578_cast_fp16")]; + tensor var_2582_equation_0 = const()[name = tensor("op_2582_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2582_cast_fp16 = einsum(equation = var_2582_equation_0, values = (var_2424_cast_fp16, var_1866_cast_fp16))[name = tensor("op_2582_cast_fp16")]; + tensor var_2583_to_fp16 = const()[name = tensor("op_2583_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_161_cast_fp16 = mul(x = var_2582_cast_fp16, y = var_2583_to_fp16)[name = tensor("aw_chunk_161_cast_fp16")]; + tensor var_2586_equation_0 = const()[name = tensor("op_2586_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2586_cast_fp16 = einsum(equation = var_2586_equation_0, values = (var_2424_cast_fp16, var_1873_cast_fp16))[name = tensor("op_2586_cast_fp16")]; + tensor var_2587_to_fp16 = const()[name = tensor("op_2587_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_163_cast_fp16 = mul(x = var_2586_cast_fp16, y = var_2587_to_fp16)[name = tensor("aw_chunk_163_cast_fp16")]; + tensor var_2590_equation_0 = const()[name = tensor("op_2590_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2590_cast_fp16 = einsum(equation = var_2590_equation_0, values = (var_2424_cast_fp16, var_1880_cast_fp16))[name = tensor("op_2590_cast_fp16")]; + tensor var_2591_to_fp16 = const()[name = tensor("op_2591_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_165_cast_fp16 = mul(x = var_2590_cast_fp16, y = var_2591_to_fp16)[name = tensor("aw_chunk_165_cast_fp16")]; + tensor var_2594_equation_0 = const()[name = tensor("op_2594_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2594_cast_fp16 = einsum(equation = var_2594_equation_0, values = (var_2424_cast_fp16, var_1887_cast_fp16))[name = tensor("op_2594_cast_fp16")]; + tensor var_2595_to_fp16 = const()[name = tensor("op_2595_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_167_cast_fp16 = mul(x = var_2594_cast_fp16, y = var_2595_to_fp16)[name = tensor("aw_chunk_167_cast_fp16")]; + tensor var_2598_equation_0 = const()[name = tensor("op_2598_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2598_cast_fp16 = einsum(equation = var_2598_equation_0, values = (var_2428_cast_fp16, var_1894_cast_fp16))[name = tensor("op_2598_cast_fp16")]; + tensor var_2599_to_fp16 = const()[name = tensor("op_2599_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_169_cast_fp16 = mul(x = var_2598_cast_fp16, y = var_2599_to_fp16)[name = tensor("aw_chunk_169_cast_fp16")]; + tensor var_2602_equation_0 = const()[name = tensor("op_2602_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2602_cast_fp16 = einsum(equation = var_2602_equation_0, values = (var_2428_cast_fp16, var_1901_cast_fp16))[name = tensor("op_2602_cast_fp16")]; + tensor var_2603_to_fp16 = const()[name = tensor("op_2603_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_171_cast_fp16 = mul(x = var_2602_cast_fp16, y = var_2603_to_fp16)[name = tensor("aw_chunk_171_cast_fp16")]; + tensor var_2606_equation_0 = const()[name = tensor("op_2606_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2606_cast_fp16 = einsum(equation = var_2606_equation_0, values = (var_2428_cast_fp16, var_1908_cast_fp16))[name = tensor("op_2606_cast_fp16")]; + tensor var_2607_to_fp16 = const()[name = tensor("op_2607_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_173_cast_fp16 = mul(x = var_2606_cast_fp16, y = var_2607_to_fp16)[name = tensor("aw_chunk_173_cast_fp16")]; + tensor var_2610_equation_0 = const()[name = tensor("op_2610_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2610_cast_fp16 = einsum(equation = var_2610_equation_0, values = (var_2428_cast_fp16, var_1915_cast_fp16))[name = tensor("op_2610_cast_fp16")]; + tensor var_2611_to_fp16 = const()[name = tensor("op_2611_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_175_cast_fp16 = mul(x = var_2610_cast_fp16, y = var_2611_to_fp16)[name = tensor("aw_chunk_175_cast_fp16")]; + tensor var_2614_equation_0 = const()[name = tensor("op_2614_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2614_cast_fp16 = einsum(equation = var_2614_equation_0, values = (var_2432_cast_fp16, var_1922_cast_fp16))[name = tensor("op_2614_cast_fp16")]; + tensor var_2615_to_fp16 = const()[name = tensor("op_2615_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_177_cast_fp16 = mul(x = var_2614_cast_fp16, y = var_2615_to_fp16)[name = tensor("aw_chunk_177_cast_fp16")]; + tensor var_2618_equation_0 = const()[name = tensor("op_2618_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2618_cast_fp16 = einsum(equation = var_2618_equation_0, values = (var_2432_cast_fp16, var_1929_cast_fp16))[name = tensor("op_2618_cast_fp16")]; + tensor var_2619_to_fp16 = const()[name = tensor("op_2619_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_179_cast_fp16 = mul(x = var_2618_cast_fp16, y = var_2619_to_fp16)[name = tensor("aw_chunk_179_cast_fp16")]; + tensor var_2622_equation_0 = const()[name = tensor("op_2622_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2622_cast_fp16 = einsum(equation = var_2622_equation_0, values = (var_2432_cast_fp16, var_1936_cast_fp16))[name = tensor("op_2622_cast_fp16")]; + tensor var_2623_to_fp16 = const()[name = tensor("op_2623_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_181_cast_fp16 = mul(x = var_2622_cast_fp16, y = var_2623_to_fp16)[name = tensor("aw_chunk_181_cast_fp16")]; + tensor var_2626_equation_0 = const()[name = tensor("op_2626_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2626_cast_fp16 = einsum(equation = var_2626_equation_0, values = (var_2432_cast_fp16, var_1943_cast_fp16))[name = tensor("op_2626_cast_fp16")]; + tensor var_2627_to_fp16 = const()[name = tensor("op_2627_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_183_cast_fp16 = mul(x = var_2626_cast_fp16, y = var_2627_to_fp16)[name = tensor("aw_chunk_183_cast_fp16")]; + tensor var_2630_equation_0 = const()[name = tensor("op_2630_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2630_cast_fp16 = einsum(equation = var_2630_equation_0, values = (var_2436_cast_fp16, var_1950_cast_fp16))[name = tensor("op_2630_cast_fp16")]; + tensor var_2631_to_fp16 = const()[name = tensor("op_2631_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_185_cast_fp16 = mul(x = var_2630_cast_fp16, y = var_2631_to_fp16)[name = tensor("aw_chunk_185_cast_fp16")]; + tensor var_2634_equation_0 = const()[name = tensor("op_2634_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2634_cast_fp16 = einsum(equation = var_2634_equation_0, values = (var_2436_cast_fp16, var_1957_cast_fp16))[name = tensor("op_2634_cast_fp16")]; + tensor var_2635_to_fp16 = const()[name = tensor("op_2635_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_187_cast_fp16 = mul(x = var_2634_cast_fp16, y = var_2635_to_fp16)[name = tensor("aw_chunk_187_cast_fp16")]; + tensor var_2638_equation_0 = const()[name = tensor("op_2638_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2638_cast_fp16 = einsum(equation = var_2638_equation_0, values = (var_2436_cast_fp16, var_1964_cast_fp16))[name = tensor("op_2638_cast_fp16")]; + tensor var_2639_to_fp16 = const()[name = tensor("op_2639_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_189_cast_fp16 = mul(x = var_2638_cast_fp16, y = var_2639_to_fp16)[name = tensor("aw_chunk_189_cast_fp16")]; + tensor var_2642_equation_0 = const()[name = tensor("op_2642_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2642_cast_fp16 = einsum(equation = var_2642_equation_0, values = (var_2436_cast_fp16, var_1971_cast_fp16))[name = tensor("op_2642_cast_fp16")]; + tensor var_2643_to_fp16 = const()[name = tensor("op_2643_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_191_cast_fp16 = mul(x = var_2642_cast_fp16, y = var_2643_to_fp16)[name = tensor("aw_chunk_191_cast_fp16")]; + tensor var_2646_equation_0 = const()[name = tensor("op_2646_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2646_cast_fp16 = einsum(equation = var_2646_equation_0, values = (var_2440_cast_fp16, var_1978_cast_fp16))[name = tensor("op_2646_cast_fp16")]; + tensor var_2647_to_fp16 = const()[name = tensor("op_2647_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_193_cast_fp16 = mul(x = var_2646_cast_fp16, y = var_2647_to_fp16)[name = tensor("aw_chunk_193_cast_fp16")]; + tensor var_2650_equation_0 = const()[name = tensor("op_2650_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2650_cast_fp16 = einsum(equation = var_2650_equation_0, values = (var_2440_cast_fp16, var_1985_cast_fp16))[name = tensor("op_2650_cast_fp16")]; + tensor var_2651_to_fp16 = const()[name = tensor("op_2651_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_195_cast_fp16 = mul(x = var_2650_cast_fp16, y = var_2651_to_fp16)[name = tensor("aw_chunk_195_cast_fp16")]; + tensor var_2654_equation_0 = const()[name = tensor("op_2654_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2654_cast_fp16 = einsum(equation = var_2654_equation_0, values = (var_2440_cast_fp16, var_1992_cast_fp16))[name = tensor("op_2654_cast_fp16")]; + tensor var_2655_to_fp16 = const()[name = tensor("op_2655_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_197_cast_fp16 = mul(x = var_2654_cast_fp16, y = var_2655_to_fp16)[name = tensor("aw_chunk_197_cast_fp16")]; + tensor var_2658_equation_0 = const()[name = tensor("op_2658_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2658_cast_fp16 = einsum(equation = var_2658_equation_0, values = (var_2440_cast_fp16, var_1999_cast_fp16))[name = tensor("op_2658_cast_fp16")]; + tensor var_2659_to_fp16 = const()[name = tensor("op_2659_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_199_cast_fp16 = mul(x = var_2658_cast_fp16, y = var_2659_to_fp16)[name = tensor("aw_chunk_199_cast_fp16")]; + tensor var_2662_equation_0 = const()[name = tensor("op_2662_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2662_cast_fp16 = einsum(equation = var_2662_equation_0, values = (var_2444_cast_fp16, var_2006_cast_fp16))[name = tensor("op_2662_cast_fp16")]; + tensor var_2663_to_fp16 = const()[name = tensor("op_2663_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_201_cast_fp16 = mul(x = var_2662_cast_fp16, y = var_2663_to_fp16)[name = tensor("aw_chunk_201_cast_fp16")]; + tensor var_2666_equation_0 = const()[name = tensor("op_2666_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2666_cast_fp16 = einsum(equation = var_2666_equation_0, values = (var_2444_cast_fp16, var_2013_cast_fp16))[name = tensor("op_2666_cast_fp16")]; + tensor var_2667_to_fp16 = const()[name = tensor("op_2667_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_203_cast_fp16 = mul(x = var_2666_cast_fp16, y = var_2667_to_fp16)[name = tensor("aw_chunk_203_cast_fp16")]; + tensor var_2670_equation_0 = const()[name = tensor("op_2670_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2670_cast_fp16 = einsum(equation = var_2670_equation_0, values = (var_2444_cast_fp16, var_2020_cast_fp16))[name = tensor("op_2670_cast_fp16")]; + tensor var_2671_to_fp16 = const()[name = tensor("op_2671_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_205_cast_fp16 = mul(x = var_2670_cast_fp16, y = var_2671_to_fp16)[name = tensor("aw_chunk_205_cast_fp16")]; + tensor var_2674_equation_0 = const()[name = tensor("op_2674_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2674_cast_fp16 = einsum(equation = var_2674_equation_0, values = (var_2444_cast_fp16, var_2027_cast_fp16))[name = tensor("op_2674_cast_fp16")]; + tensor var_2675_to_fp16 = const()[name = tensor("op_2675_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_207_cast_fp16 = mul(x = var_2674_cast_fp16, y = var_2675_to_fp16)[name = tensor("aw_chunk_207_cast_fp16")]; + tensor var_2678_equation_0 = const()[name = tensor("op_2678_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2678_cast_fp16 = einsum(equation = var_2678_equation_0, values = (var_2448_cast_fp16, var_2034_cast_fp16))[name = tensor("op_2678_cast_fp16")]; + tensor var_2679_to_fp16 = const()[name = tensor("op_2679_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_209_cast_fp16 = mul(x = var_2678_cast_fp16, y = var_2679_to_fp16)[name = tensor("aw_chunk_209_cast_fp16")]; + tensor var_2682_equation_0 = const()[name = tensor("op_2682_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2682_cast_fp16 = einsum(equation = var_2682_equation_0, values = (var_2448_cast_fp16, var_2041_cast_fp16))[name = tensor("op_2682_cast_fp16")]; + tensor var_2683_to_fp16 = const()[name = tensor("op_2683_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_211_cast_fp16 = mul(x = var_2682_cast_fp16, y = var_2683_to_fp16)[name = tensor("aw_chunk_211_cast_fp16")]; + tensor var_2686_equation_0 = const()[name = tensor("op_2686_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2686_cast_fp16 = einsum(equation = var_2686_equation_0, values = (var_2448_cast_fp16, var_2048_cast_fp16))[name = tensor("op_2686_cast_fp16")]; + tensor var_2687_to_fp16 = const()[name = tensor("op_2687_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_213_cast_fp16 = mul(x = var_2686_cast_fp16, y = var_2687_to_fp16)[name = tensor("aw_chunk_213_cast_fp16")]; + tensor var_2690_equation_0 = const()[name = tensor("op_2690_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2690_cast_fp16 = einsum(equation = var_2690_equation_0, values = (var_2448_cast_fp16, var_2055_cast_fp16))[name = tensor("op_2690_cast_fp16")]; + tensor var_2691_to_fp16 = const()[name = tensor("op_2691_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_215_cast_fp16 = mul(x = var_2690_cast_fp16, y = var_2691_to_fp16)[name = tensor("aw_chunk_215_cast_fp16")]; + tensor var_2694_equation_0 = const()[name = tensor("op_2694_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2694_cast_fp16 = einsum(equation = var_2694_equation_0, values = (var_2452_cast_fp16, var_2062_cast_fp16))[name = tensor("op_2694_cast_fp16")]; + tensor var_2695_to_fp16 = const()[name = tensor("op_2695_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_217_cast_fp16 = mul(x = var_2694_cast_fp16, y = var_2695_to_fp16)[name = tensor("aw_chunk_217_cast_fp16")]; + tensor var_2698_equation_0 = const()[name = tensor("op_2698_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2698_cast_fp16 = einsum(equation = var_2698_equation_0, values = (var_2452_cast_fp16, var_2069_cast_fp16))[name = tensor("op_2698_cast_fp16")]; + tensor var_2699_to_fp16 = const()[name = tensor("op_2699_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_219_cast_fp16 = mul(x = var_2698_cast_fp16, y = var_2699_to_fp16)[name = tensor("aw_chunk_219_cast_fp16")]; + tensor var_2702_equation_0 = const()[name = tensor("op_2702_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2702_cast_fp16 = einsum(equation = var_2702_equation_0, values = (var_2452_cast_fp16, var_2076_cast_fp16))[name = tensor("op_2702_cast_fp16")]; + tensor var_2703_to_fp16 = const()[name = tensor("op_2703_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_221_cast_fp16 = mul(x = var_2702_cast_fp16, y = var_2703_to_fp16)[name = tensor("aw_chunk_221_cast_fp16")]; + tensor var_2706_equation_0 = const()[name = tensor("op_2706_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2706_cast_fp16 = einsum(equation = var_2706_equation_0, values = (var_2452_cast_fp16, var_2083_cast_fp16))[name = tensor("op_2706_cast_fp16")]; + tensor var_2707_to_fp16 = const()[name = tensor("op_2707_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_223_cast_fp16 = mul(x = var_2706_cast_fp16, y = var_2707_to_fp16)[name = tensor("aw_chunk_223_cast_fp16")]; + tensor var_2710_equation_0 = const()[name = tensor("op_2710_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2710_cast_fp16 = einsum(equation = var_2710_equation_0, values = (var_2456_cast_fp16, var_2090_cast_fp16))[name = tensor("op_2710_cast_fp16")]; + tensor var_2711_to_fp16 = const()[name = tensor("op_2711_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_225_cast_fp16 = mul(x = var_2710_cast_fp16, y = var_2711_to_fp16)[name = tensor("aw_chunk_225_cast_fp16")]; + tensor var_2714_equation_0 = const()[name = tensor("op_2714_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2714_cast_fp16 = einsum(equation = var_2714_equation_0, values = (var_2456_cast_fp16, var_2097_cast_fp16))[name = tensor("op_2714_cast_fp16")]; + tensor var_2715_to_fp16 = const()[name = tensor("op_2715_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_227_cast_fp16 = mul(x = var_2714_cast_fp16, y = var_2715_to_fp16)[name = tensor("aw_chunk_227_cast_fp16")]; + tensor var_2718_equation_0 = const()[name = tensor("op_2718_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2718_cast_fp16 = einsum(equation = var_2718_equation_0, values = (var_2456_cast_fp16, var_2104_cast_fp16))[name = tensor("op_2718_cast_fp16")]; + tensor var_2719_to_fp16 = const()[name = tensor("op_2719_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_229_cast_fp16 = mul(x = var_2718_cast_fp16, y = var_2719_to_fp16)[name = tensor("aw_chunk_229_cast_fp16")]; + tensor var_2722_equation_0 = const()[name = tensor("op_2722_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2722_cast_fp16 = einsum(equation = var_2722_equation_0, values = (var_2456_cast_fp16, var_2111_cast_fp16))[name = tensor("op_2722_cast_fp16")]; + tensor var_2723_to_fp16 = const()[name = tensor("op_2723_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_231_cast_fp16 = mul(x = var_2722_cast_fp16, y = var_2723_to_fp16)[name = tensor("aw_chunk_231_cast_fp16")]; + tensor var_2726_equation_0 = const()[name = tensor("op_2726_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2726_cast_fp16 = einsum(equation = var_2726_equation_0, values = (var_2460_cast_fp16, var_2118_cast_fp16))[name = tensor("op_2726_cast_fp16")]; + tensor var_2727_to_fp16 = const()[name = tensor("op_2727_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_233_cast_fp16 = mul(x = var_2726_cast_fp16, y = var_2727_to_fp16)[name = tensor("aw_chunk_233_cast_fp16")]; + tensor var_2730_equation_0 = const()[name = tensor("op_2730_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2730_cast_fp16 = einsum(equation = var_2730_equation_0, values = (var_2460_cast_fp16, var_2125_cast_fp16))[name = tensor("op_2730_cast_fp16")]; + tensor var_2731_to_fp16 = const()[name = tensor("op_2731_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_235_cast_fp16 = mul(x = var_2730_cast_fp16, y = var_2731_to_fp16)[name = tensor("aw_chunk_235_cast_fp16")]; + tensor var_2734_equation_0 = const()[name = tensor("op_2734_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2734_cast_fp16 = einsum(equation = var_2734_equation_0, values = (var_2460_cast_fp16, var_2132_cast_fp16))[name = tensor("op_2734_cast_fp16")]; + tensor var_2735_to_fp16 = const()[name = tensor("op_2735_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_237_cast_fp16 = mul(x = var_2734_cast_fp16, y = var_2735_to_fp16)[name = tensor("aw_chunk_237_cast_fp16")]; + tensor var_2738_equation_0 = const()[name = tensor("op_2738_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2738_cast_fp16 = einsum(equation = var_2738_equation_0, values = (var_2460_cast_fp16, var_2139_cast_fp16))[name = tensor("op_2738_cast_fp16")]; + tensor var_2739_to_fp16 = const()[name = tensor("op_2739_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_239_cast_fp16 = mul(x = var_2738_cast_fp16, y = var_2739_to_fp16)[name = tensor("aw_chunk_239_cast_fp16")]; + tensor var_2742_equation_0 = const()[name = tensor("op_2742_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2742_cast_fp16 = einsum(equation = var_2742_equation_0, values = (var_2464_cast_fp16, var_2146_cast_fp16))[name = tensor("op_2742_cast_fp16")]; + tensor var_2743_to_fp16 = const()[name = tensor("op_2743_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_241_cast_fp16 = mul(x = var_2742_cast_fp16, y = var_2743_to_fp16)[name = tensor("aw_chunk_241_cast_fp16")]; + tensor var_2746_equation_0 = const()[name = tensor("op_2746_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2746_cast_fp16 = einsum(equation = var_2746_equation_0, values = (var_2464_cast_fp16, var_2153_cast_fp16))[name = tensor("op_2746_cast_fp16")]; + tensor var_2747_to_fp16 = const()[name = tensor("op_2747_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_243_cast_fp16 = mul(x = var_2746_cast_fp16, y = var_2747_to_fp16)[name = tensor("aw_chunk_243_cast_fp16")]; + tensor var_2750_equation_0 = const()[name = tensor("op_2750_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2750_cast_fp16 = einsum(equation = var_2750_equation_0, values = (var_2464_cast_fp16, var_2160_cast_fp16))[name = tensor("op_2750_cast_fp16")]; + tensor var_2751_to_fp16 = const()[name = tensor("op_2751_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_245_cast_fp16 = mul(x = var_2750_cast_fp16, y = var_2751_to_fp16)[name = tensor("aw_chunk_245_cast_fp16")]; + tensor var_2754_equation_0 = const()[name = tensor("op_2754_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2754_cast_fp16 = einsum(equation = var_2754_equation_0, values = (var_2464_cast_fp16, var_2167_cast_fp16))[name = tensor("op_2754_cast_fp16")]; + tensor var_2755_to_fp16 = const()[name = tensor("op_2755_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_247_cast_fp16 = mul(x = var_2754_cast_fp16, y = var_2755_to_fp16)[name = tensor("aw_chunk_247_cast_fp16")]; + tensor var_2758_equation_0 = const()[name = tensor("op_2758_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2758_cast_fp16 = einsum(equation = var_2758_equation_0, values = (var_2468_cast_fp16, var_2174_cast_fp16))[name = tensor("op_2758_cast_fp16")]; + tensor var_2759_to_fp16 = const()[name = tensor("op_2759_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_249_cast_fp16 = mul(x = var_2758_cast_fp16, y = var_2759_to_fp16)[name = tensor("aw_chunk_249_cast_fp16")]; + tensor var_2762_equation_0 = const()[name = tensor("op_2762_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2762_cast_fp16 = einsum(equation = var_2762_equation_0, values = (var_2468_cast_fp16, var_2181_cast_fp16))[name = tensor("op_2762_cast_fp16")]; + tensor var_2763_to_fp16 = const()[name = tensor("op_2763_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_251_cast_fp16 = mul(x = var_2762_cast_fp16, y = var_2763_to_fp16)[name = tensor("aw_chunk_251_cast_fp16")]; + tensor var_2766_equation_0 = const()[name = tensor("op_2766_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2766_cast_fp16 = einsum(equation = var_2766_equation_0, values = (var_2468_cast_fp16, var_2188_cast_fp16))[name = tensor("op_2766_cast_fp16")]; + tensor var_2767_to_fp16 = const()[name = tensor("op_2767_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_253_cast_fp16 = mul(x = var_2766_cast_fp16, y = var_2767_to_fp16)[name = tensor("aw_chunk_253_cast_fp16")]; + tensor var_2770_equation_0 = const()[name = tensor("op_2770_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2770_cast_fp16 = einsum(equation = var_2770_equation_0, values = (var_2468_cast_fp16, var_2195_cast_fp16))[name = tensor("op_2770_cast_fp16")]; + tensor var_2771_to_fp16 = const()[name = tensor("op_2771_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_255_cast_fp16 = mul(x = var_2770_cast_fp16, y = var_2771_to_fp16)[name = tensor("aw_chunk_255_cast_fp16")]; + tensor var_2774_equation_0 = const()[name = tensor("op_2774_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2774_cast_fp16 = einsum(equation = var_2774_equation_0, values = (var_2472_cast_fp16, var_2202_cast_fp16))[name = tensor("op_2774_cast_fp16")]; + tensor var_2775_to_fp16 = const()[name = tensor("op_2775_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_257_cast_fp16 = mul(x = var_2774_cast_fp16, y = var_2775_to_fp16)[name = tensor("aw_chunk_257_cast_fp16")]; + tensor var_2778_equation_0 = const()[name = tensor("op_2778_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2778_cast_fp16 = einsum(equation = var_2778_equation_0, values = (var_2472_cast_fp16, var_2209_cast_fp16))[name = tensor("op_2778_cast_fp16")]; + tensor var_2779_to_fp16 = const()[name = tensor("op_2779_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_259_cast_fp16 = mul(x = var_2778_cast_fp16, y = var_2779_to_fp16)[name = tensor("aw_chunk_259_cast_fp16")]; + tensor var_2782_equation_0 = const()[name = tensor("op_2782_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2782_cast_fp16 = einsum(equation = var_2782_equation_0, values = (var_2472_cast_fp16, var_2216_cast_fp16))[name = tensor("op_2782_cast_fp16")]; + tensor var_2783_to_fp16 = const()[name = tensor("op_2783_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_261_cast_fp16 = mul(x = var_2782_cast_fp16, y = var_2783_to_fp16)[name = tensor("aw_chunk_261_cast_fp16")]; + tensor var_2786_equation_0 = const()[name = tensor("op_2786_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2786_cast_fp16 = einsum(equation = var_2786_equation_0, values = (var_2472_cast_fp16, var_2223_cast_fp16))[name = tensor("op_2786_cast_fp16")]; + tensor var_2787_to_fp16 = const()[name = tensor("op_2787_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_263_cast_fp16 = mul(x = var_2786_cast_fp16, y = var_2787_to_fp16)[name = tensor("aw_chunk_263_cast_fp16")]; + tensor var_2790_equation_0 = const()[name = tensor("op_2790_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2790_cast_fp16 = einsum(equation = var_2790_equation_0, values = (var_2476_cast_fp16, var_2230_cast_fp16))[name = tensor("op_2790_cast_fp16")]; + tensor var_2791_to_fp16 = const()[name = tensor("op_2791_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_265_cast_fp16 = mul(x = var_2790_cast_fp16, y = var_2791_to_fp16)[name = tensor("aw_chunk_265_cast_fp16")]; + tensor var_2794_equation_0 = const()[name = tensor("op_2794_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2794_cast_fp16 = einsum(equation = var_2794_equation_0, values = (var_2476_cast_fp16, var_2237_cast_fp16))[name = tensor("op_2794_cast_fp16")]; + tensor var_2795_to_fp16 = const()[name = tensor("op_2795_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_267_cast_fp16 = mul(x = var_2794_cast_fp16, y = var_2795_to_fp16)[name = tensor("aw_chunk_267_cast_fp16")]; + tensor var_2798_equation_0 = const()[name = tensor("op_2798_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2798_cast_fp16 = einsum(equation = var_2798_equation_0, values = (var_2476_cast_fp16, var_2244_cast_fp16))[name = tensor("op_2798_cast_fp16")]; + tensor var_2799_to_fp16 = const()[name = tensor("op_2799_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_269_cast_fp16 = mul(x = var_2798_cast_fp16, y = var_2799_to_fp16)[name = tensor("aw_chunk_269_cast_fp16")]; + tensor var_2802_equation_0 = const()[name = tensor("op_2802_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2802_cast_fp16 = einsum(equation = var_2802_equation_0, values = (var_2476_cast_fp16, var_2251_cast_fp16))[name = tensor("op_2802_cast_fp16")]; + tensor var_2803_to_fp16 = const()[name = tensor("op_2803_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_271_cast_fp16 = mul(x = var_2802_cast_fp16, y = var_2803_to_fp16)[name = tensor("aw_chunk_271_cast_fp16")]; + tensor var_2806_equation_0 = const()[name = tensor("op_2806_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2806_cast_fp16 = einsum(equation = var_2806_equation_0, values = (var_2480_cast_fp16, var_2258_cast_fp16))[name = tensor("op_2806_cast_fp16")]; + tensor var_2807_to_fp16 = const()[name = tensor("op_2807_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_273_cast_fp16 = mul(x = var_2806_cast_fp16, y = var_2807_to_fp16)[name = tensor("aw_chunk_273_cast_fp16")]; + tensor var_2810_equation_0 = const()[name = tensor("op_2810_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2810_cast_fp16 = einsum(equation = var_2810_equation_0, values = (var_2480_cast_fp16, var_2265_cast_fp16))[name = tensor("op_2810_cast_fp16")]; + tensor var_2811_to_fp16 = const()[name = tensor("op_2811_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_275_cast_fp16 = mul(x = var_2810_cast_fp16, y = var_2811_to_fp16)[name = tensor("aw_chunk_275_cast_fp16")]; + tensor var_2814_equation_0 = const()[name = tensor("op_2814_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2814_cast_fp16 = einsum(equation = var_2814_equation_0, values = (var_2480_cast_fp16, var_2272_cast_fp16))[name = tensor("op_2814_cast_fp16")]; + tensor var_2815_to_fp16 = const()[name = tensor("op_2815_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_277_cast_fp16 = mul(x = var_2814_cast_fp16, y = var_2815_to_fp16)[name = tensor("aw_chunk_277_cast_fp16")]; + tensor var_2818_equation_0 = const()[name = tensor("op_2818_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2818_cast_fp16 = einsum(equation = var_2818_equation_0, values = (var_2480_cast_fp16, var_2279_cast_fp16))[name = tensor("op_2818_cast_fp16")]; + tensor var_2819_to_fp16 = const()[name = tensor("op_2819_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_279_cast_fp16 = mul(x = var_2818_cast_fp16, y = var_2819_to_fp16)[name = tensor("aw_chunk_279_cast_fp16")]; + tensor var_2822_equation_0 = const()[name = tensor("op_2822_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2822_cast_fp16 = einsum(equation = var_2822_equation_0, values = (var_2484_cast_fp16, var_2286_cast_fp16))[name = tensor("op_2822_cast_fp16")]; + tensor var_2823_to_fp16 = const()[name = tensor("op_2823_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_281_cast_fp16 = mul(x = var_2822_cast_fp16, y = var_2823_to_fp16)[name = tensor("aw_chunk_281_cast_fp16")]; + tensor var_2826_equation_0 = const()[name = tensor("op_2826_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2826_cast_fp16 = einsum(equation = var_2826_equation_0, values = (var_2484_cast_fp16, var_2293_cast_fp16))[name = tensor("op_2826_cast_fp16")]; + tensor var_2827_to_fp16 = const()[name = tensor("op_2827_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_283_cast_fp16 = mul(x = var_2826_cast_fp16, y = var_2827_to_fp16)[name = tensor("aw_chunk_283_cast_fp16")]; + tensor var_2830_equation_0 = const()[name = tensor("op_2830_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2830_cast_fp16 = einsum(equation = var_2830_equation_0, values = (var_2484_cast_fp16, var_2300_cast_fp16))[name = tensor("op_2830_cast_fp16")]; + tensor var_2831_to_fp16 = const()[name = tensor("op_2831_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_285_cast_fp16 = mul(x = var_2830_cast_fp16, y = var_2831_to_fp16)[name = tensor("aw_chunk_285_cast_fp16")]; + tensor var_2834_equation_0 = const()[name = tensor("op_2834_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2834_cast_fp16 = einsum(equation = var_2834_equation_0, values = (var_2484_cast_fp16, var_2307_cast_fp16))[name = tensor("op_2834_cast_fp16")]; + tensor var_2835_to_fp16 = const()[name = tensor("op_2835_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_287_cast_fp16 = mul(x = var_2834_cast_fp16, y = var_2835_to_fp16)[name = tensor("aw_chunk_287_cast_fp16")]; + tensor var_2838_equation_0 = const()[name = tensor("op_2838_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2838_cast_fp16 = einsum(equation = var_2838_equation_0, values = (var_2488_cast_fp16, var_2314_cast_fp16))[name = tensor("op_2838_cast_fp16")]; + tensor var_2839_to_fp16 = const()[name = tensor("op_2839_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_289_cast_fp16 = mul(x = var_2838_cast_fp16, y = var_2839_to_fp16)[name = tensor("aw_chunk_289_cast_fp16")]; + tensor var_2842_equation_0 = const()[name = tensor("op_2842_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2842_cast_fp16 = einsum(equation = var_2842_equation_0, values = (var_2488_cast_fp16, var_2321_cast_fp16))[name = tensor("op_2842_cast_fp16")]; + tensor var_2843_to_fp16 = const()[name = tensor("op_2843_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_291_cast_fp16 = mul(x = var_2842_cast_fp16, y = var_2843_to_fp16)[name = tensor("aw_chunk_291_cast_fp16")]; + tensor var_2846_equation_0 = const()[name = tensor("op_2846_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2846_cast_fp16 = einsum(equation = var_2846_equation_0, values = (var_2488_cast_fp16, var_2328_cast_fp16))[name = tensor("op_2846_cast_fp16")]; + tensor var_2847_to_fp16 = const()[name = tensor("op_2847_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_293_cast_fp16 = mul(x = var_2846_cast_fp16, y = var_2847_to_fp16)[name = tensor("aw_chunk_293_cast_fp16")]; + tensor var_2850_equation_0 = const()[name = tensor("op_2850_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2850_cast_fp16 = einsum(equation = var_2850_equation_0, values = (var_2488_cast_fp16, var_2335_cast_fp16))[name = tensor("op_2850_cast_fp16")]; + tensor var_2851_to_fp16 = const()[name = tensor("op_2851_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_295_cast_fp16 = mul(x = var_2850_cast_fp16, y = var_2851_to_fp16)[name = tensor("aw_chunk_295_cast_fp16")]; + tensor var_2854_equation_0 = const()[name = tensor("op_2854_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2854_cast_fp16 = einsum(equation = var_2854_equation_0, values = (var_2492_cast_fp16, var_2342_cast_fp16))[name = tensor("op_2854_cast_fp16")]; + tensor var_2855_to_fp16 = const()[name = tensor("op_2855_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_297_cast_fp16 = mul(x = var_2854_cast_fp16, y = var_2855_to_fp16)[name = tensor("aw_chunk_297_cast_fp16")]; + tensor var_2858_equation_0 = const()[name = tensor("op_2858_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2858_cast_fp16 = einsum(equation = var_2858_equation_0, values = (var_2492_cast_fp16, var_2349_cast_fp16))[name = tensor("op_2858_cast_fp16")]; + tensor var_2859_to_fp16 = const()[name = tensor("op_2859_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_299_cast_fp16 = mul(x = var_2858_cast_fp16, y = var_2859_to_fp16)[name = tensor("aw_chunk_299_cast_fp16")]; + tensor var_2862_equation_0 = const()[name = tensor("op_2862_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2862_cast_fp16 = einsum(equation = var_2862_equation_0, values = (var_2492_cast_fp16, var_2356_cast_fp16))[name = tensor("op_2862_cast_fp16")]; + tensor var_2863_to_fp16 = const()[name = tensor("op_2863_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_301_cast_fp16 = mul(x = var_2862_cast_fp16, y = var_2863_to_fp16)[name = tensor("aw_chunk_301_cast_fp16")]; + tensor var_2866_equation_0 = const()[name = tensor("op_2866_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2866_cast_fp16 = einsum(equation = var_2866_equation_0, values = (var_2492_cast_fp16, var_2363_cast_fp16))[name = tensor("op_2866_cast_fp16")]; + tensor var_2867_to_fp16 = const()[name = tensor("op_2867_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_303_cast_fp16 = mul(x = var_2866_cast_fp16, y = var_2867_to_fp16)[name = tensor("aw_chunk_303_cast_fp16")]; + tensor var_2870_equation_0 = const()[name = tensor("op_2870_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2870_cast_fp16 = einsum(equation = var_2870_equation_0, values = (var_2496_cast_fp16, var_2370_cast_fp16))[name = tensor("op_2870_cast_fp16")]; + tensor var_2871_to_fp16 = const()[name = tensor("op_2871_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_305_cast_fp16 = mul(x = var_2870_cast_fp16, y = var_2871_to_fp16)[name = tensor("aw_chunk_305_cast_fp16")]; + tensor var_2874_equation_0 = const()[name = tensor("op_2874_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2874_cast_fp16 = einsum(equation = var_2874_equation_0, values = (var_2496_cast_fp16, var_2377_cast_fp16))[name = tensor("op_2874_cast_fp16")]; + tensor var_2875_to_fp16 = const()[name = tensor("op_2875_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_307_cast_fp16 = mul(x = var_2874_cast_fp16, y = var_2875_to_fp16)[name = tensor("aw_chunk_307_cast_fp16")]; + tensor var_2878_equation_0 = const()[name = tensor("op_2878_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2878_cast_fp16 = einsum(equation = var_2878_equation_0, values = (var_2496_cast_fp16, var_2384_cast_fp16))[name = tensor("op_2878_cast_fp16")]; + tensor var_2879_to_fp16 = const()[name = tensor("op_2879_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_309_cast_fp16 = mul(x = var_2878_cast_fp16, y = var_2879_to_fp16)[name = tensor("aw_chunk_309_cast_fp16")]; + tensor var_2882_equation_0 = const()[name = tensor("op_2882_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2882_cast_fp16 = einsum(equation = var_2882_equation_0, values = (var_2496_cast_fp16, var_2391_cast_fp16))[name = tensor("op_2882_cast_fp16")]; + tensor var_2883_to_fp16 = const()[name = tensor("op_2883_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_311_cast_fp16 = mul(x = var_2882_cast_fp16, y = var_2883_to_fp16)[name = tensor("aw_chunk_311_cast_fp16")]; + tensor var_2886_equation_0 = const()[name = tensor("op_2886_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2886_cast_fp16 = einsum(equation = var_2886_equation_0, values = (var_2500_cast_fp16, var_2398_cast_fp16))[name = tensor("op_2886_cast_fp16")]; + tensor var_2887_to_fp16 = const()[name = tensor("op_2887_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_313_cast_fp16 = mul(x = var_2886_cast_fp16, y = var_2887_to_fp16)[name = tensor("aw_chunk_313_cast_fp16")]; + tensor var_2890_equation_0 = const()[name = tensor("op_2890_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2890_cast_fp16 = einsum(equation = var_2890_equation_0, values = (var_2500_cast_fp16, var_2405_cast_fp16))[name = tensor("op_2890_cast_fp16")]; + tensor var_2891_to_fp16 = const()[name = tensor("op_2891_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_315_cast_fp16 = mul(x = var_2890_cast_fp16, y = var_2891_to_fp16)[name = tensor("aw_chunk_315_cast_fp16")]; + tensor var_2894_equation_0 = const()[name = tensor("op_2894_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2894_cast_fp16 = einsum(equation = var_2894_equation_0, values = (var_2500_cast_fp16, var_2412_cast_fp16))[name = tensor("op_2894_cast_fp16")]; + tensor var_2895_to_fp16 = const()[name = tensor("op_2895_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_317_cast_fp16 = mul(x = var_2894_cast_fp16, y = var_2895_to_fp16)[name = tensor("aw_chunk_317_cast_fp16")]; + tensor var_2898_equation_0 = const()[name = tensor("op_2898_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_2898_cast_fp16 = einsum(equation = var_2898_equation_0, values = (var_2500_cast_fp16, var_2419_cast_fp16))[name = tensor("op_2898_cast_fp16")]; + tensor var_2899_to_fp16 = const()[name = tensor("op_2899_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_319_cast_fp16 = mul(x = var_2898_cast_fp16, y = var_2899_to_fp16)[name = tensor("aw_chunk_319_cast_fp16")]; + tensor var_2901_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_161_cast_fp16)[name = tensor("op_2901_cast_fp16")]; + tensor var_2902_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_163_cast_fp16)[name = tensor("op_2902_cast_fp16")]; + tensor var_2903_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_165_cast_fp16)[name = tensor("op_2903_cast_fp16")]; + tensor var_2904_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_167_cast_fp16)[name = tensor("op_2904_cast_fp16")]; + tensor var_2905_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_169_cast_fp16)[name = tensor("op_2905_cast_fp16")]; + tensor var_2906_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_171_cast_fp16)[name = tensor("op_2906_cast_fp16")]; + tensor var_2907_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_173_cast_fp16)[name = tensor("op_2907_cast_fp16")]; + tensor var_2908_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_175_cast_fp16)[name = tensor("op_2908_cast_fp16")]; + tensor var_2909_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_177_cast_fp16)[name = tensor("op_2909_cast_fp16")]; + tensor var_2910_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_179_cast_fp16)[name = tensor("op_2910_cast_fp16")]; + tensor var_2911_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_181_cast_fp16)[name = tensor("op_2911_cast_fp16")]; + tensor var_2912_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_183_cast_fp16)[name = tensor("op_2912_cast_fp16")]; + tensor var_2913_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_185_cast_fp16)[name = tensor("op_2913_cast_fp16")]; + tensor var_2914_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_187_cast_fp16)[name = tensor("op_2914_cast_fp16")]; + tensor var_2915_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_189_cast_fp16)[name = tensor("op_2915_cast_fp16")]; + tensor var_2916_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_191_cast_fp16)[name = tensor("op_2916_cast_fp16")]; + tensor var_2917_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_193_cast_fp16)[name = tensor("op_2917_cast_fp16")]; + tensor var_2918_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_195_cast_fp16)[name = tensor("op_2918_cast_fp16")]; + tensor var_2919_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_197_cast_fp16)[name = tensor("op_2919_cast_fp16")]; + tensor var_2920_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_199_cast_fp16)[name = tensor("op_2920_cast_fp16")]; + tensor var_2921_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_201_cast_fp16)[name = tensor("op_2921_cast_fp16")]; + tensor var_2922_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_203_cast_fp16)[name = tensor("op_2922_cast_fp16")]; + tensor var_2923_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_205_cast_fp16)[name = tensor("op_2923_cast_fp16")]; + tensor var_2924_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_207_cast_fp16)[name = tensor("op_2924_cast_fp16")]; + tensor var_2925_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_209_cast_fp16)[name = tensor("op_2925_cast_fp16")]; + tensor var_2926_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_211_cast_fp16)[name = tensor("op_2926_cast_fp16")]; + tensor var_2927_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_213_cast_fp16)[name = tensor("op_2927_cast_fp16")]; + tensor var_2928_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_215_cast_fp16)[name = tensor("op_2928_cast_fp16")]; + tensor var_2929_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_217_cast_fp16)[name = tensor("op_2929_cast_fp16")]; + tensor var_2930_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_219_cast_fp16)[name = tensor("op_2930_cast_fp16")]; + tensor var_2931_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_221_cast_fp16)[name = tensor("op_2931_cast_fp16")]; + tensor var_2932_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_223_cast_fp16)[name = tensor("op_2932_cast_fp16")]; + tensor var_2933_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_225_cast_fp16)[name = tensor("op_2933_cast_fp16")]; + tensor var_2934_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_227_cast_fp16)[name = tensor("op_2934_cast_fp16")]; + tensor var_2935_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_229_cast_fp16)[name = tensor("op_2935_cast_fp16")]; + tensor var_2936_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_231_cast_fp16)[name = tensor("op_2936_cast_fp16")]; + tensor var_2937_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_233_cast_fp16)[name = tensor("op_2937_cast_fp16")]; + tensor var_2938_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_235_cast_fp16)[name = tensor("op_2938_cast_fp16")]; + tensor var_2939_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_237_cast_fp16)[name = tensor("op_2939_cast_fp16")]; + tensor var_2940_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_239_cast_fp16)[name = tensor("op_2940_cast_fp16")]; + tensor var_2941_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_241_cast_fp16)[name = tensor("op_2941_cast_fp16")]; + tensor var_2942_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_243_cast_fp16)[name = tensor("op_2942_cast_fp16")]; + tensor var_2943_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_245_cast_fp16)[name = tensor("op_2943_cast_fp16")]; + tensor var_2944_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_247_cast_fp16)[name = tensor("op_2944_cast_fp16")]; + tensor var_2945_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_249_cast_fp16)[name = tensor("op_2945_cast_fp16")]; + tensor var_2946_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_251_cast_fp16)[name = tensor("op_2946_cast_fp16")]; + tensor var_2947_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_253_cast_fp16)[name = tensor("op_2947_cast_fp16")]; + tensor var_2948_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_255_cast_fp16)[name = tensor("op_2948_cast_fp16")]; + tensor var_2949_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_257_cast_fp16)[name = tensor("op_2949_cast_fp16")]; + tensor var_2950_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_259_cast_fp16)[name = tensor("op_2950_cast_fp16")]; + tensor var_2951_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_261_cast_fp16)[name = tensor("op_2951_cast_fp16")]; + tensor var_2952_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_263_cast_fp16)[name = tensor("op_2952_cast_fp16")]; + tensor var_2953_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_265_cast_fp16)[name = tensor("op_2953_cast_fp16")]; + tensor var_2954_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_267_cast_fp16)[name = tensor("op_2954_cast_fp16")]; + tensor var_2955_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_269_cast_fp16)[name = tensor("op_2955_cast_fp16")]; + tensor var_2956_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_271_cast_fp16)[name = tensor("op_2956_cast_fp16")]; + tensor var_2957_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_273_cast_fp16)[name = tensor("op_2957_cast_fp16")]; + tensor var_2958_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_275_cast_fp16)[name = tensor("op_2958_cast_fp16")]; + tensor var_2959_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_277_cast_fp16)[name = tensor("op_2959_cast_fp16")]; + tensor var_2960_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_279_cast_fp16)[name = tensor("op_2960_cast_fp16")]; + tensor var_2961_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_281_cast_fp16)[name = tensor("op_2961_cast_fp16")]; + tensor var_2962_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_283_cast_fp16)[name = tensor("op_2962_cast_fp16")]; + tensor var_2963_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_285_cast_fp16)[name = tensor("op_2963_cast_fp16")]; + tensor var_2964_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_287_cast_fp16)[name = tensor("op_2964_cast_fp16")]; + tensor var_2965_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_289_cast_fp16)[name = tensor("op_2965_cast_fp16")]; + tensor var_2966_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_291_cast_fp16)[name = tensor("op_2966_cast_fp16")]; + tensor var_2967_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_293_cast_fp16)[name = tensor("op_2967_cast_fp16")]; + tensor var_2968_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_295_cast_fp16)[name = tensor("op_2968_cast_fp16")]; + tensor var_2969_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_297_cast_fp16)[name = tensor("op_2969_cast_fp16")]; + tensor var_2970_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_299_cast_fp16)[name = tensor("op_2970_cast_fp16")]; + tensor var_2971_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_301_cast_fp16)[name = tensor("op_2971_cast_fp16")]; + tensor var_2972_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_303_cast_fp16)[name = tensor("op_2972_cast_fp16")]; + tensor var_2973_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_305_cast_fp16)[name = tensor("op_2973_cast_fp16")]; + tensor var_2974_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_307_cast_fp16)[name = tensor("op_2974_cast_fp16")]; + tensor var_2975_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_309_cast_fp16)[name = tensor("op_2975_cast_fp16")]; + tensor var_2976_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_311_cast_fp16)[name = tensor("op_2976_cast_fp16")]; + tensor var_2977_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_313_cast_fp16)[name = tensor("op_2977_cast_fp16")]; + tensor var_2978_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_315_cast_fp16)[name = tensor("op_2978_cast_fp16")]; + tensor var_2979_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_317_cast_fp16)[name = tensor("op_2979_cast_fp16")]; + tensor var_2980_cast_fp16 = softmax(axis = var_1726, x = aw_chunk_319_cast_fp16)[name = tensor("op_2980_cast_fp16")]; + tensor var_2982_equation_0 = const()[name = tensor("op_2982_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2982_cast_fp16 = einsum(equation = var_2982_equation_0, values = (var_2502_cast_fp16, var_2901_cast_fp16))[name = tensor("op_2982_cast_fp16")]; + tensor var_2984_equation_0 = const()[name = tensor("op_2984_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2984_cast_fp16 = einsum(equation = var_2984_equation_0, values = (var_2502_cast_fp16, var_2902_cast_fp16))[name = tensor("op_2984_cast_fp16")]; + tensor var_2986_equation_0 = const()[name = tensor("op_2986_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2986_cast_fp16 = einsum(equation = var_2986_equation_0, values = (var_2502_cast_fp16, var_2903_cast_fp16))[name = tensor("op_2986_cast_fp16")]; + tensor var_2988_equation_0 = const()[name = tensor("op_2988_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2988_cast_fp16 = einsum(equation = var_2988_equation_0, values = (var_2502_cast_fp16, var_2904_cast_fp16))[name = tensor("op_2988_cast_fp16")]; + tensor var_2990_equation_0 = const()[name = tensor("op_2990_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2990_cast_fp16 = einsum(equation = var_2990_equation_0, values = (var_2506_cast_fp16, var_2905_cast_fp16))[name = tensor("op_2990_cast_fp16")]; + tensor var_2992_equation_0 = const()[name = tensor("op_2992_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2992_cast_fp16 = einsum(equation = var_2992_equation_0, values = (var_2506_cast_fp16, var_2906_cast_fp16))[name = tensor("op_2992_cast_fp16")]; + tensor var_2994_equation_0 = const()[name = tensor("op_2994_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2994_cast_fp16 = einsum(equation = var_2994_equation_0, values = (var_2506_cast_fp16, var_2907_cast_fp16))[name = tensor("op_2994_cast_fp16")]; + tensor var_2996_equation_0 = const()[name = tensor("op_2996_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2996_cast_fp16 = einsum(equation = var_2996_equation_0, values = (var_2506_cast_fp16, var_2908_cast_fp16))[name = tensor("op_2996_cast_fp16")]; + tensor var_2998_equation_0 = const()[name = tensor("op_2998_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_2998_cast_fp16 = einsum(equation = var_2998_equation_0, values = (var_2510_cast_fp16, var_2909_cast_fp16))[name = tensor("op_2998_cast_fp16")]; + tensor var_3000_equation_0 = const()[name = tensor("op_3000_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3000_cast_fp16 = einsum(equation = var_3000_equation_0, values = (var_2510_cast_fp16, var_2910_cast_fp16))[name = tensor("op_3000_cast_fp16")]; + tensor var_3002_equation_0 = const()[name = tensor("op_3002_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3002_cast_fp16 = einsum(equation = var_3002_equation_0, values = (var_2510_cast_fp16, var_2911_cast_fp16))[name = tensor("op_3002_cast_fp16")]; + tensor var_3004_equation_0 = const()[name = tensor("op_3004_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3004_cast_fp16 = einsum(equation = var_3004_equation_0, values = (var_2510_cast_fp16, var_2912_cast_fp16))[name = tensor("op_3004_cast_fp16")]; + tensor var_3006_equation_0 = const()[name = tensor("op_3006_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3006_cast_fp16 = einsum(equation = var_3006_equation_0, values = (var_2514_cast_fp16, var_2913_cast_fp16))[name = tensor("op_3006_cast_fp16")]; + tensor var_3008_equation_0 = const()[name = tensor("op_3008_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3008_cast_fp16 = einsum(equation = var_3008_equation_0, values = (var_2514_cast_fp16, var_2914_cast_fp16))[name = tensor("op_3008_cast_fp16")]; + tensor var_3010_equation_0 = const()[name = tensor("op_3010_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3010_cast_fp16 = einsum(equation = var_3010_equation_0, values = (var_2514_cast_fp16, var_2915_cast_fp16))[name = tensor("op_3010_cast_fp16")]; + tensor var_3012_equation_0 = const()[name = tensor("op_3012_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3012_cast_fp16 = einsum(equation = var_3012_equation_0, values = (var_2514_cast_fp16, var_2916_cast_fp16))[name = tensor("op_3012_cast_fp16")]; + tensor var_3014_equation_0 = const()[name = tensor("op_3014_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3014_cast_fp16 = einsum(equation = var_3014_equation_0, values = (var_2518_cast_fp16, var_2917_cast_fp16))[name = tensor("op_3014_cast_fp16")]; + tensor var_3016_equation_0 = const()[name = tensor("op_3016_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3016_cast_fp16 = einsum(equation = var_3016_equation_0, values = (var_2518_cast_fp16, var_2918_cast_fp16))[name = tensor("op_3016_cast_fp16")]; + tensor var_3018_equation_0 = const()[name = tensor("op_3018_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3018_cast_fp16 = einsum(equation = var_3018_equation_0, values = (var_2518_cast_fp16, var_2919_cast_fp16))[name = tensor("op_3018_cast_fp16")]; + tensor var_3020_equation_0 = const()[name = tensor("op_3020_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3020_cast_fp16 = einsum(equation = var_3020_equation_0, values = (var_2518_cast_fp16, var_2920_cast_fp16))[name = tensor("op_3020_cast_fp16")]; + tensor var_3022_equation_0 = const()[name = tensor("op_3022_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3022_cast_fp16 = einsum(equation = var_3022_equation_0, values = (var_2522_cast_fp16, var_2921_cast_fp16))[name = tensor("op_3022_cast_fp16")]; + tensor var_3024_equation_0 = const()[name = tensor("op_3024_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3024_cast_fp16 = einsum(equation = var_3024_equation_0, values = (var_2522_cast_fp16, var_2922_cast_fp16))[name = tensor("op_3024_cast_fp16")]; + tensor var_3026_equation_0 = const()[name = tensor("op_3026_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3026_cast_fp16 = einsum(equation = var_3026_equation_0, values = (var_2522_cast_fp16, var_2923_cast_fp16))[name = tensor("op_3026_cast_fp16")]; + tensor var_3028_equation_0 = const()[name = tensor("op_3028_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3028_cast_fp16 = einsum(equation = var_3028_equation_0, values = (var_2522_cast_fp16, var_2924_cast_fp16))[name = tensor("op_3028_cast_fp16")]; + tensor var_3030_equation_0 = const()[name = tensor("op_3030_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3030_cast_fp16 = einsum(equation = var_3030_equation_0, values = (var_2526_cast_fp16, var_2925_cast_fp16))[name = tensor("op_3030_cast_fp16")]; + tensor var_3032_equation_0 = const()[name = tensor("op_3032_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3032_cast_fp16 = einsum(equation = var_3032_equation_0, values = (var_2526_cast_fp16, var_2926_cast_fp16))[name = tensor("op_3032_cast_fp16")]; + tensor var_3034_equation_0 = const()[name = tensor("op_3034_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3034_cast_fp16 = einsum(equation = var_3034_equation_0, values = (var_2526_cast_fp16, var_2927_cast_fp16))[name = tensor("op_3034_cast_fp16")]; + tensor var_3036_equation_0 = const()[name = tensor("op_3036_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3036_cast_fp16 = einsum(equation = var_3036_equation_0, values = (var_2526_cast_fp16, var_2928_cast_fp16))[name = tensor("op_3036_cast_fp16")]; + tensor var_3038_equation_0 = const()[name = tensor("op_3038_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3038_cast_fp16 = einsum(equation = var_3038_equation_0, values = (var_2530_cast_fp16, var_2929_cast_fp16))[name = tensor("op_3038_cast_fp16")]; + tensor var_3040_equation_0 = const()[name = tensor("op_3040_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3040_cast_fp16 = einsum(equation = var_3040_equation_0, values = (var_2530_cast_fp16, var_2930_cast_fp16))[name = tensor("op_3040_cast_fp16")]; + tensor var_3042_equation_0 = const()[name = tensor("op_3042_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3042_cast_fp16 = einsum(equation = var_3042_equation_0, values = (var_2530_cast_fp16, var_2931_cast_fp16))[name = tensor("op_3042_cast_fp16")]; + tensor var_3044_equation_0 = const()[name = tensor("op_3044_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3044_cast_fp16 = einsum(equation = var_3044_equation_0, values = (var_2530_cast_fp16, var_2932_cast_fp16))[name = tensor("op_3044_cast_fp16")]; + tensor var_3046_equation_0 = const()[name = tensor("op_3046_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3046_cast_fp16 = einsum(equation = var_3046_equation_0, values = (var_2534_cast_fp16, var_2933_cast_fp16))[name = tensor("op_3046_cast_fp16")]; + tensor var_3048_equation_0 = const()[name = tensor("op_3048_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3048_cast_fp16 = einsum(equation = var_3048_equation_0, values = (var_2534_cast_fp16, var_2934_cast_fp16))[name = tensor("op_3048_cast_fp16")]; + tensor var_3050_equation_0 = const()[name = tensor("op_3050_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3050_cast_fp16 = einsum(equation = var_3050_equation_0, values = (var_2534_cast_fp16, var_2935_cast_fp16))[name = tensor("op_3050_cast_fp16")]; + tensor var_3052_equation_0 = const()[name = tensor("op_3052_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3052_cast_fp16 = einsum(equation = var_3052_equation_0, values = (var_2534_cast_fp16, var_2936_cast_fp16))[name = tensor("op_3052_cast_fp16")]; + tensor var_3054_equation_0 = const()[name = tensor("op_3054_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3054_cast_fp16 = einsum(equation = var_3054_equation_0, values = (var_2538_cast_fp16, var_2937_cast_fp16))[name = tensor("op_3054_cast_fp16")]; + tensor var_3056_equation_0 = const()[name = tensor("op_3056_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3056_cast_fp16 = einsum(equation = var_3056_equation_0, values = (var_2538_cast_fp16, var_2938_cast_fp16))[name = tensor("op_3056_cast_fp16")]; + tensor var_3058_equation_0 = const()[name = tensor("op_3058_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3058_cast_fp16 = einsum(equation = var_3058_equation_0, values = (var_2538_cast_fp16, var_2939_cast_fp16))[name = tensor("op_3058_cast_fp16")]; + tensor var_3060_equation_0 = const()[name = tensor("op_3060_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3060_cast_fp16 = einsum(equation = var_3060_equation_0, values = (var_2538_cast_fp16, var_2940_cast_fp16))[name = tensor("op_3060_cast_fp16")]; + tensor var_3062_equation_0 = const()[name = tensor("op_3062_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3062_cast_fp16 = einsum(equation = var_3062_equation_0, values = (var_2542_cast_fp16, var_2941_cast_fp16))[name = tensor("op_3062_cast_fp16")]; + tensor var_3064_equation_0 = const()[name = tensor("op_3064_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3064_cast_fp16 = einsum(equation = var_3064_equation_0, values = (var_2542_cast_fp16, var_2942_cast_fp16))[name = tensor("op_3064_cast_fp16")]; + tensor var_3066_equation_0 = const()[name = tensor("op_3066_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3066_cast_fp16 = einsum(equation = var_3066_equation_0, values = (var_2542_cast_fp16, var_2943_cast_fp16))[name = tensor("op_3066_cast_fp16")]; + tensor var_3068_equation_0 = const()[name = tensor("op_3068_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3068_cast_fp16 = einsum(equation = var_3068_equation_0, values = (var_2542_cast_fp16, var_2944_cast_fp16))[name = tensor("op_3068_cast_fp16")]; + tensor var_3070_equation_0 = const()[name = tensor("op_3070_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3070_cast_fp16 = einsum(equation = var_3070_equation_0, values = (var_2546_cast_fp16, var_2945_cast_fp16))[name = tensor("op_3070_cast_fp16")]; + tensor var_3072_equation_0 = const()[name = tensor("op_3072_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3072_cast_fp16 = einsum(equation = var_3072_equation_0, values = (var_2546_cast_fp16, var_2946_cast_fp16))[name = tensor("op_3072_cast_fp16")]; + tensor var_3074_equation_0 = const()[name = tensor("op_3074_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3074_cast_fp16 = einsum(equation = var_3074_equation_0, values = (var_2546_cast_fp16, var_2947_cast_fp16))[name = tensor("op_3074_cast_fp16")]; + tensor var_3076_equation_0 = const()[name = tensor("op_3076_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3076_cast_fp16 = einsum(equation = var_3076_equation_0, values = (var_2546_cast_fp16, var_2948_cast_fp16))[name = tensor("op_3076_cast_fp16")]; + tensor var_3078_equation_0 = const()[name = tensor("op_3078_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3078_cast_fp16 = einsum(equation = var_3078_equation_0, values = (var_2550_cast_fp16, var_2949_cast_fp16))[name = tensor("op_3078_cast_fp16")]; + tensor var_3080_equation_0 = const()[name = tensor("op_3080_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3080_cast_fp16 = einsum(equation = var_3080_equation_0, values = (var_2550_cast_fp16, var_2950_cast_fp16))[name = tensor("op_3080_cast_fp16")]; + tensor var_3082_equation_0 = const()[name = tensor("op_3082_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3082_cast_fp16 = einsum(equation = var_3082_equation_0, values = (var_2550_cast_fp16, var_2951_cast_fp16))[name = tensor("op_3082_cast_fp16")]; + tensor var_3084_equation_0 = const()[name = tensor("op_3084_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3084_cast_fp16 = einsum(equation = var_3084_equation_0, values = (var_2550_cast_fp16, var_2952_cast_fp16))[name = tensor("op_3084_cast_fp16")]; + tensor var_3086_equation_0 = const()[name = tensor("op_3086_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3086_cast_fp16 = einsum(equation = var_3086_equation_0, values = (var_2554_cast_fp16, var_2953_cast_fp16))[name = tensor("op_3086_cast_fp16")]; + tensor var_3088_equation_0 = const()[name = tensor("op_3088_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3088_cast_fp16 = einsum(equation = var_3088_equation_0, values = (var_2554_cast_fp16, var_2954_cast_fp16))[name = tensor("op_3088_cast_fp16")]; + tensor var_3090_equation_0 = const()[name = tensor("op_3090_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3090_cast_fp16 = einsum(equation = var_3090_equation_0, values = (var_2554_cast_fp16, var_2955_cast_fp16))[name = tensor("op_3090_cast_fp16")]; + tensor var_3092_equation_0 = const()[name = tensor("op_3092_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3092_cast_fp16 = einsum(equation = var_3092_equation_0, values = (var_2554_cast_fp16, var_2956_cast_fp16))[name = tensor("op_3092_cast_fp16")]; + tensor var_3094_equation_0 = const()[name = tensor("op_3094_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3094_cast_fp16 = einsum(equation = var_3094_equation_0, values = (var_2558_cast_fp16, var_2957_cast_fp16))[name = tensor("op_3094_cast_fp16")]; + tensor var_3096_equation_0 = const()[name = tensor("op_3096_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3096_cast_fp16 = einsum(equation = var_3096_equation_0, values = (var_2558_cast_fp16, var_2958_cast_fp16))[name = tensor("op_3096_cast_fp16")]; + tensor var_3098_equation_0 = const()[name = tensor("op_3098_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3098_cast_fp16 = einsum(equation = var_3098_equation_0, values = (var_2558_cast_fp16, var_2959_cast_fp16))[name = tensor("op_3098_cast_fp16")]; + tensor var_3100_equation_0 = const()[name = tensor("op_3100_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3100_cast_fp16 = einsum(equation = var_3100_equation_0, values = (var_2558_cast_fp16, var_2960_cast_fp16))[name = tensor("op_3100_cast_fp16")]; + tensor var_3102_equation_0 = const()[name = tensor("op_3102_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3102_cast_fp16 = einsum(equation = var_3102_equation_0, values = (var_2562_cast_fp16, var_2961_cast_fp16))[name = tensor("op_3102_cast_fp16")]; + tensor var_3104_equation_0 = const()[name = tensor("op_3104_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3104_cast_fp16 = einsum(equation = var_3104_equation_0, values = (var_2562_cast_fp16, var_2962_cast_fp16))[name = tensor("op_3104_cast_fp16")]; + tensor var_3106_equation_0 = const()[name = tensor("op_3106_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3106_cast_fp16 = einsum(equation = var_3106_equation_0, values = (var_2562_cast_fp16, var_2963_cast_fp16))[name = tensor("op_3106_cast_fp16")]; + tensor var_3108_equation_0 = const()[name = tensor("op_3108_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3108_cast_fp16 = einsum(equation = var_3108_equation_0, values = (var_2562_cast_fp16, var_2964_cast_fp16))[name = tensor("op_3108_cast_fp16")]; + tensor var_3110_equation_0 = const()[name = tensor("op_3110_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3110_cast_fp16 = einsum(equation = var_3110_equation_0, values = (var_2566_cast_fp16, var_2965_cast_fp16))[name = tensor("op_3110_cast_fp16")]; + tensor var_3112_equation_0 = const()[name = tensor("op_3112_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3112_cast_fp16 = einsum(equation = var_3112_equation_0, values = (var_2566_cast_fp16, var_2966_cast_fp16))[name = tensor("op_3112_cast_fp16")]; + tensor var_3114_equation_0 = const()[name = tensor("op_3114_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3114_cast_fp16 = einsum(equation = var_3114_equation_0, values = (var_2566_cast_fp16, var_2967_cast_fp16))[name = tensor("op_3114_cast_fp16")]; + tensor var_3116_equation_0 = const()[name = tensor("op_3116_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3116_cast_fp16 = einsum(equation = var_3116_equation_0, values = (var_2566_cast_fp16, var_2968_cast_fp16))[name = tensor("op_3116_cast_fp16")]; + tensor var_3118_equation_0 = const()[name = tensor("op_3118_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3118_cast_fp16 = einsum(equation = var_3118_equation_0, values = (var_2570_cast_fp16, var_2969_cast_fp16))[name = tensor("op_3118_cast_fp16")]; + tensor var_3120_equation_0 = const()[name = tensor("op_3120_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3120_cast_fp16 = einsum(equation = var_3120_equation_0, values = (var_2570_cast_fp16, var_2970_cast_fp16))[name = tensor("op_3120_cast_fp16")]; + tensor var_3122_equation_0 = const()[name = tensor("op_3122_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3122_cast_fp16 = einsum(equation = var_3122_equation_0, values = (var_2570_cast_fp16, var_2971_cast_fp16))[name = tensor("op_3122_cast_fp16")]; + tensor var_3124_equation_0 = const()[name = tensor("op_3124_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3124_cast_fp16 = einsum(equation = var_3124_equation_0, values = (var_2570_cast_fp16, var_2972_cast_fp16))[name = tensor("op_3124_cast_fp16")]; + tensor var_3126_equation_0 = const()[name = tensor("op_3126_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3126_cast_fp16 = einsum(equation = var_3126_equation_0, values = (var_2574_cast_fp16, var_2973_cast_fp16))[name = tensor("op_3126_cast_fp16")]; + tensor var_3128_equation_0 = const()[name = tensor("op_3128_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3128_cast_fp16 = einsum(equation = var_3128_equation_0, values = (var_2574_cast_fp16, var_2974_cast_fp16))[name = tensor("op_3128_cast_fp16")]; + tensor var_3130_equation_0 = const()[name = tensor("op_3130_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3130_cast_fp16 = einsum(equation = var_3130_equation_0, values = (var_2574_cast_fp16, var_2975_cast_fp16))[name = tensor("op_3130_cast_fp16")]; + tensor var_3132_equation_0 = const()[name = tensor("op_3132_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3132_cast_fp16 = einsum(equation = var_3132_equation_0, values = (var_2574_cast_fp16, var_2976_cast_fp16))[name = tensor("op_3132_cast_fp16")]; + tensor var_3134_equation_0 = const()[name = tensor("op_3134_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3134_cast_fp16 = einsum(equation = var_3134_equation_0, values = (var_2578_cast_fp16, var_2977_cast_fp16))[name = tensor("op_3134_cast_fp16")]; + tensor var_3136_equation_0 = const()[name = tensor("op_3136_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3136_cast_fp16 = einsum(equation = var_3136_equation_0, values = (var_2578_cast_fp16, var_2978_cast_fp16))[name = tensor("op_3136_cast_fp16")]; + tensor var_3138_equation_0 = const()[name = tensor("op_3138_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3138_cast_fp16 = einsum(equation = var_3138_equation_0, values = (var_2578_cast_fp16, var_2979_cast_fp16))[name = tensor("op_3138_cast_fp16")]; + tensor var_3140_equation_0 = const()[name = tensor("op_3140_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_3140_cast_fp16 = einsum(equation = var_3140_equation_0, values = (var_2578_cast_fp16, var_2980_cast_fp16))[name = tensor("op_3140_cast_fp16")]; + tensor var_3142_interleave_0 = const()[name = tensor("op_3142_interleave_0"), val = tensor(false)]; + tensor var_3142_cast_fp16 = concat(axis = var_1701, interleave = var_3142_interleave_0, values = (var_2982_cast_fp16, var_2984_cast_fp16, var_2986_cast_fp16, var_2988_cast_fp16))[name = tensor("op_3142_cast_fp16")]; + tensor var_3144_interleave_0 = const()[name = tensor("op_3144_interleave_0"), val = tensor(false)]; + tensor var_3144_cast_fp16 = concat(axis = var_1701, interleave = var_3144_interleave_0, values = (var_2990_cast_fp16, var_2992_cast_fp16, var_2994_cast_fp16, var_2996_cast_fp16))[name = tensor("op_3144_cast_fp16")]; + tensor var_3146_interleave_0 = const()[name = tensor("op_3146_interleave_0"), val = tensor(false)]; + tensor var_3146_cast_fp16 = concat(axis = var_1701, interleave = var_3146_interleave_0, values = (var_2998_cast_fp16, var_3000_cast_fp16, var_3002_cast_fp16, var_3004_cast_fp16))[name = tensor("op_3146_cast_fp16")]; + tensor var_3148_interleave_0 = const()[name = tensor("op_3148_interleave_0"), val = tensor(false)]; + tensor var_3148_cast_fp16 = concat(axis = var_1701, interleave = var_3148_interleave_0, values = (var_3006_cast_fp16, var_3008_cast_fp16, var_3010_cast_fp16, var_3012_cast_fp16))[name = tensor("op_3148_cast_fp16")]; + tensor var_3150_interleave_0 = const()[name = tensor("op_3150_interleave_0"), val = tensor(false)]; + tensor var_3150_cast_fp16 = concat(axis = var_1701, interleave = var_3150_interleave_0, values = (var_3014_cast_fp16, var_3016_cast_fp16, var_3018_cast_fp16, var_3020_cast_fp16))[name = tensor("op_3150_cast_fp16")]; + tensor var_3152_interleave_0 = const()[name = tensor("op_3152_interleave_0"), val = tensor(false)]; + tensor var_3152_cast_fp16 = concat(axis = var_1701, interleave = var_3152_interleave_0, values = (var_3022_cast_fp16, var_3024_cast_fp16, var_3026_cast_fp16, var_3028_cast_fp16))[name = tensor("op_3152_cast_fp16")]; + tensor var_3154_interleave_0 = const()[name = tensor("op_3154_interleave_0"), val = tensor(false)]; + tensor var_3154_cast_fp16 = concat(axis = var_1701, interleave = var_3154_interleave_0, values = (var_3030_cast_fp16, var_3032_cast_fp16, var_3034_cast_fp16, var_3036_cast_fp16))[name = tensor("op_3154_cast_fp16")]; + tensor var_3156_interleave_0 = const()[name = tensor("op_3156_interleave_0"), val = tensor(false)]; + tensor var_3156_cast_fp16 = concat(axis = var_1701, interleave = var_3156_interleave_0, values = (var_3038_cast_fp16, var_3040_cast_fp16, var_3042_cast_fp16, var_3044_cast_fp16))[name = tensor("op_3156_cast_fp16")]; + tensor var_3158_interleave_0 = const()[name = tensor("op_3158_interleave_0"), val = tensor(false)]; + tensor var_3158_cast_fp16 = concat(axis = var_1701, interleave = var_3158_interleave_0, values = (var_3046_cast_fp16, var_3048_cast_fp16, var_3050_cast_fp16, var_3052_cast_fp16))[name = tensor("op_3158_cast_fp16")]; + tensor var_3160_interleave_0 = const()[name = tensor("op_3160_interleave_0"), val = tensor(false)]; + tensor var_3160_cast_fp16 = concat(axis = var_1701, interleave = var_3160_interleave_0, values = (var_3054_cast_fp16, var_3056_cast_fp16, var_3058_cast_fp16, var_3060_cast_fp16))[name = tensor("op_3160_cast_fp16")]; + tensor var_3162_interleave_0 = const()[name = tensor("op_3162_interleave_0"), val = tensor(false)]; + tensor var_3162_cast_fp16 = concat(axis = var_1701, interleave = var_3162_interleave_0, values = (var_3062_cast_fp16, var_3064_cast_fp16, var_3066_cast_fp16, var_3068_cast_fp16))[name = tensor("op_3162_cast_fp16")]; + tensor var_3164_interleave_0 = const()[name = tensor("op_3164_interleave_0"), val = tensor(false)]; + tensor var_3164_cast_fp16 = concat(axis = var_1701, interleave = var_3164_interleave_0, values = (var_3070_cast_fp16, var_3072_cast_fp16, var_3074_cast_fp16, var_3076_cast_fp16))[name = tensor("op_3164_cast_fp16")]; + tensor var_3166_interleave_0 = const()[name = tensor("op_3166_interleave_0"), val = tensor(false)]; + tensor var_3166_cast_fp16 = concat(axis = var_1701, interleave = var_3166_interleave_0, values = (var_3078_cast_fp16, var_3080_cast_fp16, var_3082_cast_fp16, var_3084_cast_fp16))[name = tensor("op_3166_cast_fp16")]; + tensor var_3168_interleave_0 = const()[name = tensor("op_3168_interleave_0"), val = tensor(false)]; + tensor var_3168_cast_fp16 = concat(axis = var_1701, interleave = var_3168_interleave_0, values = (var_3086_cast_fp16, var_3088_cast_fp16, var_3090_cast_fp16, var_3092_cast_fp16))[name = tensor("op_3168_cast_fp16")]; + tensor var_3170_interleave_0 = const()[name = tensor("op_3170_interleave_0"), val = tensor(false)]; + tensor var_3170_cast_fp16 = concat(axis = var_1701, interleave = var_3170_interleave_0, values = (var_3094_cast_fp16, var_3096_cast_fp16, var_3098_cast_fp16, var_3100_cast_fp16))[name = tensor("op_3170_cast_fp16")]; + tensor var_3172_interleave_0 = const()[name = tensor("op_3172_interleave_0"), val = tensor(false)]; + tensor var_3172_cast_fp16 = concat(axis = var_1701, interleave = var_3172_interleave_0, values = (var_3102_cast_fp16, var_3104_cast_fp16, var_3106_cast_fp16, var_3108_cast_fp16))[name = tensor("op_3172_cast_fp16")]; + tensor var_3174_interleave_0 = const()[name = tensor("op_3174_interleave_0"), val = tensor(false)]; + tensor var_3174_cast_fp16 = concat(axis = var_1701, interleave = var_3174_interleave_0, values = (var_3110_cast_fp16, var_3112_cast_fp16, var_3114_cast_fp16, var_3116_cast_fp16))[name = tensor("op_3174_cast_fp16")]; + tensor var_3176_interleave_0 = const()[name = tensor("op_3176_interleave_0"), val = tensor(false)]; + tensor var_3176_cast_fp16 = concat(axis = var_1701, interleave = var_3176_interleave_0, values = (var_3118_cast_fp16, var_3120_cast_fp16, var_3122_cast_fp16, var_3124_cast_fp16))[name = tensor("op_3176_cast_fp16")]; + tensor var_3178_interleave_0 = const()[name = tensor("op_3178_interleave_0"), val = tensor(false)]; + tensor var_3178_cast_fp16 = concat(axis = var_1701, interleave = var_3178_interleave_0, values = (var_3126_cast_fp16, var_3128_cast_fp16, var_3130_cast_fp16, var_3132_cast_fp16))[name = tensor("op_3178_cast_fp16")]; + tensor var_3180_interleave_0 = const()[name = tensor("op_3180_interleave_0"), val = tensor(false)]; + tensor var_3180_cast_fp16 = concat(axis = var_1701, interleave = var_3180_interleave_0, values = (var_3134_cast_fp16, var_3136_cast_fp16, var_3138_cast_fp16, var_3140_cast_fp16))[name = tensor("op_3180_cast_fp16")]; + tensor input_9_interleave_0 = const()[name = tensor("input_9_interleave_0"), val = tensor(false)]; + tensor input_9_cast_fp16 = concat(axis = var_1726, interleave = input_9_interleave_0, values = (var_3142_cast_fp16, var_3144_cast_fp16, var_3146_cast_fp16, var_3148_cast_fp16, var_3150_cast_fp16, var_3152_cast_fp16, var_3154_cast_fp16, var_3156_cast_fp16, var_3158_cast_fp16, var_3160_cast_fp16, var_3162_cast_fp16, var_3164_cast_fp16, var_3166_cast_fp16, var_3168_cast_fp16, var_3170_cast_fp16, var_3172_cast_fp16, var_3174_cast_fp16, var_3176_cast_fp16, var_3178_cast_fp16, var_3180_cast_fp16))[name = tensor("input_9_cast_fp16")]; + tensor var_3185 = const()[name = tensor("op_3185"), val = tensor([1, 1])]; + tensor var_3187 = const()[name = tensor("op_3187"), val = tensor([1, 1])]; + tensor obj_7_pad_type_0 = const()[name = tensor("obj_7_pad_type_0"), val = tensor("custom")]; + tensor obj_7_pad_0 = const()[name = tensor("obj_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_1_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_1_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(63489920)))]; + tensor layers_1_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_1_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66766784)))]; + tensor obj_7_cast_fp16 = conv(bias = layers_1_self_attn_o_proj_bias_to_fp16, dilations = var_3187, groups = var_1726, pad = obj_7_pad_0, pad_type = obj_7_pad_type_0, strides = var_3185, weight = layers_1_self_attn_o_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor("obj_7_cast_fp16")]; + tensor inputs_7_cast_fp16 = add(x = inputs_5_cast_fp16, y = obj_7_cast_fp16)[name = tensor("inputs_7_cast_fp16")]; + tensor var_3193 = const()[name = tensor("op_3193"), val = tensor([1])]; + tensor channels_mean_7_cast_fp16 = reduce_mean(axes = var_3193, keep_dims = var_1727, x = inputs_7_cast_fp16)[name = tensor("channels_mean_7_cast_fp16")]; + tensor zero_mean_7_cast_fp16 = sub(x = inputs_7_cast_fp16, y = channels_mean_7_cast_fp16)[name = tensor("zero_mean_7_cast_fp16")]; + tensor zero_mean_sq_7_cast_fp16 = mul(x = zero_mean_7_cast_fp16, y = zero_mean_7_cast_fp16)[name = tensor("zero_mean_sq_7_cast_fp16")]; + tensor var_3197 = const()[name = tensor("op_3197"), val = tensor([1])]; + tensor var_3198_cast_fp16 = reduce_mean(axes = var_3197, keep_dims = var_1727, x = zero_mean_sq_7_cast_fp16)[name = tensor("op_3198_cast_fp16")]; + tensor var_3199_to_fp16 = const()[name = tensor("op_3199_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_3200_cast_fp16 = add(x = var_3198_cast_fp16, y = var_3199_to_fp16)[name = tensor("op_3200_cast_fp16")]; + tensor denom_7_epsilon_0_to_fp16 = const()[name = tensor("denom_7_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_7_cast_fp16 = rsqrt(epsilon = denom_7_epsilon_0_to_fp16, x = var_3200_cast_fp16)[name = tensor("denom_7_cast_fp16")]; + tensor out_7_cast_fp16 = mul(x = zero_mean_7_cast_fp16, y = denom_7_cast_fp16)[name = tensor("out_7_cast_fp16")]; + tensor input_11_gamma_0_to_fp16 = const()[name = tensor("input_11_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66769408)))]; + tensor input_11_beta_0_to_fp16 = const()[name = tensor("input_11_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66772032)))]; + tensor input_11_epsilon_0_to_fp16 = const()[name = tensor("input_11_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_11_cast_fp16 = batch_norm(beta = input_11_beta_0_to_fp16, epsilon = input_11_epsilon_0_to_fp16, gamma = input_11_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_7_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_3211 = const()[name = tensor("op_3211"), val = tensor([1, 1])]; + tensor var_3213 = const()[name = tensor("op_3213"), val = tensor([1, 1])]; + tensor input_13_pad_type_0 = const()[name = tensor("input_13_pad_type_0"), val = tensor("custom")]; + tensor input_13_pad_0 = const()[name = tensor("input_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_1_fc1_weight_to_fp16 = const()[name = tensor("layers_1_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(66774656)))]; + tensor layers_1_fc1_bias_to_fp16 = const()[name = tensor("layers_1_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(79881920)))]; + tensor input_13_cast_fp16 = conv(bias = layers_1_fc1_bias_to_fp16, dilations = var_3213, groups = var_1726, pad = input_13_pad_0, pad_type = input_13_pad_type_0, strides = var_3211, weight = layers_1_fc1_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor input_15_mode_0 = const()[name = tensor("input_15_mode_0"), val = tensor("EXACT")]; + tensor input_15_cast_fp16 = gelu(mode = input_15_mode_0, x = input_13_cast_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_3219 = const()[name = tensor("op_3219"), val = tensor([1, 1])]; + tensor var_3221 = const()[name = tensor("op_3221"), val = tensor([1, 1])]; + tensor hidden_states_7_pad_type_0 = const()[name = tensor("hidden_states_7_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_7_pad_0 = const()[name = tensor("hidden_states_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_1_fc2_weight_to_fp16 = const()[name = tensor("layers_1_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(79892224)))]; + tensor layers_1_fc2_bias_to_fp16 = const()[name = tensor("layers_1_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(92999488)))]; + tensor hidden_states_7_cast_fp16 = conv(bias = layers_1_fc2_bias_to_fp16, dilations = var_3221, groups = var_1726, pad = hidden_states_7_pad_0, pad_type = hidden_states_7_pad_type_0, strides = var_3219, weight = layers_1_fc2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("hidden_states_7_cast_fp16")]; + tensor inputs_9_cast_fp16 = add(x = inputs_7_cast_fp16, y = hidden_states_7_cast_fp16)[name = tensor("inputs_9_cast_fp16")]; + tensor var_3228 = const()[name = tensor("op_3228"), val = tensor(3)]; + tensor var_3253 = const()[name = tensor("op_3253"), val = tensor(1)]; + tensor var_3254 = const()[name = tensor("op_3254"), val = tensor(true)]; + tensor var_3264 = const()[name = tensor("op_3264"), val = tensor([1])]; + tensor channels_mean_9_cast_fp16 = reduce_mean(axes = var_3264, keep_dims = var_3254, x = inputs_9_cast_fp16)[name = tensor("channels_mean_9_cast_fp16")]; + tensor zero_mean_9_cast_fp16 = sub(x = inputs_9_cast_fp16, y = channels_mean_9_cast_fp16)[name = tensor("zero_mean_9_cast_fp16")]; + tensor zero_mean_sq_9_cast_fp16 = mul(x = zero_mean_9_cast_fp16, y = zero_mean_9_cast_fp16)[name = tensor("zero_mean_sq_9_cast_fp16")]; + tensor var_3268 = const()[name = tensor("op_3268"), val = tensor([1])]; + tensor var_3269_cast_fp16 = reduce_mean(axes = var_3268, keep_dims = var_3254, x = zero_mean_sq_9_cast_fp16)[name = tensor("op_3269_cast_fp16")]; + tensor var_3270_to_fp16 = const()[name = tensor("op_3270_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_3271_cast_fp16 = add(x = var_3269_cast_fp16, y = var_3270_to_fp16)[name = tensor("op_3271_cast_fp16")]; + tensor denom_9_epsilon_0_to_fp16 = const()[name = tensor("denom_9_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_9_cast_fp16 = rsqrt(epsilon = denom_9_epsilon_0_to_fp16, x = var_3271_cast_fp16)[name = tensor("denom_9_cast_fp16")]; + tensor out_9_cast_fp16 = mul(x = zero_mean_9_cast_fp16, y = denom_9_cast_fp16)[name = tensor("out_9_cast_fp16")]; + tensor obj_9_gamma_0_to_fp16 = const()[name = tensor("obj_9_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93002112)))]; + tensor obj_9_beta_0_to_fp16 = const()[name = tensor("obj_9_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93004736)))]; + tensor obj_9_epsilon_0_to_fp16 = const()[name = tensor("obj_9_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_9_cast_fp16 = batch_norm(beta = obj_9_beta_0_to_fp16, epsilon = obj_9_epsilon_0_to_fp16, gamma = obj_9_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_9_cast_fp16)[name = tensor("obj_9_cast_fp16")]; + tensor var_3286 = const()[name = tensor("op_3286"), val = tensor([1, 1])]; + tensor var_3288 = const()[name = tensor("op_3288"), val = tensor([1, 1])]; + tensor query_5_pad_type_0 = const()[name = tensor("query_5_pad_type_0"), val = tensor("custom")]; + tensor query_5_pad_0 = const()[name = tensor("query_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_2_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_2_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(93007360)))]; + tensor layers_2_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(96284224)))]; + tensor query_5_cast_fp16 = conv(bias = layers_2_self_attn_q_proj_bias_to_fp16, dilations = var_3288, groups = var_3253, pad = query_5_pad_0, pad_type = query_5_pad_type_0, strides = var_3286, weight = layers_2_self_attn_q_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor("query_5_cast_fp16")]; + tensor var_3292 = const()[name = tensor("op_3292"), val = tensor([1, 1])]; + tensor var_3294 = const()[name = tensor("op_3294"), val = tensor([1, 1])]; + tensor key_5_pad_type_0 = const()[name = tensor("key_5_pad_type_0"), val = tensor("custom")]; + tensor key_5_pad_0 = const()[name = tensor("key_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_2_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_2_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(96286848)))]; + tensor key_5_cast_fp16 = conv(dilations = var_3294, groups = var_3253, pad = key_5_pad_0, pad_type = key_5_pad_type_0, strides = var_3292, weight = layers_2_self_attn_k_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor("key_5_cast_fp16")]; + tensor var_3299 = const()[name = tensor("op_3299"), val = tensor([1, 1])]; + tensor var_3301 = const()[name = tensor("op_3301"), val = tensor([1, 1])]; + tensor value_5_pad_type_0 = const()[name = tensor("value_5_pad_type_0"), val = tensor("custom")]; + tensor value_5_pad_0 = const()[name = tensor("value_5_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_2_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_2_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(99563712)))]; + tensor layers_2_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102840576)))]; + tensor value_5_cast_fp16 = conv(bias = layers_2_self_attn_v_proj_bias_to_fp16, dilations = var_3301, groups = var_3253, pad = value_5_pad_0, pad_type = value_5_pad_type_0, strides = var_3299, weight = layers_2_self_attn_v_proj_weight_to_fp16, x = obj_9_cast_fp16)[name = tensor("value_5_cast_fp16")]; + tensor var_3308_begin_0 = const()[name = tensor("op_3308_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3308_end_0 = const()[name = tensor("op_3308_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_3308_end_mask_0 = const()[name = tensor("op_3308_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3308_cast_fp16 = slice_by_index(begin = var_3308_begin_0, end = var_3308_end_0, end_mask = var_3308_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3308_cast_fp16")]; + tensor var_3312_begin_0 = const()[name = tensor("op_3312_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_3312_end_0 = const()[name = tensor("op_3312_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_3312_end_mask_0 = const()[name = tensor("op_3312_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3312_cast_fp16 = slice_by_index(begin = var_3312_begin_0, end = var_3312_end_0, end_mask = var_3312_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3312_cast_fp16")]; + tensor var_3316_begin_0 = const()[name = tensor("op_3316_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_3316_end_0 = const()[name = tensor("op_3316_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_3316_end_mask_0 = const()[name = tensor("op_3316_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3316_cast_fp16 = slice_by_index(begin = var_3316_begin_0, end = var_3316_end_0, end_mask = var_3316_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3316_cast_fp16")]; + tensor var_3320_begin_0 = const()[name = tensor("op_3320_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_3320_end_0 = const()[name = tensor("op_3320_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_3320_end_mask_0 = const()[name = tensor("op_3320_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3320_cast_fp16 = slice_by_index(begin = var_3320_begin_0, end = var_3320_end_0, end_mask = var_3320_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3320_cast_fp16")]; + tensor var_3324_begin_0 = const()[name = tensor("op_3324_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_3324_end_0 = const()[name = tensor("op_3324_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_3324_end_mask_0 = const()[name = tensor("op_3324_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3324_cast_fp16 = slice_by_index(begin = var_3324_begin_0, end = var_3324_end_0, end_mask = var_3324_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3324_cast_fp16")]; + tensor var_3328_begin_0 = const()[name = tensor("op_3328_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_3328_end_0 = const()[name = tensor("op_3328_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_3328_end_mask_0 = const()[name = tensor("op_3328_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3328_cast_fp16 = slice_by_index(begin = var_3328_begin_0, end = var_3328_end_0, end_mask = var_3328_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3328_cast_fp16")]; + tensor var_3332_begin_0 = const()[name = tensor("op_3332_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_3332_end_0 = const()[name = tensor("op_3332_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_3332_end_mask_0 = const()[name = tensor("op_3332_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3332_cast_fp16 = slice_by_index(begin = var_3332_begin_0, end = var_3332_end_0, end_mask = var_3332_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3332_cast_fp16")]; + tensor var_3336_begin_0 = const()[name = tensor("op_3336_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_3336_end_0 = const()[name = tensor("op_3336_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_3336_end_mask_0 = const()[name = tensor("op_3336_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3336_cast_fp16 = slice_by_index(begin = var_3336_begin_0, end = var_3336_end_0, end_mask = var_3336_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3336_cast_fp16")]; + tensor var_3340_begin_0 = const()[name = tensor("op_3340_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_3340_end_0 = const()[name = tensor("op_3340_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_3340_end_mask_0 = const()[name = tensor("op_3340_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3340_cast_fp16 = slice_by_index(begin = var_3340_begin_0, end = var_3340_end_0, end_mask = var_3340_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3340_cast_fp16")]; + tensor var_3344_begin_0 = const()[name = tensor("op_3344_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_3344_end_0 = const()[name = tensor("op_3344_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_3344_end_mask_0 = const()[name = tensor("op_3344_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3344_cast_fp16 = slice_by_index(begin = var_3344_begin_0, end = var_3344_end_0, end_mask = var_3344_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3344_cast_fp16")]; + tensor var_3348_begin_0 = const()[name = tensor("op_3348_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_3348_end_0 = const()[name = tensor("op_3348_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_3348_end_mask_0 = const()[name = tensor("op_3348_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3348_cast_fp16 = slice_by_index(begin = var_3348_begin_0, end = var_3348_end_0, end_mask = var_3348_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3348_cast_fp16")]; + tensor var_3352_begin_0 = const()[name = tensor("op_3352_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_3352_end_0 = const()[name = tensor("op_3352_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_3352_end_mask_0 = const()[name = tensor("op_3352_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3352_cast_fp16 = slice_by_index(begin = var_3352_begin_0, end = var_3352_end_0, end_mask = var_3352_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3352_cast_fp16")]; + tensor var_3356_begin_0 = const()[name = tensor("op_3356_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_3356_end_0 = const()[name = tensor("op_3356_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_3356_end_mask_0 = const()[name = tensor("op_3356_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3356_cast_fp16 = slice_by_index(begin = var_3356_begin_0, end = var_3356_end_0, end_mask = var_3356_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3356_cast_fp16")]; + tensor var_3360_begin_0 = const()[name = tensor("op_3360_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_3360_end_0 = const()[name = tensor("op_3360_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_3360_end_mask_0 = const()[name = tensor("op_3360_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3360_cast_fp16 = slice_by_index(begin = var_3360_begin_0, end = var_3360_end_0, end_mask = var_3360_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3360_cast_fp16")]; + tensor var_3364_begin_0 = const()[name = tensor("op_3364_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_3364_end_0 = const()[name = tensor("op_3364_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_3364_end_mask_0 = const()[name = tensor("op_3364_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3364_cast_fp16 = slice_by_index(begin = var_3364_begin_0, end = var_3364_end_0, end_mask = var_3364_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3364_cast_fp16")]; + tensor var_3368_begin_0 = const()[name = tensor("op_3368_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_3368_end_0 = const()[name = tensor("op_3368_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_3368_end_mask_0 = const()[name = tensor("op_3368_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3368_cast_fp16 = slice_by_index(begin = var_3368_begin_0, end = var_3368_end_0, end_mask = var_3368_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3368_cast_fp16")]; + tensor var_3372_begin_0 = const()[name = tensor("op_3372_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_3372_end_0 = const()[name = tensor("op_3372_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_3372_end_mask_0 = const()[name = tensor("op_3372_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3372_cast_fp16 = slice_by_index(begin = var_3372_begin_0, end = var_3372_end_0, end_mask = var_3372_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3372_cast_fp16")]; + tensor var_3376_begin_0 = const()[name = tensor("op_3376_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_3376_end_0 = const()[name = tensor("op_3376_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_3376_end_mask_0 = const()[name = tensor("op_3376_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3376_cast_fp16 = slice_by_index(begin = var_3376_begin_0, end = var_3376_end_0, end_mask = var_3376_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3376_cast_fp16")]; + tensor var_3380_begin_0 = const()[name = tensor("op_3380_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_3380_end_0 = const()[name = tensor("op_3380_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_3380_end_mask_0 = const()[name = tensor("op_3380_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3380_cast_fp16 = slice_by_index(begin = var_3380_begin_0, end = var_3380_end_0, end_mask = var_3380_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3380_cast_fp16")]; + tensor var_3384_begin_0 = const()[name = tensor("op_3384_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_3384_end_0 = const()[name = tensor("op_3384_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_3384_end_mask_0 = const()[name = tensor("op_3384_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_3384_cast_fp16 = slice_by_index(begin = var_3384_begin_0, end = var_3384_end_0, end_mask = var_3384_end_mask_0, x = query_5_cast_fp16)[name = tensor("op_3384_cast_fp16")]; + tensor var_3393_begin_0 = const()[name = tensor("op_3393_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3393_end_0 = const()[name = tensor("op_3393_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_3393_end_mask_0 = const()[name = tensor("op_3393_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3393_cast_fp16 = slice_by_index(begin = var_3393_begin_0, end = var_3393_end_0, end_mask = var_3393_end_mask_0, x = var_3308_cast_fp16)[name = tensor("op_3393_cast_fp16")]; + tensor var_3400_begin_0 = const()[name = tensor("op_3400_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_3400_end_0 = const()[name = tensor("op_3400_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_3400_end_mask_0 = const()[name = tensor("op_3400_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3400_cast_fp16 = slice_by_index(begin = var_3400_begin_0, end = var_3400_end_0, end_mask = var_3400_end_mask_0, x = var_3308_cast_fp16)[name = tensor("op_3400_cast_fp16")]; + tensor var_3407_begin_0 = const()[name = tensor("op_3407_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_3407_end_0 = const()[name = tensor("op_3407_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_3407_end_mask_0 = const()[name = tensor("op_3407_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3407_cast_fp16 = slice_by_index(begin = var_3407_begin_0, end = var_3407_end_0, end_mask = var_3407_end_mask_0, x = var_3308_cast_fp16)[name = tensor("op_3407_cast_fp16")]; + tensor var_3414_begin_0 = const()[name = tensor("op_3414_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_3414_end_0 = const()[name = tensor("op_3414_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_3414_end_mask_0 = const()[name = tensor("op_3414_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3414_cast_fp16 = slice_by_index(begin = var_3414_begin_0, end = var_3414_end_0, end_mask = var_3414_end_mask_0, x = var_3308_cast_fp16)[name = tensor("op_3414_cast_fp16")]; + tensor var_3421_begin_0 = const()[name = tensor("op_3421_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3421_end_0 = const()[name = tensor("op_3421_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_3421_end_mask_0 = const()[name = tensor("op_3421_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3421_cast_fp16 = slice_by_index(begin = var_3421_begin_0, end = var_3421_end_0, end_mask = var_3421_end_mask_0, x = var_3312_cast_fp16)[name = tensor("op_3421_cast_fp16")]; + tensor var_3428_begin_0 = const()[name = tensor("op_3428_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_3428_end_0 = const()[name = tensor("op_3428_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_3428_end_mask_0 = const()[name = tensor("op_3428_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3428_cast_fp16 = slice_by_index(begin = var_3428_begin_0, end = var_3428_end_0, end_mask = var_3428_end_mask_0, x = var_3312_cast_fp16)[name = tensor("op_3428_cast_fp16")]; + tensor var_3435_begin_0 = const()[name = tensor("op_3435_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_3435_end_0 = const()[name = tensor("op_3435_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_3435_end_mask_0 = const()[name = tensor("op_3435_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3435_cast_fp16 = slice_by_index(begin = var_3435_begin_0, end = var_3435_end_0, end_mask = var_3435_end_mask_0, x = var_3312_cast_fp16)[name = tensor("op_3435_cast_fp16")]; + tensor var_3442_begin_0 = const()[name = tensor("op_3442_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_3442_end_0 = const()[name = tensor("op_3442_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_3442_end_mask_0 = const()[name = tensor("op_3442_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3442_cast_fp16 = slice_by_index(begin = var_3442_begin_0, end = var_3442_end_0, end_mask = var_3442_end_mask_0, x = var_3312_cast_fp16)[name = tensor("op_3442_cast_fp16")]; + tensor var_3449_begin_0 = const()[name = tensor("op_3449_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3449_end_0 = const()[name = tensor("op_3449_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_3449_end_mask_0 = const()[name = tensor("op_3449_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3449_cast_fp16 = slice_by_index(begin = var_3449_begin_0, end = var_3449_end_0, end_mask = var_3449_end_mask_0, x = var_3316_cast_fp16)[name = tensor("op_3449_cast_fp16")]; + tensor var_3456_begin_0 = const()[name = tensor("op_3456_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_3456_end_0 = const()[name = tensor("op_3456_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_3456_end_mask_0 = const()[name = tensor("op_3456_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3456_cast_fp16 = slice_by_index(begin = var_3456_begin_0, end = var_3456_end_0, end_mask = var_3456_end_mask_0, x = var_3316_cast_fp16)[name = tensor("op_3456_cast_fp16")]; + tensor var_3463_begin_0 = const()[name = tensor("op_3463_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_3463_end_0 = const()[name = tensor("op_3463_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_3463_end_mask_0 = const()[name = tensor("op_3463_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3463_cast_fp16 = slice_by_index(begin = var_3463_begin_0, end = var_3463_end_0, end_mask = var_3463_end_mask_0, x = var_3316_cast_fp16)[name = tensor("op_3463_cast_fp16")]; + tensor var_3470_begin_0 = const()[name = tensor("op_3470_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_3470_end_0 = const()[name = tensor("op_3470_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_3470_end_mask_0 = const()[name = tensor("op_3470_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3470_cast_fp16 = slice_by_index(begin = var_3470_begin_0, end = var_3470_end_0, end_mask = var_3470_end_mask_0, x = var_3316_cast_fp16)[name = tensor("op_3470_cast_fp16")]; + tensor var_3477_begin_0 = const()[name = tensor("op_3477_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3477_end_0 = const()[name = tensor("op_3477_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_3477_end_mask_0 = const()[name = tensor("op_3477_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3477_cast_fp16 = slice_by_index(begin = var_3477_begin_0, end = var_3477_end_0, end_mask = var_3477_end_mask_0, x = var_3320_cast_fp16)[name = tensor("op_3477_cast_fp16")]; + tensor var_3484_begin_0 = const()[name = tensor("op_3484_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_3484_end_0 = const()[name = tensor("op_3484_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_3484_end_mask_0 = const()[name = tensor("op_3484_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3484_cast_fp16 = slice_by_index(begin = var_3484_begin_0, end = var_3484_end_0, end_mask = var_3484_end_mask_0, x = var_3320_cast_fp16)[name = tensor("op_3484_cast_fp16")]; + tensor var_3491_begin_0 = const()[name = tensor("op_3491_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_3491_end_0 = const()[name = tensor("op_3491_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_3491_end_mask_0 = const()[name = tensor("op_3491_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3491_cast_fp16 = slice_by_index(begin = var_3491_begin_0, end = var_3491_end_0, end_mask = var_3491_end_mask_0, x = var_3320_cast_fp16)[name = tensor("op_3491_cast_fp16")]; + tensor var_3498_begin_0 = const()[name = tensor("op_3498_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_3498_end_0 = const()[name = tensor("op_3498_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_3498_end_mask_0 = const()[name = tensor("op_3498_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3498_cast_fp16 = slice_by_index(begin = var_3498_begin_0, end = var_3498_end_0, end_mask = var_3498_end_mask_0, x = var_3320_cast_fp16)[name = tensor("op_3498_cast_fp16")]; + tensor var_3505_begin_0 = const()[name = tensor("op_3505_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3505_end_0 = const()[name = tensor("op_3505_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_3505_end_mask_0 = const()[name = tensor("op_3505_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3505_cast_fp16 = slice_by_index(begin = var_3505_begin_0, end = var_3505_end_0, end_mask = var_3505_end_mask_0, x = var_3324_cast_fp16)[name = tensor("op_3505_cast_fp16")]; + tensor var_3512_begin_0 = const()[name = tensor("op_3512_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_3512_end_0 = const()[name = tensor("op_3512_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_3512_end_mask_0 = const()[name = tensor("op_3512_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3512_cast_fp16 = slice_by_index(begin = var_3512_begin_0, end = var_3512_end_0, end_mask = var_3512_end_mask_0, x = var_3324_cast_fp16)[name = tensor("op_3512_cast_fp16")]; + tensor var_3519_begin_0 = const()[name = tensor("op_3519_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_3519_end_0 = const()[name = tensor("op_3519_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_3519_end_mask_0 = const()[name = tensor("op_3519_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3519_cast_fp16 = slice_by_index(begin = var_3519_begin_0, end = var_3519_end_0, end_mask = var_3519_end_mask_0, x = var_3324_cast_fp16)[name = tensor("op_3519_cast_fp16")]; + tensor var_3526_begin_0 = const()[name = tensor("op_3526_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_3526_end_0 = const()[name = tensor("op_3526_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_3526_end_mask_0 = const()[name = tensor("op_3526_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3526_cast_fp16 = slice_by_index(begin = var_3526_begin_0, end = var_3526_end_0, end_mask = var_3526_end_mask_0, x = var_3324_cast_fp16)[name = tensor("op_3526_cast_fp16")]; + tensor var_3533_begin_0 = const()[name = tensor("op_3533_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3533_end_0 = const()[name = tensor("op_3533_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_3533_end_mask_0 = const()[name = tensor("op_3533_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3533_cast_fp16 = slice_by_index(begin = var_3533_begin_0, end = var_3533_end_0, end_mask = var_3533_end_mask_0, x = var_3328_cast_fp16)[name = tensor("op_3533_cast_fp16")]; + tensor var_3540_begin_0 = const()[name = tensor("op_3540_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_3540_end_0 = const()[name = tensor("op_3540_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_3540_end_mask_0 = const()[name = tensor("op_3540_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3540_cast_fp16 = slice_by_index(begin = var_3540_begin_0, end = var_3540_end_0, end_mask = var_3540_end_mask_0, x = var_3328_cast_fp16)[name = tensor("op_3540_cast_fp16")]; + tensor var_3547_begin_0 = const()[name = tensor("op_3547_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_3547_end_0 = const()[name = tensor("op_3547_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_3547_end_mask_0 = const()[name = tensor("op_3547_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3547_cast_fp16 = slice_by_index(begin = var_3547_begin_0, end = var_3547_end_0, end_mask = var_3547_end_mask_0, x = var_3328_cast_fp16)[name = tensor("op_3547_cast_fp16")]; + tensor var_3554_begin_0 = const()[name = tensor("op_3554_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_3554_end_0 = const()[name = tensor("op_3554_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_3554_end_mask_0 = const()[name = tensor("op_3554_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3554_cast_fp16 = slice_by_index(begin = var_3554_begin_0, end = var_3554_end_0, end_mask = var_3554_end_mask_0, x = var_3328_cast_fp16)[name = tensor("op_3554_cast_fp16")]; + tensor var_3561_begin_0 = const()[name = tensor("op_3561_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3561_end_0 = const()[name = tensor("op_3561_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_3561_end_mask_0 = const()[name = tensor("op_3561_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3561_cast_fp16 = slice_by_index(begin = var_3561_begin_0, end = var_3561_end_0, end_mask = var_3561_end_mask_0, x = var_3332_cast_fp16)[name = tensor("op_3561_cast_fp16")]; + tensor var_3568_begin_0 = const()[name = tensor("op_3568_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_3568_end_0 = const()[name = tensor("op_3568_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_3568_end_mask_0 = const()[name = tensor("op_3568_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3568_cast_fp16 = slice_by_index(begin = var_3568_begin_0, end = var_3568_end_0, end_mask = var_3568_end_mask_0, x = var_3332_cast_fp16)[name = tensor("op_3568_cast_fp16")]; + tensor var_3575_begin_0 = const()[name = tensor("op_3575_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_3575_end_0 = const()[name = tensor("op_3575_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_3575_end_mask_0 = const()[name = tensor("op_3575_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3575_cast_fp16 = slice_by_index(begin = var_3575_begin_0, end = var_3575_end_0, end_mask = var_3575_end_mask_0, x = var_3332_cast_fp16)[name = tensor("op_3575_cast_fp16")]; + tensor var_3582_begin_0 = const()[name = tensor("op_3582_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_3582_end_0 = const()[name = tensor("op_3582_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_3582_end_mask_0 = const()[name = tensor("op_3582_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3582_cast_fp16 = slice_by_index(begin = var_3582_begin_0, end = var_3582_end_0, end_mask = var_3582_end_mask_0, x = var_3332_cast_fp16)[name = tensor("op_3582_cast_fp16")]; + tensor var_3589_begin_0 = const()[name = tensor("op_3589_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3589_end_0 = const()[name = tensor("op_3589_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_3589_end_mask_0 = const()[name = tensor("op_3589_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3589_cast_fp16 = slice_by_index(begin = var_3589_begin_0, end = var_3589_end_0, end_mask = var_3589_end_mask_0, x = var_3336_cast_fp16)[name = tensor("op_3589_cast_fp16")]; + tensor var_3596_begin_0 = const()[name = tensor("op_3596_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_3596_end_0 = const()[name = tensor("op_3596_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_3596_end_mask_0 = const()[name = tensor("op_3596_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3596_cast_fp16 = slice_by_index(begin = var_3596_begin_0, end = var_3596_end_0, end_mask = var_3596_end_mask_0, x = var_3336_cast_fp16)[name = tensor("op_3596_cast_fp16")]; + tensor var_3603_begin_0 = const()[name = tensor("op_3603_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_3603_end_0 = const()[name = tensor("op_3603_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_3603_end_mask_0 = const()[name = tensor("op_3603_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3603_cast_fp16 = slice_by_index(begin = var_3603_begin_0, end = var_3603_end_0, end_mask = var_3603_end_mask_0, x = var_3336_cast_fp16)[name = tensor("op_3603_cast_fp16")]; + tensor var_3610_begin_0 = const()[name = tensor("op_3610_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_3610_end_0 = const()[name = tensor("op_3610_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_3610_end_mask_0 = const()[name = tensor("op_3610_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3610_cast_fp16 = slice_by_index(begin = var_3610_begin_0, end = var_3610_end_0, end_mask = var_3610_end_mask_0, x = var_3336_cast_fp16)[name = tensor("op_3610_cast_fp16")]; + tensor var_3617_begin_0 = const()[name = tensor("op_3617_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3617_end_0 = const()[name = tensor("op_3617_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_3617_end_mask_0 = const()[name = tensor("op_3617_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3617_cast_fp16 = slice_by_index(begin = var_3617_begin_0, end = var_3617_end_0, end_mask = var_3617_end_mask_0, x = var_3340_cast_fp16)[name = tensor("op_3617_cast_fp16")]; + tensor var_3624_begin_0 = const()[name = tensor("op_3624_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_3624_end_0 = const()[name = tensor("op_3624_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_3624_end_mask_0 = const()[name = tensor("op_3624_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3624_cast_fp16 = slice_by_index(begin = var_3624_begin_0, end = var_3624_end_0, end_mask = var_3624_end_mask_0, x = var_3340_cast_fp16)[name = tensor("op_3624_cast_fp16")]; + tensor var_3631_begin_0 = const()[name = tensor("op_3631_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_3631_end_0 = const()[name = tensor("op_3631_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_3631_end_mask_0 = const()[name = tensor("op_3631_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3631_cast_fp16 = slice_by_index(begin = var_3631_begin_0, end = var_3631_end_0, end_mask = var_3631_end_mask_0, x = var_3340_cast_fp16)[name = tensor("op_3631_cast_fp16")]; + tensor var_3638_begin_0 = const()[name = tensor("op_3638_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_3638_end_0 = const()[name = tensor("op_3638_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_3638_end_mask_0 = const()[name = tensor("op_3638_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3638_cast_fp16 = slice_by_index(begin = var_3638_begin_0, end = var_3638_end_0, end_mask = var_3638_end_mask_0, x = var_3340_cast_fp16)[name = tensor("op_3638_cast_fp16")]; + tensor var_3645_begin_0 = const()[name = tensor("op_3645_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3645_end_0 = const()[name = tensor("op_3645_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_3645_end_mask_0 = const()[name = tensor("op_3645_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3645_cast_fp16 = slice_by_index(begin = var_3645_begin_0, end = var_3645_end_0, end_mask = var_3645_end_mask_0, x = var_3344_cast_fp16)[name = tensor("op_3645_cast_fp16")]; + tensor var_3652_begin_0 = const()[name = tensor("op_3652_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_3652_end_0 = const()[name = tensor("op_3652_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_3652_end_mask_0 = const()[name = tensor("op_3652_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3652_cast_fp16 = slice_by_index(begin = var_3652_begin_0, end = var_3652_end_0, end_mask = var_3652_end_mask_0, x = var_3344_cast_fp16)[name = tensor("op_3652_cast_fp16")]; + tensor var_3659_begin_0 = const()[name = tensor("op_3659_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_3659_end_0 = const()[name = tensor("op_3659_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_3659_end_mask_0 = const()[name = tensor("op_3659_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3659_cast_fp16 = slice_by_index(begin = var_3659_begin_0, end = var_3659_end_0, end_mask = var_3659_end_mask_0, x = var_3344_cast_fp16)[name = tensor("op_3659_cast_fp16")]; + tensor var_3666_begin_0 = const()[name = tensor("op_3666_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_3666_end_0 = const()[name = tensor("op_3666_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_3666_end_mask_0 = const()[name = tensor("op_3666_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3666_cast_fp16 = slice_by_index(begin = var_3666_begin_0, end = var_3666_end_0, end_mask = var_3666_end_mask_0, x = var_3344_cast_fp16)[name = tensor("op_3666_cast_fp16")]; + tensor var_3673_begin_0 = const()[name = tensor("op_3673_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3673_end_0 = const()[name = tensor("op_3673_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_3673_end_mask_0 = const()[name = tensor("op_3673_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3673_cast_fp16 = slice_by_index(begin = var_3673_begin_0, end = var_3673_end_0, end_mask = var_3673_end_mask_0, x = var_3348_cast_fp16)[name = tensor("op_3673_cast_fp16")]; + tensor var_3680_begin_0 = const()[name = tensor("op_3680_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_3680_end_0 = const()[name = tensor("op_3680_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_3680_end_mask_0 = const()[name = tensor("op_3680_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3680_cast_fp16 = slice_by_index(begin = var_3680_begin_0, end = var_3680_end_0, end_mask = var_3680_end_mask_0, x = var_3348_cast_fp16)[name = tensor("op_3680_cast_fp16")]; + tensor var_3687_begin_0 = const()[name = tensor("op_3687_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_3687_end_0 = const()[name = tensor("op_3687_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_3687_end_mask_0 = const()[name = tensor("op_3687_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3687_cast_fp16 = slice_by_index(begin = var_3687_begin_0, end = var_3687_end_0, end_mask = var_3687_end_mask_0, x = var_3348_cast_fp16)[name = tensor("op_3687_cast_fp16")]; + tensor var_3694_begin_0 = const()[name = tensor("op_3694_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_3694_end_0 = const()[name = tensor("op_3694_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_3694_end_mask_0 = const()[name = tensor("op_3694_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3694_cast_fp16 = slice_by_index(begin = var_3694_begin_0, end = var_3694_end_0, end_mask = var_3694_end_mask_0, x = var_3348_cast_fp16)[name = tensor("op_3694_cast_fp16")]; + tensor var_3701_begin_0 = const()[name = tensor("op_3701_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3701_end_0 = const()[name = tensor("op_3701_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_3701_end_mask_0 = const()[name = tensor("op_3701_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3701_cast_fp16 = slice_by_index(begin = var_3701_begin_0, end = var_3701_end_0, end_mask = var_3701_end_mask_0, x = var_3352_cast_fp16)[name = tensor("op_3701_cast_fp16")]; + tensor var_3708_begin_0 = const()[name = tensor("op_3708_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_3708_end_0 = const()[name = tensor("op_3708_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_3708_end_mask_0 = const()[name = tensor("op_3708_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3708_cast_fp16 = slice_by_index(begin = var_3708_begin_0, end = var_3708_end_0, end_mask = var_3708_end_mask_0, x = var_3352_cast_fp16)[name = tensor("op_3708_cast_fp16")]; + tensor var_3715_begin_0 = const()[name = tensor("op_3715_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_3715_end_0 = const()[name = tensor("op_3715_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_3715_end_mask_0 = const()[name = tensor("op_3715_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3715_cast_fp16 = slice_by_index(begin = var_3715_begin_0, end = var_3715_end_0, end_mask = var_3715_end_mask_0, x = var_3352_cast_fp16)[name = tensor("op_3715_cast_fp16")]; + tensor var_3722_begin_0 = const()[name = tensor("op_3722_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_3722_end_0 = const()[name = tensor("op_3722_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_3722_end_mask_0 = const()[name = tensor("op_3722_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3722_cast_fp16 = slice_by_index(begin = var_3722_begin_0, end = var_3722_end_0, end_mask = var_3722_end_mask_0, x = var_3352_cast_fp16)[name = tensor("op_3722_cast_fp16")]; + tensor var_3729_begin_0 = const()[name = tensor("op_3729_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3729_end_0 = const()[name = tensor("op_3729_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_3729_end_mask_0 = const()[name = tensor("op_3729_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3729_cast_fp16 = slice_by_index(begin = var_3729_begin_0, end = var_3729_end_0, end_mask = var_3729_end_mask_0, x = var_3356_cast_fp16)[name = tensor("op_3729_cast_fp16")]; + tensor var_3736_begin_0 = const()[name = tensor("op_3736_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_3736_end_0 = const()[name = tensor("op_3736_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_3736_end_mask_0 = const()[name = tensor("op_3736_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3736_cast_fp16 = slice_by_index(begin = var_3736_begin_0, end = var_3736_end_0, end_mask = var_3736_end_mask_0, x = var_3356_cast_fp16)[name = tensor("op_3736_cast_fp16")]; + tensor var_3743_begin_0 = const()[name = tensor("op_3743_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_3743_end_0 = const()[name = tensor("op_3743_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_3743_end_mask_0 = const()[name = tensor("op_3743_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3743_cast_fp16 = slice_by_index(begin = var_3743_begin_0, end = var_3743_end_0, end_mask = var_3743_end_mask_0, x = var_3356_cast_fp16)[name = tensor("op_3743_cast_fp16")]; + tensor var_3750_begin_0 = const()[name = tensor("op_3750_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_3750_end_0 = const()[name = tensor("op_3750_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_3750_end_mask_0 = const()[name = tensor("op_3750_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3750_cast_fp16 = slice_by_index(begin = var_3750_begin_0, end = var_3750_end_0, end_mask = var_3750_end_mask_0, x = var_3356_cast_fp16)[name = tensor("op_3750_cast_fp16")]; + tensor var_3757_begin_0 = const()[name = tensor("op_3757_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3757_end_0 = const()[name = tensor("op_3757_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_3757_end_mask_0 = const()[name = tensor("op_3757_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3757_cast_fp16 = slice_by_index(begin = var_3757_begin_0, end = var_3757_end_0, end_mask = var_3757_end_mask_0, x = var_3360_cast_fp16)[name = tensor("op_3757_cast_fp16")]; + tensor var_3764_begin_0 = const()[name = tensor("op_3764_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_3764_end_0 = const()[name = tensor("op_3764_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_3764_end_mask_0 = const()[name = tensor("op_3764_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3764_cast_fp16 = slice_by_index(begin = var_3764_begin_0, end = var_3764_end_0, end_mask = var_3764_end_mask_0, x = var_3360_cast_fp16)[name = tensor("op_3764_cast_fp16")]; + tensor var_3771_begin_0 = const()[name = tensor("op_3771_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_3771_end_0 = const()[name = tensor("op_3771_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_3771_end_mask_0 = const()[name = tensor("op_3771_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3771_cast_fp16 = slice_by_index(begin = var_3771_begin_0, end = var_3771_end_0, end_mask = var_3771_end_mask_0, x = var_3360_cast_fp16)[name = tensor("op_3771_cast_fp16")]; + tensor var_3778_begin_0 = const()[name = tensor("op_3778_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_3778_end_0 = const()[name = tensor("op_3778_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_3778_end_mask_0 = const()[name = tensor("op_3778_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3778_cast_fp16 = slice_by_index(begin = var_3778_begin_0, end = var_3778_end_0, end_mask = var_3778_end_mask_0, x = var_3360_cast_fp16)[name = tensor("op_3778_cast_fp16")]; + tensor var_3785_begin_0 = const()[name = tensor("op_3785_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3785_end_0 = const()[name = tensor("op_3785_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_3785_end_mask_0 = const()[name = tensor("op_3785_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3785_cast_fp16 = slice_by_index(begin = var_3785_begin_0, end = var_3785_end_0, end_mask = var_3785_end_mask_0, x = var_3364_cast_fp16)[name = tensor("op_3785_cast_fp16")]; + tensor var_3792_begin_0 = const()[name = tensor("op_3792_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_3792_end_0 = const()[name = tensor("op_3792_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_3792_end_mask_0 = const()[name = tensor("op_3792_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3792_cast_fp16 = slice_by_index(begin = var_3792_begin_0, end = var_3792_end_0, end_mask = var_3792_end_mask_0, x = var_3364_cast_fp16)[name = tensor("op_3792_cast_fp16")]; + tensor var_3799_begin_0 = const()[name = tensor("op_3799_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_3799_end_0 = const()[name = tensor("op_3799_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_3799_end_mask_0 = const()[name = tensor("op_3799_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3799_cast_fp16 = slice_by_index(begin = var_3799_begin_0, end = var_3799_end_0, end_mask = var_3799_end_mask_0, x = var_3364_cast_fp16)[name = tensor("op_3799_cast_fp16")]; + tensor var_3806_begin_0 = const()[name = tensor("op_3806_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_3806_end_0 = const()[name = tensor("op_3806_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_3806_end_mask_0 = const()[name = tensor("op_3806_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3806_cast_fp16 = slice_by_index(begin = var_3806_begin_0, end = var_3806_end_0, end_mask = var_3806_end_mask_0, x = var_3364_cast_fp16)[name = tensor("op_3806_cast_fp16")]; + tensor var_3813_begin_0 = const()[name = tensor("op_3813_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3813_end_0 = const()[name = tensor("op_3813_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_3813_end_mask_0 = const()[name = tensor("op_3813_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3813_cast_fp16 = slice_by_index(begin = var_3813_begin_0, end = var_3813_end_0, end_mask = var_3813_end_mask_0, x = var_3368_cast_fp16)[name = tensor("op_3813_cast_fp16")]; + tensor var_3820_begin_0 = const()[name = tensor("op_3820_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_3820_end_0 = const()[name = tensor("op_3820_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_3820_end_mask_0 = const()[name = tensor("op_3820_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3820_cast_fp16 = slice_by_index(begin = var_3820_begin_0, end = var_3820_end_0, end_mask = var_3820_end_mask_0, x = var_3368_cast_fp16)[name = tensor("op_3820_cast_fp16")]; + tensor var_3827_begin_0 = const()[name = tensor("op_3827_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_3827_end_0 = const()[name = tensor("op_3827_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_3827_end_mask_0 = const()[name = tensor("op_3827_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3827_cast_fp16 = slice_by_index(begin = var_3827_begin_0, end = var_3827_end_0, end_mask = var_3827_end_mask_0, x = var_3368_cast_fp16)[name = tensor("op_3827_cast_fp16")]; + tensor var_3834_begin_0 = const()[name = tensor("op_3834_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_3834_end_0 = const()[name = tensor("op_3834_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_3834_end_mask_0 = const()[name = tensor("op_3834_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3834_cast_fp16 = slice_by_index(begin = var_3834_begin_0, end = var_3834_end_0, end_mask = var_3834_end_mask_0, x = var_3368_cast_fp16)[name = tensor("op_3834_cast_fp16")]; + tensor var_3841_begin_0 = const()[name = tensor("op_3841_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3841_end_0 = const()[name = tensor("op_3841_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_3841_end_mask_0 = const()[name = tensor("op_3841_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3841_cast_fp16 = slice_by_index(begin = var_3841_begin_0, end = var_3841_end_0, end_mask = var_3841_end_mask_0, x = var_3372_cast_fp16)[name = tensor("op_3841_cast_fp16")]; + tensor var_3848_begin_0 = const()[name = tensor("op_3848_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_3848_end_0 = const()[name = tensor("op_3848_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_3848_end_mask_0 = const()[name = tensor("op_3848_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3848_cast_fp16 = slice_by_index(begin = var_3848_begin_0, end = var_3848_end_0, end_mask = var_3848_end_mask_0, x = var_3372_cast_fp16)[name = tensor("op_3848_cast_fp16")]; + tensor var_3855_begin_0 = const()[name = tensor("op_3855_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_3855_end_0 = const()[name = tensor("op_3855_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_3855_end_mask_0 = const()[name = tensor("op_3855_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3855_cast_fp16 = slice_by_index(begin = var_3855_begin_0, end = var_3855_end_0, end_mask = var_3855_end_mask_0, x = var_3372_cast_fp16)[name = tensor("op_3855_cast_fp16")]; + tensor var_3862_begin_0 = const()[name = tensor("op_3862_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_3862_end_0 = const()[name = tensor("op_3862_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_3862_end_mask_0 = const()[name = tensor("op_3862_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3862_cast_fp16 = slice_by_index(begin = var_3862_begin_0, end = var_3862_end_0, end_mask = var_3862_end_mask_0, x = var_3372_cast_fp16)[name = tensor("op_3862_cast_fp16")]; + tensor var_3869_begin_0 = const()[name = tensor("op_3869_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3869_end_0 = const()[name = tensor("op_3869_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_3869_end_mask_0 = const()[name = tensor("op_3869_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3869_cast_fp16 = slice_by_index(begin = var_3869_begin_0, end = var_3869_end_0, end_mask = var_3869_end_mask_0, x = var_3376_cast_fp16)[name = tensor("op_3869_cast_fp16")]; + tensor var_3876_begin_0 = const()[name = tensor("op_3876_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_3876_end_0 = const()[name = tensor("op_3876_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_3876_end_mask_0 = const()[name = tensor("op_3876_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3876_cast_fp16 = slice_by_index(begin = var_3876_begin_0, end = var_3876_end_0, end_mask = var_3876_end_mask_0, x = var_3376_cast_fp16)[name = tensor("op_3876_cast_fp16")]; + tensor var_3883_begin_0 = const()[name = tensor("op_3883_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_3883_end_0 = const()[name = tensor("op_3883_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_3883_end_mask_0 = const()[name = tensor("op_3883_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3883_cast_fp16 = slice_by_index(begin = var_3883_begin_0, end = var_3883_end_0, end_mask = var_3883_end_mask_0, x = var_3376_cast_fp16)[name = tensor("op_3883_cast_fp16")]; + tensor var_3890_begin_0 = const()[name = tensor("op_3890_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_3890_end_0 = const()[name = tensor("op_3890_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_3890_end_mask_0 = const()[name = tensor("op_3890_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3890_cast_fp16 = slice_by_index(begin = var_3890_begin_0, end = var_3890_end_0, end_mask = var_3890_end_mask_0, x = var_3376_cast_fp16)[name = tensor("op_3890_cast_fp16")]; + tensor var_3897_begin_0 = const()[name = tensor("op_3897_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3897_end_0 = const()[name = tensor("op_3897_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_3897_end_mask_0 = const()[name = tensor("op_3897_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3897_cast_fp16 = slice_by_index(begin = var_3897_begin_0, end = var_3897_end_0, end_mask = var_3897_end_mask_0, x = var_3380_cast_fp16)[name = tensor("op_3897_cast_fp16")]; + tensor var_3904_begin_0 = const()[name = tensor("op_3904_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_3904_end_0 = const()[name = tensor("op_3904_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_3904_end_mask_0 = const()[name = tensor("op_3904_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3904_cast_fp16 = slice_by_index(begin = var_3904_begin_0, end = var_3904_end_0, end_mask = var_3904_end_mask_0, x = var_3380_cast_fp16)[name = tensor("op_3904_cast_fp16")]; + tensor var_3911_begin_0 = const()[name = tensor("op_3911_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_3911_end_0 = const()[name = tensor("op_3911_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_3911_end_mask_0 = const()[name = tensor("op_3911_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3911_cast_fp16 = slice_by_index(begin = var_3911_begin_0, end = var_3911_end_0, end_mask = var_3911_end_mask_0, x = var_3380_cast_fp16)[name = tensor("op_3911_cast_fp16")]; + tensor var_3918_begin_0 = const()[name = tensor("op_3918_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_3918_end_0 = const()[name = tensor("op_3918_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_3918_end_mask_0 = const()[name = tensor("op_3918_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3918_cast_fp16 = slice_by_index(begin = var_3918_begin_0, end = var_3918_end_0, end_mask = var_3918_end_mask_0, x = var_3380_cast_fp16)[name = tensor("op_3918_cast_fp16")]; + tensor var_3925_begin_0 = const()[name = tensor("op_3925_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3925_end_0 = const()[name = tensor("op_3925_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_3925_end_mask_0 = const()[name = tensor("op_3925_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3925_cast_fp16 = slice_by_index(begin = var_3925_begin_0, end = var_3925_end_0, end_mask = var_3925_end_mask_0, x = var_3384_cast_fp16)[name = tensor("op_3925_cast_fp16")]; + tensor var_3932_begin_0 = const()[name = tensor("op_3932_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_3932_end_0 = const()[name = tensor("op_3932_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_3932_end_mask_0 = const()[name = tensor("op_3932_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3932_cast_fp16 = slice_by_index(begin = var_3932_begin_0, end = var_3932_end_0, end_mask = var_3932_end_mask_0, x = var_3384_cast_fp16)[name = tensor("op_3932_cast_fp16")]; + tensor var_3939_begin_0 = const()[name = tensor("op_3939_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_3939_end_0 = const()[name = tensor("op_3939_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_3939_end_mask_0 = const()[name = tensor("op_3939_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3939_cast_fp16 = slice_by_index(begin = var_3939_begin_0, end = var_3939_end_0, end_mask = var_3939_end_mask_0, x = var_3384_cast_fp16)[name = tensor("op_3939_cast_fp16")]; + tensor var_3946_begin_0 = const()[name = tensor("op_3946_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_3946_end_0 = const()[name = tensor("op_3946_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_3946_end_mask_0 = const()[name = tensor("op_3946_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3946_cast_fp16 = slice_by_index(begin = var_3946_begin_0, end = var_3946_end_0, end_mask = var_3946_end_mask_0, x = var_3384_cast_fp16)[name = tensor("op_3946_cast_fp16")]; + tensor k_5_perm_0 = const()[name = tensor("k_5_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_3951_begin_0 = const()[name = tensor("op_3951_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_3951_end_0 = const()[name = tensor("op_3951_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_3951_end_mask_0 = const()[name = tensor("op_3951_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_29 = transpose(perm = k_5_perm_0, x = key_5_cast_fp16)[name = tensor("transpose_29")]; + tensor var_3951_cast_fp16 = slice_by_index(begin = var_3951_begin_0, end = var_3951_end_0, end_mask = var_3951_end_mask_0, x = transpose_29)[name = tensor("op_3951_cast_fp16")]; + tensor var_3955_begin_0 = const()[name = tensor("op_3955_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_3955_end_0 = const()[name = tensor("op_3955_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_3955_end_mask_0 = const()[name = tensor("op_3955_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3955_cast_fp16 = slice_by_index(begin = var_3955_begin_0, end = var_3955_end_0, end_mask = var_3955_end_mask_0, x = transpose_29)[name = tensor("op_3955_cast_fp16")]; + tensor var_3959_begin_0 = const()[name = tensor("op_3959_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_3959_end_0 = const()[name = tensor("op_3959_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_3959_end_mask_0 = const()[name = tensor("op_3959_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3959_cast_fp16 = slice_by_index(begin = var_3959_begin_0, end = var_3959_end_0, end_mask = var_3959_end_mask_0, x = transpose_29)[name = tensor("op_3959_cast_fp16")]; + tensor var_3963_begin_0 = const()[name = tensor("op_3963_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_3963_end_0 = const()[name = tensor("op_3963_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_3963_end_mask_0 = const()[name = tensor("op_3963_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3963_cast_fp16 = slice_by_index(begin = var_3963_begin_0, end = var_3963_end_0, end_mask = var_3963_end_mask_0, x = transpose_29)[name = tensor("op_3963_cast_fp16")]; + tensor var_3967_begin_0 = const()[name = tensor("op_3967_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_3967_end_0 = const()[name = tensor("op_3967_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_3967_end_mask_0 = const()[name = tensor("op_3967_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3967_cast_fp16 = slice_by_index(begin = var_3967_begin_0, end = var_3967_end_0, end_mask = var_3967_end_mask_0, x = transpose_29)[name = tensor("op_3967_cast_fp16")]; + tensor var_3971_begin_0 = const()[name = tensor("op_3971_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_3971_end_0 = const()[name = tensor("op_3971_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_3971_end_mask_0 = const()[name = tensor("op_3971_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3971_cast_fp16 = slice_by_index(begin = var_3971_begin_0, end = var_3971_end_0, end_mask = var_3971_end_mask_0, x = transpose_29)[name = tensor("op_3971_cast_fp16")]; + tensor var_3975_begin_0 = const()[name = tensor("op_3975_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_3975_end_0 = const()[name = tensor("op_3975_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_3975_end_mask_0 = const()[name = tensor("op_3975_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3975_cast_fp16 = slice_by_index(begin = var_3975_begin_0, end = var_3975_end_0, end_mask = var_3975_end_mask_0, x = transpose_29)[name = tensor("op_3975_cast_fp16")]; + tensor var_3979_begin_0 = const()[name = tensor("op_3979_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_3979_end_0 = const()[name = tensor("op_3979_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_3979_end_mask_0 = const()[name = tensor("op_3979_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3979_cast_fp16 = slice_by_index(begin = var_3979_begin_0, end = var_3979_end_0, end_mask = var_3979_end_mask_0, x = transpose_29)[name = tensor("op_3979_cast_fp16")]; + tensor var_3983_begin_0 = const()[name = tensor("op_3983_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_3983_end_0 = const()[name = tensor("op_3983_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_3983_end_mask_0 = const()[name = tensor("op_3983_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3983_cast_fp16 = slice_by_index(begin = var_3983_begin_0, end = var_3983_end_0, end_mask = var_3983_end_mask_0, x = transpose_29)[name = tensor("op_3983_cast_fp16")]; + tensor var_3987_begin_0 = const()[name = tensor("op_3987_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_3987_end_0 = const()[name = tensor("op_3987_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_3987_end_mask_0 = const()[name = tensor("op_3987_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3987_cast_fp16 = slice_by_index(begin = var_3987_begin_0, end = var_3987_end_0, end_mask = var_3987_end_mask_0, x = transpose_29)[name = tensor("op_3987_cast_fp16")]; + tensor var_3991_begin_0 = const()[name = tensor("op_3991_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_3991_end_0 = const()[name = tensor("op_3991_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_3991_end_mask_0 = const()[name = tensor("op_3991_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3991_cast_fp16 = slice_by_index(begin = var_3991_begin_0, end = var_3991_end_0, end_mask = var_3991_end_mask_0, x = transpose_29)[name = tensor("op_3991_cast_fp16")]; + tensor var_3995_begin_0 = const()[name = tensor("op_3995_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_3995_end_0 = const()[name = tensor("op_3995_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_3995_end_mask_0 = const()[name = tensor("op_3995_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3995_cast_fp16 = slice_by_index(begin = var_3995_begin_0, end = var_3995_end_0, end_mask = var_3995_end_mask_0, x = transpose_29)[name = tensor("op_3995_cast_fp16")]; + tensor var_3999_begin_0 = const()[name = tensor("op_3999_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_3999_end_0 = const()[name = tensor("op_3999_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_3999_end_mask_0 = const()[name = tensor("op_3999_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_3999_cast_fp16 = slice_by_index(begin = var_3999_begin_0, end = var_3999_end_0, end_mask = var_3999_end_mask_0, x = transpose_29)[name = tensor("op_3999_cast_fp16")]; + tensor var_4003_begin_0 = const()[name = tensor("op_4003_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_4003_end_0 = const()[name = tensor("op_4003_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_4003_end_mask_0 = const()[name = tensor("op_4003_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_4003_cast_fp16 = slice_by_index(begin = var_4003_begin_0, end = var_4003_end_0, end_mask = var_4003_end_mask_0, x = transpose_29)[name = tensor("op_4003_cast_fp16")]; + tensor var_4007_begin_0 = const()[name = tensor("op_4007_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_4007_end_0 = const()[name = tensor("op_4007_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_4007_end_mask_0 = const()[name = tensor("op_4007_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_4007_cast_fp16 = slice_by_index(begin = var_4007_begin_0, end = var_4007_end_0, end_mask = var_4007_end_mask_0, x = transpose_29)[name = tensor("op_4007_cast_fp16")]; + tensor var_4011_begin_0 = const()[name = tensor("op_4011_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_4011_end_0 = const()[name = tensor("op_4011_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_4011_end_mask_0 = const()[name = tensor("op_4011_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_4011_cast_fp16 = slice_by_index(begin = var_4011_begin_0, end = var_4011_end_0, end_mask = var_4011_end_mask_0, x = transpose_29)[name = tensor("op_4011_cast_fp16")]; + tensor var_4015_begin_0 = const()[name = tensor("op_4015_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_4015_end_0 = const()[name = tensor("op_4015_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_4015_end_mask_0 = const()[name = tensor("op_4015_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_4015_cast_fp16 = slice_by_index(begin = var_4015_begin_0, end = var_4015_end_0, end_mask = var_4015_end_mask_0, x = transpose_29)[name = tensor("op_4015_cast_fp16")]; + tensor var_4019_begin_0 = const()[name = tensor("op_4019_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_4019_end_0 = const()[name = tensor("op_4019_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_4019_end_mask_0 = const()[name = tensor("op_4019_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_4019_cast_fp16 = slice_by_index(begin = var_4019_begin_0, end = var_4019_end_0, end_mask = var_4019_end_mask_0, x = transpose_29)[name = tensor("op_4019_cast_fp16")]; + tensor var_4023_begin_0 = const()[name = tensor("op_4023_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_4023_end_0 = const()[name = tensor("op_4023_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_4023_end_mask_0 = const()[name = tensor("op_4023_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_4023_cast_fp16 = slice_by_index(begin = var_4023_begin_0, end = var_4023_end_0, end_mask = var_4023_end_mask_0, x = transpose_29)[name = tensor("op_4023_cast_fp16")]; + tensor var_4027_begin_0 = const()[name = tensor("op_4027_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_4027_end_0 = const()[name = tensor("op_4027_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_4027_end_mask_0 = const()[name = tensor("op_4027_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_4027_cast_fp16 = slice_by_index(begin = var_4027_begin_0, end = var_4027_end_0, end_mask = var_4027_end_mask_0, x = transpose_29)[name = tensor("op_4027_cast_fp16")]; + tensor var_4029_begin_0 = const()[name = tensor("op_4029_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4029_end_0 = const()[name = tensor("op_4029_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_4029_end_mask_0 = const()[name = tensor("op_4029_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4029_cast_fp16 = slice_by_index(begin = var_4029_begin_0, end = var_4029_end_0, end_mask = var_4029_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4029_cast_fp16")]; + tensor var_4033_begin_0 = const()[name = tensor("op_4033_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_4033_end_0 = const()[name = tensor("op_4033_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_4033_end_mask_0 = const()[name = tensor("op_4033_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4033_cast_fp16 = slice_by_index(begin = var_4033_begin_0, end = var_4033_end_0, end_mask = var_4033_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4033_cast_fp16")]; + tensor var_4037_begin_0 = const()[name = tensor("op_4037_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_4037_end_0 = const()[name = tensor("op_4037_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_4037_end_mask_0 = const()[name = tensor("op_4037_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4037_cast_fp16 = slice_by_index(begin = var_4037_begin_0, end = var_4037_end_0, end_mask = var_4037_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4037_cast_fp16")]; + tensor var_4041_begin_0 = const()[name = tensor("op_4041_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_4041_end_0 = const()[name = tensor("op_4041_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_4041_end_mask_0 = const()[name = tensor("op_4041_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4041_cast_fp16 = slice_by_index(begin = var_4041_begin_0, end = var_4041_end_0, end_mask = var_4041_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4041_cast_fp16")]; + tensor var_4045_begin_0 = const()[name = tensor("op_4045_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_4045_end_0 = const()[name = tensor("op_4045_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_4045_end_mask_0 = const()[name = tensor("op_4045_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4045_cast_fp16 = slice_by_index(begin = var_4045_begin_0, end = var_4045_end_0, end_mask = var_4045_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4045_cast_fp16")]; + tensor var_4049_begin_0 = const()[name = tensor("op_4049_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_4049_end_0 = const()[name = tensor("op_4049_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_4049_end_mask_0 = const()[name = tensor("op_4049_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4049_cast_fp16 = slice_by_index(begin = var_4049_begin_0, end = var_4049_end_0, end_mask = var_4049_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4049_cast_fp16")]; + tensor var_4053_begin_0 = const()[name = tensor("op_4053_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_4053_end_0 = const()[name = tensor("op_4053_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_4053_end_mask_0 = const()[name = tensor("op_4053_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4053_cast_fp16 = slice_by_index(begin = var_4053_begin_0, end = var_4053_end_0, end_mask = var_4053_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4053_cast_fp16")]; + tensor var_4057_begin_0 = const()[name = tensor("op_4057_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_4057_end_0 = const()[name = tensor("op_4057_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_4057_end_mask_0 = const()[name = tensor("op_4057_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4057_cast_fp16 = slice_by_index(begin = var_4057_begin_0, end = var_4057_end_0, end_mask = var_4057_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4057_cast_fp16")]; + tensor var_4061_begin_0 = const()[name = tensor("op_4061_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_4061_end_0 = const()[name = tensor("op_4061_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_4061_end_mask_0 = const()[name = tensor("op_4061_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4061_cast_fp16 = slice_by_index(begin = var_4061_begin_0, end = var_4061_end_0, end_mask = var_4061_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4061_cast_fp16")]; + tensor var_4065_begin_0 = const()[name = tensor("op_4065_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_4065_end_0 = const()[name = tensor("op_4065_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_4065_end_mask_0 = const()[name = tensor("op_4065_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4065_cast_fp16 = slice_by_index(begin = var_4065_begin_0, end = var_4065_end_0, end_mask = var_4065_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4065_cast_fp16")]; + tensor var_4069_begin_0 = const()[name = tensor("op_4069_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_4069_end_0 = const()[name = tensor("op_4069_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_4069_end_mask_0 = const()[name = tensor("op_4069_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4069_cast_fp16 = slice_by_index(begin = var_4069_begin_0, end = var_4069_end_0, end_mask = var_4069_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4069_cast_fp16")]; + tensor var_4073_begin_0 = const()[name = tensor("op_4073_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_4073_end_0 = const()[name = tensor("op_4073_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_4073_end_mask_0 = const()[name = tensor("op_4073_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4073_cast_fp16 = slice_by_index(begin = var_4073_begin_0, end = var_4073_end_0, end_mask = var_4073_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4073_cast_fp16")]; + tensor var_4077_begin_0 = const()[name = tensor("op_4077_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_4077_end_0 = const()[name = tensor("op_4077_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_4077_end_mask_0 = const()[name = tensor("op_4077_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4077_cast_fp16 = slice_by_index(begin = var_4077_begin_0, end = var_4077_end_0, end_mask = var_4077_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4077_cast_fp16")]; + tensor var_4081_begin_0 = const()[name = tensor("op_4081_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_4081_end_0 = const()[name = tensor("op_4081_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_4081_end_mask_0 = const()[name = tensor("op_4081_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4081_cast_fp16 = slice_by_index(begin = var_4081_begin_0, end = var_4081_end_0, end_mask = var_4081_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4081_cast_fp16")]; + tensor var_4085_begin_0 = const()[name = tensor("op_4085_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_4085_end_0 = const()[name = tensor("op_4085_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_4085_end_mask_0 = const()[name = tensor("op_4085_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4085_cast_fp16 = slice_by_index(begin = var_4085_begin_0, end = var_4085_end_0, end_mask = var_4085_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4085_cast_fp16")]; + tensor var_4089_begin_0 = const()[name = tensor("op_4089_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_4089_end_0 = const()[name = tensor("op_4089_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_4089_end_mask_0 = const()[name = tensor("op_4089_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4089_cast_fp16 = slice_by_index(begin = var_4089_begin_0, end = var_4089_end_0, end_mask = var_4089_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4089_cast_fp16")]; + tensor var_4093_begin_0 = const()[name = tensor("op_4093_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_4093_end_0 = const()[name = tensor("op_4093_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_4093_end_mask_0 = const()[name = tensor("op_4093_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4093_cast_fp16 = slice_by_index(begin = var_4093_begin_0, end = var_4093_end_0, end_mask = var_4093_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4093_cast_fp16")]; + tensor var_4097_begin_0 = const()[name = tensor("op_4097_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_4097_end_0 = const()[name = tensor("op_4097_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_4097_end_mask_0 = const()[name = tensor("op_4097_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4097_cast_fp16 = slice_by_index(begin = var_4097_begin_0, end = var_4097_end_0, end_mask = var_4097_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4097_cast_fp16")]; + tensor var_4101_begin_0 = const()[name = tensor("op_4101_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_4101_end_0 = const()[name = tensor("op_4101_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_4101_end_mask_0 = const()[name = tensor("op_4101_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4101_cast_fp16 = slice_by_index(begin = var_4101_begin_0, end = var_4101_end_0, end_mask = var_4101_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4101_cast_fp16")]; + tensor var_4105_begin_0 = const()[name = tensor("op_4105_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_4105_end_0 = const()[name = tensor("op_4105_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_4105_end_mask_0 = const()[name = tensor("op_4105_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4105_cast_fp16 = slice_by_index(begin = var_4105_begin_0, end = var_4105_end_0, end_mask = var_4105_end_mask_0, x = value_5_cast_fp16)[name = tensor("op_4105_cast_fp16")]; + tensor var_4109_equation_0 = const()[name = tensor("op_4109_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4109_cast_fp16 = einsum(equation = var_4109_equation_0, values = (var_3951_cast_fp16, var_3393_cast_fp16))[name = tensor("op_4109_cast_fp16")]; + tensor var_4110_to_fp16 = const()[name = tensor("op_4110_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_321_cast_fp16 = mul(x = var_4109_cast_fp16, y = var_4110_to_fp16)[name = tensor("aw_chunk_321_cast_fp16")]; + tensor var_4113_equation_0 = const()[name = tensor("op_4113_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4113_cast_fp16 = einsum(equation = var_4113_equation_0, values = (var_3951_cast_fp16, var_3400_cast_fp16))[name = tensor("op_4113_cast_fp16")]; + tensor var_4114_to_fp16 = const()[name = tensor("op_4114_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_323_cast_fp16 = mul(x = var_4113_cast_fp16, y = var_4114_to_fp16)[name = tensor("aw_chunk_323_cast_fp16")]; + tensor var_4117_equation_0 = const()[name = tensor("op_4117_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4117_cast_fp16 = einsum(equation = var_4117_equation_0, values = (var_3951_cast_fp16, var_3407_cast_fp16))[name = tensor("op_4117_cast_fp16")]; + tensor var_4118_to_fp16 = const()[name = tensor("op_4118_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_325_cast_fp16 = mul(x = var_4117_cast_fp16, y = var_4118_to_fp16)[name = tensor("aw_chunk_325_cast_fp16")]; + tensor var_4121_equation_0 = const()[name = tensor("op_4121_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4121_cast_fp16 = einsum(equation = var_4121_equation_0, values = (var_3951_cast_fp16, var_3414_cast_fp16))[name = tensor("op_4121_cast_fp16")]; + tensor var_4122_to_fp16 = const()[name = tensor("op_4122_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_327_cast_fp16 = mul(x = var_4121_cast_fp16, y = var_4122_to_fp16)[name = tensor("aw_chunk_327_cast_fp16")]; + tensor var_4125_equation_0 = const()[name = tensor("op_4125_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4125_cast_fp16 = einsum(equation = var_4125_equation_0, values = (var_3955_cast_fp16, var_3421_cast_fp16))[name = tensor("op_4125_cast_fp16")]; + tensor var_4126_to_fp16 = const()[name = tensor("op_4126_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_329_cast_fp16 = mul(x = var_4125_cast_fp16, y = var_4126_to_fp16)[name = tensor("aw_chunk_329_cast_fp16")]; + tensor var_4129_equation_0 = const()[name = tensor("op_4129_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4129_cast_fp16 = einsum(equation = var_4129_equation_0, values = (var_3955_cast_fp16, var_3428_cast_fp16))[name = tensor("op_4129_cast_fp16")]; + tensor var_4130_to_fp16 = const()[name = tensor("op_4130_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_331_cast_fp16 = mul(x = var_4129_cast_fp16, y = var_4130_to_fp16)[name = tensor("aw_chunk_331_cast_fp16")]; + tensor var_4133_equation_0 = const()[name = tensor("op_4133_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4133_cast_fp16 = einsum(equation = var_4133_equation_0, values = (var_3955_cast_fp16, var_3435_cast_fp16))[name = tensor("op_4133_cast_fp16")]; + tensor var_4134_to_fp16 = const()[name = tensor("op_4134_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_333_cast_fp16 = mul(x = var_4133_cast_fp16, y = var_4134_to_fp16)[name = tensor("aw_chunk_333_cast_fp16")]; + tensor var_4137_equation_0 = const()[name = tensor("op_4137_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4137_cast_fp16 = einsum(equation = var_4137_equation_0, values = (var_3955_cast_fp16, var_3442_cast_fp16))[name = tensor("op_4137_cast_fp16")]; + tensor var_4138_to_fp16 = const()[name = tensor("op_4138_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_335_cast_fp16 = mul(x = var_4137_cast_fp16, y = var_4138_to_fp16)[name = tensor("aw_chunk_335_cast_fp16")]; + tensor var_4141_equation_0 = const()[name = tensor("op_4141_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4141_cast_fp16 = einsum(equation = var_4141_equation_0, values = (var_3959_cast_fp16, var_3449_cast_fp16))[name = tensor("op_4141_cast_fp16")]; + tensor var_4142_to_fp16 = const()[name = tensor("op_4142_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_337_cast_fp16 = mul(x = var_4141_cast_fp16, y = var_4142_to_fp16)[name = tensor("aw_chunk_337_cast_fp16")]; + tensor var_4145_equation_0 = const()[name = tensor("op_4145_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4145_cast_fp16 = einsum(equation = var_4145_equation_0, values = (var_3959_cast_fp16, var_3456_cast_fp16))[name = tensor("op_4145_cast_fp16")]; + tensor var_4146_to_fp16 = const()[name = tensor("op_4146_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_339_cast_fp16 = mul(x = var_4145_cast_fp16, y = var_4146_to_fp16)[name = tensor("aw_chunk_339_cast_fp16")]; + tensor var_4149_equation_0 = const()[name = tensor("op_4149_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4149_cast_fp16 = einsum(equation = var_4149_equation_0, values = (var_3959_cast_fp16, var_3463_cast_fp16))[name = tensor("op_4149_cast_fp16")]; + tensor var_4150_to_fp16 = const()[name = tensor("op_4150_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_341_cast_fp16 = mul(x = var_4149_cast_fp16, y = var_4150_to_fp16)[name = tensor("aw_chunk_341_cast_fp16")]; + tensor var_4153_equation_0 = const()[name = tensor("op_4153_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4153_cast_fp16 = einsum(equation = var_4153_equation_0, values = (var_3959_cast_fp16, var_3470_cast_fp16))[name = tensor("op_4153_cast_fp16")]; + tensor var_4154_to_fp16 = const()[name = tensor("op_4154_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_343_cast_fp16 = mul(x = var_4153_cast_fp16, y = var_4154_to_fp16)[name = tensor("aw_chunk_343_cast_fp16")]; + tensor var_4157_equation_0 = const()[name = tensor("op_4157_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4157_cast_fp16 = einsum(equation = var_4157_equation_0, values = (var_3963_cast_fp16, var_3477_cast_fp16))[name = tensor("op_4157_cast_fp16")]; + tensor var_4158_to_fp16 = const()[name = tensor("op_4158_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_345_cast_fp16 = mul(x = var_4157_cast_fp16, y = var_4158_to_fp16)[name = tensor("aw_chunk_345_cast_fp16")]; + tensor var_4161_equation_0 = const()[name = tensor("op_4161_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4161_cast_fp16 = einsum(equation = var_4161_equation_0, values = (var_3963_cast_fp16, var_3484_cast_fp16))[name = tensor("op_4161_cast_fp16")]; + tensor var_4162_to_fp16 = const()[name = tensor("op_4162_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_347_cast_fp16 = mul(x = var_4161_cast_fp16, y = var_4162_to_fp16)[name = tensor("aw_chunk_347_cast_fp16")]; + tensor var_4165_equation_0 = const()[name = tensor("op_4165_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4165_cast_fp16 = einsum(equation = var_4165_equation_0, values = (var_3963_cast_fp16, var_3491_cast_fp16))[name = tensor("op_4165_cast_fp16")]; + tensor var_4166_to_fp16 = const()[name = tensor("op_4166_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_349_cast_fp16 = mul(x = var_4165_cast_fp16, y = var_4166_to_fp16)[name = tensor("aw_chunk_349_cast_fp16")]; + tensor var_4169_equation_0 = const()[name = tensor("op_4169_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4169_cast_fp16 = einsum(equation = var_4169_equation_0, values = (var_3963_cast_fp16, var_3498_cast_fp16))[name = tensor("op_4169_cast_fp16")]; + tensor var_4170_to_fp16 = const()[name = tensor("op_4170_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_351_cast_fp16 = mul(x = var_4169_cast_fp16, y = var_4170_to_fp16)[name = tensor("aw_chunk_351_cast_fp16")]; + tensor var_4173_equation_0 = const()[name = tensor("op_4173_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4173_cast_fp16 = einsum(equation = var_4173_equation_0, values = (var_3967_cast_fp16, var_3505_cast_fp16))[name = tensor("op_4173_cast_fp16")]; + tensor var_4174_to_fp16 = const()[name = tensor("op_4174_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_353_cast_fp16 = mul(x = var_4173_cast_fp16, y = var_4174_to_fp16)[name = tensor("aw_chunk_353_cast_fp16")]; + tensor var_4177_equation_0 = const()[name = tensor("op_4177_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4177_cast_fp16 = einsum(equation = var_4177_equation_0, values = (var_3967_cast_fp16, var_3512_cast_fp16))[name = tensor("op_4177_cast_fp16")]; + tensor var_4178_to_fp16 = const()[name = tensor("op_4178_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_355_cast_fp16 = mul(x = var_4177_cast_fp16, y = var_4178_to_fp16)[name = tensor("aw_chunk_355_cast_fp16")]; + tensor var_4181_equation_0 = const()[name = tensor("op_4181_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4181_cast_fp16 = einsum(equation = var_4181_equation_0, values = (var_3967_cast_fp16, var_3519_cast_fp16))[name = tensor("op_4181_cast_fp16")]; + tensor var_4182_to_fp16 = const()[name = tensor("op_4182_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_357_cast_fp16 = mul(x = var_4181_cast_fp16, y = var_4182_to_fp16)[name = tensor("aw_chunk_357_cast_fp16")]; + tensor var_4185_equation_0 = const()[name = tensor("op_4185_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4185_cast_fp16 = einsum(equation = var_4185_equation_0, values = (var_3967_cast_fp16, var_3526_cast_fp16))[name = tensor("op_4185_cast_fp16")]; + tensor var_4186_to_fp16 = const()[name = tensor("op_4186_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_359_cast_fp16 = mul(x = var_4185_cast_fp16, y = var_4186_to_fp16)[name = tensor("aw_chunk_359_cast_fp16")]; + tensor var_4189_equation_0 = const()[name = tensor("op_4189_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4189_cast_fp16 = einsum(equation = var_4189_equation_0, values = (var_3971_cast_fp16, var_3533_cast_fp16))[name = tensor("op_4189_cast_fp16")]; + tensor var_4190_to_fp16 = const()[name = tensor("op_4190_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_361_cast_fp16 = mul(x = var_4189_cast_fp16, y = var_4190_to_fp16)[name = tensor("aw_chunk_361_cast_fp16")]; + tensor var_4193_equation_0 = const()[name = tensor("op_4193_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4193_cast_fp16 = einsum(equation = var_4193_equation_0, values = (var_3971_cast_fp16, var_3540_cast_fp16))[name = tensor("op_4193_cast_fp16")]; + tensor var_4194_to_fp16 = const()[name = tensor("op_4194_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_363_cast_fp16 = mul(x = var_4193_cast_fp16, y = var_4194_to_fp16)[name = tensor("aw_chunk_363_cast_fp16")]; + tensor var_4197_equation_0 = const()[name = tensor("op_4197_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4197_cast_fp16 = einsum(equation = var_4197_equation_0, values = (var_3971_cast_fp16, var_3547_cast_fp16))[name = tensor("op_4197_cast_fp16")]; + tensor var_4198_to_fp16 = const()[name = tensor("op_4198_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_365_cast_fp16 = mul(x = var_4197_cast_fp16, y = var_4198_to_fp16)[name = tensor("aw_chunk_365_cast_fp16")]; + tensor var_4201_equation_0 = const()[name = tensor("op_4201_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4201_cast_fp16 = einsum(equation = var_4201_equation_0, values = (var_3971_cast_fp16, var_3554_cast_fp16))[name = tensor("op_4201_cast_fp16")]; + tensor var_4202_to_fp16 = const()[name = tensor("op_4202_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_367_cast_fp16 = mul(x = var_4201_cast_fp16, y = var_4202_to_fp16)[name = tensor("aw_chunk_367_cast_fp16")]; + tensor var_4205_equation_0 = const()[name = tensor("op_4205_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4205_cast_fp16 = einsum(equation = var_4205_equation_0, values = (var_3975_cast_fp16, var_3561_cast_fp16))[name = tensor("op_4205_cast_fp16")]; + tensor var_4206_to_fp16 = const()[name = tensor("op_4206_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_369_cast_fp16 = mul(x = var_4205_cast_fp16, y = var_4206_to_fp16)[name = tensor("aw_chunk_369_cast_fp16")]; + tensor var_4209_equation_0 = const()[name = tensor("op_4209_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4209_cast_fp16 = einsum(equation = var_4209_equation_0, values = (var_3975_cast_fp16, var_3568_cast_fp16))[name = tensor("op_4209_cast_fp16")]; + tensor var_4210_to_fp16 = const()[name = tensor("op_4210_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_371_cast_fp16 = mul(x = var_4209_cast_fp16, y = var_4210_to_fp16)[name = tensor("aw_chunk_371_cast_fp16")]; + tensor var_4213_equation_0 = const()[name = tensor("op_4213_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4213_cast_fp16 = einsum(equation = var_4213_equation_0, values = (var_3975_cast_fp16, var_3575_cast_fp16))[name = tensor("op_4213_cast_fp16")]; + tensor var_4214_to_fp16 = const()[name = tensor("op_4214_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_373_cast_fp16 = mul(x = var_4213_cast_fp16, y = var_4214_to_fp16)[name = tensor("aw_chunk_373_cast_fp16")]; + tensor var_4217_equation_0 = const()[name = tensor("op_4217_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4217_cast_fp16 = einsum(equation = var_4217_equation_0, values = (var_3975_cast_fp16, var_3582_cast_fp16))[name = tensor("op_4217_cast_fp16")]; + tensor var_4218_to_fp16 = const()[name = tensor("op_4218_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_375_cast_fp16 = mul(x = var_4217_cast_fp16, y = var_4218_to_fp16)[name = tensor("aw_chunk_375_cast_fp16")]; + tensor var_4221_equation_0 = const()[name = tensor("op_4221_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4221_cast_fp16 = einsum(equation = var_4221_equation_0, values = (var_3979_cast_fp16, var_3589_cast_fp16))[name = tensor("op_4221_cast_fp16")]; + tensor var_4222_to_fp16 = const()[name = tensor("op_4222_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_377_cast_fp16 = mul(x = var_4221_cast_fp16, y = var_4222_to_fp16)[name = tensor("aw_chunk_377_cast_fp16")]; + tensor var_4225_equation_0 = const()[name = tensor("op_4225_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4225_cast_fp16 = einsum(equation = var_4225_equation_0, values = (var_3979_cast_fp16, var_3596_cast_fp16))[name = tensor("op_4225_cast_fp16")]; + tensor var_4226_to_fp16 = const()[name = tensor("op_4226_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_379_cast_fp16 = mul(x = var_4225_cast_fp16, y = var_4226_to_fp16)[name = tensor("aw_chunk_379_cast_fp16")]; + tensor var_4229_equation_0 = const()[name = tensor("op_4229_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4229_cast_fp16 = einsum(equation = var_4229_equation_0, values = (var_3979_cast_fp16, var_3603_cast_fp16))[name = tensor("op_4229_cast_fp16")]; + tensor var_4230_to_fp16 = const()[name = tensor("op_4230_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_381_cast_fp16 = mul(x = var_4229_cast_fp16, y = var_4230_to_fp16)[name = tensor("aw_chunk_381_cast_fp16")]; + tensor var_4233_equation_0 = const()[name = tensor("op_4233_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4233_cast_fp16 = einsum(equation = var_4233_equation_0, values = (var_3979_cast_fp16, var_3610_cast_fp16))[name = tensor("op_4233_cast_fp16")]; + tensor var_4234_to_fp16 = const()[name = tensor("op_4234_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_383_cast_fp16 = mul(x = var_4233_cast_fp16, y = var_4234_to_fp16)[name = tensor("aw_chunk_383_cast_fp16")]; + tensor var_4237_equation_0 = const()[name = tensor("op_4237_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4237_cast_fp16 = einsum(equation = var_4237_equation_0, values = (var_3983_cast_fp16, var_3617_cast_fp16))[name = tensor("op_4237_cast_fp16")]; + tensor var_4238_to_fp16 = const()[name = tensor("op_4238_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_385_cast_fp16 = mul(x = var_4237_cast_fp16, y = var_4238_to_fp16)[name = tensor("aw_chunk_385_cast_fp16")]; + tensor var_4241_equation_0 = const()[name = tensor("op_4241_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4241_cast_fp16 = einsum(equation = var_4241_equation_0, values = (var_3983_cast_fp16, var_3624_cast_fp16))[name = tensor("op_4241_cast_fp16")]; + tensor var_4242_to_fp16 = const()[name = tensor("op_4242_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_387_cast_fp16 = mul(x = var_4241_cast_fp16, y = var_4242_to_fp16)[name = tensor("aw_chunk_387_cast_fp16")]; + tensor var_4245_equation_0 = const()[name = tensor("op_4245_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4245_cast_fp16 = einsum(equation = var_4245_equation_0, values = (var_3983_cast_fp16, var_3631_cast_fp16))[name = tensor("op_4245_cast_fp16")]; + tensor var_4246_to_fp16 = const()[name = tensor("op_4246_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_389_cast_fp16 = mul(x = var_4245_cast_fp16, y = var_4246_to_fp16)[name = tensor("aw_chunk_389_cast_fp16")]; + tensor var_4249_equation_0 = const()[name = tensor("op_4249_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4249_cast_fp16 = einsum(equation = var_4249_equation_0, values = (var_3983_cast_fp16, var_3638_cast_fp16))[name = tensor("op_4249_cast_fp16")]; + tensor var_4250_to_fp16 = const()[name = tensor("op_4250_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_391_cast_fp16 = mul(x = var_4249_cast_fp16, y = var_4250_to_fp16)[name = tensor("aw_chunk_391_cast_fp16")]; + tensor var_4253_equation_0 = const()[name = tensor("op_4253_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4253_cast_fp16 = einsum(equation = var_4253_equation_0, values = (var_3987_cast_fp16, var_3645_cast_fp16))[name = tensor("op_4253_cast_fp16")]; + tensor var_4254_to_fp16 = const()[name = tensor("op_4254_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_393_cast_fp16 = mul(x = var_4253_cast_fp16, y = var_4254_to_fp16)[name = tensor("aw_chunk_393_cast_fp16")]; + tensor var_4257_equation_0 = const()[name = tensor("op_4257_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4257_cast_fp16 = einsum(equation = var_4257_equation_0, values = (var_3987_cast_fp16, var_3652_cast_fp16))[name = tensor("op_4257_cast_fp16")]; + tensor var_4258_to_fp16 = const()[name = tensor("op_4258_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_395_cast_fp16 = mul(x = var_4257_cast_fp16, y = var_4258_to_fp16)[name = tensor("aw_chunk_395_cast_fp16")]; + tensor var_4261_equation_0 = const()[name = tensor("op_4261_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4261_cast_fp16 = einsum(equation = var_4261_equation_0, values = (var_3987_cast_fp16, var_3659_cast_fp16))[name = tensor("op_4261_cast_fp16")]; + tensor var_4262_to_fp16 = const()[name = tensor("op_4262_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_397_cast_fp16 = mul(x = var_4261_cast_fp16, y = var_4262_to_fp16)[name = tensor("aw_chunk_397_cast_fp16")]; + tensor var_4265_equation_0 = const()[name = tensor("op_4265_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4265_cast_fp16 = einsum(equation = var_4265_equation_0, values = (var_3987_cast_fp16, var_3666_cast_fp16))[name = tensor("op_4265_cast_fp16")]; + tensor var_4266_to_fp16 = const()[name = tensor("op_4266_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_399_cast_fp16 = mul(x = var_4265_cast_fp16, y = var_4266_to_fp16)[name = tensor("aw_chunk_399_cast_fp16")]; + tensor var_4269_equation_0 = const()[name = tensor("op_4269_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4269_cast_fp16 = einsum(equation = var_4269_equation_0, values = (var_3991_cast_fp16, var_3673_cast_fp16))[name = tensor("op_4269_cast_fp16")]; + tensor var_4270_to_fp16 = const()[name = tensor("op_4270_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_401_cast_fp16 = mul(x = var_4269_cast_fp16, y = var_4270_to_fp16)[name = tensor("aw_chunk_401_cast_fp16")]; + tensor var_4273_equation_0 = const()[name = tensor("op_4273_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4273_cast_fp16 = einsum(equation = var_4273_equation_0, values = (var_3991_cast_fp16, var_3680_cast_fp16))[name = tensor("op_4273_cast_fp16")]; + tensor var_4274_to_fp16 = const()[name = tensor("op_4274_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_403_cast_fp16 = mul(x = var_4273_cast_fp16, y = var_4274_to_fp16)[name = tensor("aw_chunk_403_cast_fp16")]; + tensor var_4277_equation_0 = const()[name = tensor("op_4277_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4277_cast_fp16 = einsum(equation = var_4277_equation_0, values = (var_3991_cast_fp16, var_3687_cast_fp16))[name = tensor("op_4277_cast_fp16")]; + tensor var_4278_to_fp16 = const()[name = tensor("op_4278_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_405_cast_fp16 = mul(x = var_4277_cast_fp16, y = var_4278_to_fp16)[name = tensor("aw_chunk_405_cast_fp16")]; + tensor var_4281_equation_0 = const()[name = tensor("op_4281_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4281_cast_fp16 = einsum(equation = var_4281_equation_0, values = (var_3991_cast_fp16, var_3694_cast_fp16))[name = tensor("op_4281_cast_fp16")]; + tensor var_4282_to_fp16 = const()[name = tensor("op_4282_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_407_cast_fp16 = mul(x = var_4281_cast_fp16, y = var_4282_to_fp16)[name = tensor("aw_chunk_407_cast_fp16")]; + tensor var_4285_equation_0 = const()[name = tensor("op_4285_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4285_cast_fp16 = einsum(equation = var_4285_equation_0, values = (var_3995_cast_fp16, var_3701_cast_fp16))[name = tensor("op_4285_cast_fp16")]; + tensor var_4286_to_fp16 = const()[name = tensor("op_4286_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_409_cast_fp16 = mul(x = var_4285_cast_fp16, y = var_4286_to_fp16)[name = tensor("aw_chunk_409_cast_fp16")]; + tensor var_4289_equation_0 = const()[name = tensor("op_4289_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4289_cast_fp16 = einsum(equation = var_4289_equation_0, values = (var_3995_cast_fp16, var_3708_cast_fp16))[name = tensor("op_4289_cast_fp16")]; + tensor var_4290_to_fp16 = const()[name = tensor("op_4290_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_411_cast_fp16 = mul(x = var_4289_cast_fp16, y = var_4290_to_fp16)[name = tensor("aw_chunk_411_cast_fp16")]; + tensor var_4293_equation_0 = const()[name = tensor("op_4293_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4293_cast_fp16 = einsum(equation = var_4293_equation_0, values = (var_3995_cast_fp16, var_3715_cast_fp16))[name = tensor("op_4293_cast_fp16")]; + tensor var_4294_to_fp16 = const()[name = tensor("op_4294_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_413_cast_fp16 = mul(x = var_4293_cast_fp16, y = var_4294_to_fp16)[name = tensor("aw_chunk_413_cast_fp16")]; + tensor var_4297_equation_0 = const()[name = tensor("op_4297_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4297_cast_fp16 = einsum(equation = var_4297_equation_0, values = (var_3995_cast_fp16, var_3722_cast_fp16))[name = tensor("op_4297_cast_fp16")]; + tensor var_4298_to_fp16 = const()[name = tensor("op_4298_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_415_cast_fp16 = mul(x = var_4297_cast_fp16, y = var_4298_to_fp16)[name = tensor("aw_chunk_415_cast_fp16")]; + tensor var_4301_equation_0 = const()[name = tensor("op_4301_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4301_cast_fp16 = einsum(equation = var_4301_equation_0, values = (var_3999_cast_fp16, var_3729_cast_fp16))[name = tensor("op_4301_cast_fp16")]; + tensor var_4302_to_fp16 = const()[name = tensor("op_4302_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_417_cast_fp16 = mul(x = var_4301_cast_fp16, y = var_4302_to_fp16)[name = tensor("aw_chunk_417_cast_fp16")]; + tensor var_4305_equation_0 = const()[name = tensor("op_4305_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4305_cast_fp16 = einsum(equation = var_4305_equation_0, values = (var_3999_cast_fp16, var_3736_cast_fp16))[name = tensor("op_4305_cast_fp16")]; + tensor var_4306_to_fp16 = const()[name = tensor("op_4306_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_419_cast_fp16 = mul(x = var_4305_cast_fp16, y = var_4306_to_fp16)[name = tensor("aw_chunk_419_cast_fp16")]; + tensor var_4309_equation_0 = const()[name = tensor("op_4309_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4309_cast_fp16 = einsum(equation = var_4309_equation_0, values = (var_3999_cast_fp16, var_3743_cast_fp16))[name = tensor("op_4309_cast_fp16")]; + tensor var_4310_to_fp16 = const()[name = tensor("op_4310_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_421_cast_fp16 = mul(x = var_4309_cast_fp16, y = var_4310_to_fp16)[name = tensor("aw_chunk_421_cast_fp16")]; + tensor var_4313_equation_0 = const()[name = tensor("op_4313_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4313_cast_fp16 = einsum(equation = var_4313_equation_0, values = (var_3999_cast_fp16, var_3750_cast_fp16))[name = tensor("op_4313_cast_fp16")]; + tensor var_4314_to_fp16 = const()[name = tensor("op_4314_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_423_cast_fp16 = mul(x = var_4313_cast_fp16, y = var_4314_to_fp16)[name = tensor("aw_chunk_423_cast_fp16")]; + tensor var_4317_equation_0 = const()[name = tensor("op_4317_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4317_cast_fp16 = einsum(equation = var_4317_equation_0, values = (var_4003_cast_fp16, var_3757_cast_fp16))[name = tensor("op_4317_cast_fp16")]; + tensor var_4318_to_fp16 = const()[name = tensor("op_4318_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_425_cast_fp16 = mul(x = var_4317_cast_fp16, y = var_4318_to_fp16)[name = tensor("aw_chunk_425_cast_fp16")]; + tensor var_4321_equation_0 = const()[name = tensor("op_4321_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4321_cast_fp16 = einsum(equation = var_4321_equation_0, values = (var_4003_cast_fp16, var_3764_cast_fp16))[name = tensor("op_4321_cast_fp16")]; + tensor var_4322_to_fp16 = const()[name = tensor("op_4322_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_427_cast_fp16 = mul(x = var_4321_cast_fp16, y = var_4322_to_fp16)[name = tensor("aw_chunk_427_cast_fp16")]; + tensor var_4325_equation_0 = const()[name = tensor("op_4325_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4325_cast_fp16 = einsum(equation = var_4325_equation_0, values = (var_4003_cast_fp16, var_3771_cast_fp16))[name = tensor("op_4325_cast_fp16")]; + tensor var_4326_to_fp16 = const()[name = tensor("op_4326_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_429_cast_fp16 = mul(x = var_4325_cast_fp16, y = var_4326_to_fp16)[name = tensor("aw_chunk_429_cast_fp16")]; + tensor var_4329_equation_0 = const()[name = tensor("op_4329_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4329_cast_fp16 = einsum(equation = var_4329_equation_0, values = (var_4003_cast_fp16, var_3778_cast_fp16))[name = tensor("op_4329_cast_fp16")]; + tensor var_4330_to_fp16 = const()[name = tensor("op_4330_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_431_cast_fp16 = mul(x = var_4329_cast_fp16, y = var_4330_to_fp16)[name = tensor("aw_chunk_431_cast_fp16")]; + tensor var_4333_equation_0 = const()[name = tensor("op_4333_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4333_cast_fp16 = einsum(equation = var_4333_equation_0, values = (var_4007_cast_fp16, var_3785_cast_fp16))[name = tensor("op_4333_cast_fp16")]; + tensor var_4334_to_fp16 = const()[name = tensor("op_4334_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_433_cast_fp16 = mul(x = var_4333_cast_fp16, y = var_4334_to_fp16)[name = tensor("aw_chunk_433_cast_fp16")]; + tensor var_4337_equation_0 = const()[name = tensor("op_4337_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4337_cast_fp16 = einsum(equation = var_4337_equation_0, values = (var_4007_cast_fp16, var_3792_cast_fp16))[name = tensor("op_4337_cast_fp16")]; + tensor var_4338_to_fp16 = const()[name = tensor("op_4338_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_435_cast_fp16 = mul(x = var_4337_cast_fp16, y = var_4338_to_fp16)[name = tensor("aw_chunk_435_cast_fp16")]; + tensor var_4341_equation_0 = const()[name = tensor("op_4341_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4341_cast_fp16 = einsum(equation = var_4341_equation_0, values = (var_4007_cast_fp16, var_3799_cast_fp16))[name = tensor("op_4341_cast_fp16")]; + tensor var_4342_to_fp16 = const()[name = tensor("op_4342_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_437_cast_fp16 = mul(x = var_4341_cast_fp16, y = var_4342_to_fp16)[name = tensor("aw_chunk_437_cast_fp16")]; + tensor var_4345_equation_0 = const()[name = tensor("op_4345_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4345_cast_fp16 = einsum(equation = var_4345_equation_0, values = (var_4007_cast_fp16, var_3806_cast_fp16))[name = tensor("op_4345_cast_fp16")]; + tensor var_4346_to_fp16 = const()[name = tensor("op_4346_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_439_cast_fp16 = mul(x = var_4345_cast_fp16, y = var_4346_to_fp16)[name = tensor("aw_chunk_439_cast_fp16")]; + tensor var_4349_equation_0 = const()[name = tensor("op_4349_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4349_cast_fp16 = einsum(equation = var_4349_equation_0, values = (var_4011_cast_fp16, var_3813_cast_fp16))[name = tensor("op_4349_cast_fp16")]; + tensor var_4350_to_fp16 = const()[name = tensor("op_4350_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_441_cast_fp16 = mul(x = var_4349_cast_fp16, y = var_4350_to_fp16)[name = tensor("aw_chunk_441_cast_fp16")]; + tensor var_4353_equation_0 = const()[name = tensor("op_4353_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4353_cast_fp16 = einsum(equation = var_4353_equation_0, values = (var_4011_cast_fp16, var_3820_cast_fp16))[name = tensor("op_4353_cast_fp16")]; + tensor var_4354_to_fp16 = const()[name = tensor("op_4354_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_443_cast_fp16 = mul(x = var_4353_cast_fp16, y = var_4354_to_fp16)[name = tensor("aw_chunk_443_cast_fp16")]; + tensor var_4357_equation_0 = const()[name = tensor("op_4357_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4357_cast_fp16 = einsum(equation = var_4357_equation_0, values = (var_4011_cast_fp16, var_3827_cast_fp16))[name = tensor("op_4357_cast_fp16")]; + tensor var_4358_to_fp16 = const()[name = tensor("op_4358_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_445_cast_fp16 = mul(x = var_4357_cast_fp16, y = var_4358_to_fp16)[name = tensor("aw_chunk_445_cast_fp16")]; + tensor var_4361_equation_0 = const()[name = tensor("op_4361_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4361_cast_fp16 = einsum(equation = var_4361_equation_0, values = (var_4011_cast_fp16, var_3834_cast_fp16))[name = tensor("op_4361_cast_fp16")]; + tensor var_4362_to_fp16 = const()[name = tensor("op_4362_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_447_cast_fp16 = mul(x = var_4361_cast_fp16, y = var_4362_to_fp16)[name = tensor("aw_chunk_447_cast_fp16")]; + tensor var_4365_equation_0 = const()[name = tensor("op_4365_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4365_cast_fp16 = einsum(equation = var_4365_equation_0, values = (var_4015_cast_fp16, var_3841_cast_fp16))[name = tensor("op_4365_cast_fp16")]; + tensor var_4366_to_fp16 = const()[name = tensor("op_4366_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_449_cast_fp16 = mul(x = var_4365_cast_fp16, y = var_4366_to_fp16)[name = tensor("aw_chunk_449_cast_fp16")]; + tensor var_4369_equation_0 = const()[name = tensor("op_4369_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4369_cast_fp16 = einsum(equation = var_4369_equation_0, values = (var_4015_cast_fp16, var_3848_cast_fp16))[name = tensor("op_4369_cast_fp16")]; + tensor var_4370_to_fp16 = const()[name = tensor("op_4370_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_451_cast_fp16 = mul(x = var_4369_cast_fp16, y = var_4370_to_fp16)[name = tensor("aw_chunk_451_cast_fp16")]; + tensor var_4373_equation_0 = const()[name = tensor("op_4373_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4373_cast_fp16 = einsum(equation = var_4373_equation_0, values = (var_4015_cast_fp16, var_3855_cast_fp16))[name = tensor("op_4373_cast_fp16")]; + tensor var_4374_to_fp16 = const()[name = tensor("op_4374_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_453_cast_fp16 = mul(x = var_4373_cast_fp16, y = var_4374_to_fp16)[name = tensor("aw_chunk_453_cast_fp16")]; + tensor var_4377_equation_0 = const()[name = tensor("op_4377_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4377_cast_fp16 = einsum(equation = var_4377_equation_0, values = (var_4015_cast_fp16, var_3862_cast_fp16))[name = tensor("op_4377_cast_fp16")]; + tensor var_4378_to_fp16 = const()[name = tensor("op_4378_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_455_cast_fp16 = mul(x = var_4377_cast_fp16, y = var_4378_to_fp16)[name = tensor("aw_chunk_455_cast_fp16")]; + tensor var_4381_equation_0 = const()[name = tensor("op_4381_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4381_cast_fp16 = einsum(equation = var_4381_equation_0, values = (var_4019_cast_fp16, var_3869_cast_fp16))[name = tensor("op_4381_cast_fp16")]; + tensor var_4382_to_fp16 = const()[name = tensor("op_4382_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_457_cast_fp16 = mul(x = var_4381_cast_fp16, y = var_4382_to_fp16)[name = tensor("aw_chunk_457_cast_fp16")]; + tensor var_4385_equation_0 = const()[name = tensor("op_4385_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4385_cast_fp16 = einsum(equation = var_4385_equation_0, values = (var_4019_cast_fp16, var_3876_cast_fp16))[name = tensor("op_4385_cast_fp16")]; + tensor var_4386_to_fp16 = const()[name = tensor("op_4386_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_459_cast_fp16 = mul(x = var_4385_cast_fp16, y = var_4386_to_fp16)[name = tensor("aw_chunk_459_cast_fp16")]; + tensor var_4389_equation_0 = const()[name = tensor("op_4389_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4389_cast_fp16 = einsum(equation = var_4389_equation_0, values = (var_4019_cast_fp16, var_3883_cast_fp16))[name = tensor("op_4389_cast_fp16")]; + tensor var_4390_to_fp16 = const()[name = tensor("op_4390_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_461_cast_fp16 = mul(x = var_4389_cast_fp16, y = var_4390_to_fp16)[name = tensor("aw_chunk_461_cast_fp16")]; + tensor var_4393_equation_0 = const()[name = tensor("op_4393_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4393_cast_fp16 = einsum(equation = var_4393_equation_0, values = (var_4019_cast_fp16, var_3890_cast_fp16))[name = tensor("op_4393_cast_fp16")]; + tensor var_4394_to_fp16 = const()[name = tensor("op_4394_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_463_cast_fp16 = mul(x = var_4393_cast_fp16, y = var_4394_to_fp16)[name = tensor("aw_chunk_463_cast_fp16")]; + tensor var_4397_equation_0 = const()[name = tensor("op_4397_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4397_cast_fp16 = einsum(equation = var_4397_equation_0, values = (var_4023_cast_fp16, var_3897_cast_fp16))[name = tensor("op_4397_cast_fp16")]; + tensor var_4398_to_fp16 = const()[name = tensor("op_4398_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_465_cast_fp16 = mul(x = var_4397_cast_fp16, y = var_4398_to_fp16)[name = tensor("aw_chunk_465_cast_fp16")]; + tensor var_4401_equation_0 = const()[name = tensor("op_4401_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4401_cast_fp16 = einsum(equation = var_4401_equation_0, values = (var_4023_cast_fp16, var_3904_cast_fp16))[name = tensor("op_4401_cast_fp16")]; + tensor var_4402_to_fp16 = const()[name = tensor("op_4402_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_467_cast_fp16 = mul(x = var_4401_cast_fp16, y = var_4402_to_fp16)[name = tensor("aw_chunk_467_cast_fp16")]; + tensor var_4405_equation_0 = const()[name = tensor("op_4405_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4405_cast_fp16 = einsum(equation = var_4405_equation_0, values = (var_4023_cast_fp16, var_3911_cast_fp16))[name = tensor("op_4405_cast_fp16")]; + tensor var_4406_to_fp16 = const()[name = tensor("op_4406_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_469_cast_fp16 = mul(x = var_4405_cast_fp16, y = var_4406_to_fp16)[name = tensor("aw_chunk_469_cast_fp16")]; + tensor var_4409_equation_0 = const()[name = tensor("op_4409_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4409_cast_fp16 = einsum(equation = var_4409_equation_0, values = (var_4023_cast_fp16, var_3918_cast_fp16))[name = tensor("op_4409_cast_fp16")]; + tensor var_4410_to_fp16 = const()[name = tensor("op_4410_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_471_cast_fp16 = mul(x = var_4409_cast_fp16, y = var_4410_to_fp16)[name = tensor("aw_chunk_471_cast_fp16")]; + tensor var_4413_equation_0 = const()[name = tensor("op_4413_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4413_cast_fp16 = einsum(equation = var_4413_equation_0, values = (var_4027_cast_fp16, var_3925_cast_fp16))[name = tensor("op_4413_cast_fp16")]; + tensor var_4414_to_fp16 = const()[name = tensor("op_4414_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_473_cast_fp16 = mul(x = var_4413_cast_fp16, y = var_4414_to_fp16)[name = tensor("aw_chunk_473_cast_fp16")]; + tensor var_4417_equation_0 = const()[name = tensor("op_4417_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4417_cast_fp16 = einsum(equation = var_4417_equation_0, values = (var_4027_cast_fp16, var_3932_cast_fp16))[name = tensor("op_4417_cast_fp16")]; + tensor var_4418_to_fp16 = const()[name = tensor("op_4418_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_475_cast_fp16 = mul(x = var_4417_cast_fp16, y = var_4418_to_fp16)[name = tensor("aw_chunk_475_cast_fp16")]; + tensor var_4421_equation_0 = const()[name = tensor("op_4421_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4421_cast_fp16 = einsum(equation = var_4421_equation_0, values = (var_4027_cast_fp16, var_3939_cast_fp16))[name = tensor("op_4421_cast_fp16")]; + tensor var_4422_to_fp16 = const()[name = tensor("op_4422_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_477_cast_fp16 = mul(x = var_4421_cast_fp16, y = var_4422_to_fp16)[name = tensor("aw_chunk_477_cast_fp16")]; + tensor var_4425_equation_0 = const()[name = tensor("op_4425_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_4425_cast_fp16 = einsum(equation = var_4425_equation_0, values = (var_4027_cast_fp16, var_3946_cast_fp16))[name = tensor("op_4425_cast_fp16")]; + tensor var_4426_to_fp16 = const()[name = tensor("op_4426_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_479_cast_fp16 = mul(x = var_4425_cast_fp16, y = var_4426_to_fp16)[name = tensor("aw_chunk_479_cast_fp16")]; + tensor var_4428_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_321_cast_fp16)[name = tensor("op_4428_cast_fp16")]; + tensor var_4429_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_323_cast_fp16)[name = tensor("op_4429_cast_fp16")]; + tensor var_4430_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_325_cast_fp16)[name = tensor("op_4430_cast_fp16")]; + tensor var_4431_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_327_cast_fp16)[name = tensor("op_4431_cast_fp16")]; + tensor var_4432_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_329_cast_fp16)[name = tensor("op_4432_cast_fp16")]; + tensor var_4433_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_331_cast_fp16)[name = tensor("op_4433_cast_fp16")]; + tensor var_4434_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_333_cast_fp16)[name = tensor("op_4434_cast_fp16")]; + tensor var_4435_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_335_cast_fp16)[name = tensor("op_4435_cast_fp16")]; + tensor var_4436_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_337_cast_fp16)[name = tensor("op_4436_cast_fp16")]; + tensor var_4437_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_339_cast_fp16)[name = tensor("op_4437_cast_fp16")]; + tensor var_4438_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_341_cast_fp16)[name = tensor("op_4438_cast_fp16")]; + tensor var_4439_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_343_cast_fp16)[name = tensor("op_4439_cast_fp16")]; + tensor var_4440_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_345_cast_fp16)[name = tensor("op_4440_cast_fp16")]; + tensor var_4441_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_347_cast_fp16)[name = tensor("op_4441_cast_fp16")]; + tensor var_4442_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_349_cast_fp16)[name = tensor("op_4442_cast_fp16")]; + tensor var_4443_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_351_cast_fp16)[name = tensor("op_4443_cast_fp16")]; + tensor var_4444_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_353_cast_fp16)[name = tensor("op_4444_cast_fp16")]; + tensor var_4445_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_355_cast_fp16)[name = tensor("op_4445_cast_fp16")]; + tensor var_4446_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_357_cast_fp16)[name = tensor("op_4446_cast_fp16")]; + tensor var_4447_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_359_cast_fp16)[name = tensor("op_4447_cast_fp16")]; + tensor var_4448_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_361_cast_fp16)[name = tensor("op_4448_cast_fp16")]; + tensor var_4449_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_363_cast_fp16)[name = tensor("op_4449_cast_fp16")]; + tensor var_4450_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_365_cast_fp16)[name = tensor("op_4450_cast_fp16")]; + tensor var_4451_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_367_cast_fp16)[name = tensor("op_4451_cast_fp16")]; + tensor var_4452_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_369_cast_fp16)[name = tensor("op_4452_cast_fp16")]; + tensor var_4453_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_371_cast_fp16)[name = tensor("op_4453_cast_fp16")]; + tensor var_4454_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_373_cast_fp16)[name = tensor("op_4454_cast_fp16")]; + tensor var_4455_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_375_cast_fp16)[name = tensor("op_4455_cast_fp16")]; + tensor var_4456_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_377_cast_fp16)[name = tensor("op_4456_cast_fp16")]; + tensor var_4457_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_379_cast_fp16)[name = tensor("op_4457_cast_fp16")]; + tensor var_4458_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_381_cast_fp16)[name = tensor("op_4458_cast_fp16")]; + tensor var_4459_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_383_cast_fp16)[name = tensor("op_4459_cast_fp16")]; + tensor var_4460_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_385_cast_fp16)[name = tensor("op_4460_cast_fp16")]; + tensor var_4461_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_387_cast_fp16)[name = tensor("op_4461_cast_fp16")]; + tensor var_4462_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_389_cast_fp16)[name = tensor("op_4462_cast_fp16")]; + tensor var_4463_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_391_cast_fp16)[name = tensor("op_4463_cast_fp16")]; + tensor var_4464_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_393_cast_fp16)[name = tensor("op_4464_cast_fp16")]; + tensor var_4465_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_395_cast_fp16)[name = tensor("op_4465_cast_fp16")]; + tensor var_4466_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_397_cast_fp16)[name = tensor("op_4466_cast_fp16")]; + tensor var_4467_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_399_cast_fp16)[name = tensor("op_4467_cast_fp16")]; + tensor var_4468_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_401_cast_fp16)[name = tensor("op_4468_cast_fp16")]; + tensor var_4469_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_403_cast_fp16)[name = tensor("op_4469_cast_fp16")]; + tensor var_4470_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_405_cast_fp16)[name = tensor("op_4470_cast_fp16")]; + tensor var_4471_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_407_cast_fp16)[name = tensor("op_4471_cast_fp16")]; + tensor var_4472_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_409_cast_fp16)[name = tensor("op_4472_cast_fp16")]; + tensor var_4473_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_411_cast_fp16)[name = tensor("op_4473_cast_fp16")]; + tensor var_4474_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_413_cast_fp16)[name = tensor("op_4474_cast_fp16")]; + tensor var_4475_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_415_cast_fp16)[name = tensor("op_4475_cast_fp16")]; + tensor var_4476_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_417_cast_fp16)[name = tensor("op_4476_cast_fp16")]; + tensor var_4477_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_419_cast_fp16)[name = tensor("op_4477_cast_fp16")]; + tensor var_4478_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_421_cast_fp16)[name = tensor("op_4478_cast_fp16")]; + tensor var_4479_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_423_cast_fp16)[name = tensor("op_4479_cast_fp16")]; + tensor var_4480_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_425_cast_fp16)[name = tensor("op_4480_cast_fp16")]; + tensor var_4481_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_427_cast_fp16)[name = tensor("op_4481_cast_fp16")]; + tensor var_4482_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_429_cast_fp16)[name = tensor("op_4482_cast_fp16")]; + tensor var_4483_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_431_cast_fp16)[name = tensor("op_4483_cast_fp16")]; + tensor var_4484_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_433_cast_fp16)[name = tensor("op_4484_cast_fp16")]; + tensor var_4485_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_435_cast_fp16)[name = tensor("op_4485_cast_fp16")]; + tensor var_4486_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_437_cast_fp16)[name = tensor("op_4486_cast_fp16")]; + tensor var_4487_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_439_cast_fp16)[name = tensor("op_4487_cast_fp16")]; + tensor var_4488_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_441_cast_fp16)[name = tensor("op_4488_cast_fp16")]; + tensor var_4489_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_443_cast_fp16)[name = tensor("op_4489_cast_fp16")]; + tensor var_4490_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_445_cast_fp16)[name = tensor("op_4490_cast_fp16")]; + tensor var_4491_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_447_cast_fp16)[name = tensor("op_4491_cast_fp16")]; + tensor var_4492_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_449_cast_fp16)[name = tensor("op_4492_cast_fp16")]; + tensor var_4493_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_451_cast_fp16)[name = tensor("op_4493_cast_fp16")]; + tensor var_4494_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_453_cast_fp16)[name = tensor("op_4494_cast_fp16")]; + tensor var_4495_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_455_cast_fp16)[name = tensor("op_4495_cast_fp16")]; + tensor var_4496_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_457_cast_fp16)[name = tensor("op_4496_cast_fp16")]; + tensor var_4497_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_459_cast_fp16)[name = tensor("op_4497_cast_fp16")]; + tensor var_4498_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_461_cast_fp16)[name = tensor("op_4498_cast_fp16")]; + tensor var_4499_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_463_cast_fp16)[name = tensor("op_4499_cast_fp16")]; + tensor var_4500_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_465_cast_fp16)[name = tensor("op_4500_cast_fp16")]; + tensor var_4501_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_467_cast_fp16)[name = tensor("op_4501_cast_fp16")]; + tensor var_4502_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_469_cast_fp16)[name = tensor("op_4502_cast_fp16")]; + tensor var_4503_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_471_cast_fp16)[name = tensor("op_4503_cast_fp16")]; + tensor var_4504_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_473_cast_fp16)[name = tensor("op_4504_cast_fp16")]; + tensor var_4505_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_475_cast_fp16)[name = tensor("op_4505_cast_fp16")]; + tensor var_4506_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_477_cast_fp16)[name = tensor("op_4506_cast_fp16")]; + tensor var_4507_cast_fp16 = softmax(axis = var_3253, x = aw_chunk_479_cast_fp16)[name = tensor("op_4507_cast_fp16")]; + tensor var_4509_equation_0 = const()[name = tensor("op_4509_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4509_cast_fp16 = einsum(equation = var_4509_equation_0, values = (var_4029_cast_fp16, var_4428_cast_fp16))[name = tensor("op_4509_cast_fp16")]; + tensor var_4511_equation_0 = const()[name = tensor("op_4511_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4511_cast_fp16 = einsum(equation = var_4511_equation_0, values = (var_4029_cast_fp16, var_4429_cast_fp16))[name = tensor("op_4511_cast_fp16")]; + tensor var_4513_equation_0 = const()[name = tensor("op_4513_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4513_cast_fp16 = einsum(equation = var_4513_equation_0, values = (var_4029_cast_fp16, var_4430_cast_fp16))[name = tensor("op_4513_cast_fp16")]; + tensor var_4515_equation_0 = const()[name = tensor("op_4515_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4515_cast_fp16 = einsum(equation = var_4515_equation_0, values = (var_4029_cast_fp16, var_4431_cast_fp16))[name = tensor("op_4515_cast_fp16")]; + tensor var_4517_equation_0 = const()[name = tensor("op_4517_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4517_cast_fp16 = einsum(equation = var_4517_equation_0, values = (var_4033_cast_fp16, var_4432_cast_fp16))[name = tensor("op_4517_cast_fp16")]; + tensor var_4519_equation_0 = const()[name = tensor("op_4519_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4519_cast_fp16 = einsum(equation = var_4519_equation_0, values = (var_4033_cast_fp16, var_4433_cast_fp16))[name = tensor("op_4519_cast_fp16")]; + tensor var_4521_equation_0 = const()[name = tensor("op_4521_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4521_cast_fp16 = einsum(equation = var_4521_equation_0, values = (var_4033_cast_fp16, var_4434_cast_fp16))[name = tensor("op_4521_cast_fp16")]; + tensor var_4523_equation_0 = const()[name = tensor("op_4523_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4523_cast_fp16 = einsum(equation = var_4523_equation_0, values = (var_4033_cast_fp16, var_4435_cast_fp16))[name = tensor("op_4523_cast_fp16")]; + tensor var_4525_equation_0 = const()[name = tensor("op_4525_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4525_cast_fp16 = einsum(equation = var_4525_equation_0, values = (var_4037_cast_fp16, var_4436_cast_fp16))[name = tensor("op_4525_cast_fp16")]; + tensor var_4527_equation_0 = const()[name = tensor("op_4527_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4527_cast_fp16 = einsum(equation = var_4527_equation_0, values = (var_4037_cast_fp16, var_4437_cast_fp16))[name = tensor("op_4527_cast_fp16")]; + tensor var_4529_equation_0 = const()[name = tensor("op_4529_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4529_cast_fp16 = einsum(equation = var_4529_equation_0, values = (var_4037_cast_fp16, var_4438_cast_fp16))[name = tensor("op_4529_cast_fp16")]; + tensor var_4531_equation_0 = const()[name = tensor("op_4531_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4531_cast_fp16 = einsum(equation = var_4531_equation_0, values = (var_4037_cast_fp16, var_4439_cast_fp16))[name = tensor("op_4531_cast_fp16")]; + tensor var_4533_equation_0 = const()[name = tensor("op_4533_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4533_cast_fp16 = einsum(equation = var_4533_equation_0, values = (var_4041_cast_fp16, var_4440_cast_fp16))[name = tensor("op_4533_cast_fp16")]; + tensor var_4535_equation_0 = const()[name = tensor("op_4535_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4535_cast_fp16 = einsum(equation = var_4535_equation_0, values = (var_4041_cast_fp16, var_4441_cast_fp16))[name = tensor("op_4535_cast_fp16")]; + tensor var_4537_equation_0 = const()[name = tensor("op_4537_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4537_cast_fp16 = einsum(equation = var_4537_equation_0, values = (var_4041_cast_fp16, var_4442_cast_fp16))[name = tensor("op_4537_cast_fp16")]; + tensor var_4539_equation_0 = const()[name = tensor("op_4539_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4539_cast_fp16 = einsum(equation = var_4539_equation_0, values = (var_4041_cast_fp16, var_4443_cast_fp16))[name = tensor("op_4539_cast_fp16")]; + tensor var_4541_equation_0 = const()[name = tensor("op_4541_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4541_cast_fp16 = einsum(equation = var_4541_equation_0, values = (var_4045_cast_fp16, var_4444_cast_fp16))[name = tensor("op_4541_cast_fp16")]; + tensor var_4543_equation_0 = const()[name = tensor("op_4543_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4543_cast_fp16 = einsum(equation = var_4543_equation_0, values = (var_4045_cast_fp16, var_4445_cast_fp16))[name = tensor("op_4543_cast_fp16")]; + tensor var_4545_equation_0 = const()[name = tensor("op_4545_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4545_cast_fp16 = einsum(equation = var_4545_equation_0, values = (var_4045_cast_fp16, var_4446_cast_fp16))[name = tensor("op_4545_cast_fp16")]; + tensor var_4547_equation_0 = const()[name = tensor("op_4547_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4547_cast_fp16 = einsum(equation = var_4547_equation_0, values = (var_4045_cast_fp16, var_4447_cast_fp16))[name = tensor("op_4547_cast_fp16")]; + tensor var_4549_equation_0 = const()[name = tensor("op_4549_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4549_cast_fp16 = einsum(equation = var_4549_equation_0, values = (var_4049_cast_fp16, var_4448_cast_fp16))[name = tensor("op_4549_cast_fp16")]; + tensor var_4551_equation_0 = const()[name = tensor("op_4551_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4551_cast_fp16 = einsum(equation = var_4551_equation_0, values = (var_4049_cast_fp16, var_4449_cast_fp16))[name = tensor("op_4551_cast_fp16")]; + tensor var_4553_equation_0 = const()[name = tensor("op_4553_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4553_cast_fp16 = einsum(equation = var_4553_equation_0, values = (var_4049_cast_fp16, var_4450_cast_fp16))[name = tensor("op_4553_cast_fp16")]; + tensor var_4555_equation_0 = const()[name = tensor("op_4555_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4555_cast_fp16 = einsum(equation = var_4555_equation_0, values = (var_4049_cast_fp16, var_4451_cast_fp16))[name = tensor("op_4555_cast_fp16")]; + tensor var_4557_equation_0 = const()[name = tensor("op_4557_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4557_cast_fp16 = einsum(equation = var_4557_equation_0, values = (var_4053_cast_fp16, var_4452_cast_fp16))[name = tensor("op_4557_cast_fp16")]; + tensor var_4559_equation_0 = const()[name = tensor("op_4559_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4559_cast_fp16 = einsum(equation = var_4559_equation_0, values = (var_4053_cast_fp16, var_4453_cast_fp16))[name = tensor("op_4559_cast_fp16")]; + tensor var_4561_equation_0 = const()[name = tensor("op_4561_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4561_cast_fp16 = einsum(equation = var_4561_equation_0, values = (var_4053_cast_fp16, var_4454_cast_fp16))[name = tensor("op_4561_cast_fp16")]; + tensor var_4563_equation_0 = const()[name = tensor("op_4563_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4563_cast_fp16 = einsum(equation = var_4563_equation_0, values = (var_4053_cast_fp16, var_4455_cast_fp16))[name = tensor("op_4563_cast_fp16")]; + tensor var_4565_equation_0 = const()[name = tensor("op_4565_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4565_cast_fp16 = einsum(equation = var_4565_equation_0, values = (var_4057_cast_fp16, var_4456_cast_fp16))[name = tensor("op_4565_cast_fp16")]; + tensor var_4567_equation_0 = const()[name = tensor("op_4567_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4567_cast_fp16 = einsum(equation = var_4567_equation_0, values = (var_4057_cast_fp16, var_4457_cast_fp16))[name = tensor("op_4567_cast_fp16")]; + tensor var_4569_equation_0 = const()[name = tensor("op_4569_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4569_cast_fp16 = einsum(equation = var_4569_equation_0, values = (var_4057_cast_fp16, var_4458_cast_fp16))[name = tensor("op_4569_cast_fp16")]; + tensor var_4571_equation_0 = const()[name = tensor("op_4571_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4571_cast_fp16 = einsum(equation = var_4571_equation_0, values = (var_4057_cast_fp16, var_4459_cast_fp16))[name = tensor("op_4571_cast_fp16")]; + tensor var_4573_equation_0 = const()[name = tensor("op_4573_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4573_cast_fp16 = einsum(equation = var_4573_equation_0, values = (var_4061_cast_fp16, var_4460_cast_fp16))[name = tensor("op_4573_cast_fp16")]; + tensor var_4575_equation_0 = const()[name = tensor("op_4575_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4575_cast_fp16 = einsum(equation = var_4575_equation_0, values = (var_4061_cast_fp16, var_4461_cast_fp16))[name = tensor("op_4575_cast_fp16")]; + tensor var_4577_equation_0 = const()[name = tensor("op_4577_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4577_cast_fp16 = einsum(equation = var_4577_equation_0, values = (var_4061_cast_fp16, var_4462_cast_fp16))[name = tensor("op_4577_cast_fp16")]; + tensor var_4579_equation_0 = const()[name = tensor("op_4579_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4579_cast_fp16 = einsum(equation = var_4579_equation_0, values = (var_4061_cast_fp16, var_4463_cast_fp16))[name = tensor("op_4579_cast_fp16")]; + tensor var_4581_equation_0 = const()[name = tensor("op_4581_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4581_cast_fp16 = einsum(equation = var_4581_equation_0, values = (var_4065_cast_fp16, var_4464_cast_fp16))[name = tensor("op_4581_cast_fp16")]; + tensor var_4583_equation_0 = const()[name = tensor("op_4583_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4583_cast_fp16 = einsum(equation = var_4583_equation_0, values = (var_4065_cast_fp16, var_4465_cast_fp16))[name = tensor("op_4583_cast_fp16")]; + tensor var_4585_equation_0 = const()[name = tensor("op_4585_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4585_cast_fp16 = einsum(equation = var_4585_equation_0, values = (var_4065_cast_fp16, var_4466_cast_fp16))[name = tensor("op_4585_cast_fp16")]; + tensor var_4587_equation_0 = const()[name = tensor("op_4587_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4587_cast_fp16 = einsum(equation = var_4587_equation_0, values = (var_4065_cast_fp16, var_4467_cast_fp16))[name = tensor("op_4587_cast_fp16")]; + tensor var_4589_equation_0 = const()[name = tensor("op_4589_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4589_cast_fp16 = einsum(equation = var_4589_equation_0, values = (var_4069_cast_fp16, var_4468_cast_fp16))[name = tensor("op_4589_cast_fp16")]; + tensor var_4591_equation_0 = const()[name = tensor("op_4591_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4591_cast_fp16 = einsum(equation = var_4591_equation_0, values = (var_4069_cast_fp16, var_4469_cast_fp16))[name = tensor("op_4591_cast_fp16")]; + tensor var_4593_equation_0 = const()[name = tensor("op_4593_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4593_cast_fp16 = einsum(equation = var_4593_equation_0, values = (var_4069_cast_fp16, var_4470_cast_fp16))[name = tensor("op_4593_cast_fp16")]; + tensor var_4595_equation_0 = const()[name = tensor("op_4595_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4595_cast_fp16 = einsum(equation = var_4595_equation_0, values = (var_4069_cast_fp16, var_4471_cast_fp16))[name = tensor("op_4595_cast_fp16")]; + tensor var_4597_equation_0 = const()[name = tensor("op_4597_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4597_cast_fp16 = einsum(equation = var_4597_equation_0, values = (var_4073_cast_fp16, var_4472_cast_fp16))[name = tensor("op_4597_cast_fp16")]; + tensor var_4599_equation_0 = const()[name = tensor("op_4599_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4599_cast_fp16 = einsum(equation = var_4599_equation_0, values = (var_4073_cast_fp16, var_4473_cast_fp16))[name = tensor("op_4599_cast_fp16")]; + tensor var_4601_equation_0 = const()[name = tensor("op_4601_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4601_cast_fp16 = einsum(equation = var_4601_equation_0, values = (var_4073_cast_fp16, var_4474_cast_fp16))[name = tensor("op_4601_cast_fp16")]; + tensor var_4603_equation_0 = const()[name = tensor("op_4603_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4603_cast_fp16 = einsum(equation = var_4603_equation_0, values = (var_4073_cast_fp16, var_4475_cast_fp16))[name = tensor("op_4603_cast_fp16")]; + tensor var_4605_equation_0 = const()[name = tensor("op_4605_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4605_cast_fp16 = einsum(equation = var_4605_equation_0, values = (var_4077_cast_fp16, var_4476_cast_fp16))[name = tensor("op_4605_cast_fp16")]; + tensor var_4607_equation_0 = const()[name = tensor("op_4607_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4607_cast_fp16 = einsum(equation = var_4607_equation_0, values = (var_4077_cast_fp16, var_4477_cast_fp16))[name = tensor("op_4607_cast_fp16")]; + tensor var_4609_equation_0 = const()[name = tensor("op_4609_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4609_cast_fp16 = einsum(equation = var_4609_equation_0, values = (var_4077_cast_fp16, var_4478_cast_fp16))[name = tensor("op_4609_cast_fp16")]; + tensor var_4611_equation_0 = const()[name = tensor("op_4611_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4611_cast_fp16 = einsum(equation = var_4611_equation_0, values = (var_4077_cast_fp16, var_4479_cast_fp16))[name = tensor("op_4611_cast_fp16")]; + tensor var_4613_equation_0 = const()[name = tensor("op_4613_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4613_cast_fp16 = einsum(equation = var_4613_equation_0, values = (var_4081_cast_fp16, var_4480_cast_fp16))[name = tensor("op_4613_cast_fp16")]; + tensor var_4615_equation_0 = const()[name = tensor("op_4615_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4615_cast_fp16 = einsum(equation = var_4615_equation_0, values = (var_4081_cast_fp16, var_4481_cast_fp16))[name = tensor("op_4615_cast_fp16")]; + tensor var_4617_equation_0 = const()[name = tensor("op_4617_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4617_cast_fp16 = einsum(equation = var_4617_equation_0, values = (var_4081_cast_fp16, var_4482_cast_fp16))[name = tensor("op_4617_cast_fp16")]; + tensor var_4619_equation_0 = const()[name = tensor("op_4619_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4619_cast_fp16 = einsum(equation = var_4619_equation_0, values = (var_4081_cast_fp16, var_4483_cast_fp16))[name = tensor("op_4619_cast_fp16")]; + tensor var_4621_equation_0 = const()[name = tensor("op_4621_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4621_cast_fp16 = einsum(equation = var_4621_equation_0, values = (var_4085_cast_fp16, var_4484_cast_fp16))[name = tensor("op_4621_cast_fp16")]; + tensor var_4623_equation_0 = const()[name = tensor("op_4623_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4623_cast_fp16 = einsum(equation = var_4623_equation_0, values = (var_4085_cast_fp16, var_4485_cast_fp16))[name = tensor("op_4623_cast_fp16")]; + tensor var_4625_equation_0 = const()[name = tensor("op_4625_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4625_cast_fp16 = einsum(equation = var_4625_equation_0, values = (var_4085_cast_fp16, var_4486_cast_fp16))[name = tensor("op_4625_cast_fp16")]; + tensor var_4627_equation_0 = const()[name = tensor("op_4627_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4627_cast_fp16 = einsum(equation = var_4627_equation_0, values = (var_4085_cast_fp16, var_4487_cast_fp16))[name = tensor("op_4627_cast_fp16")]; + tensor var_4629_equation_0 = const()[name = tensor("op_4629_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4629_cast_fp16 = einsum(equation = var_4629_equation_0, values = (var_4089_cast_fp16, var_4488_cast_fp16))[name = tensor("op_4629_cast_fp16")]; + tensor var_4631_equation_0 = const()[name = tensor("op_4631_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4631_cast_fp16 = einsum(equation = var_4631_equation_0, values = (var_4089_cast_fp16, var_4489_cast_fp16))[name = tensor("op_4631_cast_fp16")]; + tensor var_4633_equation_0 = const()[name = tensor("op_4633_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4633_cast_fp16 = einsum(equation = var_4633_equation_0, values = (var_4089_cast_fp16, var_4490_cast_fp16))[name = tensor("op_4633_cast_fp16")]; + tensor var_4635_equation_0 = const()[name = tensor("op_4635_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4635_cast_fp16 = einsum(equation = var_4635_equation_0, values = (var_4089_cast_fp16, var_4491_cast_fp16))[name = tensor("op_4635_cast_fp16")]; + tensor var_4637_equation_0 = const()[name = tensor("op_4637_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4637_cast_fp16 = einsum(equation = var_4637_equation_0, values = (var_4093_cast_fp16, var_4492_cast_fp16))[name = tensor("op_4637_cast_fp16")]; + tensor var_4639_equation_0 = const()[name = tensor("op_4639_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4639_cast_fp16 = einsum(equation = var_4639_equation_0, values = (var_4093_cast_fp16, var_4493_cast_fp16))[name = tensor("op_4639_cast_fp16")]; + tensor var_4641_equation_0 = const()[name = tensor("op_4641_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4641_cast_fp16 = einsum(equation = var_4641_equation_0, values = (var_4093_cast_fp16, var_4494_cast_fp16))[name = tensor("op_4641_cast_fp16")]; + tensor var_4643_equation_0 = const()[name = tensor("op_4643_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4643_cast_fp16 = einsum(equation = var_4643_equation_0, values = (var_4093_cast_fp16, var_4495_cast_fp16))[name = tensor("op_4643_cast_fp16")]; + tensor var_4645_equation_0 = const()[name = tensor("op_4645_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4645_cast_fp16 = einsum(equation = var_4645_equation_0, values = (var_4097_cast_fp16, var_4496_cast_fp16))[name = tensor("op_4645_cast_fp16")]; + tensor var_4647_equation_0 = const()[name = tensor("op_4647_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4647_cast_fp16 = einsum(equation = var_4647_equation_0, values = (var_4097_cast_fp16, var_4497_cast_fp16))[name = tensor("op_4647_cast_fp16")]; + tensor var_4649_equation_0 = const()[name = tensor("op_4649_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4649_cast_fp16 = einsum(equation = var_4649_equation_0, values = (var_4097_cast_fp16, var_4498_cast_fp16))[name = tensor("op_4649_cast_fp16")]; + tensor var_4651_equation_0 = const()[name = tensor("op_4651_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4651_cast_fp16 = einsum(equation = var_4651_equation_0, values = (var_4097_cast_fp16, var_4499_cast_fp16))[name = tensor("op_4651_cast_fp16")]; + tensor var_4653_equation_0 = const()[name = tensor("op_4653_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4653_cast_fp16 = einsum(equation = var_4653_equation_0, values = (var_4101_cast_fp16, var_4500_cast_fp16))[name = tensor("op_4653_cast_fp16")]; + tensor var_4655_equation_0 = const()[name = tensor("op_4655_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4655_cast_fp16 = einsum(equation = var_4655_equation_0, values = (var_4101_cast_fp16, var_4501_cast_fp16))[name = tensor("op_4655_cast_fp16")]; + tensor var_4657_equation_0 = const()[name = tensor("op_4657_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4657_cast_fp16 = einsum(equation = var_4657_equation_0, values = (var_4101_cast_fp16, var_4502_cast_fp16))[name = tensor("op_4657_cast_fp16")]; + tensor var_4659_equation_0 = const()[name = tensor("op_4659_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4659_cast_fp16 = einsum(equation = var_4659_equation_0, values = (var_4101_cast_fp16, var_4503_cast_fp16))[name = tensor("op_4659_cast_fp16")]; + tensor var_4661_equation_0 = const()[name = tensor("op_4661_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4661_cast_fp16 = einsum(equation = var_4661_equation_0, values = (var_4105_cast_fp16, var_4504_cast_fp16))[name = tensor("op_4661_cast_fp16")]; + tensor var_4663_equation_0 = const()[name = tensor("op_4663_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4663_cast_fp16 = einsum(equation = var_4663_equation_0, values = (var_4105_cast_fp16, var_4505_cast_fp16))[name = tensor("op_4663_cast_fp16")]; + tensor var_4665_equation_0 = const()[name = tensor("op_4665_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4665_cast_fp16 = einsum(equation = var_4665_equation_0, values = (var_4105_cast_fp16, var_4506_cast_fp16))[name = tensor("op_4665_cast_fp16")]; + tensor var_4667_equation_0 = const()[name = tensor("op_4667_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_4667_cast_fp16 = einsum(equation = var_4667_equation_0, values = (var_4105_cast_fp16, var_4507_cast_fp16))[name = tensor("op_4667_cast_fp16")]; + tensor var_4669_interleave_0 = const()[name = tensor("op_4669_interleave_0"), val = tensor(false)]; + tensor var_4669_cast_fp16 = concat(axis = var_3228, interleave = var_4669_interleave_0, values = (var_4509_cast_fp16, var_4511_cast_fp16, var_4513_cast_fp16, var_4515_cast_fp16))[name = tensor("op_4669_cast_fp16")]; + tensor var_4671_interleave_0 = const()[name = tensor("op_4671_interleave_0"), val = tensor(false)]; + tensor var_4671_cast_fp16 = concat(axis = var_3228, interleave = var_4671_interleave_0, values = (var_4517_cast_fp16, var_4519_cast_fp16, var_4521_cast_fp16, var_4523_cast_fp16))[name = tensor("op_4671_cast_fp16")]; + tensor var_4673_interleave_0 = const()[name = tensor("op_4673_interleave_0"), val = tensor(false)]; + tensor var_4673_cast_fp16 = concat(axis = var_3228, interleave = var_4673_interleave_0, values = (var_4525_cast_fp16, var_4527_cast_fp16, var_4529_cast_fp16, var_4531_cast_fp16))[name = tensor("op_4673_cast_fp16")]; + tensor var_4675_interleave_0 = const()[name = tensor("op_4675_interleave_0"), val = tensor(false)]; + tensor var_4675_cast_fp16 = concat(axis = var_3228, interleave = var_4675_interleave_0, values = (var_4533_cast_fp16, var_4535_cast_fp16, var_4537_cast_fp16, var_4539_cast_fp16))[name = tensor("op_4675_cast_fp16")]; + tensor var_4677_interleave_0 = const()[name = tensor("op_4677_interleave_0"), val = tensor(false)]; + tensor var_4677_cast_fp16 = concat(axis = var_3228, interleave = var_4677_interleave_0, values = (var_4541_cast_fp16, var_4543_cast_fp16, var_4545_cast_fp16, var_4547_cast_fp16))[name = tensor("op_4677_cast_fp16")]; + tensor var_4679_interleave_0 = const()[name = tensor("op_4679_interleave_0"), val = tensor(false)]; + tensor var_4679_cast_fp16 = concat(axis = var_3228, interleave = var_4679_interleave_0, values = (var_4549_cast_fp16, var_4551_cast_fp16, var_4553_cast_fp16, var_4555_cast_fp16))[name = tensor("op_4679_cast_fp16")]; + tensor var_4681_interleave_0 = const()[name = tensor("op_4681_interleave_0"), val = tensor(false)]; + tensor var_4681_cast_fp16 = concat(axis = var_3228, interleave = var_4681_interleave_0, values = (var_4557_cast_fp16, var_4559_cast_fp16, var_4561_cast_fp16, var_4563_cast_fp16))[name = tensor("op_4681_cast_fp16")]; + tensor var_4683_interleave_0 = const()[name = tensor("op_4683_interleave_0"), val = tensor(false)]; + tensor var_4683_cast_fp16 = concat(axis = var_3228, interleave = var_4683_interleave_0, values = (var_4565_cast_fp16, var_4567_cast_fp16, var_4569_cast_fp16, var_4571_cast_fp16))[name = tensor("op_4683_cast_fp16")]; + tensor var_4685_interleave_0 = const()[name = tensor("op_4685_interleave_0"), val = tensor(false)]; + tensor var_4685_cast_fp16 = concat(axis = var_3228, interleave = var_4685_interleave_0, values = (var_4573_cast_fp16, var_4575_cast_fp16, var_4577_cast_fp16, var_4579_cast_fp16))[name = tensor("op_4685_cast_fp16")]; + tensor var_4687_interleave_0 = const()[name = tensor("op_4687_interleave_0"), val = tensor(false)]; + tensor var_4687_cast_fp16 = concat(axis = var_3228, interleave = var_4687_interleave_0, values = (var_4581_cast_fp16, var_4583_cast_fp16, var_4585_cast_fp16, var_4587_cast_fp16))[name = tensor("op_4687_cast_fp16")]; + tensor var_4689_interleave_0 = const()[name = tensor("op_4689_interleave_0"), val = tensor(false)]; + tensor var_4689_cast_fp16 = concat(axis = var_3228, interleave = var_4689_interleave_0, values = (var_4589_cast_fp16, var_4591_cast_fp16, var_4593_cast_fp16, var_4595_cast_fp16))[name = tensor("op_4689_cast_fp16")]; + tensor var_4691_interleave_0 = const()[name = tensor("op_4691_interleave_0"), val = tensor(false)]; + tensor var_4691_cast_fp16 = concat(axis = var_3228, interleave = var_4691_interleave_0, values = (var_4597_cast_fp16, var_4599_cast_fp16, var_4601_cast_fp16, var_4603_cast_fp16))[name = tensor("op_4691_cast_fp16")]; + tensor var_4693_interleave_0 = const()[name = tensor("op_4693_interleave_0"), val = tensor(false)]; + tensor var_4693_cast_fp16 = concat(axis = var_3228, interleave = var_4693_interleave_0, values = (var_4605_cast_fp16, var_4607_cast_fp16, var_4609_cast_fp16, var_4611_cast_fp16))[name = tensor("op_4693_cast_fp16")]; + tensor var_4695_interleave_0 = const()[name = tensor("op_4695_interleave_0"), val = tensor(false)]; + tensor var_4695_cast_fp16 = concat(axis = var_3228, interleave = var_4695_interleave_0, values = (var_4613_cast_fp16, var_4615_cast_fp16, var_4617_cast_fp16, var_4619_cast_fp16))[name = tensor("op_4695_cast_fp16")]; + tensor var_4697_interleave_0 = const()[name = tensor("op_4697_interleave_0"), val = tensor(false)]; + tensor var_4697_cast_fp16 = concat(axis = var_3228, interleave = var_4697_interleave_0, values = (var_4621_cast_fp16, var_4623_cast_fp16, var_4625_cast_fp16, var_4627_cast_fp16))[name = tensor("op_4697_cast_fp16")]; + tensor var_4699_interleave_0 = const()[name = tensor("op_4699_interleave_0"), val = tensor(false)]; + tensor var_4699_cast_fp16 = concat(axis = var_3228, interleave = var_4699_interleave_0, values = (var_4629_cast_fp16, var_4631_cast_fp16, var_4633_cast_fp16, var_4635_cast_fp16))[name = tensor("op_4699_cast_fp16")]; + tensor var_4701_interleave_0 = const()[name = tensor("op_4701_interleave_0"), val = tensor(false)]; + tensor var_4701_cast_fp16 = concat(axis = var_3228, interleave = var_4701_interleave_0, values = (var_4637_cast_fp16, var_4639_cast_fp16, var_4641_cast_fp16, var_4643_cast_fp16))[name = tensor("op_4701_cast_fp16")]; + tensor var_4703_interleave_0 = const()[name = tensor("op_4703_interleave_0"), val = tensor(false)]; + tensor var_4703_cast_fp16 = concat(axis = var_3228, interleave = var_4703_interleave_0, values = (var_4645_cast_fp16, var_4647_cast_fp16, var_4649_cast_fp16, var_4651_cast_fp16))[name = tensor("op_4703_cast_fp16")]; + tensor var_4705_interleave_0 = const()[name = tensor("op_4705_interleave_0"), val = tensor(false)]; + tensor var_4705_cast_fp16 = concat(axis = var_3228, interleave = var_4705_interleave_0, values = (var_4653_cast_fp16, var_4655_cast_fp16, var_4657_cast_fp16, var_4659_cast_fp16))[name = tensor("op_4705_cast_fp16")]; + tensor var_4707_interleave_0 = const()[name = tensor("op_4707_interleave_0"), val = tensor(false)]; + tensor var_4707_cast_fp16 = concat(axis = var_3228, interleave = var_4707_interleave_0, values = (var_4661_cast_fp16, var_4663_cast_fp16, var_4665_cast_fp16, var_4667_cast_fp16))[name = tensor("op_4707_cast_fp16")]; + tensor input_17_interleave_0 = const()[name = tensor("input_17_interleave_0"), val = tensor(false)]; + tensor input_17_cast_fp16 = concat(axis = var_3253, interleave = input_17_interleave_0, values = (var_4669_cast_fp16, var_4671_cast_fp16, var_4673_cast_fp16, var_4675_cast_fp16, var_4677_cast_fp16, var_4679_cast_fp16, var_4681_cast_fp16, var_4683_cast_fp16, var_4685_cast_fp16, var_4687_cast_fp16, var_4689_cast_fp16, var_4691_cast_fp16, var_4693_cast_fp16, var_4695_cast_fp16, var_4697_cast_fp16, var_4699_cast_fp16, var_4701_cast_fp16, var_4703_cast_fp16, var_4705_cast_fp16, var_4707_cast_fp16))[name = tensor("input_17_cast_fp16")]; + tensor var_4712 = const()[name = tensor("op_4712"), val = tensor([1, 1])]; + tensor var_4714 = const()[name = tensor("op_4714"), val = tensor([1, 1])]; + tensor obj_11_pad_type_0 = const()[name = tensor("obj_11_pad_type_0"), val = tensor("custom")]; + tensor obj_11_pad_0 = const()[name = tensor("obj_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_2_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_2_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(102843200)))]; + tensor layers_2_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_2_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106120064)))]; + tensor obj_11_cast_fp16 = conv(bias = layers_2_self_attn_o_proj_bias_to_fp16, dilations = var_4714, groups = var_3253, pad = obj_11_pad_0, pad_type = obj_11_pad_type_0, strides = var_4712, weight = layers_2_self_attn_o_proj_weight_to_fp16, x = input_17_cast_fp16)[name = tensor("obj_11_cast_fp16")]; + tensor inputs_11_cast_fp16 = add(x = inputs_9_cast_fp16, y = obj_11_cast_fp16)[name = tensor("inputs_11_cast_fp16")]; + tensor var_4720 = const()[name = tensor("op_4720"), val = tensor([1])]; + tensor channels_mean_11_cast_fp16 = reduce_mean(axes = var_4720, keep_dims = var_3254, x = inputs_11_cast_fp16)[name = tensor("channels_mean_11_cast_fp16")]; + tensor zero_mean_11_cast_fp16 = sub(x = inputs_11_cast_fp16, y = channels_mean_11_cast_fp16)[name = tensor("zero_mean_11_cast_fp16")]; + tensor zero_mean_sq_11_cast_fp16 = mul(x = zero_mean_11_cast_fp16, y = zero_mean_11_cast_fp16)[name = tensor("zero_mean_sq_11_cast_fp16")]; + tensor var_4724 = const()[name = tensor("op_4724"), val = tensor([1])]; + tensor var_4725_cast_fp16 = reduce_mean(axes = var_4724, keep_dims = var_3254, x = zero_mean_sq_11_cast_fp16)[name = tensor("op_4725_cast_fp16")]; + tensor var_4726_to_fp16 = const()[name = tensor("op_4726_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_4727_cast_fp16 = add(x = var_4725_cast_fp16, y = var_4726_to_fp16)[name = tensor("op_4727_cast_fp16")]; + tensor denom_11_epsilon_0_to_fp16 = const()[name = tensor("denom_11_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_11_cast_fp16 = rsqrt(epsilon = denom_11_epsilon_0_to_fp16, x = var_4727_cast_fp16)[name = tensor("denom_11_cast_fp16")]; + tensor out_11_cast_fp16 = mul(x = zero_mean_11_cast_fp16, y = denom_11_cast_fp16)[name = tensor("out_11_cast_fp16")]; + tensor input_19_gamma_0_to_fp16 = const()[name = tensor("input_19_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106122688)))]; + tensor input_19_beta_0_to_fp16 = const()[name = tensor("input_19_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106125312)))]; + tensor input_19_epsilon_0_to_fp16 = const()[name = tensor("input_19_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_19_cast_fp16 = batch_norm(beta = input_19_beta_0_to_fp16, epsilon = input_19_epsilon_0_to_fp16, gamma = input_19_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_11_cast_fp16)[name = tensor("input_19_cast_fp16")]; + tensor var_4738 = const()[name = tensor("op_4738"), val = tensor([1, 1])]; + tensor var_4740 = const()[name = tensor("op_4740"), val = tensor([1, 1])]; + tensor input_21_pad_type_0 = const()[name = tensor("input_21_pad_type_0"), val = tensor("custom")]; + tensor input_21_pad_0 = const()[name = tensor("input_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_2_fc1_weight_to_fp16 = const()[name = tensor("layers_2_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(106127936)))]; + tensor layers_2_fc1_bias_to_fp16 = const()[name = tensor("layers_2_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119235200)))]; + tensor input_21_cast_fp16 = conv(bias = layers_2_fc1_bias_to_fp16, dilations = var_4740, groups = var_3253, pad = input_21_pad_0, pad_type = input_21_pad_type_0, strides = var_4738, weight = layers_2_fc1_weight_to_fp16, x = input_19_cast_fp16)[name = tensor("input_21_cast_fp16")]; + tensor input_23_mode_0 = const()[name = tensor("input_23_mode_0"), val = tensor("EXACT")]; + tensor input_23_cast_fp16 = gelu(mode = input_23_mode_0, x = input_21_cast_fp16)[name = tensor("input_23_cast_fp16")]; + tensor var_4746 = const()[name = tensor("op_4746"), val = tensor([1, 1])]; + tensor var_4748 = const()[name = tensor("op_4748"), val = tensor([1, 1])]; + tensor hidden_states_9_pad_type_0 = const()[name = tensor("hidden_states_9_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_9_pad_0 = const()[name = tensor("hidden_states_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_2_fc2_weight_to_fp16 = const()[name = tensor("layers_2_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(119245504)))]; + tensor layers_2_fc2_bias_to_fp16 = const()[name = tensor("layers_2_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132352768)))]; + tensor hidden_states_9_cast_fp16 = conv(bias = layers_2_fc2_bias_to_fp16, dilations = var_4748, groups = var_3253, pad = hidden_states_9_pad_0, pad_type = hidden_states_9_pad_type_0, strides = var_4746, weight = layers_2_fc2_weight_to_fp16, x = input_23_cast_fp16)[name = tensor("hidden_states_9_cast_fp16")]; + tensor inputs_13_cast_fp16 = add(x = inputs_11_cast_fp16, y = hidden_states_9_cast_fp16)[name = tensor("inputs_13_cast_fp16")]; + tensor var_4755 = const()[name = tensor("op_4755"), val = tensor(3)]; + tensor var_4780 = const()[name = tensor("op_4780"), val = tensor(1)]; + tensor var_4781 = const()[name = tensor("op_4781"), val = tensor(true)]; + tensor var_4791 = const()[name = tensor("op_4791"), val = tensor([1])]; + tensor channels_mean_13_cast_fp16 = reduce_mean(axes = var_4791, keep_dims = var_4781, x = inputs_13_cast_fp16)[name = tensor("channels_mean_13_cast_fp16")]; + tensor zero_mean_13_cast_fp16 = sub(x = inputs_13_cast_fp16, y = channels_mean_13_cast_fp16)[name = tensor("zero_mean_13_cast_fp16")]; + tensor zero_mean_sq_13_cast_fp16 = mul(x = zero_mean_13_cast_fp16, y = zero_mean_13_cast_fp16)[name = tensor("zero_mean_sq_13_cast_fp16")]; + tensor var_4795 = const()[name = tensor("op_4795"), val = tensor([1])]; + tensor var_4796_cast_fp16 = reduce_mean(axes = var_4795, keep_dims = var_4781, x = zero_mean_sq_13_cast_fp16)[name = tensor("op_4796_cast_fp16")]; + tensor var_4797_to_fp16 = const()[name = tensor("op_4797_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_4798_cast_fp16 = add(x = var_4796_cast_fp16, y = var_4797_to_fp16)[name = tensor("op_4798_cast_fp16")]; + tensor denom_13_epsilon_0_to_fp16 = const()[name = tensor("denom_13_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_13_cast_fp16 = rsqrt(epsilon = denom_13_epsilon_0_to_fp16, x = var_4798_cast_fp16)[name = tensor("denom_13_cast_fp16")]; + tensor out_13_cast_fp16 = mul(x = zero_mean_13_cast_fp16, y = denom_13_cast_fp16)[name = tensor("out_13_cast_fp16")]; + tensor obj_13_gamma_0_to_fp16 = const()[name = tensor("obj_13_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132355392)))]; + tensor obj_13_beta_0_to_fp16 = const()[name = tensor("obj_13_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132358016)))]; + tensor obj_13_epsilon_0_to_fp16 = const()[name = tensor("obj_13_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_13_cast_fp16 = batch_norm(beta = obj_13_beta_0_to_fp16, epsilon = obj_13_epsilon_0_to_fp16, gamma = obj_13_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_13_cast_fp16)[name = tensor("obj_13_cast_fp16")]; + tensor var_4813 = const()[name = tensor("op_4813"), val = tensor([1, 1])]; + tensor var_4815 = const()[name = tensor("op_4815"), val = tensor([1, 1])]; + tensor query_7_pad_type_0 = const()[name = tensor("query_7_pad_type_0"), val = tensor("custom")]; + tensor query_7_pad_0 = const()[name = tensor("query_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_3_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_3_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132360640)))]; + tensor layers_3_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135637504)))]; + tensor query_7_cast_fp16 = conv(bias = layers_3_self_attn_q_proj_bias_to_fp16, dilations = var_4815, groups = var_4780, pad = query_7_pad_0, pad_type = query_7_pad_type_0, strides = var_4813, weight = layers_3_self_attn_q_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor("query_7_cast_fp16")]; + tensor var_4819 = const()[name = tensor("op_4819"), val = tensor([1, 1])]; + tensor var_4821 = const()[name = tensor("op_4821"), val = tensor([1, 1])]; + tensor key_7_pad_type_0 = const()[name = tensor("key_7_pad_type_0"), val = tensor("custom")]; + tensor key_7_pad_0 = const()[name = tensor("key_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_3_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_3_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(135640128)))]; + tensor key_7_cast_fp16 = conv(dilations = var_4821, groups = var_4780, pad = key_7_pad_0, pad_type = key_7_pad_type_0, strides = var_4819, weight = layers_3_self_attn_k_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor("key_7_cast_fp16")]; + tensor var_4826 = const()[name = tensor("op_4826"), val = tensor([1, 1])]; + tensor var_4828 = const()[name = tensor("op_4828"), val = tensor([1, 1])]; + tensor value_7_pad_type_0 = const()[name = tensor("value_7_pad_type_0"), val = tensor("custom")]; + tensor value_7_pad_0 = const()[name = tensor("value_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_3_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_3_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(138916992)))]; + tensor layers_3_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142193856)))]; + tensor value_7_cast_fp16 = conv(bias = layers_3_self_attn_v_proj_bias_to_fp16, dilations = var_4828, groups = var_4780, pad = value_7_pad_0, pad_type = value_7_pad_type_0, strides = var_4826, weight = layers_3_self_attn_v_proj_weight_to_fp16, x = obj_13_cast_fp16)[name = tensor("value_7_cast_fp16")]; + tensor var_4835_begin_0 = const()[name = tensor("op_4835_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4835_end_0 = const()[name = tensor("op_4835_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_4835_end_mask_0 = const()[name = tensor("op_4835_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4835_cast_fp16 = slice_by_index(begin = var_4835_begin_0, end = var_4835_end_0, end_mask = var_4835_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_4835_cast_fp16")]; + tensor var_4839_begin_0 = const()[name = tensor("op_4839_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_4839_end_0 = const()[name = tensor("op_4839_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_4839_end_mask_0 = const()[name = tensor("op_4839_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4839_cast_fp16 = slice_by_index(begin = var_4839_begin_0, end = var_4839_end_0, end_mask = var_4839_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_4839_cast_fp16")]; + tensor var_4843_begin_0 = const()[name = tensor("op_4843_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_4843_end_0 = const()[name = tensor("op_4843_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_4843_end_mask_0 = const()[name = tensor("op_4843_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4843_cast_fp16 = slice_by_index(begin = var_4843_begin_0, end = var_4843_end_0, end_mask = var_4843_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_4843_cast_fp16")]; + tensor var_4847_begin_0 = const()[name = tensor("op_4847_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_4847_end_0 = const()[name = tensor("op_4847_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_4847_end_mask_0 = const()[name = tensor("op_4847_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4847_cast_fp16 = slice_by_index(begin = var_4847_begin_0, end = var_4847_end_0, end_mask = var_4847_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_4847_cast_fp16")]; + tensor var_4851_begin_0 = const()[name = tensor("op_4851_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_4851_end_0 = const()[name = tensor("op_4851_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_4851_end_mask_0 = const()[name = tensor("op_4851_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4851_cast_fp16 = slice_by_index(begin = var_4851_begin_0, end = var_4851_end_0, end_mask = var_4851_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_4851_cast_fp16")]; + tensor var_4855_begin_0 = const()[name = tensor("op_4855_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_4855_end_0 = const()[name = tensor("op_4855_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_4855_end_mask_0 = const()[name = tensor("op_4855_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4855_cast_fp16 = slice_by_index(begin = var_4855_begin_0, end = var_4855_end_0, end_mask = var_4855_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_4855_cast_fp16")]; + tensor var_4859_begin_0 = const()[name = tensor("op_4859_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_4859_end_0 = const()[name = tensor("op_4859_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_4859_end_mask_0 = const()[name = tensor("op_4859_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4859_cast_fp16 = slice_by_index(begin = var_4859_begin_0, end = var_4859_end_0, end_mask = var_4859_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_4859_cast_fp16")]; + tensor var_4863_begin_0 = const()[name = tensor("op_4863_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_4863_end_0 = const()[name = tensor("op_4863_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_4863_end_mask_0 = const()[name = tensor("op_4863_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4863_cast_fp16 = slice_by_index(begin = var_4863_begin_0, end = var_4863_end_0, end_mask = var_4863_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_4863_cast_fp16")]; + tensor var_4867_begin_0 = const()[name = tensor("op_4867_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_4867_end_0 = const()[name = tensor("op_4867_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_4867_end_mask_0 = const()[name = tensor("op_4867_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4867_cast_fp16 = slice_by_index(begin = var_4867_begin_0, end = var_4867_end_0, end_mask = var_4867_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_4867_cast_fp16")]; + tensor var_4871_begin_0 = const()[name = tensor("op_4871_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_4871_end_0 = const()[name = tensor("op_4871_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_4871_end_mask_0 = const()[name = tensor("op_4871_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4871_cast_fp16 = slice_by_index(begin = var_4871_begin_0, end = var_4871_end_0, end_mask = var_4871_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_4871_cast_fp16")]; + tensor var_4875_begin_0 = const()[name = tensor("op_4875_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_4875_end_0 = const()[name = tensor("op_4875_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_4875_end_mask_0 = const()[name = tensor("op_4875_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4875_cast_fp16 = slice_by_index(begin = var_4875_begin_0, end = var_4875_end_0, end_mask = var_4875_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_4875_cast_fp16")]; + tensor var_4879_begin_0 = const()[name = tensor("op_4879_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_4879_end_0 = const()[name = tensor("op_4879_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_4879_end_mask_0 = const()[name = tensor("op_4879_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4879_cast_fp16 = slice_by_index(begin = var_4879_begin_0, end = var_4879_end_0, end_mask = var_4879_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_4879_cast_fp16")]; + tensor var_4883_begin_0 = const()[name = tensor("op_4883_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_4883_end_0 = const()[name = tensor("op_4883_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_4883_end_mask_0 = const()[name = tensor("op_4883_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4883_cast_fp16 = slice_by_index(begin = var_4883_begin_0, end = var_4883_end_0, end_mask = var_4883_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_4883_cast_fp16")]; + tensor var_4887_begin_0 = const()[name = tensor("op_4887_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_4887_end_0 = const()[name = tensor("op_4887_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_4887_end_mask_0 = const()[name = tensor("op_4887_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4887_cast_fp16 = slice_by_index(begin = var_4887_begin_0, end = var_4887_end_0, end_mask = var_4887_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_4887_cast_fp16")]; + tensor var_4891_begin_0 = const()[name = tensor("op_4891_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_4891_end_0 = const()[name = tensor("op_4891_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_4891_end_mask_0 = const()[name = tensor("op_4891_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4891_cast_fp16 = slice_by_index(begin = var_4891_begin_0, end = var_4891_end_0, end_mask = var_4891_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_4891_cast_fp16")]; + tensor var_4895_begin_0 = const()[name = tensor("op_4895_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_4895_end_0 = const()[name = tensor("op_4895_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_4895_end_mask_0 = const()[name = tensor("op_4895_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4895_cast_fp16 = slice_by_index(begin = var_4895_begin_0, end = var_4895_end_0, end_mask = var_4895_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_4895_cast_fp16")]; + tensor var_4899_begin_0 = const()[name = tensor("op_4899_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_4899_end_0 = const()[name = tensor("op_4899_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_4899_end_mask_0 = const()[name = tensor("op_4899_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4899_cast_fp16 = slice_by_index(begin = var_4899_begin_0, end = var_4899_end_0, end_mask = var_4899_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_4899_cast_fp16")]; + tensor var_4903_begin_0 = const()[name = tensor("op_4903_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_4903_end_0 = const()[name = tensor("op_4903_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_4903_end_mask_0 = const()[name = tensor("op_4903_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4903_cast_fp16 = slice_by_index(begin = var_4903_begin_0, end = var_4903_end_0, end_mask = var_4903_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_4903_cast_fp16")]; + tensor var_4907_begin_0 = const()[name = tensor("op_4907_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_4907_end_0 = const()[name = tensor("op_4907_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_4907_end_mask_0 = const()[name = tensor("op_4907_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4907_cast_fp16 = slice_by_index(begin = var_4907_begin_0, end = var_4907_end_0, end_mask = var_4907_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_4907_cast_fp16")]; + tensor var_4911_begin_0 = const()[name = tensor("op_4911_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_4911_end_0 = const()[name = tensor("op_4911_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_4911_end_mask_0 = const()[name = tensor("op_4911_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_4911_cast_fp16 = slice_by_index(begin = var_4911_begin_0, end = var_4911_end_0, end_mask = var_4911_end_mask_0, x = query_7_cast_fp16)[name = tensor("op_4911_cast_fp16")]; + tensor var_4920_begin_0 = const()[name = tensor("op_4920_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4920_end_0 = const()[name = tensor("op_4920_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_4920_end_mask_0 = const()[name = tensor("op_4920_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_4920_cast_fp16 = slice_by_index(begin = var_4920_begin_0, end = var_4920_end_0, end_mask = var_4920_end_mask_0, x = var_4835_cast_fp16)[name = tensor("op_4920_cast_fp16")]; + tensor var_4927_begin_0 = const()[name = tensor("op_4927_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_4927_end_0 = const()[name = tensor("op_4927_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_4927_end_mask_0 = const()[name = tensor("op_4927_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_4927_cast_fp16 = slice_by_index(begin = var_4927_begin_0, end = var_4927_end_0, end_mask = var_4927_end_mask_0, x = var_4835_cast_fp16)[name = tensor("op_4927_cast_fp16")]; + tensor var_4934_begin_0 = const()[name = tensor("op_4934_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_4934_end_0 = const()[name = tensor("op_4934_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_4934_end_mask_0 = const()[name = tensor("op_4934_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_4934_cast_fp16 = slice_by_index(begin = var_4934_begin_0, end = var_4934_end_0, end_mask = var_4934_end_mask_0, x = var_4835_cast_fp16)[name = tensor("op_4934_cast_fp16")]; + tensor var_4941_begin_0 = const()[name = tensor("op_4941_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_4941_end_0 = const()[name = tensor("op_4941_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_4941_end_mask_0 = const()[name = tensor("op_4941_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_4941_cast_fp16 = slice_by_index(begin = var_4941_begin_0, end = var_4941_end_0, end_mask = var_4941_end_mask_0, x = var_4835_cast_fp16)[name = tensor("op_4941_cast_fp16")]; + tensor var_4948_begin_0 = const()[name = tensor("op_4948_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4948_end_0 = const()[name = tensor("op_4948_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_4948_end_mask_0 = const()[name = tensor("op_4948_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_4948_cast_fp16 = slice_by_index(begin = var_4948_begin_0, end = var_4948_end_0, end_mask = var_4948_end_mask_0, x = var_4839_cast_fp16)[name = tensor("op_4948_cast_fp16")]; + tensor var_4955_begin_0 = const()[name = tensor("op_4955_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_4955_end_0 = const()[name = tensor("op_4955_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_4955_end_mask_0 = const()[name = tensor("op_4955_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_4955_cast_fp16 = slice_by_index(begin = var_4955_begin_0, end = var_4955_end_0, end_mask = var_4955_end_mask_0, x = var_4839_cast_fp16)[name = tensor("op_4955_cast_fp16")]; + tensor var_4962_begin_0 = const()[name = tensor("op_4962_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_4962_end_0 = const()[name = tensor("op_4962_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_4962_end_mask_0 = const()[name = tensor("op_4962_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_4962_cast_fp16 = slice_by_index(begin = var_4962_begin_0, end = var_4962_end_0, end_mask = var_4962_end_mask_0, x = var_4839_cast_fp16)[name = tensor("op_4962_cast_fp16")]; + tensor var_4969_begin_0 = const()[name = tensor("op_4969_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_4969_end_0 = const()[name = tensor("op_4969_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_4969_end_mask_0 = const()[name = tensor("op_4969_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_4969_cast_fp16 = slice_by_index(begin = var_4969_begin_0, end = var_4969_end_0, end_mask = var_4969_end_mask_0, x = var_4839_cast_fp16)[name = tensor("op_4969_cast_fp16")]; + tensor var_4976_begin_0 = const()[name = tensor("op_4976_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_4976_end_0 = const()[name = tensor("op_4976_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_4976_end_mask_0 = const()[name = tensor("op_4976_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_4976_cast_fp16 = slice_by_index(begin = var_4976_begin_0, end = var_4976_end_0, end_mask = var_4976_end_mask_0, x = var_4843_cast_fp16)[name = tensor("op_4976_cast_fp16")]; + tensor var_4983_begin_0 = const()[name = tensor("op_4983_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_4983_end_0 = const()[name = tensor("op_4983_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_4983_end_mask_0 = const()[name = tensor("op_4983_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_4983_cast_fp16 = slice_by_index(begin = var_4983_begin_0, end = var_4983_end_0, end_mask = var_4983_end_mask_0, x = var_4843_cast_fp16)[name = tensor("op_4983_cast_fp16")]; + tensor var_4990_begin_0 = const()[name = tensor("op_4990_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_4990_end_0 = const()[name = tensor("op_4990_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_4990_end_mask_0 = const()[name = tensor("op_4990_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_4990_cast_fp16 = slice_by_index(begin = var_4990_begin_0, end = var_4990_end_0, end_mask = var_4990_end_mask_0, x = var_4843_cast_fp16)[name = tensor("op_4990_cast_fp16")]; + tensor var_4997_begin_0 = const()[name = tensor("op_4997_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_4997_end_0 = const()[name = tensor("op_4997_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_4997_end_mask_0 = const()[name = tensor("op_4997_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_4997_cast_fp16 = slice_by_index(begin = var_4997_begin_0, end = var_4997_end_0, end_mask = var_4997_end_mask_0, x = var_4843_cast_fp16)[name = tensor("op_4997_cast_fp16")]; + tensor var_5004_begin_0 = const()[name = tensor("op_5004_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5004_end_0 = const()[name = tensor("op_5004_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_5004_end_mask_0 = const()[name = tensor("op_5004_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5004_cast_fp16 = slice_by_index(begin = var_5004_begin_0, end = var_5004_end_0, end_mask = var_5004_end_mask_0, x = var_4847_cast_fp16)[name = tensor("op_5004_cast_fp16")]; + tensor var_5011_begin_0 = const()[name = tensor("op_5011_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_5011_end_0 = const()[name = tensor("op_5011_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_5011_end_mask_0 = const()[name = tensor("op_5011_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5011_cast_fp16 = slice_by_index(begin = var_5011_begin_0, end = var_5011_end_0, end_mask = var_5011_end_mask_0, x = var_4847_cast_fp16)[name = tensor("op_5011_cast_fp16")]; + tensor var_5018_begin_0 = const()[name = tensor("op_5018_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_5018_end_0 = const()[name = tensor("op_5018_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_5018_end_mask_0 = const()[name = tensor("op_5018_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5018_cast_fp16 = slice_by_index(begin = var_5018_begin_0, end = var_5018_end_0, end_mask = var_5018_end_mask_0, x = var_4847_cast_fp16)[name = tensor("op_5018_cast_fp16")]; + tensor var_5025_begin_0 = const()[name = tensor("op_5025_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_5025_end_0 = const()[name = tensor("op_5025_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_5025_end_mask_0 = const()[name = tensor("op_5025_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5025_cast_fp16 = slice_by_index(begin = var_5025_begin_0, end = var_5025_end_0, end_mask = var_5025_end_mask_0, x = var_4847_cast_fp16)[name = tensor("op_5025_cast_fp16")]; + tensor var_5032_begin_0 = const()[name = tensor("op_5032_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5032_end_0 = const()[name = tensor("op_5032_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_5032_end_mask_0 = const()[name = tensor("op_5032_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5032_cast_fp16 = slice_by_index(begin = var_5032_begin_0, end = var_5032_end_0, end_mask = var_5032_end_mask_0, x = var_4851_cast_fp16)[name = tensor("op_5032_cast_fp16")]; + tensor var_5039_begin_0 = const()[name = tensor("op_5039_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_5039_end_0 = const()[name = tensor("op_5039_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_5039_end_mask_0 = const()[name = tensor("op_5039_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5039_cast_fp16 = slice_by_index(begin = var_5039_begin_0, end = var_5039_end_0, end_mask = var_5039_end_mask_0, x = var_4851_cast_fp16)[name = tensor("op_5039_cast_fp16")]; + tensor var_5046_begin_0 = const()[name = tensor("op_5046_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_5046_end_0 = const()[name = tensor("op_5046_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_5046_end_mask_0 = const()[name = tensor("op_5046_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5046_cast_fp16 = slice_by_index(begin = var_5046_begin_0, end = var_5046_end_0, end_mask = var_5046_end_mask_0, x = var_4851_cast_fp16)[name = tensor("op_5046_cast_fp16")]; + tensor var_5053_begin_0 = const()[name = tensor("op_5053_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_5053_end_0 = const()[name = tensor("op_5053_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_5053_end_mask_0 = const()[name = tensor("op_5053_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5053_cast_fp16 = slice_by_index(begin = var_5053_begin_0, end = var_5053_end_0, end_mask = var_5053_end_mask_0, x = var_4851_cast_fp16)[name = tensor("op_5053_cast_fp16")]; + tensor var_5060_begin_0 = const()[name = tensor("op_5060_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5060_end_0 = const()[name = tensor("op_5060_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_5060_end_mask_0 = const()[name = tensor("op_5060_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5060_cast_fp16 = slice_by_index(begin = var_5060_begin_0, end = var_5060_end_0, end_mask = var_5060_end_mask_0, x = var_4855_cast_fp16)[name = tensor("op_5060_cast_fp16")]; + tensor var_5067_begin_0 = const()[name = tensor("op_5067_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_5067_end_0 = const()[name = tensor("op_5067_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_5067_end_mask_0 = const()[name = tensor("op_5067_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5067_cast_fp16 = slice_by_index(begin = var_5067_begin_0, end = var_5067_end_0, end_mask = var_5067_end_mask_0, x = var_4855_cast_fp16)[name = tensor("op_5067_cast_fp16")]; + tensor var_5074_begin_0 = const()[name = tensor("op_5074_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_5074_end_0 = const()[name = tensor("op_5074_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_5074_end_mask_0 = const()[name = tensor("op_5074_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5074_cast_fp16 = slice_by_index(begin = var_5074_begin_0, end = var_5074_end_0, end_mask = var_5074_end_mask_0, x = var_4855_cast_fp16)[name = tensor("op_5074_cast_fp16")]; + tensor var_5081_begin_0 = const()[name = tensor("op_5081_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_5081_end_0 = const()[name = tensor("op_5081_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_5081_end_mask_0 = const()[name = tensor("op_5081_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5081_cast_fp16 = slice_by_index(begin = var_5081_begin_0, end = var_5081_end_0, end_mask = var_5081_end_mask_0, x = var_4855_cast_fp16)[name = tensor("op_5081_cast_fp16")]; + tensor var_5088_begin_0 = const()[name = tensor("op_5088_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5088_end_0 = const()[name = tensor("op_5088_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_5088_end_mask_0 = const()[name = tensor("op_5088_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5088_cast_fp16 = slice_by_index(begin = var_5088_begin_0, end = var_5088_end_0, end_mask = var_5088_end_mask_0, x = var_4859_cast_fp16)[name = tensor("op_5088_cast_fp16")]; + tensor var_5095_begin_0 = const()[name = tensor("op_5095_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_5095_end_0 = const()[name = tensor("op_5095_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_5095_end_mask_0 = const()[name = tensor("op_5095_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5095_cast_fp16 = slice_by_index(begin = var_5095_begin_0, end = var_5095_end_0, end_mask = var_5095_end_mask_0, x = var_4859_cast_fp16)[name = tensor("op_5095_cast_fp16")]; + tensor var_5102_begin_0 = const()[name = tensor("op_5102_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_5102_end_0 = const()[name = tensor("op_5102_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_5102_end_mask_0 = const()[name = tensor("op_5102_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5102_cast_fp16 = slice_by_index(begin = var_5102_begin_0, end = var_5102_end_0, end_mask = var_5102_end_mask_0, x = var_4859_cast_fp16)[name = tensor("op_5102_cast_fp16")]; + tensor var_5109_begin_0 = const()[name = tensor("op_5109_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_5109_end_0 = const()[name = tensor("op_5109_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_5109_end_mask_0 = const()[name = tensor("op_5109_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5109_cast_fp16 = slice_by_index(begin = var_5109_begin_0, end = var_5109_end_0, end_mask = var_5109_end_mask_0, x = var_4859_cast_fp16)[name = tensor("op_5109_cast_fp16")]; + tensor var_5116_begin_0 = const()[name = tensor("op_5116_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5116_end_0 = const()[name = tensor("op_5116_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_5116_end_mask_0 = const()[name = tensor("op_5116_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5116_cast_fp16 = slice_by_index(begin = var_5116_begin_0, end = var_5116_end_0, end_mask = var_5116_end_mask_0, x = var_4863_cast_fp16)[name = tensor("op_5116_cast_fp16")]; + tensor var_5123_begin_0 = const()[name = tensor("op_5123_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_5123_end_0 = const()[name = tensor("op_5123_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_5123_end_mask_0 = const()[name = tensor("op_5123_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5123_cast_fp16 = slice_by_index(begin = var_5123_begin_0, end = var_5123_end_0, end_mask = var_5123_end_mask_0, x = var_4863_cast_fp16)[name = tensor("op_5123_cast_fp16")]; + tensor var_5130_begin_0 = const()[name = tensor("op_5130_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_5130_end_0 = const()[name = tensor("op_5130_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_5130_end_mask_0 = const()[name = tensor("op_5130_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5130_cast_fp16 = slice_by_index(begin = var_5130_begin_0, end = var_5130_end_0, end_mask = var_5130_end_mask_0, x = var_4863_cast_fp16)[name = tensor("op_5130_cast_fp16")]; + tensor var_5137_begin_0 = const()[name = tensor("op_5137_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_5137_end_0 = const()[name = tensor("op_5137_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_5137_end_mask_0 = const()[name = tensor("op_5137_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5137_cast_fp16 = slice_by_index(begin = var_5137_begin_0, end = var_5137_end_0, end_mask = var_5137_end_mask_0, x = var_4863_cast_fp16)[name = tensor("op_5137_cast_fp16")]; + tensor var_5144_begin_0 = const()[name = tensor("op_5144_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5144_end_0 = const()[name = tensor("op_5144_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_5144_end_mask_0 = const()[name = tensor("op_5144_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5144_cast_fp16 = slice_by_index(begin = var_5144_begin_0, end = var_5144_end_0, end_mask = var_5144_end_mask_0, x = var_4867_cast_fp16)[name = tensor("op_5144_cast_fp16")]; + tensor var_5151_begin_0 = const()[name = tensor("op_5151_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_5151_end_0 = const()[name = tensor("op_5151_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_5151_end_mask_0 = const()[name = tensor("op_5151_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5151_cast_fp16 = slice_by_index(begin = var_5151_begin_0, end = var_5151_end_0, end_mask = var_5151_end_mask_0, x = var_4867_cast_fp16)[name = tensor("op_5151_cast_fp16")]; + tensor var_5158_begin_0 = const()[name = tensor("op_5158_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_5158_end_0 = const()[name = tensor("op_5158_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_5158_end_mask_0 = const()[name = tensor("op_5158_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5158_cast_fp16 = slice_by_index(begin = var_5158_begin_0, end = var_5158_end_0, end_mask = var_5158_end_mask_0, x = var_4867_cast_fp16)[name = tensor("op_5158_cast_fp16")]; + tensor var_5165_begin_0 = const()[name = tensor("op_5165_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_5165_end_0 = const()[name = tensor("op_5165_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_5165_end_mask_0 = const()[name = tensor("op_5165_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5165_cast_fp16 = slice_by_index(begin = var_5165_begin_0, end = var_5165_end_0, end_mask = var_5165_end_mask_0, x = var_4867_cast_fp16)[name = tensor("op_5165_cast_fp16")]; + tensor var_5172_begin_0 = const()[name = tensor("op_5172_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5172_end_0 = const()[name = tensor("op_5172_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_5172_end_mask_0 = const()[name = tensor("op_5172_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5172_cast_fp16 = slice_by_index(begin = var_5172_begin_0, end = var_5172_end_0, end_mask = var_5172_end_mask_0, x = var_4871_cast_fp16)[name = tensor("op_5172_cast_fp16")]; + tensor var_5179_begin_0 = const()[name = tensor("op_5179_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_5179_end_0 = const()[name = tensor("op_5179_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_5179_end_mask_0 = const()[name = tensor("op_5179_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5179_cast_fp16 = slice_by_index(begin = var_5179_begin_0, end = var_5179_end_0, end_mask = var_5179_end_mask_0, x = var_4871_cast_fp16)[name = tensor("op_5179_cast_fp16")]; + tensor var_5186_begin_0 = const()[name = tensor("op_5186_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_5186_end_0 = const()[name = tensor("op_5186_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_5186_end_mask_0 = const()[name = tensor("op_5186_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5186_cast_fp16 = slice_by_index(begin = var_5186_begin_0, end = var_5186_end_0, end_mask = var_5186_end_mask_0, x = var_4871_cast_fp16)[name = tensor("op_5186_cast_fp16")]; + tensor var_5193_begin_0 = const()[name = tensor("op_5193_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_5193_end_0 = const()[name = tensor("op_5193_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_5193_end_mask_0 = const()[name = tensor("op_5193_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5193_cast_fp16 = slice_by_index(begin = var_5193_begin_0, end = var_5193_end_0, end_mask = var_5193_end_mask_0, x = var_4871_cast_fp16)[name = tensor("op_5193_cast_fp16")]; + tensor var_5200_begin_0 = const()[name = tensor("op_5200_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5200_end_0 = const()[name = tensor("op_5200_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_5200_end_mask_0 = const()[name = tensor("op_5200_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5200_cast_fp16 = slice_by_index(begin = var_5200_begin_0, end = var_5200_end_0, end_mask = var_5200_end_mask_0, x = var_4875_cast_fp16)[name = tensor("op_5200_cast_fp16")]; + tensor var_5207_begin_0 = const()[name = tensor("op_5207_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_5207_end_0 = const()[name = tensor("op_5207_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_5207_end_mask_0 = const()[name = tensor("op_5207_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5207_cast_fp16 = slice_by_index(begin = var_5207_begin_0, end = var_5207_end_0, end_mask = var_5207_end_mask_0, x = var_4875_cast_fp16)[name = tensor("op_5207_cast_fp16")]; + tensor var_5214_begin_0 = const()[name = tensor("op_5214_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_5214_end_0 = const()[name = tensor("op_5214_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_5214_end_mask_0 = const()[name = tensor("op_5214_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5214_cast_fp16 = slice_by_index(begin = var_5214_begin_0, end = var_5214_end_0, end_mask = var_5214_end_mask_0, x = var_4875_cast_fp16)[name = tensor("op_5214_cast_fp16")]; + tensor var_5221_begin_0 = const()[name = tensor("op_5221_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_5221_end_0 = const()[name = tensor("op_5221_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_5221_end_mask_0 = const()[name = tensor("op_5221_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5221_cast_fp16 = slice_by_index(begin = var_5221_begin_0, end = var_5221_end_0, end_mask = var_5221_end_mask_0, x = var_4875_cast_fp16)[name = tensor("op_5221_cast_fp16")]; + tensor var_5228_begin_0 = const()[name = tensor("op_5228_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5228_end_0 = const()[name = tensor("op_5228_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_5228_end_mask_0 = const()[name = tensor("op_5228_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5228_cast_fp16 = slice_by_index(begin = var_5228_begin_0, end = var_5228_end_0, end_mask = var_5228_end_mask_0, x = var_4879_cast_fp16)[name = tensor("op_5228_cast_fp16")]; + tensor var_5235_begin_0 = const()[name = tensor("op_5235_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_5235_end_0 = const()[name = tensor("op_5235_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_5235_end_mask_0 = const()[name = tensor("op_5235_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5235_cast_fp16 = slice_by_index(begin = var_5235_begin_0, end = var_5235_end_0, end_mask = var_5235_end_mask_0, x = var_4879_cast_fp16)[name = tensor("op_5235_cast_fp16")]; + tensor var_5242_begin_0 = const()[name = tensor("op_5242_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_5242_end_0 = const()[name = tensor("op_5242_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_5242_end_mask_0 = const()[name = tensor("op_5242_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5242_cast_fp16 = slice_by_index(begin = var_5242_begin_0, end = var_5242_end_0, end_mask = var_5242_end_mask_0, x = var_4879_cast_fp16)[name = tensor("op_5242_cast_fp16")]; + tensor var_5249_begin_0 = const()[name = tensor("op_5249_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_5249_end_0 = const()[name = tensor("op_5249_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_5249_end_mask_0 = const()[name = tensor("op_5249_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5249_cast_fp16 = slice_by_index(begin = var_5249_begin_0, end = var_5249_end_0, end_mask = var_5249_end_mask_0, x = var_4879_cast_fp16)[name = tensor("op_5249_cast_fp16")]; + tensor var_5256_begin_0 = const()[name = tensor("op_5256_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5256_end_0 = const()[name = tensor("op_5256_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_5256_end_mask_0 = const()[name = tensor("op_5256_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5256_cast_fp16 = slice_by_index(begin = var_5256_begin_0, end = var_5256_end_0, end_mask = var_5256_end_mask_0, x = var_4883_cast_fp16)[name = tensor("op_5256_cast_fp16")]; + tensor var_5263_begin_0 = const()[name = tensor("op_5263_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_5263_end_0 = const()[name = tensor("op_5263_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_5263_end_mask_0 = const()[name = tensor("op_5263_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5263_cast_fp16 = slice_by_index(begin = var_5263_begin_0, end = var_5263_end_0, end_mask = var_5263_end_mask_0, x = var_4883_cast_fp16)[name = tensor("op_5263_cast_fp16")]; + tensor var_5270_begin_0 = const()[name = tensor("op_5270_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_5270_end_0 = const()[name = tensor("op_5270_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_5270_end_mask_0 = const()[name = tensor("op_5270_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5270_cast_fp16 = slice_by_index(begin = var_5270_begin_0, end = var_5270_end_0, end_mask = var_5270_end_mask_0, x = var_4883_cast_fp16)[name = tensor("op_5270_cast_fp16")]; + tensor var_5277_begin_0 = const()[name = tensor("op_5277_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_5277_end_0 = const()[name = tensor("op_5277_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_5277_end_mask_0 = const()[name = tensor("op_5277_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5277_cast_fp16 = slice_by_index(begin = var_5277_begin_0, end = var_5277_end_0, end_mask = var_5277_end_mask_0, x = var_4883_cast_fp16)[name = tensor("op_5277_cast_fp16")]; + tensor var_5284_begin_0 = const()[name = tensor("op_5284_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5284_end_0 = const()[name = tensor("op_5284_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_5284_end_mask_0 = const()[name = tensor("op_5284_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5284_cast_fp16 = slice_by_index(begin = var_5284_begin_0, end = var_5284_end_0, end_mask = var_5284_end_mask_0, x = var_4887_cast_fp16)[name = tensor("op_5284_cast_fp16")]; + tensor var_5291_begin_0 = const()[name = tensor("op_5291_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_5291_end_0 = const()[name = tensor("op_5291_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_5291_end_mask_0 = const()[name = tensor("op_5291_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5291_cast_fp16 = slice_by_index(begin = var_5291_begin_0, end = var_5291_end_0, end_mask = var_5291_end_mask_0, x = var_4887_cast_fp16)[name = tensor("op_5291_cast_fp16")]; + tensor var_5298_begin_0 = const()[name = tensor("op_5298_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_5298_end_0 = const()[name = tensor("op_5298_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_5298_end_mask_0 = const()[name = tensor("op_5298_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5298_cast_fp16 = slice_by_index(begin = var_5298_begin_0, end = var_5298_end_0, end_mask = var_5298_end_mask_0, x = var_4887_cast_fp16)[name = tensor("op_5298_cast_fp16")]; + tensor var_5305_begin_0 = const()[name = tensor("op_5305_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_5305_end_0 = const()[name = tensor("op_5305_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_5305_end_mask_0 = const()[name = tensor("op_5305_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5305_cast_fp16 = slice_by_index(begin = var_5305_begin_0, end = var_5305_end_0, end_mask = var_5305_end_mask_0, x = var_4887_cast_fp16)[name = tensor("op_5305_cast_fp16")]; + tensor var_5312_begin_0 = const()[name = tensor("op_5312_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5312_end_0 = const()[name = tensor("op_5312_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_5312_end_mask_0 = const()[name = tensor("op_5312_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5312_cast_fp16 = slice_by_index(begin = var_5312_begin_0, end = var_5312_end_0, end_mask = var_5312_end_mask_0, x = var_4891_cast_fp16)[name = tensor("op_5312_cast_fp16")]; + tensor var_5319_begin_0 = const()[name = tensor("op_5319_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_5319_end_0 = const()[name = tensor("op_5319_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_5319_end_mask_0 = const()[name = tensor("op_5319_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5319_cast_fp16 = slice_by_index(begin = var_5319_begin_0, end = var_5319_end_0, end_mask = var_5319_end_mask_0, x = var_4891_cast_fp16)[name = tensor("op_5319_cast_fp16")]; + tensor var_5326_begin_0 = const()[name = tensor("op_5326_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_5326_end_0 = const()[name = tensor("op_5326_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_5326_end_mask_0 = const()[name = tensor("op_5326_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5326_cast_fp16 = slice_by_index(begin = var_5326_begin_0, end = var_5326_end_0, end_mask = var_5326_end_mask_0, x = var_4891_cast_fp16)[name = tensor("op_5326_cast_fp16")]; + tensor var_5333_begin_0 = const()[name = tensor("op_5333_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_5333_end_0 = const()[name = tensor("op_5333_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_5333_end_mask_0 = const()[name = tensor("op_5333_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5333_cast_fp16 = slice_by_index(begin = var_5333_begin_0, end = var_5333_end_0, end_mask = var_5333_end_mask_0, x = var_4891_cast_fp16)[name = tensor("op_5333_cast_fp16")]; + tensor var_5340_begin_0 = const()[name = tensor("op_5340_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5340_end_0 = const()[name = tensor("op_5340_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_5340_end_mask_0 = const()[name = tensor("op_5340_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5340_cast_fp16 = slice_by_index(begin = var_5340_begin_0, end = var_5340_end_0, end_mask = var_5340_end_mask_0, x = var_4895_cast_fp16)[name = tensor("op_5340_cast_fp16")]; + tensor var_5347_begin_0 = const()[name = tensor("op_5347_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_5347_end_0 = const()[name = tensor("op_5347_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_5347_end_mask_0 = const()[name = tensor("op_5347_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5347_cast_fp16 = slice_by_index(begin = var_5347_begin_0, end = var_5347_end_0, end_mask = var_5347_end_mask_0, x = var_4895_cast_fp16)[name = tensor("op_5347_cast_fp16")]; + tensor var_5354_begin_0 = const()[name = tensor("op_5354_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_5354_end_0 = const()[name = tensor("op_5354_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_5354_end_mask_0 = const()[name = tensor("op_5354_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5354_cast_fp16 = slice_by_index(begin = var_5354_begin_0, end = var_5354_end_0, end_mask = var_5354_end_mask_0, x = var_4895_cast_fp16)[name = tensor("op_5354_cast_fp16")]; + tensor var_5361_begin_0 = const()[name = tensor("op_5361_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_5361_end_0 = const()[name = tensor("op_5361_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_5361_end_mask_0 = const()[name = tensor("op_5361_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5361_cast_fp16 = slice_by_index(begin = var_5361_begin_0, end = var_5361_end_0, end_mask = var_5361_end_mask_0, x = var_4895_cast_fp16)[name = tensor("op_5361_cast_fp16")]; + tensor var_5368_begin_0 = const()[name = tensor("op_5368_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5368_end_0 = const()[name = tensor("op_5368_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_5368_end_mask_0 = const()[name = tensor("op_5368_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5368_cast_fp16 = slice_by_index(begin = var_5368_begin_0, end = var_5368_end_0, end_mask = var_5368_end_mask_0, x = var_4899_cast_fp16)[name = tensor("op_5368_cast_fp16")]; + tensor var_5375_begin_0 = const()[name = tensor("op_5375_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_5375_end_0 = const()[name = tensor("op_5375_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_5375_end_mask_0 = const()[name = tensor("op_5375_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5375_cast_fp16 = slice_by_index(begin = var_5375_begin_0, end = var_5375_end_0, end_mask = var_5375_end_mask_0, x = var_4899_cast_fp16)[name = tensor("op_5375_cast_fp16")]; + tensor var_5382_begin_0 = const()[name = tensor("op_5382_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_5382_end_0 = const()[name = tensor("op_5382_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_5382_end_mask_0 = const()[name = tensor("op_5382_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5382_cast_fp16 = slice_by_index(begin = var_5382_begin_0, end = var_5382_end_0, end_mask = var_5382_end_mask_0, x = var_4899_cast_fp16)[name = tensor("op_5382_cast_fp16")]; + tensor var_5389_begin_0 = const()[name = tensor("op_5389_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_5389_end_0 = const()[name = tensor("op_5389_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_5389_end_mask_0 = const()[name = tensor("op_5389_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5389_cast_fp16 = slice_by_index(begin = var_5389_begin_0, end = var_5389_end_0, end_mask = var_5389_end_mask_0, x = var_4899_cast_fp16)[name = tensor("op_5389_cast_fp16")]; + tensor var_5396_begin_0 = const()[name = tensor("op_5396_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5396_end_0 = const()[name = tensor("op_5396_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_5396_end_mask_0 = const()[name = tensor("op_5396_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5396_cast_fp16 = slice_by_index(begin = var_5396_begin_0, end = var_5396_end_0, end_mask = var_5396_end_mask_0, x = var_4903_cast_fp16)[name = tensor("op_5396_cast_fp16")]; + tensor var_5403_begin_0 = const()[name = tensor("op_5403_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_5403_end_0 = const()[name = tensor("op_5403_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_5403_end_mask_0 = const()[name = tensor("op_5403_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5403_cast_fp16 = slice_by_index(begin = var_5403_begin_0, end = var_5403_end_0, end_mask = var_5403_end_mask_0, x = var_4903_cast_fp16)[name = tensor("op_5403_cast_fp16")]; + tensor var_5410_begin_0 = const()[name = tensor("op_5410_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_5410_end_0 = const()[name = tensor("op_5410_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_5410_end_mask_0 = const()[name = tensor("op_5410_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5410_cast_fp16 = slice_by_index(begin = var_5410_begin_0, end = var_5410_end_0, end_mask = var_5410_end_mask_0, x = var_4903_cast_fp16)[name = tensor("op_5410_cast_fp16")]; + tensor var_5417_begin_0 = const()[name = tensor("op_5417_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_5417_end_0 = const()[name = tensor("op_5417_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_5417_end_mask_0 = const()[name = tensor("op_5417_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5417_cast_fp16 = slice_by_index(begin = var_5417_begin_0, end = var_5417_end_0, end_mask = var_5417_end_mask_0, x = var_4903_cast_fp16)[name = tensor("op_5417_cast_fp16")]; + tensor var_5424_begin_0 = const()[name = tensor("op_5424_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5424_end_0 = const()[name = tensor("op_5424_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_5424_end_mask_0 = const()[name = tensor("op_5424_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5424_cast_fp16 = slice_by_index(begin = var_5424_begin_0, end = var_5424_end_0, end_mask = var_5424_end_mask_0, x = var_4907_cast_fp16)[name = tensor("op_5424_cast_fp16")]; + tensor var_5431_begin_0 = const()[name = tensor("op_5431_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_5431_end_0 = const()[name = tensor("op_5431_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_5431_end_mask_0 = const()[name = tensor("op_5431_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5431_cast_fp16 = slice_by_index(begin = var_5431_begin_0, end = var_5431_end_0, end_mask = var_5431_end_mask_0, x = var_4907_cast_fp16)[name = tensor("op_5431_cast_fp16")]; + tensor var_5438_begin_0 = const()[name = tensor("op_5438_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_5438_end_0 = const()[name = tensor("op_5438_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_5438_end_mask_0 = const()[name = tensor("op_5438_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5438_cast_fp16 = slice_by_index(begin = var_5438_begin_0, end = var_5438_end_0, end_mask = var_5438_end_mask_0, x = var_4907_cast_fp16)[name = tensor("op_5438_cast_fp16")]; + tensor var_5445_begin_0 = const()[name = tensor("op_5445_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_5445_end_0 = const()[name = tensor("op_5445_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_5445_end_mask_0 = const()[name = tensor("op_5445_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5445_cast_fp16 = slice_by_index(begin = var_5445_begin_0, end = var_5445_end_0, end_mask = var_5445_end_mask_0, x = var_4907_cast_fp16)[name = tensor("op_5445_cast_fp16")]; + tensor var_5452_begin_0 = const()[name = tensor("op_5452_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5452_end_0 = const()[name = tensor("op_5452_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_5452_end_mask_0 = const()[name = tensor("op_5452_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5452_cast_fp16 = slice_by_index(begin = var_5452_begin_0, end = var_5452_end_0, end_mask = var_5452_end_mask_0, x = var_4911_cast_fp16)[name = tensor("op_5452_cast_fp16")]; + tensor var_5459_begin_0 = const()[name = tensor("op_5459_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_5459_end_0 = const()[name = tensor("op_5459_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_5459_end_mask_0 = const()[name = tensor("op_5459_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5459_cast_fp16 = slice_by_index(begin = var_5459_begin_0, end = var_5459_end_0, end_mask = var_5459_end_mask_0, x = var_4911_cast_fp16)[name = tensor("op_5459_cast_fp16")]; + tensor var_5466_begin_0 = const()[name = tensor("op_5466_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_5466_end_0 = const()[name = tensor("op_5466_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_5466_end_mask_0 = const()[name = tensor("op_5466_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5466_cast_fp16 = slice_by_index(begin = var_5466_begin_0, end = var_5466_end_0, end_mask = var_5466_end_mask_0, x = var_4911_cast_fp16)[name = tensor("op_5466_cast_fp16")]; + tensor var_5473_begin_0 = const()[name = tensor("op_5473_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_5473_end_0 = const()[name = tensor("op_5473_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_5473_end_mask_0 = const()[name = tensor("op_5473_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5473_cast_fp16 = slice_by_index(begin = var_5473_begin_0, end = var_5473_end_0, end_mask = var_5473_end_mask_0, x = var_4911_cast_fp16)[name = tensor("op_5473_cast_fp16")]; + tensor k_7_perm_0 = const()[name = tensor("k_7_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_5478_begin_0 = const()[name = tensor("op_5478_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5478_end_0 = const()[name = tensor("op_5478_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_5478_end_mask_0 = const()[name = tensor("op_5478_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_28 = transpose(perm = k_7_perm_0, x = key_7_cast_fp16)[name = tensor("transpose_28")]; + tensor var_5478_cast_fp16 = slice_by_index(begin = var_5478_begin_0, end = var_5478_end_0, end_mask = var_5478_end_mask_0, x = transpose_28)[name = tensor("op_5478_cast_fp16")]; + tensor var_5482_begin_0 = const()[name = tensor("op_5482_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_5482_end_0 = const()[name = tensor("op_5482_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_5482_end_mask_0 = const()[name = tensor("op_5482_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5482_cast_fp16 = slice_by_index(begin = var_5482_begin_0, end = var_5482_end_0, end_mask = var_5482_end_mask_0, x = transpose_28)[name = tensor("op_5482_cast_fp16")]; + tensor var_5486_begin_0 = const()[name = tensor("op_5486_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_5486_end_0 = const()[name = tensor("op_5486_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_5486_end_mask_0 = const()[name = tensor("op_5486_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5486_cast_fp16 = slice_by_index(begin = var_5486_begin_0, end = var_5486_end_0, end_mask = var_5486_end_mask_0, x = transpose_28)[name = tensor("op_5486_cast_fp16")]; + tensor var_5490_begin_0 = const()[name = tensor("op_5490_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_5490_end_0 = const()[name = tensor("op_5490_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_5490_end_mask_0 = const()[name = tensor("op_5490_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5490_cast_fp16 = slice_by_index(begin = var_5490_begin_0, end = var_5490_end_0, end_mask = var_5490_end_mask_0, x = transpose_28)[name = tensor("op_5490_cast_fp16")]; + tensor var_5494_begin_0 = const()[name = tensor("op_5494_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_5494_end_0 = const()[name = tensor("op_5494_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_5494_end_mask_0 = const()[name = tensor("op_5494_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5494_cast_fp16 = slice_by_index(begin = var_5494_begin_0, end = var_5494_end_0, end_mask = var_5494_end_mask_0, x = transpose_28)[name = tensor("op_5494_cast_fp16")]; + tensor var_5498_begin_0 = const()[name = tensor("op_5498_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_5498_end_0 = const()[name = tensor("op_5498_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_5498_end_mask_0 = const()[name = tensor("op_5498_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5498_cast_fp16 = slice_by_index(begin = var_5498_begin_0, end = var_5498_end_0, end_mask = var_5498_end_mask_0, x = transpose_28)[name = tensor("op_5498_cast_fp16")]; + tensor var_5502_begin_0 = const()[name = tensor("op_5502_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_5502_end_0 = const()[name = tensor("op_5502_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_5502_end_mask_0 = const()[name = tensor("op_5502_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5502_cast_fp16 = slice_by_index(begin = var_5502_begin_0, end = var_5502_end_0, end_mask = var_5502_end_mask_0, x = transpose_28)[name = tensor("op_5502_cast_fp16")]; + tensor var_5506_begin_0 = const()[name = tensor("op_5506_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_5506_end_0 = const()[name = tensor("op_5506_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_5506_end_mask_0 = const()[name = tensor("op_5506_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5506_cast_fp16 = slice_by_index(begin = var_5506_begin_0, end = var_5506_end_0, end_mask = var_5506_end_mask_0, x = transpose_28)[name = tensor("op_5506_cast_fp16")]; + tensor var_5510_begin_0 = const()[name = tensor("op_5510_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_5510_end_0 = const()[name = tensor("op_5510_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_5510_end_mask_0 = const()[name = tensor("op_5510_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5510_cast_fp16 = slice_by_index(begin = var_5510_begin_0, end = var_5510_end_0, end_mask = var_5510_end_mask_0, x = transpose_28)[name = tensor("op_5510_cast_fp16")]; + tensor var_5514_begin_0 = const()[name = tensor("op_5514_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_5514_end_0 = const()[name = tensor("op_5514_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_5514_end_mask_0 = const()[name = tensor("op_5514_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5514_cast_fp16 = slice_by_index(begin = var_5514_begin_0, end = var_5514_end_0, end_mask = var_5514_end_mask_0, x = transpose_28)[name = tensor("op_5514_cast_fp16")]; + tensor var_5518_begin_0 = const()[name = tensor("op_5518_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_5518_end_0 = const()[name = tensor("op_5518_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_5518_end_mask_0 = const()[name = tensor("op_5518_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5518_cast_fp16 = slice_by_index(begin = var_5518_begin_0, end = var_5518_end_0, end_mask = var_5518_end_mask_0, x = transpose_28)[name = tensor("op_5518_cast_fp16")]; + tensor var_5522_begin_0 = const()[name = tensor("op_5522_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_5522_end_0 = const()[name = tensor("op_5522_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_5522_end_mask_0 = const()[name = tensor("op_5522_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5522_cast_fp16 = slice_by_index(begin = var_5522_begin_0, end = var_5522_end_0, end_mask = var_5522_end_mask_0, x = transpose_28)[name = tensor("op_5522_cast_fp16")]; + tensor var_5526_begin_0 = const()[name = tensor("op_5526_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_5526_end_0 = const()[name = tensor("op_5526_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_5526_end_mask_0 = const()[name = tensor("op_5526_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5526_cast_fp16 = slice_by_index(begin = var_5526_begin_0, end = var_5526_end_0, end_mask = var_5526_end_mask_0, x = transpose_28)[name = tensor("op_5526_cast_fp16")]; + tensor var_5530_begin_0 = const()[name = tensor("op_5530_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_5530_end_0 = const()[name = tensor("op_5530_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_5530_end_mask_0 = const()[name = tensor("op_5530_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5530_cast_fp16 = slice_by_index(begin = var_5530_begin_0, end = var_5530_end_0, end_mask = var_5530_end_mask_0, x = transpose_28)[name = tensor("op_5530_cast_fp16")]; + tensor var_5534_begin_0 = const()[name = tensor("op_5534_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_5534_end_0 = const()[name = tensor("op_5534_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_5534_end_mask_0 = const()[name = tensor("op_5534_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5534_cast_fp16 = slice_by_index(begin = var_5534_begin_0, end = var_5534_end_0, end_mask = var_5534_end_mask_0, x = transpose_28)[name = tensor("op_5534_cast_fp16")]; + tensor var_5538_begin_0 = const()[name = tensor("op_5538_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_5538_end_0 = const()[name = tensor("op_5538_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_5538_end_mask_0 = const()[name = tensor("op_5538_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5538_cast_fp16 = slice_by_index(begin = var_5538_begin_0, end = var_5538_end_0, end_mask = var_5538_end_mask_0, x = transpose_28)[name = tensor("op_5538_cast_fp16")]; + tensor var_5542_begin_0 = const()[name = tensor("op_5542_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_5542_end_0 = const()[name = tensor("op_5542_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_5542_end_mask_0 = const()[name = tensor("op_5542_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5542_cast_fp16 = slice_by_index(begin = var_5542_begin_0, end = var_5542_end_0, end_mask = var_5542_end_mask_0, x = transpose_28)[name = tensor("op_5542_cast_fp16")]; + tensor var_5546_begin_0 = const()[name = tensor("op_5546_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_5546_end_0 = const()[name = tensor("op_5546_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_5546_end_mask_0 = const()[name = tensor("op_5546_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5546_cast_fp16 = slice_by_index(begin = var_5546_begin_0, end = var_5546_end_0, end_mask = var_5546_end_mask_0, x = transpose_28)[name = tensor("op_5546_cast_fp16")]; + tensor var_5550_begin_0 = const()[name = tensor("op_5550_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_5550_end_0 = const()[name = tensor("op_5550_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_5550_end_mask_0 = const()[name = tensor("op_5550_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5550_cast_fp16 = slice_by_index(begin = var_5550_begin_0, end = var_5550_end_0, end_mask = var_5550_end_mask_0, x = transpose_28)[name = tensor("op_5550_cast_fp16")]; + tensor var_5554_begin_0 = const()[name = tensor("op_5554_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_5554_end_0 = const()[name = tensor("op_5554_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_5554_end_mask_0 = const()[name = tensor("op_5554_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_5554_cast_fp16 = slice_by_index(begin = var_5554_begin_0, end = var_5554_end_0, end_mask = var_5554_end_mask_0, x = transpose_28)[name = tensor("op_5554_cast_fp16")]; + tensor var_5556_begin_0 = const()[name = tensor("op_5556_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_5556_end_0 = const()[name = tensor("op_5556_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_5556_end_mask_0 = const()[name = tensor("op_5556_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5556_cast_fp16 = slice_by_index(begin = var_5556_begin_0, end = var_5556_end_0, end_mask = var_5556_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5556_cast_fp16")]; + tensor var_5560_begin_0 = const()[name = tensor("op_5560_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_5560_end_0 = const()[name = tensor("op_5560_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_5560_end_mask_0 = const()[name = tensor("op_5560_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5560_cast_fp16 = slice_by_index(begin = var_5560_begin_0, end = var_5560_end_0, end_mask = var_5560_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5560_cast_fp16")]; + tensor var_5564_begin_0 = const()[name = tensor("op_5564_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_5564_end_0 = const()[name = tensor("op_5564_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_5564_end_mask_0 = const()[name = tensor("op_5564_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5564_cast_fp16 = slice_by_index(begin = var_5564_begin_0, end = var_5564_end_0, end_mask = var_5564_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5564_cast_fp16")]; + tensor var_5568_begin_0 = const()[name = tensor("op_5568_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_5568_end_0 = const()[name = tensor("op_5568_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_5568_end_mask_0 = const()[name = tensor("op_5568_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5568_cast_fp16 = slice_by_index(begin = var_5568_begin_0, end = var_5568_end_0, end_mask = var_5568_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5568_cast_fp16")]; + tensor var_5572_begin_0 = const()[name = tensor("op_5572_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_5572_end_0 = const()[name = tensor("op_5572_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_5572_end_mask_0 = const()[name = tensor("op_5572_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5572_cast_fp16 = slice_by_index(begin = var_5572_begin_0, end = var_5572_end_0, end_mask = var_5572_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5572_cast_fp16")]; + tensor var_5576_begin_0 = const()[name = tensor("op_5576_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_5576_end_0 = const()[name = tensor("op_5576_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_5576_end_mask_0 = const()[name = tensor("op_5576_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5576_cast_fp16 = slice_by_index(begin = var_5576_begin_0, end = var_5576_end_0, end_mask = var_5576_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5576_cast_fp16")]; + tensor var_5580_begin_0 = const()[name = tensor("op_5580_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_5580_end_0 = const()[name = tensor("op_5580_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_5580_end_mask_0 = const()[name = tensor("op_5580_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5580_cast_fp16 = slice_by_index(begin = var_5580_begin_0, end = var_5580_end_0, end_mask = var_5580_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5580_cast_fp16")]; + tensor var_5584_begin_0 = const()[name = tensor("op_5584_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_5584_end_0 = const()[name = tensor("op_5584_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_5584_end_mask_0 = const()[name = tensor("op_5584_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5584_cast_fp16 = slice_by_index(begin = var_5584_begin_0, end = var_5584_end_0, end_mask = var_5584_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5584_cast_fp16")]; + tensor var_5588_begin_0 = const()[name = tensor("op_5588_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_5588_end_0 = const()[name = tensor("op_5588_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_5588_end_mask_0 = const()[name = tensor("op_5588_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5588_cast_fp16 = slice_by_index(begin = var_5588_begin_0, end = var_5588_end_0, end_mask = var_5588_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5588_cast_fp16")]; + tensor var_5592_begin_0 = const()[name = tensor("op_5592_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_5592_end_0 = const()[name = tensor("op_5592_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_5592_end_mask_0 = const()[name = tensor("op_5592_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5592_cast_fp16 = slice_by_index(begin = var_5592_begin_0, end = var_5592_end_0, end_mask = var_5592_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5592_cast_fp16")]; + tensor var_5596_begin_0 = const()[name = tensor("op_5596_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_5596_end_0 = const()[name = tensor("op_5596_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_5596_end_mask_0 = const()[name = tensor("op_5596_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5596_cast_fp16 = slice_by_index(begin = var_5596_begin_0, end = var_5596_end_0, end_mask = var_5596_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5596_cast_fp16")]; + tensor var_5600_begin_0 = const()[name = tensor("op_5600_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_5600_end_0 = const()[name = tensor("op_5600_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_5600_end_mask_0 = const()[name = tensor("op_5600_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5600_cast_fp16 = slice_by_index(begin = var_5600_begin_0, end = var_5600_end_0, end_mask = var_5600_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5600_cast_fp16")]; + tensor var_5604_begin_0 = const()[name = tensor("op_5604_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_5604_end_0 = const()[name = tensor("op_5604_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_5604_end_mask_0 = const()[name = tensor("op_5604_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5604_cast_fp16 = slice_by_index(begin = var_5604_begin_0, end = var_5604_end_0, end_mask = var_5604_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5604_cast_fp16")]; + tensor var_5608_begin_0 = const()[name = tensor("op_5608_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_5608_end_0 = const()[name = tensor("op_5608_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_5608_end_mask_0 = const()[name = tensor("op_5608_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5608_cast_fp16 = slice_by_index(begin = var_5608_begin_0, end = var_5608_end_0, end_mask = var_5608_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5608_cast_fp16")]; + tensor var_5612_begin_0 = const()[name = tensor("op_5612_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_5612_end_0 = const()[name = tensor("op_5612_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_5612_end_mask_0 = const()[name = tensor("op_5612_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5612_cast_fp16 = slice_by_index(begin = var_5612_begin_0, end = var_5612_end_0, end_mask = var_5612_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5612_cast_fp16")]; + tensor var_5616_begin_0 = const()[name = tensor("op_5616_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_5616_end_0 = const()[name = tensor("op_5616_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_5616_end_mask_0 = const()[name = tensor("op_5616_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5616_cast_fp16 = slice_by_index(begin = var_5616_begin_0, end = var_5616_end_0, end_mask = var_5616_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5616_cast_fp16")]; + tensor var_5620_begin_0 = const()[name = tensor("op_5620_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_5620_end_0 = const()[name = tensor("op_5620_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_5620_end_mask_0 = const()[name = tensor("op_5620_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5620_cast_fp16 = slice_by_index(begin = var_5620_begin_0, end = var_5620_end_0, end_mask = var_5620_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5620_cast_fp16")]; + tensor var_5624_begin_0 = const()[name = tensor("op_5624_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_5624_end_0 = const()[name = tensor("op_5624_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_5624_end_mask_0 = const()[name = tensor("op_5624_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5624_cast_fp16 = slice_by_index(begin = var_5624_begin_0, end = var_5624_end_0, end_mask = var_5624_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5624_cast_fp16")]; + tensor var_5628_begin_0 = const()[name = tensor("op_5628_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_5628_end_0 = const()[name = tensor("op_5628_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_5628_end_mask_0 = const()[name = tensor("op_5628_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5628_cast_fp16 = slice_by_index(begin = var_5628_begin_0, end = var_5628_end_0, end_mask = var_5628_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5628_cast_fp16")]; + tensor var_5632_begin_0 = const()[name = tensor("op_5632_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_5632_end_0 = const()[name = tensor("op_5632_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_5632_end_mask_0 = const()[name = tensor("op_5632_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_5632_cast_fp16 = slice_by_index(begin = var_5632_begin_0, end = var_5632_end_0, end_mask = var_5632_end_mask_0, x = value_7_cast_fp16)[name = tensor("op_5632_cast_fp16")]; + tensor var_5636_equation_0 = const()[name = tensor("op_5636_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5636_cast_fp16 = einsum(equation = var_5636_equation_0, values = (var_5478_cast_fp16, var_4920_cast_fp16))[name = tensor("op_5636_cast_fp16")]; + tensor var_5637_to_fp16 = const()[name = tensor("op_5637_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_481_cast_fp16 = mul(x = var_5636_cast_fp16, y = var_5637_to_fp16)[name = tensor("aw_chunk_481_cast_fp16")]; + tensor var_5640_equation_0 = const()[name = tensor("op_5640_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5640_cast_fp16 = einsum(equation = var_5640_equation_0, values = (var_5478_cast_fp16, var_4927_cast_fp16))[name = tensor("op_5640_cast_fp16")]; + tensor var_5641_to_fp16 = const()[name = tensor("op_5641_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_483_cast_fp16 = mul(x = var_5640_cast_fp16, y = var_5641_to_fp16)[name = tensor("aw_chunk_483_cast_fp16")]; + tensor var_5644_equation_0 = const()[name = tensor("op_5644_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5644_cast_fp16 = einsum(equation = var_5644_equation_0, values = (var_5478_cast_fp16, var_4934_cast_fp16))[name = tensor("op_5644_cast_fp16")]; + tensor var_5645_to_fp16 = const()[name = tensor("op_5645_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_485_cast_fp16 = mul(x = var_5644_cast_fp16, y = var_5645_to_fp16)[name = tensor("aw_chunk_485_cast_fp16")]; + tensor var_5648_equation_0 = const()[name = tensor("op_5648_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5648_cast_fp16 = einsum(equation = var_5648_equation_0, values = (var_5478_cast_fp16, var_4941_cast_fp16))[name = tensor("op_5648_cast_fp16")]; + tensor var_5649_to_fp16 = const()[name = tensor("op_5649_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_487_cast_fp16 = mul(x = var_5648_cast_fp16, y = var_5649_to_fp16)[name = tensor("aw_chunk_487_cast_fp16")]; + tensor var_5652_equation_0 = const()[name = tensor("op_5652_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5652_cast_fp16 = einsum(equation = var_5652_equation_0, values = (var_5482_cast_fp16, var_4948_cast_fp16))[name = tensor("op_5652_cast_fp16")]; + tensor var_5653_to_fp16 = const()[name = tensor("op_5653_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_489_cast_fp16 = mul(x = var_5652_cast_fp16, y = var_5653_to_fp16)[name = tensor("aw_chunk_489_cast_fp16")]; + tensor var_5656_equation_0 = const()[name = tensor("op_5656_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5656_cast_fp16 = einsum(equation = var_5656_equation_0, values = (var_5482_cast_fp16, var_4955_cast_fp16))[name = tensor("op_5656_cast_fp16")]; + tensor var_5657_to_fp16 = const()[name = tensor("op_5657_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_491_cast_fp16 = mul(x = var_5656_cast_fp16, y = var_5657_to_fp16)[name = tensor("aw_chunk_491_cast_fp16")]; + tensor var_5660_equation_0 = const()[name = tensor("op_5660_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5660_cast_fp16 = einsum(equation = var_5660_equation_0, values = (var_5482_cast_fp16, var_4962_cast_fp16))[name = tensor("op_5660_cast_fp16")]; + tensor var_5661_to_fp16 = const()[name = tensor("op_5661_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_493_cast_fp16 = mul(x = var_5660_cast_fp16, y = var_5661_to_fp16)[name = tensor("aw_chunk_493_cast_fp16")]; + tensor var_5664_equation_0 = const()[name = tensor("op_5664_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5664_cast_fp16 = einsum(equation = var_5664_equation_0, values = (var_5482_cast_fp16, var_4969_cast_fp16))[name = tensor("op_5664_cast_fp16")]; + tensor var_5665_to_fp16 = const()[name = tensor("op_5665_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_495_cast_fp16 = mul(x = var_5664_cast_fp16, y = var_5665_to_fp16)[name = tensor("aw_chunk_495_cast_fp16")]; + tensor var_5668_equation_0 = const()[name = tensor("op_5668_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5668_cast_fp16 = einsum(equation = var_5668_equation_0, values = (var_5486_cast_fp16, var_4976_cast_fp16))[name = tensor("op_5668_cast_fp16")]; + tensor var_5669_to_fp16 = const()[name = tensor("op_5669_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_497_cast_fp16 = mul(x = var_5668_cast_fp16, y = var_5669_to_fp16)[name = tensor("aw_chunk_497_cast_fp16")]; + tensor var_5672_equation_0 = const()[name = tensor("op_5672_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5672_cast_fp16 = einsum(equation = var_5672_equation_0, values = (var_5486_cast_fp16, var_4983_cast_fp16))[name = tensor("op_5672_cast_fp16")]; + tensor var_5673_to_fp16 = const()[name = tensor("op_5673_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_499_cast_fp16 = mul(x = var_5672_cast_fp16, y = var_5673_to_fp16)[name = tensor("aw_chunk_499_cast_fp16")]; + tensor var_5676_equation_0 = const()[name = tensor("op_5676_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5676_cast_fp16 = einsum(equation = var_5676_equation_0, values = (var_5486_cast_fp16, var_4990_cast_fp16))[name = tensor("op_5676_cast_fp16")]; + tensor var_5677_to_fp16 = const()[name = tensor("op_5677_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_501_cast_fp16 = mul(x = var_5676_cast_fp16, y = var_5677_to_fp16)[name = tensor("aw_chunk_501_cast_fp16")]; + tensor var_5680_equation_0 = const()[name = tensor("op_5680_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5680_cast_fp16 = einsum(equation = var_5680_equation_0, values = (var_5486_cast_fp16, var_4997_cast_fp16))[name = tensor("op_5680_cast_fp16")]; + tensor var_5681_to_fp16 = const()[name = tensor("op_5681_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_503_cast_fp16 = mul(x = var_5680_cast_fp16, y = var_5681_to_fp16)[name = tensor("aw_chunk_503_cast_fp16")]; + tensor var_5684_equation_0 = const()[name = tensor("op_5684_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5684_cast_fp16 = einsum(equation = var_5684_equation_0, values = (var_5490_cast_fp16, var_5004_cast_fp16))[name = tensor("op_5684_cast_fp16")]; + tensor var_5685_to_fp16 = const()[name = tensor("op_5685_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_505_cast_fp16 = mul(x = var_5684_cast_fp16, y = var_5685_to_fp16)[name = tensor("aw_chunk_505_cast_fp16")]; + tensor var_5688_equation_0 = const()[name = tensor("op_5688_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5688_cast_fp16 = einsum(equation = var_5688_equation_0, values = (var_5490_cast_fp16, var_5011_cast_fp16))[name = tensor("op_5688_cast_fp16")]; + tensor var_5689_to_fp16 = const()[name = tensor("op_5689_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_507_cast_fp16 = mul(x = var_5688_cast_fp16, y = var_5689_to_fp16)[name = tensor("aw_chunk_507_cast_fp16")]; + tensor var_5692_equation_0 = const()[name = tensor("op_5692_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5692_cast_fp16 = einsum(equation = var_5692_equation_0, values = (var_5490_cast_fp16, var_5018_cast_fp16))[name = tensor("op_5692_cast_fp16")]; + tensor var_5693_to_fp16 = const()[name = tensor("op_5693_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_509_cast_fp16 = mul(x = var_5692_cast_fp16, y = var_5693_to_fp16)[name = tensor("aw_chunk_509_cast_fp16")]; + tensor var_5696_equation_0 = const()[name = tensor("op_5696_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5696_cast_fp16 = einsum(equation = var_5696_equation_0, values = (var_5490_cast_fp16, var_5025_cast_fp16))[name = tensor("op_5696_cast_fp16")]; + tensor var_5697_to_fp16 = const()[name = tensor("op_5697_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_511_cast_fp16 = mul(x = var_5696_cast_fp16, y = var_5697_to_fp16)[name = tensor("aw_chunk_511_cast_fp16")]; + tensor var_5700_equation_0 = const()[name = tensor("op_5700_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5700_cast_fp16 = einsum(equation = var_5700_equation_0, values = (var_5494_cast_fp16, var_5032_cast_fp16))[name = tensor("op_5700_cast_fp16")]; + tensor var_5701_to_fp16 = const()[name = tensor("op_5701_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_513_cast_fp16 = mul(x = var_5700_cast_fp16, y = var_5701_to_fp16)[name = tensor("aw_chunk_513_cast_fp16")]; + tensor var_5704_equation_0 = const()[name = tensor("op_5704_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5704_cast_fp16 = einsum(equation = var_5704_equation_0, values = (var_5494_cast_fp16, var_5039_cast_fp16))[name = tensor("op_5704_cast_fp16")]; + tensor var_5705_to_fp16 = const()[name = tensor("op_5705_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_515_cast_fp16 = mul(x = var_5704_cast_fp16, y = var_5705_to_fp16)[name = tensor("aw_chunk_515_cast_fp16")]; + tensor var_5708_equation_0 = const()[name = tensor("op_5708_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5708_cast_fp16 = einsum(equation = var_5708_equation_0, values = (var_5494_cast_fp16, var_5046_cast_fp16))[name = tensor("op_5708_cast_fp16")]; + tensor var_5709_to_fp16 = const()[name = tensor("op_5709_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_517_cast_fp16 = mul(x = var_5708_cast_fp16, y = var_5709_to_fp16)[name = tensor("aw_chunk_517_cast_fp16")]; + tensor var_5712_equation_0 = const()[name = tensor("op_5712_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5712_cast_fp16 = einsum(equation = var_5712_equation_0, values = (var_5494_cast_fp16, var_5053_cast_fp16))[name = tensor("op_5712_cast_fp16")]; + tensor var_5713_to_fp16 = const()[name = tensor("op_5713_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_519_cast_fp16 = mul(x = var_5712_cast_fp16, y = var_5713_to_fp16)[name = tensor("aw_chunk_519_cast_fp16")]; + tensor var_5716_equation_0 = const()[name = tensor("op_5716_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5716_cast_fp16 = einsum(equation = var_5716_equation_0, values = (var_5498_cast_fp16, var_5060_cast_fp16))[name = tensor("op_5716_cast_fp16")]; + tensor var_5717_to_fp16 = const()[name = tensor("op_5717_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_521_cast_fp16 = mul(x = var_5716_cast_fp16, y = var_5717_to_fp16)[name = tensor("aw_chunk_521_cast_fp16")]; + tensor var_5720_equation_0 = const()[name = tensor("op_5720_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5720_cast_fp16 = einsum(equation = var_5720_equation_0, values = (var_5498_cast_fp16, var_5067_cast_fp16))[name = tensor("op_5720_cast_fp16")]; + tensor var_5721_to_fp16 = const()[name = tensor("op_5721_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_523_cast_fp16 = mul(x = var_5720_cast_fp16, y = var_5721_to_fp16)[name = tensor("aw_chunk_523_cast_fp16")]; + tensor var_5724_equation_0 = const()[name = tensor("op_5724_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5724_cast_fp16 = einsum(equation = var_5724_equation_0, values = (var_5498_cast_fp16, var_5074_cast_fp16))[name = tensor("op_5724_cast_fp16")]; + tensor var_5725_to_fp16 = const()[name = tensor("op_5725_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_525_cast_fp16 = mul(x = var_5724_cast_fp16, y = var_5725_to_fp16)[name = tensor("aw_chunk_525_cast_fp16")]; + tensor var_5728_equation_0 = const()[name = tensor("op_5728_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5728_cast_fp16 = einsum(equation = var_5728_equation_0, values = (var_5498_cast_fp16, var_5081_cast_fp16))[name = tensor("op_5728_cast_fp16")]; + tensor var_5729_to_fp16 = const()[name = tensor("op_5729_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_527_cast_fp16 = mul(x = var_5728_cast_fp16, y = var_5729_to_fp16)[name = tensor("aw_chunk_527_cast_fp16")]; + tensor var_5732_equation_0 = const()[name = tensor("op_5732_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5732_cast_fp16 = einsum(equation = var_5732_equation_0, values = (var_5502_cast_fp16, var_5088_cast_fp16))[name = tensor("op_5732_cast_fp16")]; + tensor var_5733_to_fp16 = const()[name = tensor("op_5733_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_529_cast_fp16 = mul(x = var_5732_cast_fp16, y = var_5733_to_fp16)[name = tensor("aw_chunk_529_cast_fp16")]; + tensor var_5736_equation_0 = const()[name = tensor("op_5736_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5736_cast_fp16 = einsum(equation = var_5736_equation_0, values = (var_5502_cast_fp16, var_5095_cast_fp16))[name = tensor("op_5736_cast_fp16")]; + tensor var_5737_to_fp16 = const()[name = tensor("op_5737_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_531_cast_fp16 = mul(x = var_5736_cast_fp16, y = var_5737_to_fp16)[name = tensor("aw_chunk_531_cast_fp16")]; + tensor var_5740_equation_0 = const()[name = tensor("op_5740_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5740_cast_fp16 = einsum(equation = var_5740_equation_0, values = (var_5502_cast_fp16, var_5102_cast_fp16))[name = tensor("op_5740_cast_fp16")]; + tensor var_5741_to_fp16 = const()[name = tensor("op_5741_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_533_cast_fp16 = mul(x = var_5740_cast_fp16, y = var_5741_to_fp16)[name = tensor("aw_chunk_533_cast_fp16")]; + tensor var_5744_equation_0 = const()[name = tensor("op_5744_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5744_cast_fp16 = einsum(equation = var_5744_equation_0, values = (var_5502_cast_fp16, var_5109_cast_fp16))[name = tensor("op_5744_cast_fp16")]; + tensor var_5745_to_fp16 = const()[name = tensor("op_5745_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_535_cast_fp16 = mul(x = var_5744_cast_fp16, y = var_5745_to_fp16)[name = tensor("aw_chunk_535_cast_fp16")]; + tensor var_5748_equation_0 = const()[name = tensor("op_5748_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5748_cast_fp16 = einsum(equation = var_5748_equation_0, values = (var_5506_cast_fp16, var_5116_cast_fp16))[name = tensor("op_5748_cast_fp16")]; + tensor var_5749_to_fp16 = const()[name = tensor("op_5749_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_537_cast_fp16 = mul(x = var_5748_cast_fp16, y = var_5749_to_fp16)[name = tensor("aw_chunk_537_cast_fp16")]; + tensor var_5752_equation_0 = const()[name = tensor("op_5752_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5752_cast_fp16 = einsum(equation = var_5752_equation_0, values = (var_5506_cast_fp16, var_5123_cast_fp16))[name = tensor("op_5752_cast_fp16")]; + tensor var_5753_to_fp16 = const()[name = tensor("op_5753_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_539_cast_fp16 = mul(x = var_5752_cast_fp16, y = var_5753_to_fp16)[name = tensor("aw_chunk_539_cast_fp16")]; + tensor var_5756_equation_0 = const()[name = tensor("op_5756_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5756_cast_fp16 = einsum(equation = var_5756_equation_0, values = (var_5506_cast_fp16, var_5130_cast_fp16))[name = tensor("op_5756_cast_fp16")]; + tensor var_5757_to_fp16 = const()[name = tensor("op_5757_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_541_cast_fp16 = mul(x = var_5756_cast_fp16, y = var_5757_to_fp16)[name = tensor("aw_chunk_541_cast_fp16")]; + tensor var_5760_equation_0 = const()[name = tensor("op_5760_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5760_cast_fp16 = einsum(equation = var_5760_equation_0, values = (var_5506_cast_fp16, var_5137_cast_fp16))[name = tensor("op_5760_cast_fp16")]; + tensor var_5761_to_fp16 = const()[name = tensor("op_5761_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_543_cast_fp16 = mul(x = var_5760_cast_fp16, y = var_5761_to_fp16)[name = tensor("aw_chunk_543_cast_fp16")]; + tensor var_5764_equation_0 = const()[name = tensor("op_5764_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5764_cast_fp16 = einsum(equation = var_5764_equation_0, values = (var_5510_cast_fp16, var_5144_cast_fp16))[name = tensor("op_5764_cast_fp16")]; + tensor var_5765_to_fp16 = const()[name = tensor("op_5765_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_545_cast_fp16 = mul(x = var_5764_cast_fp16, y = var_5765_to_fp16)[name = tensor("aw_chunk_545_cast_fp16")]; + tensor var_5768_equation_0 = const()[name = tensor("op_5768_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5768_cast_fp16 = einsum(equation = var_5768_equation_0, values = (var_5510_cast_fp16, var_5151_cast_fp16))[name = tensor("op_5768_cast_fp16")]; + tensor var_5769_to_fp16 = const()[name = tensor("op_5769_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_547_cast_fp16 = mul(x = var_5768_cast_fp16, y = var_5769_to_fp16)[name = tensor("aw_chunk_547_cast_fp16")]; + tensor var_5772_equation_0 = const()[name = tensor("op_5772_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5772_cast_fp16 = einsum(equation = var_5772_equation_0, values = (var_5510_cast_fp16, var_5158_cast_fp16))[name = tensor("op_5772_cast_fp16")]; + tensor var_5773_to_fp16 = const()[name = tensor("op_5773_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_549_cast_fp16 = mul(x = var_5772_cast_fp16, y = var_5773_to_fp16)[name = tensor("aw_chunk_549_cast_fp16")]; + tensor var_5776_equation_0 = const()[name = tensor("op_5776_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5776_cast_fp16 = einsum(equation = var_5776_equation_0, values = (var_5510_cast_fp16, var_5165_cast_fp16))[name = tensor("op_5776_cast_fp16")]; + tensor var_5777_to_fp16 = const()[name = tensor("op_5777_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_551_cast_fp16 = mul(x = var_5776_cast_fp16, y = var_5777_to_fp16)[name = tensor("aw_chunk_551_cast_fp16")]; + tensor var_5780_equation_0 = const()[name = tensor("op_5780_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5780_cast_fp16 = einsum(equation = var_5780_equation_0, values = (var_5514_cast_fp16, var_5172_cast_fp16))[name = tensor("op_5780_cast_fp16")]; + tensor var_5781_to_fp16 = const()[name = tensor("op_5781_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_553_cast_fp16 = mul(x = var_5780_cast_fp16, y = var_5781_to_fp16)[name = tensor("aw_chunk_553_cast_fp16")]; + tensor var_5784_equation_0 = const()[name = tensor("op_5784_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5784_cast_fp16 = einsum(equation = var_5784_equation_0, values = (var_5514_cast_fp16, var_5179_cast_fp16))[name = tensor("op_5784_cast_fp16")]; + tensor var_5785_to_fp16 = const()[name = tensor("op_5785_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_555_cast_fp16 = mul(x = var_5784_cast_fp16, y = var_5785_to_fp16)[name = tensor("aw_chunk_555_cast_fp16")]; + tensor var_5788_equation_0 = const()[name = tensor("op_5788_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5788_cast_fp16 = einsum(equation = var_5788_equation_0, values = (var_5514_cast_fp16, var_5186_cast_fp16))[name = tensor("op_5788_cast_fp16")]; + tensor var_5789_to_fp16 = const()[name = tensor("op_5789_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_557_cast_fp16 = mul(x = var_5788_cast_fp16, y = var_5789_to_fp16)[name = tensor("aw_chunk_557_cast_fp16")]; + tensor var_5792_equation_0 = const()[name = tensor("op_5792_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5792_cast_fp16 = einsum(equation = var_5792_equation_0, values = (var_5514_cast_fp16, var_5193_cast_fp16))[name = tensor("op_5792_cast_fp16")]; + tensor var_5793_to_fp16 = const()[name = tensor("op_5793_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_559_cast_fp16 = mul(x = var_5792_cast_fp16, y = var_5793_to_fp16)[name = tensor("aw_chunk_559_cast_fp16")]; + tensor var_5796_equation_0 = const()[name = tensor("op_5796_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5796_cast_fp16 = einsum(equation = var_5796_equation_0, values = (var_5518_cast_fp16, var_5200_cast_fp16))[name = tensor("op_5796_cast_fp16")]; + tensor var_5797_to_fp16 = const()[name = tensor("op_5797_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_561_cast_fp16 = mul(x = var_5796_cast_fp16, y = var_5797_to_fp16)[name = tensor("aw_chunk_561_cast_fp16")]; + tensor var_5800_equation_0 = const()[name = tensor("op_5800_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5800_cast_fp16 = einsum(equation = var_5800_equation_0, values = (var_5518_cast_fp16, var_5207_cast_fp16))[name = tensor("op_5800_cast_fp16")]; + tensor var_5801_to_fp16 = const()[name = tensor("op_5801_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_563_cast_fp16 = mul(x = var_5800_cast_fp16, y = var_5801_to_fp16)[name = tensor("aw_chunk_563_cast_fp16")]; + tensor var_5804_equation_0 = const()[name = tensor("op_5804_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5804_cast_fp16 = einsum(equation = var_5804_equation_0, values = (var_5518_cast_fp16, var_5214_cast_fp16))[name = tensor("op_5804_cast_fp16")]; + tensor var_5805_to_fp16 = const()[name = tensor("op_5805_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_565_cast_fp16 = mul(x = var_5804_cast_fp16, y = var_5805_to_fp16)[name = tensor("aw_chunk_565_cast_fp16")]; + tensor var_5808_equation_0 = const()[name = tensor("op_5808_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5808_cast_fp16 = einsum(equation = var_5808_equation_0, values = (var_5518_cast_fp16, var_5221_cast_fp16))[name = tensor("op_5808_cast_fp16")]; + tensor var_5809_to_fp16 = const()[name = tensor("op_5809_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_567_cast_fp16 = mul(x = var_5808_cast_fp16, y = var_5809_to_fp16)[name = tensor("aw_chunk_567_cast_fp16")]; + tensor var_5812_equation_0 = const()[name = tensor("op_5812_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5812_cast_fp16 = einsum(equation = var_5812_equation_0, values = (var_5522_cast_fp16, var_5228_cast_fp16))[name = tensor("op_5812_cast_fp16")]; + tensor var_5813_to_fp16 = const()[name = tensor("op_5813_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_569_cast_fp16 = mul(x = var_5812_cast_fp16, y = var_5813_to_fp16)[name = tensor("aw_chunk_569_cast_fp16")]; + tensor var_5816_equation_0 = const()[name = tensor("op_5816_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5816_cast_fp16 = einsum(equation = var_5816_equation_0, values = (var_5522_cast_fp16, var_5235_cast_fp16))[name = tensor("op_5816_cast_fp16")]; + tensor var_5817_to_fp16 = const()[name = tensor("op_5817_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_571_cast_fp16 = mul(x = var_5816_cast_fp16, y = var_5817_to_fp16)[name = tensor("aw_chunk_571_cast_fp16")]; + tensor var_5820_equation_0 = const()[name = tensor("op_5820_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5820_cast_fp16 = einsum(equation = var_5820_equation_0, values = (var_5522_cast_fp16, var_5242_cast_fp16))[name = tensor("op_5820_cast_fp16")]; + tensor var_5821_to_fp16 = const()[name = tensor("op_5821_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_573_cast_fp16 = mul(x = var_5820_cast_fp16, y = var_5821_to_fp16)[name = tensor("aw_chunk_573_cast_fp16")]; + tensor var_5824_equation_0 = const()[name = tensor("op_5824_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5824_cast_fp16 = einsum(equation = var_5824_equation_0, values = (var_5522_cast_fp16, var_5249_cast_fp16))[name = tensor("op_5824_cast_fp16")]; + tensor var_5825_to_fp16 = const()[name = tensor("op_5825_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_575_cast_fp16 = mul(x = var_5824_cast_fp16, y = var_5825_to_fp16)[name = tensor("aw_chunk_575_cast_fp16")]; + tensor var_5828_equation_0 = const()[name = tensor("op_5828_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5828_cast_fp16 = einsum(equation = var_5828_equation_0, values = (var_5526_cast_fp16, var_5256_cast_fp16))[name = tensor("op_5828_cast_fp16")]; + tensor var_5829_to_fp16 = const()[name = tensor("op_5829_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_577_cast_fp16 = mul(x = var_5828_cast_fp16, y = var_5829_to_fp16)[name = tensor("aw_chunk_577_cast_fp16")]; + tensor var_5832_equation_0 = const()[name = tensor("op_5832_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5832_cast_fp16 = einsum(equation = var_5832_equation_0, values = (var_5526_cast_fp16, var_5263_cast_fp16))[name = tensor("op_5832_cast_fp16")]; + tensor var_5833_to_fp16 = const()[name = tensor("op_5833_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_579_cast_fp16 = mul(x = var_5832_cast_fp16, y = var_5833_to_fp16)[name = tensor("aw_chunk_579_cast_fp16")]; + tensor var_5836_equation_0 = const()[name = tensor("op_5836_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5836_cast_fp16 = einsum(equation = var_5836_equation_0, values = (var_5526_cast_fp16, var_5270_cast_fp16))[name = tensor("op_5836_cast_fp16")]; + tensor var_5837_to_fp16 = const()[name = tensor("op_5837_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_581_cast_fp16 = mul(x = var_5836_cast_fp16, y = var_5837_to_fp16)[name = tensor("aw_chunk_581_cast_fp16")]; + tensor var_5840_equation_0 = const()[name = tensor("op_5840_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5840_cast_fp16 = einsum(equation = var_5840_equation_0, values = (var_5526_cast_fp16, var_5277_cast_fp16))[name = tensor("op_5840_cast_fp16")]; + tensor var_5841_to_fp16 = const()[name = tensor("op_5841_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_583_cast_fp16 = mul(x = var_5840_cast_fp16, y = var_5841_to_fp16)[name = tensor("aw_chunk_583_cast_fp16")]; + tensor var_5844_equation_0 = const()[name = tensor("op_5844_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5844_cast_fp16 = einsum(equation = var_5844_equation_0, values = (var_5530_cast_fp16, var_5284_cast_fp16))[name = tensor("op_5844_cast_fp16")]; + tensor var_5845_to_fp16 = const()[name = tensor("op_5845_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_585_cast_fp16 = mul(x = var_5844_cast_fp16, y = var_5845_to_fp16)[name = tensor("aw_chunk_585_cast_fp16")]; + tensor var_5848_equation_0 = const()[name = tensor("op_5848_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5848_cast_fp16 = einsum(equation = var_5848_equation_0, values = (var_5530_cast_fp16, var_5291_cast_fp16))[name = tensor("op_5848_cast_fp16")]; + tensor var_5849_to_fp16 = const()[name = tensor("op_5849_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_587_cast_fp16 = mul(x = var_5848_cast_fp16, y = var_5849_to_fp16)[name = tensor("aw_chunk_587_cast_fp16")]; + tensor var_5852_equation_0 = const()[name = tensor("op_5852_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5852_cast_fp16 = einsum(equation = var_5852_equation_0, values = (var_5530_cast_fp16, var_5298_cast_fp16))[name = tensor("op_5852_cast_fp16")]; + tensor var_5853_to_fp16 = const()[name = tensor("op_5853_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_589_cast_fp16 = mul(x = var_5852_cast_fp16, y = var_5853_to_fp16)[name = tensor("aw_chunk_589_cast_fp16")]; + tensor var_5856_equation_0 = const()[name = tensor("op_5856_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5856_cast_fp16 = einsum(equation = var_5856_equation_0, values = (var_5530_cast_fp16, var_5305_cast_fp16))[name = tensor("op_5856_cast_fp16")]; + tensor var_5857_to_fp16 = const()[name = tensor("op_5857_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_591_cast_fp16 = mul(x = var_5856_cast_fp16, y = var_5857_to_fp16)[name = tensor("aw_chunk_591_cast_fp16")]; + tensor var_5860_equation_0 = const()[name = tensor("op_5860_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5860_cast_fp16 = einsum(equation = var_5860_equation_0, values = (var_5534_cast_fp16, var_5312_cast_fp16))[name = tensor("op_5860_cast_fp16")]; + tensor var_5861_to_fp16 = const()[name = tensor("op_5861_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_593_cast_fp16 = mul(x = var_5860_cast_fp16, y = var_5861_to_fp16)[name = tensor("aw_chunk_593_cast_fp16")]; + tensor var_5864_equation_0 = const()[name = tensor("op_5864_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5864_cast_fp16 = einsum(equation = var_5864_equation_0, values = (var_5534_cast_fp16, var_5319_cast_fp16))[name = tensor("op_5864_cast_fp16")]; + tensor var_5865_to_fp16 = const()[name = tensor("op_5865_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_595_cast_fp16 = mul(x = var_5864_cast_fp16, y = var_5865_to_fp16)[name = tensor("aw_chunk_595_cast_fp16")]; + tensor var_5868_equation_0 = const()[name = tensor("op_5868_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5868_cast_fp16 = einsum(equation = var_5868_equation_0, values = (var_5534_cast_fp16, var_5326_cast_fp16))[name = tensor("op_5868_cast_fp16")]; + tensor var_5869_to_fp16 = const()[name = tensor("op_5869_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_597_cast_fp16 = mul(x = var_5868_cast_fp16, y = var_5869_to_fp16)[name = tensor("aw_chunk_597_cast_fp16")]; + tensor var_5872_equation_0 = const()[name = tensor("op_5872_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5872_cast_fp16 = einsum(equation = var_5872_equation_0, values = (var_5534_cast_fp16, var_5333_cast_fp16))[name = tensor("op_5872_cast_fp16")]; + tensor var_5873_to_fp16 = const()[name = tensor("op_5873_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_599_cast_fp16 = mul(x = var_5872_cast_fp16, y = var_5873_to_fp16)[name = tensor("aw_chunk_599_cast_fp16")]; + tensor var_5876_equation_0 = const()[name = tensor("op_5876_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5876_cast_fp16 = einsum(equation = var_5876_equation_0, values = (var_5538_cast_fp16, var_5340_cast_fp16))[name = tensor("op_5876_cast_fp16")]; + tensor var_5877_to_fp16 = const()[name = tensor("op_5877_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_601_cast_fp16 = mul(x = var_5876_cast_fp16, y = var_5877_to_fp16)[name = tensor("aw_chunk_601_cast_fp16")]; + tensor var_5880_equation_0 = const()[name = tensor("op_5880_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5880_cast_fp16 = einsum(equation = var_5880_equation_0, values = (var_5538_cast_fp16, var_5347_cast_fp16))[name = tensor("op_5880_cast_fp16")]; + tensor var_5881_to_fp16 = const()[name = tensor("op_5881_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_603_cast_fp16 = mul(x = var_5880_cast_fp16, y = var_5881_to_fp16)[name = tensor("aw_chunk_603_cast_fp16")]; + tensor var_5884_equation_0 = const()[name = tensor("op_5884_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5884_cast_fp16 = einsum(equation = var_5884_equation_0, values = (var_5538_cast_fp16, var_5354_cast_fp16))[name = tensor("op_5884_cast_fp16")]; + tensor var_5885_to_fp16 = const()[name = tensor("op_5885_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_605_cast_fp16 = mul(x = var_5884_cast_fp16, y = var_5885_to_fp16)[name = tensor("aw_chunk_605_cast_fp16")]; + tensor var_5888_equation_0 = const()[name = tensor("op_5888_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5888_cast_fp16 = einsum(equation = var_5888_equation_0, values = (var_5538_cast_fp16, var_5361_cast_fp16))[name = tensor("op_5888_cast_fp16")]; + tensor var_5889_to_fp16 = const()[name = tensor("op_5889_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_607_cast_fp16 = mul(x = var_5888_cast_fp16, y = var_5889_to_fp16)[name = tensor("aw_chunk_607_cast_fp16")]; + tensor var_5892_equation_0 = const()[name = tensor("op_5892_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5892_cast_fp16 = einsum(equation = var_5892_equation_0, values = (var_5542_cast_fp16, var_5368_cast_fp16))[name = tensor("op_5892_cast_fp16")]; + tensor var_5893_to_fp16 = const()[name = tensor("op_5893_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_609_cast_fp16 = mul(x = var_5892_cast_fp16, y = var_5893_to_fp16)[name = tensor("aw_chunk_609_cast_fp16")]; + tensor var_5896_equation_0 = const()[name = tensor("op_5896_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5896_cast_fp16 = einsum(equation = var_5896_equation_0, values = (var_5542_cast_fp16, var_5375_cast_fp16))[name = tensor("op_5896_cast_fp16")]; + tensor var_5897_to_fp16 = const()[name = tensor("op_5897_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_611_cast_fp16 = mul(x = var_5896_cast_fp16, y = var_5897_to_fp16)[name = tensor("aw_chunk_611_cast_fp16")]; + tensor var_5900_equation_0 = const()[name = tensor("op_5900_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5900_cast_fp16 = einsum(equation = var_5900_equation_0, values = (var_5542_cast_fp16, var_5382_cast_fp16))[name = tensor("op_5900_cast_fp16")]; + tensor var_5901_to_fp16 = const()[name = tensor("op_5901_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_613_cast_fp16 = mul(x = var_5900_cast_fp16, y = var_5901_to_fp16)[name = tensor("aw_chunk_613_cast_fp16")]; + tensor var_5904_equation_0 = const()[name = tensor("op_5904_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5904_cast_fp16 = einsum(equation = var_5904_equation_0, values = (var_5542_cast_fp16, var_5389_cast_fp16))[name = tensor("op_5904_cast_fp16")]; + tensor var_5905_to_fp16 = const()[name = tensor("op_5905_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_615_cast_fp16 = mul(x = var_5904_cast_fp16, y = var_5905_to_fp16)[name = tensor("aw_chunk_615_cast_fp16")]; + tensor var_5908_equation_0 = const()[name = tensor("op_5908_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5908_cast_fp16 = einsum(equation = var_5908_equation_0, values = (var_5546_cast_fp16, var_5396_cast_fp16))[name = tensor("op_5908_cast_fp16")]; + tensor var_5909_to_fp16 = const()[name = tensor("op_5909_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_617_cast_fp16 = mul(x = var_5908_cast_fp16, y = var_5909_to_fp16)[name = tensor("aw_chunk_617_cast_fp16")]; + tensor var_5912_equation_0 = const()[name = tensor("op_5912_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5912_cast_fp16 = einsum(equation = var_5912_equation_0, values = (var_5546_cast_fp16, var_5403_cast_fp16))[name = tensor("op_5912_cast_fp16")]; + tensor var_5913_to_fp16 = const()[name = tensor("op_5913_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_619_cast_fp16 = mul(x = var_5912_cast_fp16, y = var_5913_to_fp16)[name = tensor("aw_chunk_619_cast_fp16")]; + tensor var_5916_equation_0 = const()[name = tensor("op_5916_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5916_cast_fp16 = einsum(equation = var_5916_equation_0, values = (var_5546_cast_fp16, var_5410_cast_fp16))[name = tensor("op_5916_cast_fp16")]; + tensor var_5917_to_fp16 = const()[name = tensor("op_5917_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_621_cast_fp16 = mul(x = var_5916_cast_fp16, y = var_5917_to_fp16)[name = tensor("aw_chunk_621_cast_fp16")]; + tensor var_5920_equation_0 = const()[name = tensor("op_5920_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5920_cast_fp16 = einsum(equation = var_5920_equation_0, values = (var_5546_cast_fp16, var_5417_cast_fp16))[name = tensor("op_5920_cast_fp16")]; + tensor var_5921_to_fp16 = const()[name = tensor("op_5921_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_623_cast_fp16 = mul(x = var_5920_cast_fp16, y = var_5921_to_fp16)[name = tensor("aw_chunk_623_cast_fp16")]; + tensor var_5924_equation_0 = const()[name = tensor("op_5924_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5924_cast_fp16 = einsum(equation = var_5924_equation_0, values = (var_5550_cast_fp16, var_5424_cast_fp16))[name = tensor("op_5924_cast_fp16")]; + tensor var_5925_to_fp16 = const()[name = tensor("op_5925_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_625_cast_fp16 = mul(x = var_5924_cast_fp16, y = var_5925_to_fp16)[name = tensor("aw_chunk_625_cast_fp16")]; + tensor var_5928_equation_0 = const()[name = tensor("op_5928_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5928_cast_fp16 = einsum(equation = var_5928_equation_0, values = (var_5550_cast_fp16, var_5431_cast_fp16))[name = tensor("op_5928_cast_fp16")]; + tensor var_5929_to_fp16 = const()[name = tensor("op_5929_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_627_cast_fp16 = mul(x = var_5928_cast_fp16, y = var_5929_to_fp16)[name = tensor("aw_chunk_627_cast_fp16")]; + tensor var_5932_equation_0 = const()[name = tensor("op_5932_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5932_cast_fp16 = einsum(equation = var_5932_equation_0, values = (var_5550_cast_fp16, var_5438_cast_fp16))[name = tensor("op_5932_cast_fp16")]; + tensor var_5933_to_fp16 = const()[name = tensor("op_5933_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_629_cast_fp16 = mul(x = var_5932_cast_fp16, y = var_5933_to_fp16)[name = tensor("aw_chunk_629_cast_fp16")]; + tensor var_5936_equation_0 = const()[name = tensor("op_5936_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5936_cast_fp16 = einsum(equation = var_5936_equation_0, values = (var_5550_cast_fp16, var_5445_cast_fp16))[name = tensor("op_5936_cast_fp16")]; + tensor var_5937_to_fp16 = const()[name = tensor("op_5937_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_631_cast_fp16 = mul(x = var_5936_cast_fp16, y = var_5937_to_fp16)[name = tensor("aw_chunk_631_cast_fp16")]; + tensor var_5940_equation_0 = const()[name = tensor("op_5940_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5940_cast_fp16 = einsum(equation = var_5940_equation_0, values = (var_5554_cast_fp16, var_5452_cast_fp16))[name = tensor("op_5940_cast_fp16")]; + tensor var_5941_to_fp16 = const()[name = tensor("op_5941_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_633_cast_fp16 = mul(x = var_5940_cast_fp16, y = var_5941_to_fp16)[name = tensor("aw_chunk_633_cast_fp16")]; + tensor var_5944_equation_0 = const()[name = tensor("op_5944_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5944_cast_fp16 = einsum(equation = var_5944_equation_0, values = (var_5554_cast_fp16, var_5459_cast_fp16))[name = tensor("op_5944_cast_fp16")]; + tensor var_5945_to_fp16 = const()[name = tensor("op_5945_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_635_cast_fp16 = mul(x = var_5944_cast_fp16, y = var_5945_to_fp16)[name = tensor("aw_chunk_635_cast_fp16")]; + tensor var_5948_equation_0 = const()[name = tensor("op_5948_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5948_cast_fp16 = einsum(equation = var_5948_equation_0, values = (var_5554_cast_fp16, var_5466_cast_fp16))[name = tensor("op_5948_cast_fp16")]; + tensor var_5949_to_fp16 = const()[name = tensor("op_5949_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_637_cast_fp16 = mul(x = var_5948_cast_fp16, y = var_5949_to_fp16)[name = tensor("aw_chunk_637_cast_fp16")]; + tensor var_5952_equation_0 = const()[name = tensor("op_5952_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_5952_cast_fp16 = einsum(equation = var_5952_equation_0, values = (var_5554_cast_fp16, var_5473_cast_fp16))[name = tensor("op_5952_cast_fp16")]; + tensor var_5953_to_fp16 = const()[name = tensor("op_5953_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_639_cast_fp16 = mul(x = var_5952_cast_fp16, y = var_5953_to_fp16)[name = tensor("aw_chunk_639_cast_fp16")]; + tensor var_5955_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_481_cast_fp16)[name = tensor("op_5955_cast_fp16")]; + tensor var_5956_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_483_cast_fp16)[name = tensor("op_5956_cast_fp16")]; + tensor var_5957_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_485_cast_fp16)[name = tensor("op_5957_cast_fp16")]; + tensor var_5958_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_487_cast_fp16)[name = tensor("op_5958_cast_fp16")]; + tensor var_5959_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_489_cast_fp16)[name = tensor("op_5959_cast_fp16")]; + tensor var_5960_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_491_cast_fp16)[name = tensor("op_5960_cast_fp16")]; + tensor var_5961_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_493_cast_fp16)[name = tensor("op_5961_cast_fp16")]; + tensor var_5962_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_495_cast_fp16)[name = tensor("op_5962_cast_fp16")]; + tensor var_5963_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_497_cast_fp16)[name = tensor("op_5963_cast_fp16")]; + tensor var_5964_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_499_cast_fp16)[name = tensor("op_5964_cast_fp16")]; + tensor var_5965_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_501_cast_fp16)[name = tensor("op_5965_cast_fp16")]; + tensor var_5966_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_503_cast_fp16)[name = tensor("op_5966_cast_fp16")]; + tensor var_5967_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_505_cast_fp16)[name = tensor("op_5967_cast_fp16")]; + tensor var_5968_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_507_cast_fp16)[name = tensor("op_5968_cast_fp16")]; + tensor var_5969_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_509_cast_fp16)[name = tensor("op_5969_cast_fp16")]; + tensor var_5970_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_511_cast_fp16)[name = tensor("op_5970_cast_fp16")]; + tensor var_5971_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_513_cast_fp16)[name = tensor("op_5971_cast_fp16")]; + tensor var_5972_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_515_cast_fp16)[name = tensor("op_5972_cast_fp16")]; + tensor var_5973_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_517_cast_fp16)[name = tensor("op_5973_cast_fp16")]; + tensor var_5974_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_519_cast_fp16)[name = tensor("op_5974_cast_fp16")]; + tensor var_5975_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_521_cast_fp16)[name = tensor("op_5975_cast_fp16")]; + tensor var_5976_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_523_cast_fp16)[name = tensor("op_5976_cast_fp16")]; + tensor var_5977_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_525_cast_fp16)[name = tensor("op_5977_cast_fp16")]; + tensor var_5978_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_527_cast_fp16)[name = tensor("op_5978_cast_fp16")]; + tensor var_5979_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_529_cast_fp16)[name = tensor("op_5979_cast_fp16")]; + tensor var_5980_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_531_cast_fp16)[name = tensor("op_5980_cast_fp16")]; + tensor var_5981_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_533_cast_fp16)[name = tensor("op_5981_cast_fp16")]; + tensor var_5982_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_535_cast_fp16)[name = tensor("op_5982_cast_fp16")]; + tensor var_5983_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_537_cast_fp16)[name = tensor("op_5983_cast_fp16")]; + tensor var_5984_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_539_cast_fp16)[name = tensor("op_5984_cast_fp16")]; + tensor var_5985_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_541_cast_fp16)[name = tensor("op_5985_cast_fp16")]; + tensor var_5986_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_543_cast_fp16)[name = tensor("op_5986_cast_fp16")]; + tensor var_5987_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_545_cast_fp16)[name = tensor("op_5987_cast_fp16")]; + tensor var_5988_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_547_cast_fp16)[name = tensor("op_5988_cast_fp16")]; + tensor var_5989_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_549_cast_fp16)[name = tensor("op_5989_cast_fp16")]; + tensor var_5990_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_551_cast_fp16)[name = tensor("op_5990_cast_fp16")]; + tensor var_5991_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_553_cast_fp16)[name = tensor("op_5991_cast_fp16")]; + tensor var_5992_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_555_cast_fp16)[name = tensor("op_5992_cast_fp16")]; + tensor var_5993_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_557_cast_fp16)[name = tensor("op_5993_cast_fp16")]; + tensor var_5994_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_559_cast_fp16)[name = tensor("op_5994_cast_fp16")]; + tensor var_5995_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_561_cast_fp16)[name = tensor("op_5995_cast_fp16")]; + tensor var_5996_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_563_cast_fp16)[name = tensor("op_5996_cast_fp16")]; + tensor var_5997_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_565_cast_fp16)[name = tensor("op_5997_cast_fp16")]; + tensor var_5998_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_567_cast_fp16)[name = tensor("op_5998_cast_fp16")]; + tensor var_5999_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_569_cast_fp16)[name = tensor("op_5999_cast_fp16")]; + tensor var_6000_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_571_cast_fp16)[name = tensor("op_6000_cast_fp16")]; + tensor var_6001_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_573_cast_fp16)[name = tensor("op_6001_cast_fp16")]; + tensor var_6002_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_575_cast_fp16)[name = tensor("op_6002_cast_fp16")]; + tensor var_6003_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_577_cast_fp16)[name = tensor("op_6003_cast_fp16")]; + tensor var_6004_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_579_cast_fp16)[name = tensor("op_6004_cast_fp16")]; + tensor var_6005_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_581_cast_fp16)[name = tensor("op_6005_cast_fp16")]; + tensor var_6006_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_583_cast_fp16)[name = tensor("op_6006_cast_fp16")]; + tensor var_6007_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_585_cast_fp16)[name = tensor("op_6007_cast_fp16")]; + tensor var_6008_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_587_cast_fp16)[name = tensor("op_6008_cast_fp16")]; + tensor var_6009_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_589_cast_fp16)[name = tensor("op_6009_cast_fp16")]; + tensor var_6010_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_591_cast_fp16)[name = tensor("op_6010_cast_fp16")]; + tensor var_6011_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_593_cast_fp16)[name = tensor("op_6011_cast_fp16")]; + tensor var_6012_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_595_cast_fp16)[name = tensor("op_6012_cast_fp16")]; + tensor var_6013_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_597_cast_fp16)[name = tensor("op_6013_cast_fp16")]; + tensor var_6014_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_599_cast_fp16)[name = tensor("op_6014_cast_fp16")]; + tensor var_6015_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_601_cast_fp16)[name = tensor("op_6015_cast_fp16")]; + tensor var_6016_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_603_cast_fp16)[name = tensor("op_6016_cast_fp16")]; + tensor var_6017_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_605_cast_fp16)[name = tensor("op_6017_cast_fp16")]; + tensor var_6018_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_607_cast_fp16)[name = tensor("op_6018_cast_fp16")]; + tensor var_6019_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_609_cast_fp16)[name = tensor("op_6019_cast_fp16")]; + tensor var_6020_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_611_cast_fp16)[name = tensor("op_6020_cast_fp16")]; + tensor var_6021_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_613_cast_fp16)[name = tensor("op_6021_cast_fp16")]; + tensor var_6022_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_615_cast_fp16)[name = tensor("op_6022_cast_fp16")]; + tensor var_6023_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_617_cast_fp16)[name = tensor("op_6023_cast_fp16")]; + tensor var_6024_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_619_cast_fp16)[name = tensor("op_6024_cast_fp16")]; + tensor var_6025_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_621_cast_fp16)[name = tensor("op_6025_cast_fp16")]; + tensor var_6026_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_623_cast_fp16)[name = tensor("op_6026_cast_fp16")]; + tensor var_6027_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_625_cast_fp16)[name = tensor("op_6027_cast_fp16")]; + tensor var_6028_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_627_cast_fp16)[name = tensor("op_6028_cast_fp16")]; + tensor var_6029_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_629_cast_fp16)[name = tensor("op_6029_cast_fp16")]; + tensor var_6030_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_631_cast_fp16)[name = tensor("op_6030_cast_fp16")]; + tensor var_6031_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_633_cast_fp16)[name = tensor("op_6031_cast_fp16")]; + tensor var_6032_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_635_cast_fp16)[name = tensor("op_6032_cast_fp16")]; + tensor var_6033_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_637_cast_fp16)[name = tensor("op_6033_cast_fp16")]; + tensor var_6034_cast_fp16 = softmax(axis = var_4780, x = aw_chunk_639_cast_fp16)[name = tensor("op_6034_cast_fp16")]; + tensor var_6036_equation_0 = const()[name = tensor("op_6036_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6036_cast_fp16 = einsum(equation = var_6036_equation_0, values = (var_5556_cast_fp16, var_5955_cast_fp16))[name = tensor("op_6036_cast_fp16")]; + tensor var_6038_equation_0 = const()[name = tensor("op_6038_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6038_cast_fp16 = einsum(equation = var_6038_equation_0, values = (var_5556_cast_fp16, var_5956_cast_fp16))[name = tensor("op_6038_cast_fp16")]; + tensor var_6040_equation_0 = const()[name = tensor("op_6040_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6040_cast_fp16 = einsum(equation = var_6040_equation_0, values = (var_5556_cast_fp16, var_5957_cast_fp16))[name = tensor("op_6040_cast_fp16")]; + tensor var_6042_equation_0 = const()[name = tensor("op_6042_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6042_cast_fp16 = einsum(equation = var_6042_equation_0, values = (var_5556_cast_fp16, var_5958_cast_fp16))[name = tensor("op_6042_cast_fp16")]; + tensor var_6044_equation_0 = const()[name = tensor("op_6044_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6044_cast_fp16 = einsum(equation = var_6044_equation_0, values = (var_5560_cast_fp16, var_5959_cast_fp16))[name = tensor("op_6044_cast_fp16")]; + tensor var_6046_equation_0 = const()[name = tensor("op_6046_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6046_cast_fp16 = einsum(equation = var_6046_equation_0, values = (var_5560_cast_fp16, var_5960_cast_fp16))[name = tensor("op_6046_cast_fp16")]; + tensor var_6048_equation_0 = const()[name = tensor("op_6048_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6048_cast_fp16 = einsum(equation = var_6048_equation_0, values = (var_5560_cast_fp16, var_5961_cast_fp16))[name = tensor("op_6048_cast_fp16")]; + tensor var_6050_equation_0 = const()[name = tensor("op_6050_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6050_cast_fp16 = einsum(equation = var_6050_equation_0, values = (var_5560_cast_fp16, var_5962_cast_fp16))[name = tensor("op_6050_cast_fp16")]; + tensor var_6052_equation_0 = const()[name = tensor("op_6052_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6052_cast_fp16 = einsum(equation = var_6052_equation_0, values = (var_5564_cast_fp16, var_5963_cast_fp16))[name = tensor("op_6052_cast_fp16")]; + tensor var_6054_equation_0 = const()[name = tensor("op_6054_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6054_cast_fp16 = einsum(equation = var_6054_equation_0, values = (var_5564_cast_fp16, var_5964_cast_fp16))[name = tensor("op_6054_cast_fp16")]; + tensor var_6056_equation_0 = const()[name = tensor("op_6056_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6056_cast_fp16 = einsum(equation = var_6056_equation_0, values = (var_5564_cast_fp16, var_5965_cast_fp16))[name = tensor("op_6056_cast_fp16")]; + tensor var_6058_equation_0 = const()[name = tensor("op_6058_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6058_cast_fp16 = einsum(equation = var_6058_equation_0, values = (var_5564_cast_fp16, var_5966_cast_fp16))[name = tensor("op_6058_cast_fp16")]; + tensor var_6060_equation_0 = const()[name = tensor("op_6060_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6060_cast_fp16 = einsum(equation = var_6060_equation_0, values = (var_5568_cast_fp16, var_5967_cast_fp16))[name = tensor("op_6060_cast_fp16")]; + tensor var_6062_equation_0 = const()[name = tensor("op_6062_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6062_cast_fp16 = einsum(equation = var_6062_equation_0, values = (var_5568_cast_fp16, var_5968_cast_fp16))[name = tensor("op_6062_cast_fp16")]; + tensor var_6064_equation_0 = const()[name = tensor("op_6064_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6064_cast_fp16 = einsum(equation = var_6064_equation_0, values = (var_5568_cast_fp16, var_5969_cast_fp16))[name = tensor("op_6064_cast_fp16")]; + tensor var_6066_equation_0 = const()[name = tensor("op_6066_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6066_cast_fp16 = einsum(equation = var_6066_equation_0, values = (var_5568_cast_fp16, var_5970_cast_fp16))[name = tensor("op_6066_cast_fp16")]; + tensor var_6068_equation_0 = const()[name = tensor("op_6068_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6068_cast_fp16 = einsum(equation = var_6068_equation_0, values = (var_5572_cast_fp16, var_5971_cast_fp16))[name = tensor("op_6068_cast_fp16")]; + tensor var_6070_equation_0 = const()[name = tensor("op_6070_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6070_cast_fp16 = einsum(equation = var_6070_equation_0, values = (var_5572_cast_fp16, var_5972_cast_fp16))[name = tensor("op_6070_cast_fp16")]; + tensor var_6072_equation_0 = const()[name = tensor("op_6072_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6072_cast_fp16 = einsum(equation = var_6072_equation_0, values = (var_5572_cast_fp16, var_5973_cast_fp16))[name = tensor("op_6072_cast_fp16")]; + tensor var_6074_equation_0 = const()[name = tensor("op_6074_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6074_cast_fp16 = einsum(equation = var_6074_equation_0, values = (var_5572_cast_fp16, var_5974_cast_fp16))[name = tensor("op_6074_cast_fp16")]; + tensor var_6076_equation_0 = const()[name = tensor("op_6076_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6076_cast_fp16 = einsum(equation = var_6076_equation_0, values = (var_5576_cast_fp16, var_5975_cast_fp16))[name = tensor("op_6076_cast_fp16")]; + tensor var_6078_equation_0 = const()[name = tensor("op_6078_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6078_cast_fp16 = einsum(equation = var_6078_equation_0, values = (var_5576_cast_fp16, var_5976_cast_fp16))[name = tensor("op_6078_cast_fp16")]; + tensor var_6080_equation_0 = const()[name = tensor("op_6080_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6080_cast_fp16 = einsum(equation = var_6080_equation_0, values = (var_5576_cast_fp16, var_5977_cast_fp16))[name = tensor("op_6080_cast_fp16")]; + tensor var_6082_equation_0 = const()[name = tensor("op_6082_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6082_cast_fp16 = einsum(equation = var_6082_equation_0, values = (var_5576_cast_fp16, var_5978_cast_fp16))[name = tensor("op_6082_cast_fp16")]; + tensor var_6084_equation_0 = const()[name = tensor("op_6084_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6084_cast_fp16 = einsum(equation = var_6084_equation_0, values = (var_5580_cast_fp16, var_5979_cast_fp16))[name = tensor("op_6084_cast_fp16")]; + tensor var_6086_equation_0 = const()[name = tensor("op_6086_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6086_cast_fp16 = einsum(equation = var_6086_equation_0, values = (var_5580_cast_fp16, var_5980_cast_fp16))[name = tensor("op_6086_cast_fp16")]; + tensor var_6088_equation_0 = const()[name = tensor("op_6088_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6088_cast_fp16 = einsum(equation = var_6088_equation_0, values = (var_5580_cast_fp16, var_5981_cast_fp16))[name = tensor("op_6088_cast_fp16")]; + tensor var_6090_equation_0 = const()[name = tensor("op_6090_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6090_cast_fp16 = einsum(equation = var_6090_equation_0, values = (var_5580_cast_fp16, var_5982_cast_fp16))[name = tensor("op_6090_cast_fp16")]; + tensor var_6092_equation_0 = const()[name = tensor("op_6092_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6092_cast_fp16 = einsum(equation = var_6092_equation_0, values = (var_5584_cast_fp16, var_5983_cast_fp16))[name = tensor("op_6092_cast_fp16")]; + tensor var_6094_equation_0 = const()[name = tensor("op_6094_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6094_cast_fp16 = einsum(equation = var_6094_equation_0, values = (var_5584_cast_fp16, var_5984_cast_fp16))[name = tensor("op_6094_cast_fp16")]; + tensor var_6096_equation_0 = const()[name = tensor("op_6096_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6096_cast_fp16 = einsum(equation = var_6096_equation_0, values = (var_5584_cast_fp16, var_5985_cast_fp16))[name = tensor("op_6096_cast_fp16")]; + tensor var_6098_equation_0 = const()[name = tensor("op_6098_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6098_cast_fp16 = einsum(equation = var_6098_equation_0, values = (var_5584_cast_fp16, var_5986_cast_fp16))[name = tensor("op_6098_cast_fp16")]; + tensor var_6100_equation_0 = const()[name = tensor("op_6100_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6100_cast_fp16 = einsum(equation = var_6100_equation_0, values = (var_5588_cast_fp16, var_5987_cast_fp16))[name = tensor("op_6100_cast_fp16")]; + tensor var_6102_equation_0 = const()[name = tensor("op_6102_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6102_cast_fp16 = einsum(equation = var_6102_equation_0, values = (var_5588_cast_fp16, var_5988_cast_fp16))[name = tensor("op_6102_cast_fp16")]; + tensor var_6104_equation_0 = const()[name = tensor("op_6104_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6104_cast_fp16 = einsum(equation = var_6104_equation_0, values = (var_5588_cast_fp16, var_5989_cast_fp16))[name = tensor("op_6104_cast_fp16")]; + tensor var_6106_equation_0 = const()[name = tensor("op_6106_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6106_cast_fp16 = einsum(equation = var_6106_equation_0, values = (var_5588_cast_fp16, var_5990_cast_fp16))[name = tensor("op_6106_cast_fp16")]; + tensor var_6108_equation_0 = const()[name = tensor("op_6108_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6108_cast_fp16 = einsum(equation = var_6108_equation_0, values = (var_5592_cast_fp16, var_5991_cast_fp16))[name = tensor("op_6108_cast_fp16")]; + tensor var_6110_equation_0 = const()[name = tensor("op_6110_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6110_cast_fp16 = einsum(equation = var_6110_equation_0, values = (var_5592_cast_fp16, var_5992_cast_fp16))[name = tensor("op_6110_cast_fp16")]; + tensor var_6112_equation_0 = const()[name = tensor("op_6112_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6112_cast_fp16 = einsum(equation = var_6112_equation_0, values = (var_5592_cast_fp16, var_5993_cast_fp16))[name = tensor("op_6112_cast_fp16")]; + tensor var_6114_equation_0 = const()[name = tensor("op_6114_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6114_cast_fp16 = einsum(equation = var_6114_equation_0, values = (var_5592_cast_fp16, var_5994_cast_fp16))[name = tensor("op_6114_cast_fp16")]; + tensor var_6116_equation_0 = const()[name = tensor("op_6116_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6116_cast_fp16 = einsum(equation = var_6116_equation_0, values = (var_5596_cast_fp16, var_5995_cast_fp16))[name = tensor("op_6116_cast_fp16")]; + tensor var_6118_equation_0 = const()[name = tensor("op_6118_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6118_cast_fp16 = einsum(equation = var_6118_equation_0, values = (var_5596_cast_fp16, var_5996_cast_fp16))[name = tensor("op_6118_cast_fp16")]; + tensor var_6120_equation_0 = const()[name = tensor("op_6120_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6120_cast_fp16 = einsum(equation = var_6120_equation_0, values = (var_5596_cast_fp16, var_5997_cast_fp16))[name = tensor("op_6120_cast_fp16")]; + tensor var_6122_equation_0 = const()[name = tensor("op_6122_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6122_cast_fp16 = einsum(equation = var_6122_equation_0, values = (var_5596_cast_fp16, var_5998_cast_fp16))[name = tensor("op_6122_cast_fp16")]; + tensor var_6124_equation_0 = const()[name = tensor("op_6124_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6124_cast_fp16 = einsum(equation = var_6124_equation_0, values = (var_5600_cast_fp16, var_5999_cast_fp16))[name = tensor("op_6124_cast_fp16")]; + tensor var_6126_equation_0 = const()[name = tensor("op_6126_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6126_cast_fp16 = einsum(equation = var_6126_equation_0, values = (var_5600_cast_fp16, var_6000_cast_fp16))[name = tensor("op_6126_cast_fp16")]; + tensor var_6128_equation_0 = const()[name = tensor("op_6128_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6128_cast_fp16 = einsum(equation = var_6128_equation_0, values = (var_5600_cast_fp16, var_6001_cast_fp16))[name = tensor("op_6128_cast_fp16")]; + tensor var_6130_equation_0 = const()[name = tensor("op_6130_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6130_cast_fp16 = einsum(equation = var_6130_equation_0, values = (var_5600_cast_fp16, var_6002_cast_fp16))[name = tensor("op_6130_cast_fp16")]; + tensor var_6132_equation_0 = const()[name = tensor("op_6132_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6132_cast_fp16 = einsum(equation = var_6132_equation_0, values = (var_5604_cast_fp16, var_6003_cast_fp16))[name = tensor("op_6132_cast_fp16")]; + tensor var_6134_equation_0 = const()[name = tensor("op_6134_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6134_cast_fp16 = einsum(equation = var_6134_equation_0, values = (var_5604_cast_fp16, var_6004_cast_fp16))[name = tensor("op_6134_cast_fp16")]; + tensor var_6136_equation_0 = const()[name = tensor("op_6136_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6136_cast_fp16 = einsum(equation = var_6136_equation_0, values = (var_5604_cast_fp16, var_6005_cast_fp16))[name = tensor("op_6136_cast_fp16")]; + tensor var_6138_equation_0 = const()[name = tensor("op_6138_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6138_cast_fp16 = einsum(equation = var_6138_equation_0, values = (var_5604_cast_fp16, var_6006_cast_fp16))[name = tensor("op_6138_cast_fp16")]; + tensor var_6140_equation_0 = const()[name = tensor("op_6140_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6140_cast_fp16 = einsum(equation = var_6140_equation_0, values = (var_5608_cast_fp16, var_6007_cast_fp16))[name = tensor("op_6140_cast_fp16")]; + tensor var_6142_equation_0 = const()[name = tensor("op_6142_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6142_cast_fp16 = einsum(equation = var_6142_equation_0, values = (var_5608_cast_fp16, var_6008_cast_fp16))[name = tensor("op_6142_cast_fp16")]; + tensor var_6144_equation_0 = const()[name = tensor("op_6144_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6144_cast_fp16 = einsum(equation = var_6144_equation_0, values = (var_5608_cast_fp16, var_6009_cast_fp16))[name = tensor("op_6144_cast_fp16")]; + tensor var_6146_equation_0 = const()[name = tensor("op_6146_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6146_cast_fp16 = einsum(equation = var_6146_equation_0, values = (var_5608_cast_fp16, var_6010_cast_fp16))[name = tensor("op_6146_cast_fp16")]; + tensor var_6148_equation_0 = const()[name = tensor("op_6148_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6148_cast_fp16 = einsum(equation = var_6148_equation_0, values = (var_5612_cast_fp16, var_6011_cast_fp16))[name = tensor("op_6148_cast_fp16")]; + tensor var_6150_equation_0 = const()[name = tensor("op_6150_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6150_cast_fp16 = einsum(equation = var_6150_equation_0, values = (var_5612_cast_fp16, var_6012_cast_fp16))[name = tensor("op_6150_cast_fp16")]; + tensor var_6152_equation_0 = const()[name = tensor("op_6152_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6152_cast_fp16 = einsum(equation = var_6152_equation_0, values = (var_5612_cast_fp16, var_6013_cast_fp16))[name = tensor("op_6152_cast_fp16")]; + tensor var_6154_equation_0 = const()[name = tensor("op_6154_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6154_cast_fp16 = einsum(equation = var_6154_equation_0, values = (var_5612_cast_fp16, var_6014_cast_fp16))[name = tensor("op_6154_cast_fp16")]; + tensor var_6156_equation_0 = const()[name = tensor("op_6156_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6156_cast_fp16 = einsum(equation = var_6156_equation_0, values = (var_5616_cast_fp16, var_6015_cast_fp16))[name = tensor("op_6156_cast_fp16")]; + tensor var_6158_equation_0 = const()[name = tensor("op_6158_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6158_cast_fp16 = einsum(equation = var_6158_equation_0, values = (var_5616_cast_fp16, var_6016_cast_fp16))[name = tensor("op_6158_cast_fp16")]; + tensor var_6160_equation_0 = const()[name = tensor("op_6160_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6160_cast_fp16 = einsum(equation = var_6160_equation_0, values = (var_5616_cast_fp16, var_6017_cast_fp16))[name = tensor("op_6160_cast_fp16")]; + tensor var_6162_equation_0 = const()[name = tensor("op_6162_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6162_cast_fp16 = einsum(equation = var_6162_equation_0, values = (var_5616_cast_fp16, var_6018_cast_fp16))[name = tensor("op_6162_cast_fp16")]; + tensor var_6164_equation_0 = const()[name = tensor("op_6164_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6164_cast_fp16 = einsum(equation = var_6164_equation_0, values = (var_5620_cast_fp16, var_6019_cast_fp16))[name = tensor("op_6164_cast_fp16")]; + tensor var_6166_equation_0 = const()[name = tensor("op_6166_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6166_cast_fp16 = einsum(equation = var_6166_equation_0, values = (var_5620_cast_fp16, var_6020_cast_fp16))[name = tensor("op_6166_cast_fp16")]; + tensor var_6168_equation_0 = const()[name = tensor("op_6168_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6168_cast_fp16 = einsum(equation = var_6168_equation_0, values = (var_5620_cast_fp16, var_6021_cast_fp16))[name = tensor("op_6168_cast_fp16")]; + tensor var_6170_equation_0 = const()[name = tensor("op_6170_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6170_cast_fp16 = einsum(equation = var_6170_equation_0, values = (var_5620_cast_fp16, var_6022_cast_fp16))[name = tensor("op_6170_cast_fp16")]; + tensor var_6172_equation_0 = const()[name = tensor("op_6172_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6172_cast_fp16 = einsum(equation = var_6172_equation_0, values = (var_5624_cast_fp16, var_6023_cast_fp16))[name = tensor("op_6172_cast_fp16")]; + tensor var_6174_equation_0 = const()[name = tensor("op_6174_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6174_cast_fp16 = einsum(equation = var_6174_equation_0, values = (var_5624_cast_fp16, var_6024_cast_fp16))[name = tensor("op_6174_cast_fp16")]; + tensor var_6176_equation_0 = const()[name = tensor("op_6176_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6176_cast_fp16 = einsum(equation = var_6176_equation_0, values = (var_5624_cast_fp16, var_6025_cast_fp16))[name = tensor("op_6176_cast_fp16")]; + tensor var_6178_equation_0 = const()[name = tensor("op_6178_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6178_cast_fp16 = einsum(equation = var_6178_equation_0, values = (var_5624_cast_fp16, var_6026_cast_fp16))[name = tensor("op_6178_cast_fp16")]; + tensor var_6180_equation_0 = const()[name = tensor("op_6180_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6180_cast_fp16 = einsum(equation = var_6180_equation_0, values = (var_5628_cast_fp16, var_6027_cast_fp16))[name = tensor("op_6180_cast_fp16")]; + tensor var_6182_equation_0 = const()[name = tensor("op_6182_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6182_cast_fp16 = einsum(equation = var_6182_equation_0, values = (var_5628_cast_fp16, var_6028_cast_fp16))[name = tensor("op_6182_cast_fp16")]; + tensor var_6184_equation_0 = const()[name = tensor("op_6184_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6184_cast_fp16 = einsum(equation = var_6184_equation_0, values = (var_5628_cast_fp16, var_6029_cast_fp16))[name = tensor("op_6184_cast_fp16")]; + tensor var_6186_equation_0 = const()[name = tensor("op_6186_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6186_cast_fp16 = einsum(equation = var_6186_equation_0, values = (var_5628_cast_fp16, var_6030_cast_fp16))[name = tensor("op_6186_cast_fp16")]; + tensor var_6188_equation_0 = const()[name = tensor("op_6188_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6188_cast_fp16 = einsum(equation = var_6188_equation_0, values = (var_5632_cast_fp16, var_6031_cast_fp16))[name = tensor("op_6188_cast_fp16")]; + tensor var_6190_equation_0 = const()[name = tensor("op_6190_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6190_cast_fp16 = einsum(equation = var_6190_equation_0, values = (var_5632_cast_fp16, var_6032_cast_fp16))[name = tensor("op_6190_cast_fp16")]; + tensor var_6192_equation_0 = const()[name = tensor("op_6192_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6192_cast_fp16 = einsum(equation = var_6192_equation_0, values = (var_5632_cast_fp16, var_6033_cast_fp16))[name = tensor("op_6192_cast_fp16")]; + tensor var_6194_equation_0 = const()[name = tensor("op_6194_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_6194_cast_fp16 = einsum(equation = var_6194_equation_0, values = (var_5632_cast_fp16, var_6034_cast_fp16))[name = tensor("op_6194_cast_fp16")]; + tensor var_6196_interleave_0 = const()[name = tensor("op_6196_interleave_0"), val = tensor(false)]; + tensor var_6196_cast_fp16 = concat(axis = var_4755, interleave = var_6196_interleave_0, values = (var_6036_cast_fp16, var_6038_cast_fp16, var_6040_cast_fp16, var_6042_cast_fp16))[name = tensor("op_6196_cast_fp16")]; + tensor var_6198_interleave_0 = const()[name = tensor("op_6198_interleave_0"), val = tensor(false)]; + tensor var_6198_cast_fp16 = concat(axis = var_4755, interleave = var_6198_interleave_0, values = (var_6044_cast_fp16, var_6046_cast_fp16, var_6048_cast_fp16, var_6050_cast_fp16))[name = tensor("op_6198_cast_fp16")]; + tensor var_6200_interleave_0 = const()[name = tensor("op_6200_interleave_0"), val = tensor(false)]; + tensor var_6200_cast_fp16 = concat(axis = var_4755, interleave = var_6200_interleave_0, values = (var_6052_cast_fp16, var_6054_cast_fp16, var_6056_cast_fp16, var_6058_cast_fp16))[name = tensor("op_6200_cast_fp16")]; + tensor var_6202_interleave_0 = const()[name = tensor("op_6202_interleave_0"), val = tensor(false)]; + tensor var_6202_cast_fp16 = concat(axis = var_4755, interleave = var_6202_interleave_0, values = (var_6060_cast_fp16, var_6062_cast_fp16, var_6064_cast_fp16, var_6066_cast_fp16))[name = tensor("op_6202_cast_fp16")]; + tensor var_6204_interleave_0 = const()[name = tensor("op_6204_interleave_0"), val = tensor(false)]; + tensor var_6204_cast_fp16 = concat(axis = var_4755, interleave = var_6204_interleave_0, values = (var_6068_cast_fp16, var_6070_cast_fp16, var_6072_cast_fp16, var_6074_cast_fp16))[name = tensor("op_6204_cast_fp16")]; + tensor var_6206_interleave_0 = const()[name = tensor("op_6206_interleave_0"), val = tensor(false)]; + tensor var_6206_cast_fp16 = concat(axis = var_4755, interleave = var_6206_interleave_0, values = (var_6076_cast_fp16, var_6078_cast_fp16, var_6080_cast_fp16, var_6082_cast_fp16))[name = tensor("op_6206_cast_fp16")]; + tensor var_6208_interleave_0 = const()[name = tensor("op_6208_interleave_0"), val = tensor(false)]; + tensor var_6208_cast_fp16 = concat(axis = var_4755, interleave = var_6208_interleave_0, values = (var_6084_cast_fp16, var_6086_cast_fp16, var_6088_cast_fp16, var_6090_cast_fp16))[name = tensor("op_6208_cast_fp16")]; + tensor var_6210_interleave_0 = const()[name = tensor("op_6210_interleave_0"), val = tensor(false)]; + tensor var_6210_cast_fp16 = concat(axis = var_4755, interleave = var_6210_interleave_0, values = (var_6092_cast_fp16, var_6094_cast_fp16, var_6096_cast_fp16, var_6098_cast_fp16))[name = tensor("op_6210_cast_fp16")]; + tensor var_6212_interleave_0 = const()[name = tensor("op_6212_interleave_0"), val = tensor(false)]; + tensor var_6212_cast_fp16 = concat(axis = var_4755, interleave = var_6212_interleave_0, values = (var_6100_cast_fp16, var_6102_cast_fp16, var_6104_cast_fp16, var_6106_cast_fp16))[name = tensor("op_6212_cast_fp16")]; + tensor var_6214_interleave_0 = const()[name = tensor("op_6214_interleave_0"), val = tensor(false)]; + tensor var_6214_cast_fp16 = concat(axis = var_4755, interleave = var_6214_interleave_0, values = (var_6108_cast_fp16, var_6110_cast_fp16, var_6112_cast_fp16, var_6114_cast_fp16))[name = tensor("op_6214_cast_fp16")]; + tensor var_6216_interleave_0 = const()[name = tensor("op_6216_interleave_0"), val = tensor(false)]; + tensor var_6216_cast_fp16 = concat(axis = var_4755, interleave = var_6216_interleave_0, values = (var_6116_cast_fp16, var_6118_cast_fp16, var_6120_cast_fp16, var_6122_cast_fp16))[name = tensor("op_6216_cast_fp16")]; + tensor var_6218_interleave_0 = const()[name = tensor("op_6218_interleave_0"), val = tensor(false)]; + tensor var_6218_cast_fp16 = concat(axis = var_4755, interleave = var_6218_interleave_0, values = (var_6124_cast_fp16, var_6126_cast_fp16, var_6128_cast_fp16, var_6130_cast_fp16))[name = tensor("op_6218_cast_fp16")]; + tensor var_6220_interleave_0 = const()[name = tensor("op_6220_interleave_0"), val = tensor(false)]; + tensor var_6220_cast_fp16 = concat(axis = var_4755, interleave = var_6220_interleave_0, values = (var_6132_cast_fp16, var_6134_cast_fp16, var_6136_cast_fp16, var_6138_cast_fp16))[name = tensor("op_6220_cast_fp16")]; + tensor var_6222_interleave_0 = const()[name = tensor("op_6222_interleave_0"), val = tensor(false)]; + tensor var_6222_cast_fp16 = concat(axis = var_4755, interleave = var_6222_interleave_0, values = (var_6140_cast_fp16, var_6142_cast_fp16, var_6144_cast_fp16, var_6146_cast_fp16))[name = tensor("op_6222_cast_fp16")]; + tensor var_6224_interleave_0 = const()[name = tensor("op_6224_interleave_0"), val = tensor(false)]; + tensor var_6224_cast_fp16 = concat(axis = var_4755, interleave = var_6224_interleave_0, values = (var_6148_cast_fp16, var_6150_cast_fp16, var_6152_cast_fp16, var_6154_cast_fp16))[name = tensor("op_6224_cast_fp16")]; + tensor var_6226_interleave_0 = const()[name = tensor("op_6226_interleave_0"), val = tensor(false)]; + tensor var_6226_cast_fp16 = concat(axis = var_4755, interleave = var_6226_interleave_0, values = (var_6156_cast_fp16, var_6158_cast_fp16, var_6160_cast_fp16, var_6162_cast_fp16))[name = tensor("op_6226_cast_fp16")]; + tensor var_6228_interleave_0 = const()[name = tensor("op_6228_interleave_0"), val = tensor(false)]; + tensor var_6228_cast_fp16 = concat(axis = var_4755, interleave = var_6228_interleave_0, values = (var_6164_cast_fp16, var_6166_cast_fp16, var_6168_cast_fp16, var_6170_cast_fp16))[name = tensor("op_6228_cast_fp16")]; + tensor var_6230_interleave_0 = const()[name = tensor("op_6230_interleave_0"), val = tensor(false)]; + tensor var_6230_cast_fp16 = concat(axis = var_4755, interleave = var_6230_interleave_0, values = (var_6172_cast_fp16, var_6174_cast_fp16, var_6176_cast_fp16, var_6178_cast_fp16))[name = tensor("op_6230_cast_fp16")]; + tensor var_6232_interleave_0 = const()[name = tensor("op_6232_interleave_0"), val = tensor(false)]; + tensor var_6232_cast_fp16 = concat(axis = var_4755, interleave = var_6232_interleave_0, values = (var_6180_cast_fp16, var_6182_cast_fp16, var_6184_cast_fp16, var_6186_cast_fp16))[name = tensor("op_6232_cast_fp16")]; + tensor var_6234_interleave_0 = const()[name = tensor("op_6234_interleave_0"), val = tensor(false)]; + tensor var_6234_cast_fp16 = concat(axis = var_4755, interleave = var_6234_interleave_0, values = (var_6188_cast_fp16, var_6190_cast_fp16, var_6192_cast_fp16, var_6194_cast_fp16))[name = tensor("op_6234_cast_fp16")]; + tensor input_25_interleave_0 = const()[name = tensor("input_25_interleave_0"), val = tensor(false)]; + tensor input_25_cast_fp16 = concat(axis = var_4780, interleave = input_25_interleave_0, values = (var_6196_cast_fp16, var_6198_cast_fp16, var_6200_cast_fp16, var_6202_cast_fp16, var_6204_cast_fp16, var_6206_cast_fp16, var_6208_cast_fp16, var_6210_cast_fp16, var_6212_cast_fp16, var_6214_cast_fp16, var_6216_cast_fp16, var_6218_cast_fp16, var_6220_cast_fp16, var_6222_cast_fp16, var_6224_cast_fp16, var_6226_cast_fp16, var_6228_cast_fp16, var_6230_cast_fp16, var_6232_cast_fp16, var_6234_cast_fp16))[name = tensor("input_25_cast_fp16")]; + tensor var_6239 = const()[name = tensor("op_6239"), val = tensor([1, 1])]; + tensor var_6241 = const()[name = tensor("op_6241"), val = tensor([1, 1])]; + tensor obj_15_pad_type_0 = const()[name = tensor("obj_15_pad_type_0"), val = tensor("custom")]; + tensor obj_15_pad_0 = const()[name = tensor("obj_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_3_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_3_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(142196480)))]; + tensor layers_3_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_3_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145473344)))]; + tensor obj_15_cast_fp16 = conv(bias = layers_3_self_attn_o_proj_bias_to_fp16, dilations = var_6241, groups = var_4780, pad = obj_15_pad_0, pad_type = obj_15_pad_type_0, strides = var_6239, weight = layers_3_self_attn_o_proj_weight_to_fp16, x = input_25_cast_fp16)[name = tensor("obj_15_cast_fp16")]; + tensor inputs_15_cast_fp16 = add(x = inputs_13_cast_fp16, y = obj_15_cast_fp16)[name = tensor("inputs_15_cast_fp16")]; + tensor var_6247 = const()[name = tensor("op_6247"), val = tensor([1])]; + tensor channels_mean_15_cast_fp16 = reduce_mean(axes = var_6247, keep_dims = var_4781, x = inputs_15_cast_fp16)[name = tensor("channels_mean_15_cast_fp16")]; + tensor zero_mean_15_cast_fp16 = sub(x = inputs_15_cast_fp16, y = channels_mean_15_cast_fp16)[name = tensor("zero_mean_15_cast_fp16")]; + tensor zero_mean_sq_15_cast_fp16 = mul(x = zero_mean_15_cast_fp16, y = zero_mean_15_cast_fp16)[name = tensor("zero_mean_sq_15_cast_fp16")]; + tensor var_6251 = const()[name = tensor("op_6251"), val = tensor([1])]; + tensor var_6252_cast_fp16 = reduce_mean(axes = var_6251, keep_dims = var_4781, x = zero_mean_sq_15_cast_fp16)[name = tensor("op_6252_cast_fp16")]; + tensor var_6253_to_fp16 = const()[name = tensor("op_6253_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_6254_cast_fp16 = add(x = var_6252_cast_fp16, y = var_6253_to_fp16)[name = tensor("op_6254_cast_fp16")]; + tensor denom_15_epsilon_0_to_fp16 = const()[name = tensor("denom_15_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_15_cast_fp16 = rsqrt(epsilon = denom_15_epsilon_0_to_fp16, x = var_6254_cast_fp16)[name = tensor("denom_15_cast_fp16")]; + tensor out_15_cast_fp16 = mul(x = zero_mean_15_cast_fp16, y = denom_15_cast_fp16)[name = tensor("out_15_cast_fp16")]; + tensor input_27_gamma_0_to_fp16 = const()[name = tensor("input_27_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145475968)))]; + tensor input_27_beta_0_to_fp16 = const()[name = tensor("input_27_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145478592)))]; + tensor input_27_epsilon_0_to_fp16 = const()[name = tensor("input_27_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_27_cast_fp16 = batch_norm(beta = input_27_beta_0_to_fp16, epsilon = input_27_epsilon_0_to_fp16, gamma = input_27_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_15_cast_fp16)[name = tensor("input_27_cast_fp16")]; + tensor var_6265 = const()[name = tensor("op_6265"), val = tensor([1, 1])]; + tensor var_6267 = const()[name = tensor("op_6267"), val = tensor([1, 1])]; + tensor input_29_pad_type_0 = const()[name = tensor("input_29_pad_type_0"), val = tensor("custom")]; + tensor input_29_pad_0 = const()[name = tensor("input_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_3_fc1_weight_to_fp16 = const()[name = tensor("layers_3_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(145481216)))]; + tensor layers_3_fc1_bias_to_fp16 = const()[name = tensor("layers_3_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(158588480)))]; + tensor input_29_cast_fp16 = conv(bias = layers_3_fc1_bias_to_fp16, dilations = var_6267, groups = var_4780, pad = input_29_pad_0, pad_type = input_29_pad_type_0, strides = var_6265, weight = layers_3_fc1_weight_to_fp16, x = input_27_cast_fp16)[name = tensor("input_29_cast_fp16")]; + tensor input_31_mode_0 = const()[name = tensor("input_31_mode_0"), val = tensor("EXACT")]; + tensor input_31_cast_fp16 = gelu(mode = input_31_mode_0, x = input_29_cast_fp16)[name = tensor("input_31_cast_fp16")]; + tensor var_6273 = const()[name = tensor("op_6273"), val = tensor([1, 1])]; + tensor var_6275 = const()[name = tensor("op_6275"), val = tensor([1, 1])]; + tensor hidden_states_11_pad_type_0 = const()[name = tensor("hidden_states_11_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_11_pad_0 = const()[name = tensor("hidden_states_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_3_fc2_weight_to_fp16 = const()[name = tensor("layers_3_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(158598784)))]; + tensor layers_3_fc2_bias_to_fp16 = const()[name = tensor("layers_3_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171706048)))]; + tensor hidden_states_11_cast_fp16 = conv(bias = layers_3_fc2_bias_to_fp16, dilations = var_6275, groups = var_4780, pad = hidden_states_11_pad_0, pad_type = hidden_states_11_pad_type_0, strides = var_6273, weight = layers_3_fc2_weight_to_fp16, x = input_31_cast_fp16)[name = tensor("hidden_states_11_cast_fp16")]; + tensor inputs_17_cast_fp16 = add(x = inputs_15_cast_fp16, y = hidden_states_11_cast_fp16)[name = tensor("inputs_17_cast_fp16")]; + tensor var_6282 = const()[name = tensor("op_6282"), val = tensor(3)]; + tensor var_6307 = const()[name = tensor("op_6307"), val = tensor(1)]; + tensor var_6308 = const()[name = tensor("op_6308"), val = tensor(true)]; + tensor var_6318 = const()[name = tensor("op_6318"), val = tensor([1])]; + tensor channels_mean_17_cast_fp16 = reduce_mean(axes = var_6318, keep_dims = var_6308, x = inputs_17_cast_fp16)[name = tensor("channels_mean_17_cast_fp16")]; + tensor zero_mean_17_cast_fp16 = sub(x = inputs_17_cast_fp16, y = channels_mean_17_cast_fp16)[name = tensor("zero_mean_17_cast_fp16")]; + tensor zero_mean_sq_17_cast_fp16 = mul(x = zero_mean_17_cast_fp16, y = zero_mean_17_cast_fp16)[name = tensor("zero_mean_sq_17_cast_fp16")]; + tensor var_6322 = const()[name = tensor("op_6322"), val = tensor([1])]; + tensor var_6323_cast_fp16 = reduce_mean(axes = var_6322, keep_dims = var_6308, x = zero_mean_sq_17_cast_fp16)[name = tensor("op_6323_cast_fp16")]; + tensor var_6324_to_fp16 = const()[name = tensor("op_6324_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_6325_cast_fp16 = add(x = var_6323_cast_fp16, y = var_6324_to_fp16)[name = tensor("op_6325_cast_fp16")]; + tensor denom_17_epsilon_0_to_fp16 = const()[name = tensor("denom_17_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_17_cast_fp16 = rsqrt(epsilon = denom_17_epsilon_0_to_fp16, x = var_6325_cast_fp16)[name = tensor("denom_17_cast_fp16")]; + tensor out_17_cast_fp16 = mul(x = zero_mean_17_cast_fp16, y = denom_17_cast_fp16)[name = tensor("out_17_cast_fp16")]; + tensor obj_17_gamma_0_to_fp16 = const()[name = tensor("obj_17_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171708672)))]; + tensor obj_17_beta_0_to_fp16 = const()[name = tensor("obj_17_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171711296)))]; + tensor obj_17_epsilon_0_to_fp16 = const()[name = tensor("obj_17_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_17_cast_fp16 = batch_norm(beta = obj_17_beta_0_to_fp16, epsilon = obj_17_epsilon_0_to_fp16, gamma = obj_17_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_17_cast_fp16)[name = tensor("obj_17_cast_fp16")]; + tensor var_6340 = const()[name = tensor("op_6340"), val = tensor([1, 1])]; + tensor var_6342 = const()[name = tensor("op_6342"), val = tensor([1, 1])]; + tensor query_9_pad_type_0 = const()[name = tensor("query_9_pad_type_0"), val = tensor("custom")]; + tensor query_9_pad_0 = const()[name = tensor("query_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_4_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_4_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(171713920)))]; + tensor layers_4_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(174990784)))]; + tensor query_9_cast_fp16 = conv(bias = layers_4_self_attn_q_proj_bias_to_fp16, dilations = var_6342, groups = var_6307, pad = query_9_pad_0, pad_type = query_9_pad_type_0, strides = var_6340, weight = layers_4_self_attn_q_proj_weight_to_fp16, x = obj_17_cast_fp16)[name = tensor("query_9_cast_fp16")]; + tensor var_6346 = const()[name = tensor("op_6346"), val = tensor([1, 1])]; + tensor var_6348 = const()[name = tensor("op_6348"), val = tensor([1, 1])]; + tensor key_9_pad_type_0 = const()[name = tensor("key_9_pad_type_0"), val = tensor("custom")]; + tensor key_9_pad_0 = const()[name = tensor("key_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_4_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_4_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(174993408)))]; + tensor key_9_cast_fp16 = conv(dilations = var_6348, groups = var_6307, pad = key_9_pad_0, pad_type = key_9_pad_type_0, strides = var_6346, weight = layers_4_self_attn_k_proj_weight_to_fp16, x = obj_17_cast_fp16)[name = tensor("key_9_cast_fp16")]; + tensor var_6353 = const()[name = tensor("op_6353"), val = tensor([1, 1])]; + tensor var_6355 = const()[name = tensor("op_6355"), val = tensor([1, 1])]; + tensor value_9_pad_type_0 = const()[name = tensor("value_9_pad_type_0"), val = tensor("custom")]; + tensor value_9_pad_0 = const()[name = tensor("value_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_4_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_4_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(178270272)))]; + tensor layers_4_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(181547136)))]; + tensor value_9_cast_fp16 = conv(bias = layers_4_self_attn_v_proj_bias_to_fp16, dilations = var_6355, groups = var_6307, pad = value_9_pad_0, pad_type = value_9_pad_type_0, strides = var_6353, weight = layers_4_self_attn_v_proj_weight_to_fp16, x = obj_17_cast_fp16)[name = tensor("value_9_cast_fp16")]; + tensor var_6362_begin_0 = const()[name = tensor("op_6362_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6362_end_0 = const()[name = tensor("op_6362_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_6362_end_mask_0 = const()[name = tensor("op_6362_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6362_cast_fp16 = slice_by_index(begin = var_6362_begin_0, end = var_6362_end_0, end_mask = var_6362_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6362_cast_fp16")]; + tensor var_6366_begin_0 = const()[name = tensor("op_6366_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_6366_end_0 = const()[name = tensor("op_6366_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_6366_end_mask_0 = const()[name = tensor("op_6366_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6366_cast_fp16 = slice_by_index(begin = var_6366_begin_0, end = var_6366_end_0, end_mask = var_6366_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6366_cast_fp16")]; + tensor var_6370_begin_0 = const()[name = tensor("op_6370_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_6370_end_0 = const()[name = tensor("op_6370_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_6370_end_mask_0 = const()[name = tensor("op_6370_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6370_cast_fp16 = slice_by_index(begin = var_6370_begin_0, end = var_6370_end_0, end_mask = var_6370_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6370_cast_fp16")]; + tensor var_6374_begin_0 = const()[name = tensor("op_6374_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_6374_end_0 = const()[name = tensor("op_6374_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_6374_end_mask_0 = const()[name = tensor("op_6374_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6374_cast_fp16 = slice_by_index(begin = var_6374_begin_0, end = var_6374_end_0, end_mask = var_6374_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6374_cast_fp16")]; + tensor var_6378_begin_0 = const()[name = tensor("op_6378_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_6378_end_0 = const()[name = tensor("op_6378_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_6378_end_mask_0 = const()[name = tensor("op_6378_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6378_cast_fp16 = slice_by_index(begin = var_6378_begin_0, end = var_6378_end_0, end_mask = var_6378_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6378_cast_fp16")]; + tensor var_6382_begin_0 = const()[name = tensor("op_6382_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_6382_end_0 = const()[name = tensor("op_6382_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_6382_end_mask_0 = const()[name = tensor("op_6382_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6382_cast_fp16 = slice_by_index(begin = var_6382_begin_0, end = var_6382_end_0, end_mask = var_6382_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6382_cast_fp16")]; + tensor var_6386_begin_0 = const()[name = tensor("op_6386_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_6386_end_0 = const()[name = tensor("op_6386_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_6386_end_mask_0 = const()[name = tensor("op_6386_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6386_cast_fp16 = slice_by_index(begin = var_6386_begin_0, end = var_6386_end_0, end_mask = var_6386_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6386_cast_fp16")]; + tensor var_6390_begin_0 = const()[name = tensor("op_6390_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_6390_end_0 = const()[name = tensor("op_6390_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_6390_end_mask_0 = const()[name = tensor("op_6390_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6390_cast_fp16 = slice_by_index(begin = var_6390_begin_0, end = var_6390_end_0, end_mask = var_6390_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6390_cast_fp16")]; + tensor var_6394_begin_0 = const()[name = tensor("op_6394_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_6394_end_0 = const()[name = tensor("op_6394_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_6394_end_mask_0 = const()[name = tensor("op_6394_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6394_cast_fp16 = slice_by_index(begin = var_6394_begin_0, end = var_6394_end_0, end_mask = var_6394_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6394_cast_fp16")]; + tensor var_6398_begin_0 = const()[name = tensor("op_6398_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_6398_end_0 = const()[name = tensor("op_6398_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_6398_end_mask_0 = const()[name = tensor("op_6398_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6398_cast_fp16 = slice_by_index(begin = var_6398_begin_0, end = var_6398_end_0, end_mask = var_6398_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6398_cast_fp16")]; + tensor var_6402_begin_0 = const()[name = tensor("op_6402_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_6402_end_0 = const()[name = tensor("op_6402_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_6402_end_mask_0 = const()[name = tensor("op_6402_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6402_cast_fp16 = slice_by_index(begin = var_6402_begin_0, end = var_6402_end_0, end_mask = var_6402_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6402_cast_fp16")]; + tensor var_6406_begin_0 = const()[name = tensor("op_6406_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_6406_end_0 = const()[name = tensor("op_6406_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_6406_end_mask_0 = const()[name = tensor("op_6406_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6406_cast_fp16 = slice_by_index(begin = var_6406_begin_0, end = var_6406_end_0, end_mask = var_6406_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6406_cast_fp16")]; + tensor var_6410_begin_0 = const()[name = tensor("op_6410_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_6410_end_0 = const()[name = tensor("op_6410_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_6410_end_mask_0 = const()[name = tensor("op_6410_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6410_cast_fp16 = slice_by_index(begin = var_6410_begin_0, end = var_6410_end_0, end_mask = var_6410_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6410_cast_fp16")]; + tensor var_6414_begin_0 = const()[name = tensor("op_6414_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_6414_end_0 = const()[name = tensor("op_6414_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_6414_end_mask_0 = const()[name = tensor("op_6414_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6414_cast_fp16 = slice_by_index(begin = var_6414_begin_0, end = var_6414_end_0, end_mask = var_6414_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6414_cast_fp16")]; + tensor var_6418_begin_0 = const()[name = tensor("op_6418_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_6418_end_0 = const()[name = tensor("op_6418_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_6418_end_mask_0 = const()[name = tensor("op_6418_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6418_cast_fp16 = slice_by_index(begin = var_6418_begin_0, end = var_6418_end_0, end_mask = var_6418_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6418_cast_fp16")]; + tensor var_6422_begin_0 = const()[name = tensor("op_6422_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_6422_end_0 = const()[name = tensor("op_6422_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_6422_end_mask_0 = const()[name = tensor("op_6422_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6422_cast_fp16 = slice_by_index(begin = var_6422_begin_0, end = var_6422_end_0, end_mask = var_6422_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6422_cast_fp16")]; + tensor var_6426_begin_0 = const()[name = tensor("op_6426_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_6426_end_0 = const()[name = tensor("op_6426_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_6426_end_mask_0 = const()[name = tensor("op_6426_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6426_cast_fp16 = slice_by_index(begin = var_6426_begin_0, end = var_6426_end_0, end_mask = var_6426_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6426_cast_fp16")]; + tensor var_6430_begin_0 = const()[name = tensor("op_6430_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_6430_end_0 = const()[name = tensor("op_6430_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_6430_end_mask_0 = const()[name = tensor("op_6430_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6430_cast_fp16 = slice_by_index(begin = var_6430_begin_0, end = var_6430_end_0, end_mask = var_6430_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6430_cast_fp16")]; + tensor var_6434_begin_0 = const()[name = tensor("op_6434_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_6434_end_0 = const()[name = tensor("op_6434_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_6434_end_mask_0 = const()[name = tensor("op_6434_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6434_cast_fp16 = slice_by_index(begin = var_6434_begin_0, end = var_6434_end_0, end_mask = var_6434_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6434_cast_fp16")]; + tensor var_6438_begin_0 = const()[name = tensor("op_6438_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_6438_end_0 = const()[name = tensor("op_6438_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_6438_end_mask_0 = const()[name = tensor("op_6438_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_6438_cast_fp16 = slice_by_index(begin = var_6438_begin_0, end = var_6438_end_0, end_mask = var_6438_end_mask_0, x = query_9_cast_fp16)[name = tensor("op_6438_cast_fp16")]; + tensor var_6447_begin_0 = const()[name = tensor("op_6447_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6447_end_0 = const()[name = tensor("op_6447_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_6447_end_mask_0 = const()[name = tensor("op_6447_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6447_cast_fp16 = slice_by_index(begin = var_6447_begin_0, end = var_6447_end_0, end_mask = var_6447_end_mask_0, x = var_6362_cast_fp16)[name = tensor("op_6447_cast_fp16")]; + tensor var_6454_begin_0 = const()[name = tensor("op_6454_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_6454_end_0 = const()[name = tensor("op_6454_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_6454_end_mask_0 = const()[name = tensor("op_6454_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6454_cast_fp16 = slice_by_index(begin = var_6454_begin_0, end = var_6454_end_0, end_mask = var_6454_end_mask_0, x = var_6362_cast_fp16)[name = tensor("op_6454_cast_fp16")]; + tensor var_6461_begin_0 = const()[name = tensor("op_6461_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_6461_end_0 = const()[name = tensor("op_6461_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_6461_end_mask_0 = const()[name = tensor("op_6461_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6461_cast_fp16 = slice_by_index(begin = var_6461_begin_0, end = var_6461_end_0, end_mask = var_6461_end_mask_0, x = var_6362_cast_fp16)[name = tensor("op_6461_cast_fp16")]; + tensor var_6468_begin_0 = const()[name = tensor("op_6468_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_6468_end_0 = const()[name = tensor("op_6468_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_6468_end_mask_0 = const()[name = tensor("op_6468_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6468_cast_fp16 = slice_by_index(begin = var_6468_begin_0, end = var_6468_end_0, end_mask = var_6468_end_mask_0, x = var_6362_cast_fp16)[name = tensor("op_6468_cast_fp16")]; + tensor var_6475_begin_0 = const()[name = tensor("op_6475_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6475_end_0 = const()[name = tensor("op_6475_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_6475_end_mask_0 = const()[name = tensor("op_6475_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6475_cast_fp16 = slice_by_index(begin = var_6475_begin_0, end = var_6475_end_0, end_mask = var_6475_end_mask_0, x = var_6366_cast_fp16)[name = tensor("op_6475_cast_fp16")]; + tensor var_6482_begin_0 = const()[name = tensor("op_6482_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_6482_end_0 = const()[name = tensor("op_6482_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_6482_end_mask_0 = const()[name = tensor("op_6482_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6482_cast_fp16 = slice_by_index(begin = var_6482_begin_0, end = var_6482_end_0, end_mask = var_6482_end_mask_0, x = var_6366_cast_fp16)[name = tensor("op_6482_cast_fp16")]; + tensor var_6489_begin_0 = const()[name = tensor("op_6489_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_6489_end_0 = const()[name = tensor("op_6489_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_6489_end_mask_0 = const()[name = tensor("op_6489_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6489_cast_fp16 = slice_by_index(begin = var_6489_begin_0, end = var_6489_end_0, end_mask = var_6489_end_mask_0, x = var_6366_cast_fp16)[name = tensor("op_6489_cast_fp16")]; + tensor var_6496_begin_0 = const()[name = tensor("op_6496_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_6496_end_0 = const()[name = tensor("op_6496_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_6496_end_mask_0 = const()[name = tensor("op_6496_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6496_cast_fp16 = slice_by_index(begin = var_6496_begin_0, end = var_6496_end_0, end_mask = var_6496_end_mask_0, x = var_6366_cast_fp16)[name = tensor("op_6496_cast_fp16")]; + tensor var_6503_begin_0 = const()[name = tensor("op_6503_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6503_end_0 = const()[name = tensor("op_6503_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_6503_end_mask_0 = const()[name = tensor("op_6503_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6503_cast_fp16 = slice_by_index(begin = var_6503_begin_0, end = var_6503_end_0, end_mask = var_6503_end_mask_0, x = var_6370_cast_fp16)[name = tensor("op_6503_cast_fp16")]; + tensor var_6510_begin_0 = const()[name = tensor("op_6510_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_6510_end_0 = const()[name = tensor("op_6510_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_6510_end_mask_0 = const()[name = tensor("op_6510_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6510_cast_fp16 = slice_by_index(begin = var_6510_begin_0, end = var_6510_end_0, end_mask = var_6510_end_mask_0, x = var_6370_cast_fp16)[name = tensor("op_6510_cast_fp16")]; + tensor var_6517_begin_0 = const()[name = tensor("op_6517_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_6517_end_0 = const()[name = tensor("op_6517_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_6517_end_mask_0 = const()[name = tensor("op_6517_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6517_cast_fp16 = slice_by_index(begin = var_6517_begin_0, end = var_6517_end_0, end_mask = var_6517_end_mask_0, x = var_6370_cast_fp16)[name = tensor("op_6517_cast_fp16")]; + tensor var_6524_begin_0 = const()[name = tensor("op_6524_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_6524_end_0 = const()[name = tensor("op_6524_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_6524_end_mask_0 = const()[name = tensor("op_6524_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6524_cast_fp16 = slice_by_index(begin = var_6524_begin_0, end = var_6524_end_0, end_mask = var_6524_end_mask_0, x = var_6370_cast_fp16)[name = tensor("op_6524_cast_fp16")]; + tensor var_6531_begin_0 = const()[name = tensor("op_6531_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6531_end_0 = const()[name = tensor("op_6531_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_6531_end_mask_0 = const()[name = tensor("op_6531_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6531_cast_fp16 = slice_by_index(begin = var_6531_begin_0, end = var_6531_end_0, end_mask = var_6531_end_mask_0, x = var_6374_cast_fp16)[name = tensor("op_6531_cast_fp16")]; + tensor var_6538_begin_0 = const()[name = tensor("op_6538_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_6538_end_0 = const()[name = tensor("op_6538_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_6538_end_mask_0 = const()[name = tensor("op_6538_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6538_cast_fp16 = slice_by_index(begin = var_6538_begin_0, end = var_6538_end_0, end_mask = var_6538_end_mask_0, x = var_6374_cast_fp16)[name = tensor("op_6538_cast_fp16")]; + tensor var_6545_begin_0 = const()[name = tensor("op_6545_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_6545_end_0 = const()[name = tensor("op_6545_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_6545_end_mask_0 = const()[name = tensor("op_6545_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6545_cast_fp16 = slice_by_index(begin = var_6545_begin_0, end = var_6545_end_0, end_mask = var_6545_end_mask_0, x = var_6374_cast_fp16)[name = tensor("op_6545_cast_fp16")]; + tensor var_6552_begin_0 = const()[name = tensor("op_6552_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_6552_end_0 = const()[name = tensor("op_6552_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_6552_end_mask_0 = const()[name = tensor("op_6552_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6552_cast_fp16 = slice_by_index(begin = var_6552_begin_0, end = var_6552_end_0, end_mask = var_6552_end_mask_0, x = var_6374_cast_fp16)[name = tensor("op_6552_cast_fp16")]; + tensor var_6559_begin_0 = const()[name = tensor("op_6559_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6559_end_0 = const()[name = tensor("op_6559_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_6559_end_mask_0 = const()[name = tensor("op_6559_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6559_cast_fp16 = slice_by_index(begin = var_6559_begin_0, end = var_6559_end_0, end_mask = var_6559_end_mask_0, x = var_6378_cast_fp16)[name = tensor("op_6559_cast_fp16")]; + tensor var_6566_begin_0 = const()[name = tensor("op_6566_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_6566_end_0 = const()[name = tensor("op_6566_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_6566_end_mask_0 = const()[name = tensor("op_6566_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6566_cast_fp16 = slice_by_index(begin = var_6566_begin_0, end = var_6566_end_0, end_mask = var_6566_end_mask_0, x = var_6378_cast_fp16)[name = tensor("op_6566_cast_fp16")]; + tensor var_6573_begin_0 = const()[name = tensor("op_6573_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_6573_end_0 = const()[name = tensor("op_6573_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_6573_end_mask_0 = const()[name = tensor("op_6573_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6573_cast_fp16 = slice_by_index(begin = var_6573_begin_0, end = var_6573_end_0, end_mask = var_6573_end_mask_0, x = var_6378_cast_fp16)[name = tensor("op_6573_cast_fp16")]; + tensor var_6580_begin_0 = const()[name = tensor("op_6580_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_6580_end_0 = const()[name = tensor("op_6580_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_6580_end_mask_0 = const()[name = tensor("op_6580_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6580_cast_fp16 = slice_by_index(begin = var_6580_begin_0, end = var_6580_end_0, end_mask = var_6580_end_mask_0, x = var_6378_cast_fp16)[name = tensor("op_6580_cast_fp16")]; + tensor var_6587_begin_0 = const()[name = tensor("op_6587_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6587_end_0 = const()[name = tensor("op_6587_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_6587_end_mask_0 = const()[name = tensor("op_6587_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6587_cast_fp16 = slice_by_index(begin = var_6587_begin_0, end = var_6587_end_0, end_mask = var_6587_end_mask_0, x = var_6382_cast_fp16)[name = tensor("op_6587_cast_fp16")]; + tensor var_6594_begin_0 = const()[name = tensor("op_6594_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_6594_end_0 = const()[name = tensor("op_6594_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_6594_end_mask_0 = const()[name = tensor("op_6594_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6594_cast_fp16 = slice_by_index(begin = var_6594_begin_0, end = var_6594_end_0, end_mask = var_6594_end_mask_0, x = var_6382_cast_fp16)[name = tensor("op_6594_cast_fp16")]; + tensor var_6601_begin_0 = const()[name = tensor("op_6601_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_6601_end_0 = const()[name = tensor("op_6601_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_6601_end_mask_0 = const()[name = tensor("op_6601_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6601_cast_fp16 = slice_by_index(begin = var_6601_begin_0, end = var_6601_end_0, end_mask = var_6601_end_mask_0, x = var_6382_cast_fp16)[name = tensor("op_6601_cast_fp16")]; + tensor var_6608_begin_0 = const()[name = tensor("op_6608_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_6608_end_0 = const()[name = tensor("op_6608_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_6608_end_mask_0 = const()[name = tensor("op_6608_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6608_cast_fp16 = slice_by_index(begin = var_6608_begin_0, end = var_6608_end_0, end_mask = var_6608_end_mask_0, x = var_6382_cast_fp16)[name = tensor("op_6608_cast_fp16")]; + tensor var_6615_begin_0 = const()[name = tensor("op_6615_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6615_end_0 = const()[name = tensor("op_6615_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_6615_end_mask_0 = const()[name = tensor("op_6615_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6615_cast_fp16 = slice_by_index(begin = var_6615_begin_0, end = var_6615_end_0, end_mask = var_6615_end_mask_0, x = var_6386_cast_fp16)[name = tensor("op_6615_cast_fp16")]; + tensor var_6622_begin_0 = const()[name = tensor("op_6622_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_6622_end_0 = const()[name = tensor("op_6622_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_6622_end_mask_0 = const()[name = tensor("op_6622_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6622_cast_fp16 = slice_by_index(begin = var_6622_begin_0, end = var_6622_end_0, end_mask = var_6622_end_mask_0, x = var_6386_cast_fp16)[name = tensor("op_6622_cast_fp16")]; + tensor var_6629_begin_0 = const()[name = tensor("op_6629_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_6629_end_0 = const()[name = tensor("op_6629_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_6629_end_mask_0 = const()[name = tensor("op_6629_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6629_cast_fp16 = slice_by_index(begin = var_6629_begin_0, end = var_6629_end_0, end_mask = var_6629_end_mask_0, x = var_6386_cast_fp16)[name = tensor("op_6629_cast_fp16")]; + tensor var_6636_begin_0 = const()[name = tensor("op_6636_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_6636_end_0 = const()[name = tensor("op_6636_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_6636_end_mask_0 = const()[name = tensor("op_6636_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6636_cast_fp16 = slice_by_index(begin = var_6636_begin_0, end = var_6636_end_0, end_mask = var_6636_end_mask_0, x = var_6386_cast_fp16)[name = tensor("op_6636_cast_fp16")]; + tensor var_6643_begin_0 = const()[name = tensor("op_6643_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6643_end_0 = const()[name = tensor("op_6643_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_6643_end_mask_0 = const()[name = tensor("op_6643_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6643_cast_fp16 = slice_by_index(begin = var_6643_begin_0, end = var_6643_end_0, end_mask = var_6643_end_mask_0, x = var_6390_cast_fp16)[name = tensor("op_6643_cast_fp16")]; + tensor var_6650_begin_0 = const()[name = tensor("op_6650_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_6650_end_0 = const()[name = tensor("op_6650_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_6650_end_mask_0 = const()[name = tensor("op_6650_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6650_cast_fp16 = slice_by_index(begin = var_6650_begin_0, end = var_6650_end_0, end_mask = var_6650_end_mask_0, x = var_6390_cast_fp16)[name = tensor("op_6650_cast_fp16")]; + tensor var_6657_begin_0 = const()[name = tensor("op_6657_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_6657_end_0 = const()[name = tensor("op_6657_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_6657_end_mask_0 = const()[name = tensor("op_6657_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6657_cast_fp16 = slice_by_index(begin = var_6657_begin_0, end = var_6657_end_0, end_mask = var_6657_end_mask_0, x = var_6390_cast_fp16)[name = tensor("op_6657_cast_fp16")]; + tensor var_6664_begin_0 = const()[name = tensor("op_6664_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_6664_end_0 = const()[name = tensor("op_6664_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_6664_end_mask_0 = const()[name = tensor("op_6664_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6664_cast_fp16 = slice_by_index(begin = var_6664_begin_0, end = var_6664_end_0, end_mask = var_6664_end_mask_0, x = var_6390_cast_fp16)[name = tensor("op_6664_cast_fp16")]; + tensor var_6671_begin_0 = const()[name = tensor("op_6671_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6671_end_0 = const()[name = tensor("op_6671_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_6671_end_mask_0 = const()[name = tensor("op_6671_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6671_cast_fp16 = slice_by_index(begin = var_6671_begin_0, end = var_6671_end_0, end_mask = var_6671_end_mask_0, x = var_6394_cast_fp16)[name = tensor("op_6671_cast_fp16")]; + tensor var_6678_begin_0 = const()[name = tensor("op_6678_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_6678_end_0 = const()[name = tensor("op_6678_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_6678_end_mask_0 = const()[name = tensor("op_6678_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6678_cast_fp16 = slice_by_index(begin = var_6678_begin_0, end = var_6678_end_0, end_mask = var_6678_end_mask_0, x = var_6394_cast_fp16)[name = tensor("op_6678_cast_fp16")]; + tensor var_6685_begin_0 = const()[name = tensor("op_6685_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_6685_end_0 = const()[name = tensor("op_6685_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_6685_end_mask_0 = const()[name = tensor("op_6685_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6685_cast_fp16 = slice_by_index(begin = var_6685_begin_0, end = var_6685_end_0, end_mask = var_6685_end_mask_0, x = var_6394_cast_fp16)[name = tensor("op_6685_cast_fp16")]; + tensor var_6692_begin_0 = const()[name = tensor("op_6692_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_6692_end_0 = const()[name = tensor("op_6692_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_6692_end_mask_0 = const()[name = tensor("op_6692_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6692_cast_fp16 = slice_by_index(begin = var_6692_begin_0, end = var_6692_end_0, end_mask = var_6692_end_mask_0, x = var_6394_cast_fp16)[name = tensor("op_6692_cast_fp16")]; + tensor var_6699_begin_0 = const()[name = tensor("op_6699_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6699_end_0 = const()[name = tensor("op_6699_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_6699_end_mask_0 = const()[name = tensor("op_6699_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6699_cast_fp16 = slice_by_index(begin = var_6699_begin_0, end = var_6699_end_0, end_mask = var_6699_end_mask_0, x = var_6398_cast_fp16)[name = tensor("op_6699_cast_fp16")]; + tensor var_6706_begin_0 = const()[name = tensor("op_6706_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_6706_end_0 = const()[name = tensor("op_6706_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_6706_end_mask_0 = const()[name = tensor("op_6706_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6706_cast_fp16 = slice_by_index(begin = var_6706_begin_0, end = var_6706_end_0, end_mask = var_6706_end_mask_0, x = var_6398_cast_fp16)[name = tensor("op_6706_cast_fp16")]; + tensor var_6713_begin_0 = const()[name = tensor("op_6713_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_6713_end_0 = const()[name = tensor("op_6713_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_6713_end_mask_0 = const()[name = tensor("op_6713_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6713_cast_fp16 = slice_by_index(begin = var_6713_begin_0, end = var_6713_end_0, end_mask = var_6713_end_mask_0, x = var_6398_cast_fp16)[name = tensor("op_6713_cast_fp16")]; + tensor var_6720_begin_0 = const()[name = tensor("op_6720_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_6720_end_0 = const()[name = tensor("op_6720_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_6720_end_mask_0 = const()[name = tensor("op_6720_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6720_cast_fp16 = slice_by_index(begin = var_6720_begin_0, end = var_6720_end_0, end_mask = var_6720_end_mask_0, x = var_6398_cast_fp16)[name = tensor("op_6720_cast_fp16")]; + tensor var_6727_begin_0 = const()[name = tensor("op_6727_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6727_end_0 = const()[name = tensor("op_6727_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_6727_end_mask_0 = const()[name = tensor("op_6727_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6727_cast_fp16 = slice_by_index(begin = var_6727_begin_0, end = var_6727_end_0, end_mask = var_6727_end_mask_0, x = var_6402_cast_fp16)[name = tensor("op_6727_cast_fp16")]; + tensor var_6734_begin_0 = const()[name = tensor("op_6734_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_6734_end_0 = const()[name = tensor("op_6734_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_6734_end_mask_0 = const()[name = tensor("op_6734_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6734_cast_fp16 = slice_by_index(begin = var_6734_begin_0, end = var_6734_end_0, end_mask = var_6734_end_mask_0, x = var_6402_cast_fp16)[name = tensor("op_6734_cast_fp16")]; + tensor var_6741_begin_0 = const()[name = tensor("op_6741_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_6741_end_0 = const()[name = tensor("op_6741_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_6741_end_mask_0 = const()[name = tensor("op_6741_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6741_cast_fp16 = slice_by_index(begin = var_6741_begin_0, end = var_6741_end_0, end_mask = var_6741_end_mask_0, x = var_6402_cast_fp16)[name = tensor("op_6741_cast_fp16")]; + tensor var_6748_begin_0 = const()[name = tensor("op_6748_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_6748_end_0 = const()[name = tensor("op_6748_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_6748_end_mask_0 = const()[name = tensor("op_6748_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6748_cast_fp16 = slice_by_index(begin = var_6748_begin_0, end = var_6748_end_0, end_mask = var_6748_end_mask_0, x = var_6402_cast_fp16)[name = tensor("op_6748_cast_fp16")]; + tensor var_6755_begin_0 = const()[name = tensor("op_6755_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6755_end_0 = const()[name = tensor("op_6755_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_6755_end_mask_0 = const()[name = tensor("op_6755_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6755_cast_fp16 = slice_by_index(begin = var_6755_begin_0, end = var_6755_end_0, end_mask = var_6755_end_mask_0, x = var_6406_cast_fp16)[name = tensor("op_6755_cast_fp16")]; + tensor var_6762_begin_0 = const()[name = tensor("op_6762_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_6762_end_0 = const()[name = tensor("op_6762_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_6762_end_mask_0 = const()[name = tensor("op_6762_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6762_cast_fp16 = slice_by_index(begin = var_6762_begin_0, end = var_6762_end_0, end_mask = var_6762_end_mask_0, x = var_6406_cast_fp16)[name = tensor("op_6762_cast_fp16")]; + tensor var_6769_begin_0 = const()[name = tensor("op_6769_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_6769_end_0 = const()[name = tensor("op_6769_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_6769_end_mask_0 = const()[name = tensor("op_6769_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6769_cast_fp16 = slice_by_index(begin = var_6769_begin_0, end = var_6769_end_0, end_mask = var_6769_end_mask_0, x = var_6406_cast_fp16)[name = tensor("op_6769_cast_fp16")]; + tensor var_6776_begin_0 = const()[name = tensor("op_6776_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_6776_end_0 = const()[name = tensor("op_6776_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_6776_end_mask_0 = const()[name = tensor("op_6776_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6776_cast_fp16 = slice_by_index(begin = var_6776_begin_0, end = var_6776_end_0, end_mask = var_6776_end_mask_0, x = var_6406_cast_fp16)[name = tensor("op_6776_cast_fp16")]; + tensor var_6783_begin_0 = const()[name = tensor("op_6783_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6783_end_0 = const()[name = tensor("op_6783_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_6783_end_mask_0 = const()[name = tensor("op_6783_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6783_cast_fp16 = slice_by_index(begin = var_6783_begin_0, end = var_6783_end_0, end_mask = var_6783_end_mask_0, x = var_6410_cast_fp16)[name = tensor("op_6783_cast_fp16")]; + tensor var_6790_begin_0 = const()[name = tensor("op_6790_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_6790_end_0 = const()[name = tensor("op_6790_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_6790_end_mask_0 = const()[name = tensor("op_6790_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6790_cast_fp16 = slice_by_index(begin = var_6790_begin_0, end = var_6790_end_0, end_mask = var_6790_end_mask_0, x = var_6410_cast_fp16)[name = tensor("op_6790_cast_fp16")]; + tensor var_6797_begin_0 = const()[name = tensor("op_6797_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_6797_end_0 = const()[name = tensor("op_6797_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_6797_end_mask_0 = const()[name = tensor("op_6797_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6797_cast_fp16 = slice_by_index(begin = var_6797_begin_0, end = var_6797_end_0, end_mask = var_6797_end_mask_0, x = var_6410_cast_fp16)[name = tensor("op_6797_cast_fp16")]; + tensor var_6804_begin_0 = const()[name = tensor("op_6804_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_6804_end_0 = const()[name = tensor("op_6804_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_6804_end_mask_0 = const()[name = tensor("op_6804_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6804_cast_fp16 = slice_by_index(begin = var_6804_begin_0, end = var_6804_end_0, end_mask = var_6804_end_mask_0, x = var_6410_cast_fp16)[name = tensor("op_6804_cast_fp16")]; + tensor var_6811_begin_0 = const()[name = tensor("op_6811_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6811_end_0 = const()[name = tensor("op_6811_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_6811_end_mask_0 = const()[name = tensor("op_6811_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6811_cast_fp16 = slice_by_index(begin = var_6811_begin_0, end = var_6811_end_0, end_mask = var_6811_end_mask_0, x = var_6414_cast_fp16)[name = tensor("op_6811_cast_fp16")]; + tensor var_6818_begin_0 = const()[name = tensor("op_6818_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_6818_end_0 = const()[name = tensor("op_6818_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_6818_end_mask_0 = const()[name = tensor("op_6818_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6818_cast_fp16 = slice_by_index(begin = var_6818_begin_0, end = var_6818_end_0, end_mask = var_6818_end_mask_0, x = var_6414_cast_fp16)[name = tensor("op_6818_cast_fp16")]; + tensor var_6825_begin_0 = const()[name = tensor("op_6825_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_6825_end_0 = const()[name = tensor("op_6825_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_6825_end_mask_0 = const()[name = tensor("op_6825_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6825_cast_fp16 = slice_by_index(begin = var_6825_begin_0, end = var_6825_end_0, end_mask = var_6825_end_mask_0, x = var_6414_cast_fp16)[name = tensor("op_6825_cast_fp16")]; + tensor var_6832_begin_0 = const()[name = tensor("op_6832_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_6832_end_0 = const()[name = tensor("op_6832_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_6832_end_mask_0 = const()[name = tensor("op_6832_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6832_cast_fp16 = slice_by_index(begin = var_6832_begin_0, end = var_6832_end_0, end_mask = var_6832_end_mask_0, x = var_6414_cast_fp16)[name = tensor("op_6832_cast_fp16")]; + tensor var_6839_begin_0 = const()[name = tensor("op_6839_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6839_end_0 = const()[name = tensor("op_6839_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_6839_end_mask_0 = const()[name = tensor("op_6839_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6839_cast_fp16 = slice_by_index(begin = var_6839_begin_0, end = var_6839_end_0, end_mask = var_6839_end_mask_0, x = var_6418_cast_fp16)[name = tensor("op_6839_cast_fp16")]; + tensor var_6846_begin_0 = const()[name = tensor("op_6846_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_6846_end_0 = const()[name = tensor("op_6846_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_6846_end_mask_0 = const()[name = tensor("op_6846_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6846_cast_fp16 = slice_by_index(begin = var_6846_begin_0, end = var_6846_end_0, end_mask = var_6846_end_mask_0, x = var_6418_cast_fp16)[name = tensor("op_6846_cast_fp16")]; + tensor var_6853_begin_0 = const()[name = tensor("op_6853_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_6853_end_0 = const()[name = tensor("op_6853_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_6853_end_mask_0 = const()[name = tensor("op_6853_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6853_cast_fp16 = slice_by_index(begin = var_6853_begin_0, end = var_6853_end_0, end_mask = var_6853_end_mask_0, x = var_6418_cast_fp16)[name = tensor("op_6853_cast_fp16")]; + tensor var_6860_begin_0 = const()[name = tensor("op_6860_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_6860_end_0 = const()[name = tensor("op_6860_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_6860_end_mask_0 = const()[name = tensor("op_6860_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6860_cast_fp16 = slice_by_index(begin = var_6860_begin_0, end = var_6860_end_0, end_mask = var_6860_end_mask_0, x = var_6418_cast_fp16)[name = tensor("op_6860_cast_fp16")]; + tensor var_6867_begin_0 = const()[name = tensor("op_6867_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6867_end_0 = const()[name = tensor("op_6867_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_6867_end_mask_0 = const()[name = tensor("op_6867_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6867_cast_fp16 = slice_by_index(begin = var_6867_begin_0, end = var_6867_end_0, end_mask = var_6867_end_mask_0, x = var_6422_cast_fp16)[name = tensor("op_6867_cast_fp16")]; + tensor var_6874_begin_0 = const()[name = tensor("op_6874_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_6874_end_0 = const()[name = tensor("op_6874_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_6874_end_mask_0 = const()[name = tensor("op_6874_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6874_cast_fp16 = slice_by_index(begin = var_6874_begin_0, end = var_6874_end_0, end_mask = var_6874_end_mask_0, x = var_6422_cast_fp16)[name = tensor("op_6874_cast_fp16")]; + tensor var_6881_begin_0 = const()[name = tensor("op_6881_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_6881_end_0 = const()[name = tensor("op_6881_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_6881_end_mask_0 = const()[name = tensor("op_6881_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6881_cast_fp16 = slice_by_index(begin = var_6881_begin_0, end = var_6881_end_0, end_mask = var_6881_end_mask_0, x = var_6422_cast_fp16)[name = tensor("op_6881_cast_fp16")]; + tensor var_6888_begin_0 = const()[name = tensor("op_6888_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_6888_end_0 = const()[name = tensor("op_6888_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_6888_end_mask_0 = const()[name = tensor("op_6888_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6888_cast_fp16 = slice_by_index(begin = var_6888_begin_0, end = var_6888_end_0, end_mask = var_6888_end_mask_0, x = var_6422_cast_fp16)[name = tensor("op_6888_cast_fp16")]; + tensor var_6895_begin_0 = const()[name = tensor("op_6895_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6895_end_0 = const()[name = tensor("op_6895_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_6895_end_mask_0 = const()[name = tensor("op_6895_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6895_cast_fp16 = slice_by_index(begin = var_6895_begin_0, end = var_6895_end_0, end_mask = var_6895_end_mask_0, x = var_6426_cast_fp16)[name = tensor("op_6895_cast_fp16")]; + tensor var_6902_begin_0 = const()[name = tensor("op_6902_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_6902_end_0 = const()[name = tensor("op_6902_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_6902_end_mask_0 = const()[name = tensor("op_6902_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6902_cast_fp16 = slice_by_index(begin = var_6902_begin_0, end = var_6902_end_0, end_mask = var_6902_end_mask_0, x = var_6426_cast_fp16)[name = tensor("op_6902_cast_fp16")]; + tensor var_6909_begin_0 = const()[name = tensor("op_6909_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_6909_end_0 = const()[name = tensor("op_6909_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_6909_end_mask_0 = const()[name = tensor("op_6909_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6909_cast_fp16 = slice_by_index(begin = var_6909_begin_0, end = var_6909_end_0, end_mask = var_6909_end_mask_0, x = var_6426_cast_fp16)[name = tensor("op_6909_cast_fp16")]; + tensor var_6916_begin_0 = const()[name = tensor("op_6916_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_6916_end_0 = const()[name = tensor("op_6916_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_6916_end_mask_0 = const()[name = tensor("op_6916_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6916_cast_fp16 = slice_by_index(begin = var_6916_begin_0, end = var_6916_end_0, end_mask = var_6916_end_mask_0, x = var_6426_cast_fp16)[name = tensor("op_6916_cast_fp16")]; + tensor var_6923_begin_0 = const()[name = tensor("op_6923_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6923_end_0 = const()[name = tensor("op_6923_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_6923_end_mask_0 = const()[name = tensor("op_6923_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6923_cast_fp16 = slice_by_index(begin = var_6923_begin_0, end = var_6923_end_0, end_mask = var_6923_end_mask_0, x = var_6430_cast_fp16)[name = tensor("op_6923_cast_fp16")]; + tensor var_6930_begin_0 = const()[name = tensor("op_6930_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_6930_end_0 = const()[name = tensor("op_6930_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_6930_end_mask_0 = const()[name = tensor("op_6930_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6930_cast_fp16 = slice_by_index(begin = var_6930_begin_0, end = var_6930_end_0, end_mask = var_6930_end_mask_0, x = var_6430_cast_fp16)[name = tensor("op_6930_cast_fp16")]; + tensor var_6937_begin_0 = const()[name = tensor("op_6937_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_6937_end_0 = const()[name = tensor("op_6937_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_6937_end_mask_0 = const()[name = tensor("op_6937_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6937_cast_fp16 = slice_by_index(begin = var_6937_begin_0, end = var_6937_end_0, end_mask = var_6937_end_mask_0, x = var_6430_cast_fp16)[name = tensor("op_6937_cast_fp16")]; + tensor var_6944_begin_0 = const()[name = tensor("op_6944_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_6944_end_0 = const()[name = tensor("op_6944_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_6944_end_mask_0 = const()[name = tensor("op_6944_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6944_cast_fp16 = slice_by_index(begin = var_6944_begin_0, end = var_6944_end_0, end_mask = var_6944_end_mask_0, x = var_6430_cast_fp16)[name = tensor("op_6944_cast_fp16")]; + tensor var_6951_begin_0 = const()[name = tensor("op_6951_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6951_end_0 = const()[name = tensor("op_6951_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_6951_end_mask_0 = const()[name = tensor("op_6951_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6951_cast_fp16 = slice_by_index(begin = var_6951_begin_0, end = var_6951_end_0, end_mask = var_6951_end_mask_0, x = var_6434_cast_fp16)[name = tensor("op_6951_cast_fp16")]; + tensor var_6958_begin_0 = const()[name = tensor("op_6958_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_6958_end_0 = const()[name = tensor("op_6958_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_6958_end_mask_0 = const()[name = tensor("op_6958_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6958_cast_fp16 = slice_by_index(begin = var_6958_begin_0, end = var_6958_end_0, end_mask = var_6958_end_mask_0, x = var_6434_cast_fp16)[name = tensor("op_6958_cast_fp16")]; + tensor var_6965_begin_0 = const()[name = tensor("op_6965_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_6965_end_0 = const()[name = tensor("op_6965_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_6965_end_mask_0 = const()[name = tensor("op_6965_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6965_cast_fp16 = slice_by_index(begin = var_6965_begin_0, end = var_6965_end_0, end_mask = var_6965_end_mask_0, x = var_6434_cast_fp16)[name = tensor("op_6965_cast_fp16")]; + tensor var_6972_begin_0 = const()[name = tensor("op_6972_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_6972_end_0 = const()[name = tensor("op_6972_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_6972_end_mask_0 = const()[name = tensor("op_6972_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6972_cast_fp16 = slice_by_index(begin = var_6972_begin_0, end = var_6972_end_0, end_mask = var_6972_end_mask_0, x = var_6434_cast_fp16)[name = tensor("op_6972_cast_fp16")]; + tensor var_6979_begin_0 = const()[name = tensor("op_6979_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_6979_end_0 = const()[name = tensor("op_6979_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_6979_end_mask_0 = const()[name = tensor("op_6979_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6979_cast_fp16 = slice_by_index(begin = var_6979_begin_0, end = var_6979_end_0, end_mask = var_6979_end_mask_0, x = var_6438_cast_fp16)[name = tensor("op_6979_cast_fp16")]; + tensor var_6986_begin_0 = const()[name = tensor("op_6986_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_6986_end_0 = const()[name = tensor("op_6986_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_6986_end_mask_0 = const()[name = tensor("op_6986_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6986_cast_fp16 = slice_by_index(begin = var_6986_begin_0, end = var_6986_end_0, end_mask = var_6986_end_mask_0, x = var_6438_cast_fp16)[name = tensor("op_6986_cast_fp16")]; + tensor var_6993_begin_0 = const()[name = tensor("op_6993_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_6993_end_0 = const()[name = tensor("op_6993_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_6993_end_mask_0 = const()[name = tensor("op_6993_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_6993_cast_fp16 = slice_by_index(begin = var_6993_begin_0, end = var_6993_end_0, end_mask = var_6993_end_mask_0, x = var_6438_cast_fp16)[name = tensor("op_6993_cast_fp16")]; + tensor var_7000_begin_0 = const()[name = tensor("op_7000_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_7000_end_0 = const()[name = tensor("op_7000_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_7000_end_mask_0 = const()[name = tensor("op_7000_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_7000_cast_fp16 = slice_by_index(begin = var_7000_begin_0, end = var_7000_end_0, end_mask = var_7000_end_mask_0, x = var_6438_cast_fp16)[name = tensor("op_7000_cast_fp16")]; + tensor k_9_perm_0 = const()[name = tensor("k_9_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_7005_begin_0 = const()[name = tensor("op_7005_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7005_end_0 = const()[name = tensor("op_7005_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_7005_end_mask_0 = const()[name = tensor("op_7005_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_27 = transpose(perm = k_9_perm_0, x = key_9_cast_fp16)[name = tensor("transpose_27")]; + tensor var_7005_cast_fp16 = slice_by_index(begin = var_7005_begin_0, end = var_7005_end_0, end_mask = var_7005_end_mask_0, x = transpose_27)[name = tensor("op_7005_cast_fp16")]; + tensor var_7009_begin_0 = const()[name = tensor("op_7009_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_7009_end_0 = const()[name = tensor("op_7009_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_7009_end_mask_0 = const()[name = tensor("op_7009_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_7009_cast_fp16 = slice_by_index(begin = var_7009_begin_0, end = var_7009_end_0, end_mask = var_7009_end_mask_0, x = transpose_27)[name = tensor("op_7009_cast_fp16")]; + tensor var_7013_begin_0 = const()[name = tensor("op_7013_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_7013_end_0 = const()[name = tensor("op_7013_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_7013_end_mask_0 = const()[name = tensor("op_7013_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_7013_cast_fp16 = slice_by_index(begin = var_7013_begin_0, end = var_7013_end_0, end_mask = var_7013_end_mask_0, x = transpose_27)[name = tensor("op_7013_cast_fp16")]; + tensor var_7017_begin_0 = const()[name = tensor("op_7017_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_7017_end_0 = const()[name = tensor("op_7017_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_7017_end_mask_0 = const()[name = tensor("op_7017_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_7017_cast_fp16 = slice_by_index(begin = var_7017_begin_0, end = var_7017_end_0, end_mask = var_7017_end_mask_0, x = transpose_27)[name = tensor("op_7017_cast_fp16")]; + tensor var_7021_begin_0 = const()[name = tensor("op_7021_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_7021_end_0 = const()[name = tensor("op_7021_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_7021_end_mask_0 = const()[name = tensor("op_7021_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_7021_cast_fp16 = slice_by_index(begin = var_7021_begin_0, end = var_7021_end_0, end_mask = var_7021_end_mask_0, x = transpose_27)[name = tensor("op_7021_cast_fp16")]; + tensor var_7025_begin_0 = const()[name = tensor("op_7025_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_7025_end_0 = const()[name = tensor("op_7025_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_7025_end_mask_0 = const()[name = tensor("op_7025_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_7025_cast_fp16 = slice_by_index(begin = var_7025_begin_0, end = var_7025_end_0, end_mask = var_7025_end_mask_0, x = transpose_27)[name = tensor("op_7025_cast_fp16")]; + tensor var_7029_begin_0 = const()[name = tensor("op_7029_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_7029_end_0 = const()[name = tensor("op_7029_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_7029_end_mask_0 = const()[name = tensor("op_7029_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_7029_cast_fp16 = slice_by_index(begin = var_7029_begin_0, end = var_7029_end_0, end_mask = var_7029_end_mask_0, x = transpose_27)[name = tensor("op_7029_cast_fp16")]; + tensor var_7033_begin_0 = const()[name = tensor("op_7033_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_7033_end_0 = const()[name = tensor("op_7033_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_7033_end_mask_0 = const()[name = tensor("op_7033_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_7033_cast_fp16 = slice_by_index(begin = var_7033_begin_0, end = var_7033_end_0, end_mask = var_7033_end_mask_0, x = transpose_27)[name = tensor("op_7033_cast_fp16")]; + tensor var_7037_begin_0 = const()[name = tensor("op_7037_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_7037_end_0 = const()[name = tensor("op_7037_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_7037_end_mask_0 = const()[name = tensor("op_7037_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_7037_cast_fp16 = slice_by_index(begin = var_7037_begin_0, end = var_7037_end_0, end_mask = var_7037_end_mask_0, x = transpose_27)[name = tensor("op_7037_cast_fp16")]; + tensor var_7041_begin_0 = const()[name = tensor("op_7041_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_7041_end_0 = const()[name = tensor("op_7041_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_7041_end_mask_0 = const()[name = tensor("op_7041_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_7041_cast_fp16 = slice_by_index(begin = var_7041_begin_0, end = var_7041_end_0, end_mask = var_7041_end_mask_0, x = transpose_27)[name = tensor("op_7041_cast_fp16")]; + tensor var_7045_begin_0 = const()[name = tensor("op_7045_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_7045_end_0 = const()[name = tensor("op_7045_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_7045_end_mask_0 = const()[name = tensor("op_7045_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_7045_cast_fp16 = slice_by_index(begin = var_7045_begin_0, end = var_7045_end_0, end_mask = var_7045_end_mask_0, x = transpose_27)[name = tensor("op_7045_cast_fp16")]; + tensor var_7049_begin_0 = const()[name = tensor("op_7049_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_7049_end_0 = const()[name = tensor("op_7049_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_7049_end_mask_0 = const()[name = tensor("op_7049_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_7049_cast_fp16 = slice_by_index(begin = var_7049_begin_0, end = var_7049_end_0, end_mask = var_7049_end_mask_0, x = transpose_27)[name = tensor("op_7049_cast_fp16")]; + tensor var_7053_begin_0 = const()[name = tensor("op_7053_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_7053_end_0 = const()[name = tensor("op_7053_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_7053_end_mask_0 = const()[name = tensor("op_7053_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_7053_cast_fp16 = slice_by_index(begin = var_7053_begin_0, end = var_7053_end_0, end_mask = var_7053_end_mask_0, x = transpose_27)[name = tensor("op_7053_cast_fp16")]; + tensor var_7057_begin_0 = const()[name = tensor("op_7057_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_7057_end_0 = const()[name = tensor("op_7057_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_7057_end_mask_0 = const()[name = tensor("op_7057_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_7057_cast_fp16 = slice_by_index(begin = var_7057_begin_0, end = var_7057_end_0, end_mask = var_7057_end_mask_0, x = transpose_27)[name = tensor("op_7057_cast_fp16")]; + tensor var_7061_begin_0 = const()[name = tensor("op_7061_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_7061_end_0 = const()[name = tensor("op_7061_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_7061_end_mask_0 = const()[name = tensor("op_7061_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_7061_cast_fp16 = slice_by_index(begin = var_7061_begin_0, end = var_7061_end_0, end_mask = var_7061_end_mask_0, x = transpose_27)[name = tensor("op_7061_cast_fp16")]; + tensor var_7065_begin_0 = const()[name = tensor("op_7065_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_7065_end_0 = const()[name = tensor("op_7065_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_7065_end_mask_0 = const()[name = tensor("op_7065_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_7065_cast_fp16 = slice_by_index(begin = var_7065_begin_0, end = var_7065_end_0, end_mask = var_7065_end_mask_0, x = transpose_27)[name = tensor("op_7065_cast_fp16")]; + tensor var_7069_begin_0 = const()[name = tensor("op_7069_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_7069_end_0 = const()[name = tensor("op_7069_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_7069_end_mask_0 = const()[name = tensor("op_7069_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_7069_cast_fp16 = slice_by_index(begin = var_7069_begin_0, end = var_7069_end_0, end_mask = var_7069_end_mask_0, x = transpose_27)[name = tensor("op_7069_cast_fp16")]; + tensor var_7073_begin_0 = const()[name = tensor("op_7073_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_7073_end_0 = const()[name = tensor("op_7073_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_7073_end_mask_0 = const()[name = tensor("op_7073_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_7073_cast_fp16 = slice_by_index(begin = var_7073_begin_0, end = var_7073_end_0, end_mask = var_7073_end_mask_0, x = transpose_27)[name = tensor("op_7073_cast_fp16")]; + tensor var_7077_begin_0 = const()[name = tensor("op_7077_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_7077_end_0 = const()[name = tensor("op_7077_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_7077_end_mask_0 = const()[name = tensor("op_7077_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_7077_cast_fp16 = slice_by_index(begin = var_7077_begin_0, end = var_7077_end_0, end_mask = var_7077_end_mask_0, x = transpose_27)[name = tensor("op_7077_cast_fp16")]; + tensor var_7081_begin_0 = const()[name = tensor("op_7081_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_7081_end_0 = const()[name = tensor("op_7081_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_7081_end_mask_0 = const()[name = tensor("op_7081_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_7081_cast_fp16 = slice_by_index(begin = var_7081_begin_0, end = var_7081_end_0, end_mask = var_7081_end_mask_0, x = transpose_27)[name = tensor("op_7081_cast_fp16")]; + tensor var_7083_begin_0 = const()[name = tensor("op_7083_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7083_end_0 = const()[name = tensor("op_7083_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_7083_end_mask_0 = const()[name = tensor("op_7083_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7083_cast_fp16 = slice_by_index(begin = var_7083_begin_0, end = var_7083_end_0, end_mask = var_7083_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7083_cast_fp16")]; + tensor var_7087_begin_0 = const()[name = tensor("op_7087_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_7087_end_0 = const()[name = tensor("op_7087_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_7087_end_mask_0 = const()[name = tensor("op_7087_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7087_cast_fp16 = slice_by_index(begin = var_7087_begin_0, end = var_7087_end_0, end_mask = var_7087_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7087_cast_fp16")]; + tensor var_7091_begin_0 = const()[name = tensor("op_7091_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_7091_end_0 = const()[name = tensor("op_7091_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_7091_end_mask_0 = const()[name = tensor("op_7091_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7091_cast_fp16 = slice_by_index(begin = var_7091_begin_0, end = var_7091_end_0, end_mask = var_7091_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7091_cast_fp16")]; + tensor var_7095_begin_0 = const()[name = tensor("op_7095_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_7095_end_0 = const()[name = tensor("op_7095_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_7095_end_mask_0 = const()[name = tensor("op_7095_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7095_cast_fp16 = slice_by_index(begin = var_7095_begin_0, end = var_7095_end_0, end_mask = var_7095_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7095_cast_fp16")]; + tensor var_7099_begin_0 = const()[name = tensor("op_7099_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_7099_end_0 = const()[name = tensor("op_7099_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_7099_end_mask_0 = const()[name = tensor("op_7099_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7099_cast_fp16 = slice_by_index(begin = var_7099_begin_0, end = var_7099_end_0, end_mask = var_7099_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7099_cast_fp16")]; + tensor var_7103_begin_0 = const()[name = tensor("op_7103_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_7103_end_0 = const()[name = tensor("op_7103_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_7103_end_mask_0 = const()[name = tensor("op_7103_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7103_cast_fp16 = slice_by_index(begin = var_7103_begin_0, end = var_7103_end_0, end_mask = var_7103_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7103_cast_fp16")]; + tensor var_7107_begin_0 = const()[name = tensor("op_7107_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_7107_end_0 = const()[name = tensor("op_7107_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_7107_end_mask_0 = const()[name = tensor("op_7107_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7107_cast_fp16 = slice_by_index(begin = var_7107_begin_0, end = var_7107_end_0, end_mask = var_7107_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7107_cast_fp16")]; + tensor var_7111_begin_0 = const()[name = tensor("op_7111_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_7111_end_0 = const()[name = tensor("op_7111_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_7111_end_mask_0 = const()[name = tensor("op_7111_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7111_cast_fp16 = slice_by_index(begin = var_7111_begin_0, end = var_7111_end_0, end_mask = var_7111_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7111_cast_fp16")]; + tensor var_7115_begin_0 = const()[name = tensor("op_7115_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_7115_end_0 = const()[name = tensor("op_7115_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_7115_end_mask_0 = const()[name = tensor("op_7115_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7115_cast_fp16 = slice_by_index(begin = var_7115_begin_0, end = var_7115_end_0, end_mask = var_7115_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7115_cast_fp16")]; + tensor var_7119_begin_0 = const()[name = tensor("op_7119_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_7119_end_0 = const()[name = tensor("op_7119_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_7119_end_mask_0 = const()[name = tensor("op_7119_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7119_cast_fp16 = slice_by_index(begin = var_7119_begin_0, end = var_7119_end_0, end_mask = var_7119_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7119_cast_fp16")]; + tensor var_7123_begin_0 = const()[name = tensor("op_7123_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_7123_end_0 = const()[name = tensor("op_7123_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_7123_end_mask_0 = const()[name = tensor("op_7123_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7123_cast_fp16 = slice_by_index(begin = var_7123_begin_0, end = var_7123_end_0, end_mask = var_7123_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7123_cast_fp16")]; + tensor var_7127_begin_0 = const()[name = tensor("op_7127_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_7127_end_0 = const()[name = tensor("op_7127_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_7127_end_mask_0 = const()[name = tensor("op_7127_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7127_cast_fp16 = slice_by_index(begin = var_7127_begin_0, end = var_7127_end_0, end_mask = var_7127_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7127_cast_fp16")]; + tensor var_7131_begin_0 = const()[name = tensor("op_7131_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_7131_end_0 = const()[name = tensor("op_7131_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_7131_end_mask_0 = const()[name = tensor("op_7131_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7131_cast_fp16 = slice_by_index(begin = var_7131_begin_0, end = var_7131_end_0, end_mask = var_7131_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7131_cast_fp16")]; + tensor var_7135_begin_0 = const()[name = tensor("op_7135_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_7135_end_0 = const()[name = tensor("op_7135_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_7135_end_mask_0 = const()[name = tensor("op_7135_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7135_cast_fp16 = slice_by_index(begin = var_7135_begin_0, end = var_7135_end_0, end_mask = var_7135_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7135_cast_fp16")]; + tensor var_7139_begin_0 = const()[name = tensor("op_7139_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_7139_end_0 = const()[name = tensor("op_7139_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_7139_end_mask_0 = const()[name = tensor("op_7139_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7139_cast_fp16 = slice_by_index(begin = var_7139_begin_0, end = var_7139_end_0, end_mask = var_7139_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7139_cast_fp16")]; + tensor var_7143_begin_0 = const()[name = tensor("op_7143_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_7143_end_0 = const()[name = tensor("op_7143_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_7143_end_mask_0 = const()[name = tensor("op_7143_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7143_cast_fp16 = slice_by_index(begin = var_7143_begin_0, end = var_7143_end_0, end_mask = var_7143_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7143_cast_fp16")]; + tensor var_7147_begin_0 = const()[name = tensor("op_7147_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_7147_end_0 = const()[name = tensor("op_7147_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_7147_end_mask_0 = const()[name = tensor("op_7147_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7147_cast_fp16 = slice_by_index(begin = var_7147_begin_0, end = var_7147_end_0, end_mask = var_7147_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7147_cast_fp16")]; + tensor var_7151_begin_0 = const()[name = tensor("op_7151_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_7151_end_0 = const()[name = tensor("op_7151_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_7151_end_mask_0 = const()[name = tensor("op_7151_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7151_cast_fp16 = slice_by_index(begin = var_7151_begin_0, end = var_7151_end_0, end_mask = var_7151_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7151_cast_fp16")]; + tensor var_7155_begin_0 = const()[name = tensor("op_7155_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_7155_end_0 = const()[name = tensor("op_7155_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_7155_end_mask_0 = const()[name = tensor("op_7155_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7155_cast_fp16 = slice_by_index(begin = var_7155_begin_0, end = var_7155_end_0, end_mask = var_7155_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7155_cast_fp16")]; + tensor var_7159_begin_0 = const()[name = tensor("op_7159_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_7159_end_0 = const()[name = tensor("op_7159_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_7159_end_mask_0 = const()[name = tensor("op_7159_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7159_cast_fp16 = slice_by_index(begin = var_7159_begin_0, end = var_7159_end_0, end_mask = var_7159_end_mask_0, x = value_9_cast_fp16)[name = tensor("op_7159_cast_fp16")]; + tensor var_7163_equation_0 = const()[name = tensor("op_7163_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7163_cast_fp16 = einsum(equation = var_7163_equation_0, values = (var_7005_cast_fp16, var_6447_cast_fp16))[name = tensor("op_7163_cast_fp16")]; + tensor var_7164_to_fp16 = const()[name = tensor("op_7164_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_641_cast_fp16 = mul(x = var_7163_cast_fp16, y = var_7164_to_fp16)[name = tensor("aw_chunk_641_cast_fp16")]; + tensor var_7167_equation_0 = const()[name = tensor("op_7167_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7167_cast_fp16 = einsum(equation = var_7167_equation_0, values = (var_7005_cast_fp16, var_6454_cast_fp16))[name = tensor("op_7167_cast_fp16")]; + tensor var_7168_to_fp16 = const()[name = tensor("op_7168_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_643_cast_fp16 = mul(x = var_7167_cast_fp16, y = var_7168_to_fp16)[name = tensor("aw_chunk_643_cast_fp16")]; + tensor var_7171_equation_0 = const()[name = tensor("op_7171_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7171_cast_fp16 = einsum(equation = var_7171_equation_0, values = (var_7005_cast_fp16, var_6461_cast_fp16))[name = tensor("op_7171_cast_fp16")]; + tensor var_7172_to_fp16 = const()[name = tensor("op_7172_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_645_cast_fp16 = mul(x = var_7171_cast_fp16, y = var_7172_to_fp16)[name = tensor("aw_chunk_645_cast_fp16")]; + tensor var_7175_equation_0 = const()[name = tensor("op_7175_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7175_cast_fp16 = einsum(equation = var_7175_equation_0, values = (var_7005_cast_fp16, var_6468_cast_fp16))[name = tensor("op_7175_cast_fp16")]; + tensor var_7176_to_fp16 = const()[name = tensor("op_7176_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_647_cast_fp16 = mul(x = var_7175_cast_fp16, y = var_7176_to_fp16)[name = tensor("aw_chunk_647_cast_fp16")]; + tensor var_7179_equation_0 = const()[name = tensor("op_7179_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7179_cast_fp16 = einsum(equation = var_7179_equation_0, values = (var_7009_cast_fp16, var_6475_cast_fp16))[name = tensor("op_7179_cast_fp16")]; + tensor var_7180_to_fp16 = const()[name = tensor("op_7180_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_649_cast_fp16 = mul(x = var_7179_cast_fp16, y = var_7180_to_fp16)[name = tensor("aw_chunk_649_cast_fp16")]; + tensor var_7183_equation_0 = const()[name = tensor("op_7183_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7183_cast_fp16 = einsum(equation = var_7183_equation_0, values = (var_7009_cast_fp16, var_6482_cast_fp16))[name = tensor("op_7183_cast_fp16")]; + tensor var_7184_to_fp16 = const()[name = tensor("op_7184_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_651_cast_fp16 = mul(x = var_7183_cast_fp16, y = var_7184_to_fp16)[name = tensor("aw_chunk_651_cast_fp16")]; + tensor var_7187_equation_0 = const()[name = tensor("op_7187_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7187_cast_fp16 = einsum(equation = var_7187_equation_0, values = (var_7009_cast_fp16, var_6489_cast_fp16))[name = tensor("op_7187_cast_fp16")]; + tensor var_7188_to_fp16 = const()[name = tensor("op_7188_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_653_cast_fp16 = mul(x = var_7187_cast_fp16, y = var_7188_to_fp16)[name = tensor("aw_chunk_653_cast_fp16")]; + tensor var_7191_equation_0 = const()[name = tensor("op_7191_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7191_cast_fp16 = einsum(equation = var_7191_equation_0, values = (var_7009_cast_fp16, var_6496_cast_fp16))[name = tensor("op_7191_cast_fp16")]; + tensor var_7192_to_fp16 = const()[name = tensor("op_7192_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_655_cast_fp16 = mul(x = var_7191_cast_fp16, y = var_7192_to_fp16)[name = tensor("aw_chunk_655_cast_fp16")]; + tensor var_7195_equation_0 = const()[name = tensor("op_7195_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7195_cast_fp16 = einsum(equation = var_7195_equation_0, values = (var_7013_cast_fp16, var_6503_cast_fp16))[name = tensor("op_7195_cast_fp16")]; + tensor var_7196_to_fp16 = const()[name = tensor("op_7196_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_657_cast_fp16 = mul(x = var_7195_cast_fp16, y = var_7196_to_fp16)[name = tensor("aw_chunk_657_cast_fp16")]; + tensor var_7199_equation_0 = const()[name = tensor("op_7199_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7199_cast_fp16 = einsum(equation = var_7199_equation_0, values = (var_7013_cast_fp16, var_6510_cast_fp16))[name = tensor("op_7199_cast_fp16")]; + tensor var_7200_to_fp16 = const()[name = tensor("op_7200_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_659_cast_fp16 = mul(x = var_7199_cast_fp16, y = var_7200_to_fp16)[name = tensor("aw_chunk_659_cast_fp16")]; + tensor var_7203_equation_0 = const()[name = tensor("op_7203_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7203_cast_fp16 = einsum(equation = var_7203_equation_0, values = (var_7013_cast_fp16, var_6517_cast_fp16))[name = tensor("op_7203_cast_fp16")]; + tensor var_7204_to_fp16 = const()[name = tensor("op_7204_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_661_cast_fp16 = mul(x = var_7203_cast_fp16, y = var_7204_to_fp16)[name = tensor("aw_chunk_661_cast_fp16")]; + tensor var_7207_equation_0 = const()[name = tensor("op_7207_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7207_cast_fp16 = einsum(equation = var_7207_equation_0, values = (var_7013_cast_fp16, var_6524_cast_fp16))[name = tensor("op_7207_cast_fp16")]; + tensor var_7208_to_fp16 = const()[name = tensor("op_7208_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_663_cast_fp16 = mul(x = var_7207_cast_fp16, y = var_7208_to_fp16)[name = tensor("aw_chunk_663_cast_fp16")]; + tensor var_7211_equation_0 = const()[name = tensor("op_7211_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7211_cast_fp16 = einsum(equation = var_7211_equation_0, values = (var_7017_cast_fp16, var_6531_cast_fp16))[name = tensor("op_7211_cast_fp16")]; + tensor var_7212_to_fp16 = const()[name = tensor("op_7212_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_665_cast_fp16 = mul(x = var_7211_cast_fp16, y = var_7212_to_fp16)[name = tensor("aw_chunk_665_cast_fp16")]; + tensor var_7215_equation_0 = const()[name = tensor("op_7215_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7215_cast_fp16 = einsum(equation = var_7215_equation_0, values = (var_7017_cast_fp16, var_6538_cast_fp16))[name = tensor("op_7215_cast_fp16")]; + tensor var_7216_to_fp16 = const()[name = tensor("op_7216_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_667_cast_fp16 = mul(x = var_7215_cast_fp16, y = var_7216_to_fp16)[name = tensor("aw_chunk_667_cast_fp16")]; + tensor var_7219_equation_0 = const()[name = tensor("op_7219_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7219_cast_fp16 = einsum(equation = var_7219_equation_0, values = (var_7017_cast_fp16, var_6545_cast_fp16))[name = tensor("op_7219_cast_fp16")]; + tensor var_7220_to_fp16 = const()[name = tensor("op_7220_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_669_cast_fp16 = mul(x = var_7219_cast_fp16, y = var_7220_to_fp16)[name = tensor("aw_chunk_669_cast_fp16")]; + tensor var_7223_equation_0 = const()[name = tensor("op_7223_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7223_cast_fp16 = einsum(equation = var_7223_equation_0, values = (var_7017_cast_fp16, var_6552_cast_fp16))[name = tensor("op_7223_cast_fp16")]; + tensor var_7224_to_fp16 = const()[name = tensor("op_7224_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_671_cast_fp16 = mul(x = var_7223_cast_fp16, y = var_7224_to_fp16)[name = tensor("aw_chunk_671_cast_fp16")]; + tensor var_7227_equation_0 = const()[name = tensor("op_7227_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7227_cast_fp16 = einsum(equation = var_7227_equation_0, values = (var_7021_cast_fp16, var_6559_cast_fp16))[name = tensor("op_7227_cast_fp16")]; + tensor var_7228_to_fp16 = const()[name = tensor("op_7228_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_673_cast_fp16 = mul(x = var_7227_cast_fp16, y = var_7228_to_fp16)[name = tensor("aw_chunk_673_cast_fp16")]; + tensor var_7231_equation_0 = const()[name = tensor("op_7231_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7231_cast_fp16 = einsum(equation = var_7231_equation_0, values = (var_7021_cast_fp16, var_6566_cast_fp16))[name = tensor("op_7231_cast_fp16")]; + tensor var_7232_to_fp16 = const()[name = tensor("op_7232_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_675_cast_fp16 = mul(x = var_7231_cast_fp16, y = var_7232_to_fp16)[name = tensor("aw_chunk_675_cast_fp16")]; + tensor var_7235_equation_0 = const()[name = tensor("op_7235_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7235_cast_fp16 = einsum(equation = var_7235_equation_0, values = (var_7021_cast_fp16, var_6573_cast_fp16))[name = tensor("op_7235_cast_fp16")]; + tensor var_7236_to_fp16 = const()[name = tensor("op_7236_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_677_cast_fp16 = mul(x = var_7235_cast_fp16, y = var_7236_to_fp16)[name = tensor("aw_chunk_677_cast_fp16")]; + tensor var_7239_equation_0 = const()[name = tensor("op_7239_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7239_cast_fp16 = einsum(equation = var_7239_equation_0, values = (var_7021_cast_fp16, var_6580_cast_fp16))[name = tensor("op_7239_cast_fp16")]; + tensor var_7240_to_fp16 = const()[name = tensor("op_7240_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_679_cast_fp16 = mul(x = var_7239_cast_fp16, y = var_7240_to_fp16)[name = tensor("aw_chunk_679_cast_fp16")]; + tensor var_7243_equation_0 = const()[name = tensor("op_7243_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7243_cast_fp16 = einsum(equation = var_7243_equation_0, values = (var_7025_cast_fp16, var_6587_cast_fp16))[name = tensor("op_7243_cast_fp16")]; + tensor var_7244_to_fp16 = const()[name = tensor("op_7244_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_681_cast_fp16 = mul(x = var_7243_cast_fp16, y = var_7244_to_fp16)[name = tensor("aw_chunk_681_cast_fp16")]; + tensor var_7247_equation_0 = const()[name = tensor("op_7247_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7247_cast_fp16 = einsum(equation = var_7247_equation_0, values = (var_7025_cast_fp16, var_6594_cast_fp16))[name = tensor("op_7247_cast_fp16")]; + tensor var_7248_to_fp16 = const()[name = tensor("op_7248_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_683_cast_fp16 = mul(x = var_7247_cast_fp16, y = var_7248_to_fp16)[name = tensor("aw_chunk_683_cast_fp16")]; + tensor var_7251_equation_0 = const()[name = tensor("op_7251_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7251_cast_fp16 = einsum(equation = var_7251_equation_0, values = (var_7025_cast_fp16, var_6601_cast_fp16))[name = tensor("op_7251_cast_fp16")]; + tensor var_7252_to_fp16 = const()[name = tensor("op_7252_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_685_cast_fp16 = mul(x = var_7251_cast_fp16, y = var_7252_to_fp16)[name = tensor("aw_chunk_685_cast_fp16")]; + tensor var_7255_equation_0 = const()[name = tensor("op_7255_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7255_cast_fp16 = einsum(equation = var_7255_equation_0, values = (var_7025_cast_fp16, var_6608_cast_fp16))[name = tensor("op_7255_cast_fp16")]; + tensor var_7256_to_fp16 = const()[name = tensor("op_7256_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_687_cast_fp16 = mul(x = var_7255_cast_fp16, y = var_7256_to_fp16)[name = tensor("aw_chunk_687_cast_fp16")]; + tensor var_7259_equation_0 = const()[name = tensor("op_7259_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7259_cast_fp16 = einsum(equation = var_7259_equation_0, values = (var_7029_cast_fp16, var_6615_cast_fp16))[name = tensor("op_7259_cast_fp16")]; + tensor var_7260_to_fp16 = const()[name = tensor("op_7260_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_689_cast_fp16 = mul(x = var_7259_cast_fp16, y = var_7260_to_fp16)[name = tensor("aw_chunk_689_cast_fp16")]; + tensor var_7263_equation_0 = const()[name = tensor("op_7263_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7263_cast_fp16 = einsum(equation = var_7263_equation_0, values = (var_7029_cast_fp16, var_6622_cast_fp16))[name = tensor("op_7263_cast_fp16")]; + tensor var_7264_to_fp16 = const()[name = tensor("op_7264_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_691_cast_fp16 = mul(x = var_7263_cast_fp16, y = var_7264_to_fp16)[name = tensor("aw_chunk_691_cast_fp16")]; + tensor var_7267_equation_0 = const()[name = tensor("op_7267_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7267_cast_fp16 = einsum(equation = var_7267_equation_0, values = (var_7029_cast_fp16, var_6629_cast_fp16))[name = tensor("op_7267_cast_fp16")]; + tensor var_7268_to_fp16 = const()[name = tensor("op_7268_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_693_cast_fp16 = mul(x = var_7267_cast_fp16, y = var_7268_to_fp16)[name = tensor("aw_chunk_693_cast_fp16")]; + tensor var_7271_equation_0 = const()[name = tensor("op_7271_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7271_cast_fp16 = einsum(equation = var_7271_equation_0, values = (var_7029_cast_fp16, var_6636_cast_fp16))[name = tensor("op_7271_cast_fp16")]; + tensor var_7272_to_fp16 = const()[name = tensor("op_7272_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_695_cast_fp16 = mul(x = var_7271_cast_fp16, y = var_7272_to_fp16)[name = tensor("aw_chunk_695_cast_fp16")]; + tensor var_7275_equation_0 = const()[name = tensor("op_7275_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7275_cast_fp16 = einsum(equation = var_7275_equation_0, values = (var_7033_cast_fp16, var_6643_cast_fp16))[name = tensor("op_7275_cast_fp16")]; + tensor var_7276_to_fp16 = const()[name = tensor("op_7276_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_697_cast_fp16 = mul(x = var_7275_cast_fp16, y = var_7276_to_fp16)[name = tensor("aw_chunk_697_cast_fp16")]; + tensor var_7279_equation_0 = const()[name = tensor("op_7279_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7279_cast_fp16 = einsum(equation = var_7279_equation_0, values = (var_7033_cast_fp16, var_6650_cast_fp16))[name = tensor("op_7279_cast_fp16")]; + tensor var_7280_to_fp16 = const()[name = tensor("op_7280_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_699_cast_fp16 = mul(x = var_7279_cast_fp16, y = var_7280_to_fp16)[name = tensor("aw_chunk_699_cast_fp16")]; + tensor var_7283_equation_0 = const()[name = tensor("op_7283_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7283_cast_fp16 = einsum(equation = var_7283_equation_0, values = (var_7033_cast_fp16, var_6657_cast_fp16))[name = tensor("op_7283_cast_fp16")]; + tensor var_7284_to_fp16 = const()[name = tensor("op_7284_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_701_cast_fp16 = mul(x = var_7283_cast_fp16, y = var_7284_to_fp16)[name = tensor("aw_chunk_701_cast_fp16")]; + tensor var_7287_equation_0 = const()[name = tensor("op_7287_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7287_cast_fp16 = einsum(equation = var_7287_equation_0, values = (var_7033_cast_fp16, var_6664_cast_fp16))[name = tensor("op_7287_cast_fp16")]; + tensor var_7288_to_fp16 = const()[name = tensor("op_7288_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_703_cast_fp16 = mul(x = var_7287_cast_fp16, y = var_7288_to_fp16)[name = tensor("aw_chunk_703_cast_fp16")]; + tensor var_7291_equation_0 = const()[name = tensor("op_7291_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7291_cast_fp16 = einsum(equation = var_7291_equation_0, values = (var_7037_cast_fp16, var_6671_cast_fp16))[name = tensor("op_7291_cast_fp16")]; + tensor var_7292_to_fp16 = const()[name = tensor("op_7292_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_705_cast_fp16 = mul(x = var_7291_cast_fp16, y = var_7292_to_fp16)[name = tensor("aw_chunk_705_cast_fp16")]; + tensor var_7295_equation_0 = const()[name = tensor("op_7295_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7295_cast_fp16 = einsum(equation = var_7295_equation_0, values = (var_7037_cast_fp16, var_6678_cast_fp16))[name = tensor("op_7295_cast_fp16")]; + tensor var_7296_to_fp16 = const()[name = tensor("op_7296_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_707_cast_fp16 = mul(x = var_7295_cast_fp16, y = var_7296_to_fp16)[name = tensor("aw_chunk_707_cast_fp16")]; + tensor var_7299_equation_0 = const()[name = tensor("op_7299_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7299_cast_fp16 = einsum(equation = var_7299_equation_0, values = (var_7037_cast_fp16, var_6685_cast_fp16))[name = tensor("op_7299_cast_fp16")]; + tensor var_7300_to_fp16 = const()[name = tensor("op_7300_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_709_cast_fp16 = mul(x = var_7299_cast_fp16, y = var_7300_to_fp16)[name = tensor("aw_chunk_709_cast_fp16")]; + tensor var_7303_equation_0 = const()[name = tensor("op_7303_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7303_cast_fp16 = einsum(equation = var_7303_equation_0, values = (var_7037_cast_fp16, var_6692_cast_fp16))[name = tensor("op_7303_cast_fp16")]; + tensor var_7304_to_fp16 = const()[name = tensor("op_7304_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_711_cast_fp16 = mul(x = var_7303_cast_fp16, y = var_7304_to_fp16)[name = tensor("aw_chunk_711_cast_fp16")]; + tensor var_7307_equation_0 = const()[name = tensor("op_7307_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7307_cast_fp16 = einsum(equation = var_7307_equation_0, values = (var_7041_cast_fp16, var_6699_cast_fp16))[name = tensor("op_7307_cast_fp16")]; + tensor var_7308_to_fp16 = const()[name = tensor("op_7308_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_713_cast_fp16 = mul(x = var_7307_cast_fp16, y = var_7308_to_fp16)[name = tensor("aw_chunk_713_cast_fp16")]; + tensor var_7311_equation_0 = const()[name = tensor("op_7311_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7311_cast_fp16 = einsum(equation = var_7311_equation_0, values = (var_7041_cast_fp16, var_6706_cast_fp16))[name = tensor("op_7311_cast_fp16")]; + tensor var_7312_to_fp16 = const()[name = tensor("op_7312_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_715_cast_fp16 = mul(x = var_7311_cast_fp16, y = var_7312_to_fp16)[name = tensor("aw_chunk_715_cast_fp16")]; + tensor var_7315_equation_0 = const()[name = tensor("op_7315_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7315_cast_fp16 = einsum(equation = var_7315_equation_0, values = (var_7041_cast_fp16, var_6713_cast_fp16))[name = tensor("op_7315_cast_fp16")]; + tensor var_7316_to_fp16 = const()[name = tensor("op_7316_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_717_cast_fp16 = mul(x = var_7315_cast_fp16, y = var_7316_to_fp16)[name = tensor("aw_chunk_717_cast_fp16")]; + tensor var_7319_equation_0 = const()[name = tensor("op_7319_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7319_cast_fp16 = einsum(equation = var_7319_equation_0, values = (var_7041_cast_fp16, var_6720_cast_fp16))[name = tensor("op_7319_cast_fp16")]; + tensor var_7320_to_fp16 = const()[name = tensor("op_7320_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_719_cast_fp16 = mul(x = var_7319_cast_fp16, y = var_7320_to_fp16)[name = tensor("aw_chunk_719_cast_fp16")]; + tensor var_7323_equation_0 = const()[name = tensor("op_7323_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7323_cast_fp16 = einsum(equation = var_7323_equation_0, values = (var_7045_cast_fp16, var_6727_cast_fp16))[name = tensor("op_7323_cast_fp16")]; + tensor var_7324_to_fp16 = const()[name = tensor("op_7324_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_721_cast_fp16 = mul(x = var_7323_cast_fp16, y = var_7324_to_fp16)[name = tensor("aw_chunk_721_cast_fp16")]; + tensor var_7327_equation_0 = const()[name = tensor("op_7327_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7327_cast_fp16 = einsum(equation = var_7327_equation_0, values = (var_7045_cast_fp16, var_6734_cast_fp16))[name = tensor("op_7327_cast_fp16")]; + tensor var_7328_to_fp16 = const()[name = tensor("op_7328_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_723_cast_fp16 = mul(x = var_7327_cast_fp16, y = var_7328_to_fp16)[name = tensor("aw_chunk_723_cast_fp16")]; + tensor var_7331_equation_0 = const()[name = tensor("op_7331_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7331_cast_fp16 = einsum(equation = var_7331_equation_0, values = (var_7045_cast_fp16, var_6741_cast_fp16))[name = tensor("op_7331_cast_fp16")]; + tensor var_7332_to_fp16 = const()[name = tensor("op_7332_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_725_cast_fp16 = mul(x = var_7331_cast_fp16, y = var_7332_to_fp16)[name = tensor("aw_chunk_725_cast_fp16")]; + tensor var_7335_equation_0 = const()[name = tensor("op_7335_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7335_cast_fp16 = einsum(equation = var_7335_equation_0, values = (var_7045_cast_fp16, var_6748_cast_fp16))[name = tensor("op_7335_cast_fp16")]; + tensor var_7336_to_fp16 = const()[name = tensor("op_7336_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_727_cast_fp16 = mul(x = var_7335_cast_fp16, y = var_7336_to_fp16)[name = tensor("aw_chunk_727_cast_fp16")]; + tensor var_7339_equation_0 = const()[name = tensor("op_7339_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7339_cast_fp16 = einsum(equation = var_7339_equation_0, values = (var_7049_cast_fp16, var_6755_cast_fp16))[name = tensor("op_7339_cast_fp16")]; + tensor var_7340_to_fp16 = const()[name = tensor("op_7340_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_729_cast_fp16 = mul(x = var_7339_cast_fp16, y = var_7340_to_fp16)[name = tensor("aw_chunk_729_cast_fp16")]; + tensor var_7343_equation_0 = const()[name = tensor("op_7343_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7343_cast_fp16 = einsum(equation = var_7343_equation_0, values = (var_7049_cast_fp16, var_6762_cast_fp16))[name = tensor("op_7343_cast_fp16")]; + tensor var_7344_to_fp16 = const()[name = tensor("op_7344_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_731_cast_fp16 = mul(x = var_7343_cast_fp16, y = var_7344_to_fp16)[name = tensor("aw_chunk_731_cast_fp16")]; + tensor var_7347_equation_0 = const()[name = tensor("op_7347_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7347_cast_fp16 = einsum(equation = var_7347_equation_0, values = (var_7049_cast_fp16, var_6769_cast_fp16))[name = tensor("op_7347_cast_fp16")]; + tensor var_7348_to_fp16 = const()[name = tensor("op_7348_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_733_cast_fp16 = mul(x = var_7347_cast_fp16, y = var_7348_to_fp16)[name = tensor("aw_chunk_733_cast_fp16")]; + tensor var_7351_equation_0 = const()[name = tensor("op_7351_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7351_cast_fp16 = einsum(equation = var_7351_equation_0, values = (var_7049_cast_fp16, var_6776_cast_fp16))[name = tensor("op_7351_cast_fp16")]; + tensor var_7352_to_fp16 = const()[name = tensor("op_7352_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_735_cast_fp16 = mul(x = var_7351_cast_fp16, y = var_7352_to_fp16)[name = tensor("aw_chunk_735_cast_fp16")]; + tensor var_7355_equation_0 = const()[name = tensor("op_7355_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7355_cast_fp16 = einsum(equation = var_7355_equation_0, values = (var_7053_cast_fp16, var_6783_cast_fp16))[name = tensor("op_7355_cast_fp16")]; + tensor var_7356_to_fp16 = const()[name = tensor("op_7356_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_737_cast_fp16 = mul(x = var_7355_cast_fp16, y = var_7356_to_fp16)[name = tensor("aw_chunk_737_cast_fp16")]; + tensor var_7359_equation_0 = const()[name = tensor("op_7359_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7359_cast_fp16 = einsum(equation = var_7359_equation_0, values = (var_7053_cast_fp16, var_6790_cast_fp16))[name = tensor("op_7359_cast_fp16")]; + tensor var_7360_to_fp16 = const()[name = tensor("op_7360_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_739_cast_fp16 = mul(x = var_7359_cast_fp16, y = var_7360_to_fp16)[name = tensor("aw_chunk_739_cast_fp16")]; + tensor var_7363_equation_0 = const()[name = tensor("op_7363_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7363_cast_fp16 = einsum(equation = var_7363_equation_0, values = (var_7053_cast_fp16, var_6797_cast_fp16))[name = tensor("op_7363_cast_fp16")]; + tensor var_7364_to_fp16 = const()[name = tensor("op_7364_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_741_cast_fp16 = mul(x = var_7363_cast_fp16, y = var_7364_to_fp16)[name = tensor("aw_chunk_741_cast_fp16")]; + tensor var_7367_equation_0 = const()[name = tensor("op_7367_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7367_cast_fp16 = einsum(equation = var_7367_equation_0, values = (var_7053_cast_fp16, var_6804_cast_fp16))[name = tensor("op_7367_cast_fp16")]; + tensor var_7368_to_fp16 = const()[name = tensor("op_7368_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_743_cast_fp16 = mul(x = var_7367_cast_fp16, y = var_7368_to_fp16)[name = tensor("aw_chunk_743_cast_fp16")]; + tensor var_7371_equation_0 = const()[name = tensor("op_7371_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7371_cast_fp16 = einsum(equation = var_7371_equation_0, values = (var_7057_cast_fp16, var_6811_cast_fp16))[name = tensor("op_7371_cast_fp16")]; + tensor var_7372_to_fp16 = const()[name = tensor("op_7372_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_745_cast_fp16 = mul(x = var_7371_cast_fp16, y = var_7372_to_fp16)[name = tensor("aw_chunk_745_cast_fp16")]; + tensor var_7375_equation_0 = const()[name = tensor("op_7375_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7375_cast_fp16 = einsum(equation = var_7375_equation_0, values = (var_7057_cast_fp16, var_6818_cast_fp16))[name = tensor("op_7375_cast_fp16")]; + tensor var_7376_to_fp16 = const()[name = tensor("op_7376_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_747_cast_fp16 = mul(x = var_7375_cast_fp16, y = var_7376_to_fp16)[name = tensor("aw_chunk_747_cast_fp16")]; + tensor var_7379_equation_0 = const()[name = tensor("op_7379_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7379_cast_fp16 = einsum(equation = var_7379_equation_0, values = (var_7057_cast_fp16, var_6825_cast_fp16))[name = tensor("op_7379_cast_fp16")]; + tensor var_7380_to_fp16 = const()[name = tensor("op_7380_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_749_cast_fp16 = mul(x = var_7379_cast_fp16, y = var_7380_to_fp16)[name = tensor("aw_chunk_749_cast_fp16")]; + tensor var_7383_equation_0 = const()[name = tensor("op_7383_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7383_cast_fp16 = einsum(equation = var_7383_equation_0, values = (var_7057_cast_fp16, var_6832_cast_fp16))[name = tensor("op_7383_cast_fp16")]; + tensor var_7384_to_fp16 = const()[name = tensor("op_7384_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_751_cast_fp16 = mul(x = var_7383_cast_fp16, y = var_7384_to_fp16)[name = tensor("aw_chunk_751_cast_fp16")]; + tensor var_7387_equation_0 = const()[name = tensor("op_7387_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7387_cast_fp16 = einsum(equation = var_7387_equation_0, values = (var_7061_cast_fp16, var_6839_cast_fp16))[name = tensor("op_7387_cast_fp16")]; + tensor var_7388_to_fp16 = const()[name = tensor("op_7388_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_753_cast_fp16 = mul(x = var_7387_cast_fp16, y = var_7388_to_fp16)[name = tensor("aw_chunk_753_cast_fp16")]; + tensor var_7391_equation_0 = const()[name = tensor("op_7391_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7391_cast_fp16 = einsum(equation = var_7391_equation_0, values = (var_7061_cast_fp16, var_6846_cast_fp16))[name = tensor("op_7391_cast_fp16")]; + tensor var_7392_to_fp16 = const()[name = tensor("op_7392_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_755_cast_fp16 = mul(x = var_7391_cast_fp16, y = var_7392_to_fp16)[name = tensor("aw_chunk_755_cast_fp16")]; + tensor var_7395_equation_0 = const()[name = tensor("op_7395_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7395_cast_fp16 = einsum(equation = var_7395_equation_0, values = (var_7061_cast_fp16, var_6853_cast_fp16))[name = tensor("op_7395_cast_fp16")]; + tensor var_7396_to_fp16 = const()[name = tensor("op_7396_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_757_cast_fp16 = mul(x = var_7395_cast_fp16, y = var_7396_to_fp16)[name = tensor("aw_chunk_757_cast_fp16")]; + tensor var_7399_equation_0 = const()[name = tensor("op_7399_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7399_cast_fp16 = einsum(equation = var_7399_equation_0, values = (var_7061_cast_fp16, var_6860_cast_fp16))[name = tensor("op_7399_cast_fp16")]; + tensor var_7400_to_fp16 = const()[name = tensor("op_7400_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_759_cast_fp16 = mul(x = var_7399_cast_fp16, y = var_7400_to_fp16)[name = tensor("aw_chunk_759_cast_fp16")]; + tensor var_7403_equation_0 = const()[name = tensor("op_7403_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7403_cast_fp16 = einsum(equation = var_7403_equation_0, values = (var_7065_cast_fp16, var_6867_cast_fp16))[name = tensor("op_7403_cast_fp16")]; + tensor var_7404_to_fp16 = const()[name = tensor("op_7404_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_761_cast_fp16 = mul(x = var_7403_cast_fp16, y = var_7404_to_fp16)[name = tensor("aw_chunk_761_cast_fp16")]; + tensor var_7407_equation_0 = const()[name = tensor("op_7407_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7407_cast_fp16 = einsum(equation = var_7407_equation_0, values = (var_7065_cast_fp16, var_6874_cast_fp16))[name = tensor("op_7407_cast_fp16")]; + tensor var_7408_to_fp16 = const()[name = tensor("op_7408_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_763_cast_fp16 = mul(x = var_7407_cast_fp16, y = var_7408_to_fp16)[name = tensor("aw_chunk_763_cast_fp16")]; + tensor var_7411_equation_0 = const()[name = tensor("op_7411_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7411_cast_fp16 = einsum(equation = var_7411_equation_0, values = (var_7065_cast_fp16, var_6881_cast_fp16))[name = tensor("op_7411_cast_fp16")]; + tensor var_7412_to_fp16 = const()[name = tensor("op_7412_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_765_cast_fp16 = mul(x = var_7411_cast_fp16, y = var_7412_to_fp16)[name = tensor("aw_chunk_765_cast_fp16")]; + tensor var_7415_equation_0 = const()[name = tensor("op_7415_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7415_cast_fp16 = einsum(equation = var_7415_equation_0, values = (var_7065_cast_fp16, var_6888_cast_fp16))[name = tensor("op_7415_cast_fp16")]; + tensor var_7416_to_fp16 = const()[name = tensor("op_7416_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_767_cast_fp16 = mul(x = var_7415_cast_fp16, y = var_7416_to_fp16)[name = tensor("aw_chunk_767_cast_fp16")]; + tensor var_7419_equation_0 = const()[name = tensor("op_7419_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7419_cast_fp16 = einsum(equation = var_7419_equation_0, values = (var_7069_cast_fp16, var_6895_cast_fp16))[name = tensor("op_7419_cast_fp16")]; + tensor var_7420_to_fp16 = const()[name = tensor("op_7420_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_769_cast_fp16 = mul(x = var_7419_cast_fp16, y = var_7420_to_fp16)[name = tensor("aw_chunk_769_cast_fp16")]; + tensor var_7423_equation_0 = const()[name = tensor("op_7423_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7423_cast_fp16 = einsum(equation = var_7423_equation_0, values = (var_7069_cast_fp16, var_6902_cast_fp16))[name = tensor("op_7423_cast_fp16")]; + tensor var_7424_to_fp16 = const()[name = tensor("op_7424_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_771_cast_fp16 = mul(x = var_7423_cast_fp16, y = var_7424_to_fp16)[name = tensor("aw_chunk_771_cast_fp16")]; + tensor var_7427_equation_0 = const()[name = tensor("op_7427_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7427_cast_fp16 = einsum(equation = var_7427_equation_0, values = (var_7069_cast_fp16, var_6909_cast_fp16))[name = tensor("op_7427_cast_fp16")]; + tensor var_7428_to_fp16 = const()[name = tensor("op_7428_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_773_cast_fp16 = mul(x = var_7427_cast_fp16, y = var_7428_to_fp16)[name = tensor("aw_chunk_773_cast_fp16")]; + tensor var_7431_equation_0 = const()[name = tensor("op_7431_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7431_cast_fp16 = einsum(equation = var_7431_equation_0, values = (var_7069_cast_fp16, var_6916_cast_fp16))[name = tensor("op_7431_cast_fp16")]; + tensor var_7432_to_fp16 = const()[name = tensor("op_7432_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_775_cast_fp16 = mul(x = var_7431_cast_fp16, y = var_7432_to_fp16)[name = tensor("aw_chunk_775_cast_fp16")]; + tensor var_7435_equation_0 = const()[name = tensor("op_7435_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7435_cast_fp16 = einsum(equation = var_7435_equation_0, values = (var_7073_cast_fp16, var_6923_cast_fp16))[name = tensor("op_7435_cast_fp16")]; + tensor var_7436_to_fp16 = const()[name = tensor("op_7436_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_777_cast_fp16 = mul(x = var_7435_cast_fp16, y = var_7436_to_fp16)[name = tensor("aw_chunk_777_cast_fp16")]; + tensor var_7439_equation_0 = const()[name = tensor("op_7439_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7439_cast_fp16 = einsum(equation = var_7439_equation_0, values = (var_7073_cast_fp16, var_6930_cast_fp16))[name = tensor("op_7439_cast_fp16")]; + tensor var_7440_to_fp16 = const()[name = tensor("op_7440_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_779_cast_fp16 = mul(x = var_7439_cast_fp16, y = var_7440_to_fp16)[name = tensor("aw_chunk_779_cast_fp16")]; + tensor var_7443_equation_0 = const()[name = tensor("op_7443_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7443_cast_fp16 = einsum(equation = var_7443_equation_0, values = (var_7073_cast_fp16, var_6937_cast_fp16))[name = tensor("op_7443_cast_fp16")]; + tensor var_7444_to_fp16 = const()[name = tensor("op_7444_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_781_cast_fp16 = mul(x = var_7443_cast_fp16, y = var_7444_to_fp16)[name = tensor("aw_chunk_781_cast_fp16")]; + tensor var_7447_equation_0 = const()[name = tensor("op_7447_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7447_cast_fp16 = einsum(equation = var_7447_equation_0, values = (var_7073_cast_fp16, var_6944_cast_fp16))[name = tensor("op_7447_cast_fp16")]; + tensor var_7448_to_fp16 = const()[name = tensor("op_7448_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_783_cast_fp16 = mul(x = var_7447_cast_fp16, y = var_7448_to_fp16)[name = tensor("aw_chunk_783_cast_fp16")]; + tensor var_7451_equation_0 = const()[name = tensor("op_7451_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7451_cast_fp16 = einsum(equation = var_7451_equation_0, values = (var_7077_cast_fp16, var_6951_cast_fp16))[name = tensor("op_7451_cast_fp16")]; + tensor var_7452_to_fp16 = const()[name = tensor("op_7452_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_785_cast_fp16 = mul(x = var_7451_cast_fp16, y = var_7452_to_fp16)[name = tensor("aw_chunk_785_cast_fp16")]; + tensor var_7455_equation_0 = const()[name = tensor("op_7455_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7455_cast_fp16 = einsum(equation = var_7455_equation_0, values = (var_7077_cast_fp16, var_6958_cast_fp16))[name = tensor("op_7455_cast_fp16")]; + tensor var_7456_to_fp16 = const()[name = tensor("op_7456_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_787_cast_fp16 = mul(x = var_7455_cast_fp16, y = var_7456_to_fp16)[name = tensor("aw_chunk_787_cast_fp16")]; + tensor var_7459_equation_0 = const()[name = tensor("op_7459_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7459_cast_fp16 = einsum(equation = var_7459_equation_0, values = (var_7077_cast_fp16, var_6965_cast_fp16))[name = tensor("op_7459_cast_fp16")]; + tensor var_7460_to_fp16 = const()[name = tensor("op_7460_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_789_cast_fp16 = mul(x = var_7459_cast_fp16, y = var_7460_to_fp16)[name = tensor("aw_chunk_789_cast_fp16")]; + tensor var_7463_equation_0 = const()[name = tensor("op_7463_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7463_cast_fp16 = einsum(equation = var_7463_equation_0, values = (var_7077_cast_fp16, var_6972_cast_fp16))[name = tensor("op_7463_cast_fp16")]; + tensor var_7464_to_fp16 = const()[name = tensor("op_7464_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_791_cast_fp16 = mul(x = var_7463_cast_fp16, y = var_7464_to_fp16)[name = tensor("aw_chunk_791_cast_fp16")]; + tensor var_7467_equation_0 = const()[name = tensor("op_7467_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7467_cast_fp16 = einsum(equation = var_7467_equation_0, values = (var_7081_cast_fp16, var_6979_cast_fp16))[name = tensor("op_7467_cast_fp16")]; + tensor var_7468_to_fp16 = const()[name = tensor("op_7468_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_793_cast_fp16 = mul(x = var_7467_cast_fp16, y = var_7468_to_fp16)[name = tensor("aw_chunk_793_cast_fp16")]; + tensor var_7471_equation_0 = const()[name = tensor("op_7471_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7471_cast_fp16 = einsum(equation = var_7471_equation_0, values = (var_7081_cast_fp16, var_6986_cast_fp16))[name = tensor("op_7471_cast_fp16")]; + tensor var_7472_to_fp16 = const()[name = tensor("op_7472_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_795_cast_fp16 = mul(x = var_7471_cast_fp16, y = var_7472_to_fp16)[name = tensor("aw_chunk_795_cast_fp16")]; + tensor var_7475_equation_0 = const()[name = tensor("op_7475_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7475_cast_fp16 = einsum(equation = var_7475_equation_0, values = (var_7081_cast_fp16, var_6993_cast_fp16))[name = tensor("op_7475_cast_fp16")]; + tensor var_7476_to_fp16 = const()[name = tensor("op_7476_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_797_cast_fp16 = mul(x = var_7475_cast_fp16, y = var_7476_to_fp16)[name = tensor("aw_chunk_797_cast_fp16")]; + tensor var_7479_equation_0 = const()[name = tensor("op_7479_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_7479_cast_fp16 = einsum(equation = var_7479_equation_0, values = (var_7081_cast_fp16, var_7000_cast_fp16))[name = tensor("op_7479_cast_fp16")]; + tensor var_7480_to_fp16 = const()[name = tensor("op_7480_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_799_cast_fp16 = mul(x = var_7479_cast_fp16, y = var_7480_to_fp16)[name = tensor("aw_chunk_799_cast_fp16")]; + tensor var_7482_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_641_cast_fp16)[name = tensor("op_7482_cast_fp16")]; + tensor var_7483_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_643_cast_fp16)[name = tensor("op_7483_cast_fp16")]; + tensor var_7484_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_645_cast_fp16)[name = tensor("op_7484_cast_fp16")]; + tensor var_7485_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_647_cast_fp16)[name = tensor("op_7485_cast_fp16")]; + tensor var_7486_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_649_cast_fp16)[name = tensor("op_7486_cast_fp16")]; + tensor var_7487_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_651_cast_fp16)[name = tensor("op_7487_cast_fp16")]; + tensor var_7488_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_653_cast_fp16)[name = tensor("op_7488_cast_fp16")]; + tensor var_7489_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_655_cast_fp16)[name = tensor("op_7489_cast_fp16")]; + tensor var_7490_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_657_cast_fp16)[name = tensor("op_7490_cast_fp16")]; + tensor var_7491_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_659_cast_fp16)[name = tensor("op_7491_cast_fp16")]; + tensor var_7492_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_661_cast_fp16)[name = tensor("op_7492_cast_fp16")]; + tensor var_7493_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_663_cast_fp16)[name = tensor("op_7493_cast_fp16")]; + tensor var_7494_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_665_cast_fp16)[name = tensor("op_7494_cast_fp16")]; + tensor var_7495_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_667_cast_fp16)[name = tensor("op_7495_cast_fp16")]; + tensor var_7496_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_669_cast_fp16)[name = tensor("op_7496_cast_fp16")]; + tensor var_7497_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_671_cast_fp16)[name = tensor("op_7497_cast_fp16")]; + tensor var_7498_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_673_cast_fp16)[name = tensor("op_7498_cast_fp16")]; + tensor var_7499_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_675_cast_fp16)[name = tensor("op_7499_cast_fp16")]; + tensor var_7500_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_677_cast_fp16)[name = tensor("op_7500_cast_fp16")]; + tensor var_7501_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_679_cast_fp16)[name = tensor("op_7501_cast_fp16")]; + tensor var_7502_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_681_cast_fp16)[name = tensor("op_7502_cast_fp16")]; + tensor var_7503_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_683_cast_fp16)[name = tensor("op_7503_cast_fp16")]; + tensor var_7504_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_685_cast_fp16)[name = tensor("op_7504_cast_fp16")]; + tensor var_7505_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_687_cast_fp16)[name = tensor("op_7505_cast_fp16")]; + tensor var_7506_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_689_cast_fp16)[name = tensor("op_7506_cast_fp16")]; + tensor var_7507_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_691_cast_fp16)[name = tensor("op_7507_cast_fp16")]; + tensor var_7508_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_693_cast_fp16)[name = tensor("op_7508_cast_fp16")]; + tensor var_7509_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_695_cast_fp16)[name = tensor("op_7509_cast_fp16")]; + tensor var_7510_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_697_cast_fp16)[name = tensor("op_7510_cast_fp16")]; + tensor var_7511_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_699_cast_fp16)[name = tensor("op_7511_cast_fp16")]; + tensor var_7512_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_701_cast_fp16)[name = tensor("op_7512_cast_fp16")]; + tensor var_7513_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_703_cast_fp16)[name = tensor("op_7513_cast_fp16")]; + tensor var_7514_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_705_cast_fp16)[name = tensor("op_7514_cast_fp16")]; + tensor var_7515_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_707_cast_fp16)[name = tensor("op_7515_cast_fp16")]; + tensor var_7516_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_709_cast_fp16)[name = tensor("op_7516_cast_fp16")]; + tensor var_7517_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_711_cast_fp16)[name = tensor("op_7517_cast_fp16")]; + tensor var_7518_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_713_cast_fp16)[name = tensor("op_7518_cast_fp16")]; + tensor var_7519_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_715_cast_fp16)[name = tensor("op_7519_cast_fp16")]; + tensor var_7520_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_717_cast_fp16)[name = tensor("op_7520_cast_fp16")]; + tensor var_7521_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_719_cast_fp16)[name = tensor("op_7521_cast_fp16")]; + tensor var_7522_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_721_cast_fp16)[name = tensor("op_7522_cast_fp16")]; + tensor var_7523_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_723_cast_fp16)[name = tensor("op_7523_cast_fp16")]; + tensor var_7524_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_725_cast_fp16)[name = tensor("op_7524_cast_fp16")]; + tensor var_7525_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_727_cast_fp16)[name = tensor("op_7525_cast_fp16")]; + tensor var_7526_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_729_cast_fp16)[name = tensor("op_7526_cast_fp16")]; + tensor var_7527_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_731_cast_fp16)[name = tensor("op_7527_cast_fp16")]; + tensor var_7528_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_733_cast_fp16)[name = tensor("op_7528_cast_fp16")]; + tensor var_7529_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_735_cast_fp16)[name = tensor("op_7529_cast_fp16")]; + tensor var_7530_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_737_cast_fp16)[name = tensor("op_7530_cast_fp16")]; + tensor var_7531_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_739_cast_fp16)[name = tensor("op_7531_cast_fp16")]; + tensor var_7532_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_741_cast_fp16)[name = tensor("op_7532_cast_fp16")]; + tensor var_7533_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_743_cast_fp16)[name = tensor("op_7533_cast_fp16")]; + tensor var_7534_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_745_cast_fp16)[name = tensor("op_7534_cast_fp16")]; + tensor var_7535_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_747_cast_fp16)[name = tensor("op_7535_cast_fp16")]; + tensor var_7536_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_749_cast_fp16)[name = tensor("op_7536_cast_fp16")]; + tensor var_7537_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_751_cast_fp16)[name = tensor("op_7537_cast_fp16")]; + tensor var_7538_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_753_cast_fp16)[name = tensor("op_7538_cast_fp16")]; + tensor var_7539_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_755_cast_fp16)[name = tensor("op_7539_cast_fp16")]; + tensor var_7540_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_757_cast_fp16)[name = tensor("op_7540_cast_fp16")]; + tensor var_7541_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_759_cast_fp16)[name = tensor("op_7541_cast_fp16")]; + tensor var_7542_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_761_cast_fp16)[name = tensor("op_7542_cast_fp16")]; + tensor var_7543_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_763_cast_fp16)[name = tensor("op_7543_cast_fp16")]; + tensor var_7544_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_765_cast_fp16)[name = tensor("op_7544_cast_fp16")]; + tensor var_7545_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_767_cast_fp16)[name = tensor("op_7545_cast_fp16")]; + tensor var_7546_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_769_cast_fp16)[name = tensor("op_7546_cast_fp16")]; + tensor var_7547_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_771_cast_fp16)[name = tensor("op_7547_cast_fp16")]; + tensor var_7548_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_773_cast_fp16)[name = tensor("op_7548_cast_fp16")]; + tensor var_7549_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_775_cast_fp16)[name = tensor("op_7549_cast_fp16")]; + tensor var_7550_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_777_cast_fp16)[name = tensor("op_7550_cast_fp16")]; + tensor var_7551_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_779_cast_fp16)[name = tensor("op_7551_cast_fp16")]; + tensor var_7552_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_781_cast_fp16)[name = tensor("op_7552_cast_fp16")]; + tensor var_7553_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_783_cast_fp16)[name = tensor("op_7553_cast_fp16")]; + tensor var_7554_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_785_cast_fp16)[name = tensor("op_7554_cast_fp16")]; + tensor var_7555_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_787_cast_fp16)[name = tensor("op_7555_cast_fp16")]; + tensor var_7556_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_789_cast_fp16)[name = tensor("op_7556_cast_fp16")]; + tensor var_7557_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_791_cast_fp16)[name = tensor("op_7557_cast_fp16")]; + tensor var_7558_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_793_cast_fp16)[name = tensor("op_7558_cast_fp16")]; + tensor var_7559_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_795_cast_fp16)[name = tensor("op_7559_cast_fp16")]; + tensor var_7560_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_797_cast_fp16)[name = tensor("op_7560_cast_fp16")]; + tensor var_7561_cast_fp16 = softmax(axis = var_6307, x = aw_chunk_799_cast_fp16)[name = tensor("op_7561_cast_fp16")]; + tensor var_7563_equation_0 = const()[name = tensor("op_7563_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7563_cast_fp16 = einsum(equation = var_7563_equation_0, values = (var_7083_cast_fp16, var_7482_cast_fp16))[name = tensor("op_7563_cast_fp16")]; + tensor var_7565_equation_0 = const()[name = tensor("op_7565_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7565_cast_fp16 = einsum(equation = var_7565_equation_0, values = (var_7083_cast_fp16, var_7483_cast_fp16))[name = tensor("op_7565_cast_fp16")]; + tensor var_7567_equation_0 = const()[name = tensor("op_7567_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7567_cast_fp16 = einsum(equation = var_7567_equation_0, values = (var_7083_cast_fp16, var_7484_cast_fp16))[name = tensor("op_7567_cast_fp16")]; + tensor var_7569_equation_0 = const()[name = tensor("op_7569_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7569_cast_fp16 = einsum(equation = var_7569_equation_0, values = (var_7083_cast_fp16, var_7485_cast_fp16))[name = tensor("op_7569_cast_fp16")]; + tensor var_7571_equation_0 = const()[name = tensor("op_7571_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7571_cast_fp16 = einsum(equation = var_7571_equation_0, values = (var_7087_cast_fp16, var_7486_cast_fp16))[name = tensor("op_7571_cast_fp16")]; + tensor var_7573_equation_0 = const()[name = tensor("op_7573_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7573_cast_fp16 = einsum(equation = var_7573_equation_0, values = (var_7087_cast_fp16, var_7487_cast_fp16))[name = tensor("op_7573_cast_fp16")]; + tensor var_7575_equation_0 = const()[name = tensor("op_7575_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7575_cast_fp16 = einsum(equation = var_7575_equation_0, values = (var_7087_cast_fp16, var_7488_cast_fp16))[name = tensor("op_7575_cast_fp16")]; + tensor var_7577_equation_0 = const()[name = tensor("op_7577_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7577_cast_fp16 = einsum(equation = var_7577_equation_0, values = (var_7087_cast_fp16, var_7489_cast_fp16))[name = tensor("op_7577_cast_fp16")]; + tensor var_7579_equation_0 = const()[name = tensor("op_7579_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7579_cast_fp16 = einsum(equation = var_7579_equation_0, values = (var_7091_cast_fp16, var_7490_cast_fp16))[name = tensor("op_7579_cast_fp16")]; + tensor var_7581_equation_0 = const()[name = tensor("op_7581_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7581_cast_fp16 = einsum(equation = var_7581_equation_0, values = (var_7091_cast_fp16, var_7491_cast_fp16))[name = tensor("op_7581_cast_fp16")]; + tensor var_7583_equation_0 = const()[name = tensor("op_7583_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7583_cast_fp16 = einsum(equation = var_7583_equation_0, values = (var_7091_cast_fp16, var_7492_cast_fp16))[name = tensor("op_7583_cast_fp16")]; + tensor var_7585_equation_0 = const()[name = tensor("op_7585_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7585_cast_fp16 = einsum(equation = var_7585_equation_0, values = (var_7091_cast_fp16, var_7493_cast_fp16))[name = tensor("op_7585_cast_fp16")]; + tensor var_7587_equation_0 = const()[name = tensor("op_7587_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7587_cast_fp16 = einsum(equation = var_7587_equation_0, values = (var_7095_cast_fp16, var_7494_cast_fp16))[name = tensor("op_7587_cast_fp16")]; + tensor var_7589_equation_0 = const()[name = tensor("op_7589_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7589_cast_fp16 = einsum(equation = var_7589_equation_0, values = (var_7095_cast_fp16, var_7495_cast_fp16))[name = tensor("op_7589_cast_fp16")]; + tensor var_7591_equation_0 = const()[name = tensor("op_7591_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7591_cast_fp16 = einsum(equation = var_7591_equation_0, values = (var_7095_cast_fp16, var_7496_cast_fp16))[name = tensor("op_7591_cast_fp16")]; + tensor var_7593_equation_0 = const()[name = tensor("op_7593_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7593_cast_fp16 = einsum(equation = var_7593_equation_0, values = (var_7095_cast_fp16, var_7497_cast_fp16))[name = tensor("op_7593_cast_fp16")]; + tensor var_7595_equation_0 = const()[name = tensor("op_7595_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7595_cast_fp16 = einsum(equation = var_7595_equation_0, values = (var_7099_cast_fp16, var_7498_cast_fp16))[name = tensor("op_7595_cast_fp16")]; + tensor var_7597_equation_0 = const()[name = tensor("op_7597_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7597_cast_fp16 = einsum(equation = var_7597_equation_0, values = (var_7099_cast_fp16, var_7499_cast_fp16))[name = tensor("op_7597_cast_fp16")]; + tensor var_7599_equation_0 = const()[name = tensor("op_7599_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7599_cast_fp16 = einsum(equation = var_7599_equation_0, values = (var_7099_cast_fp16, var_7500_cast_fp16))[name = tensor("op_7599_cast_fp16")]; + tensor var_7601_equation_0 = const()[name = tensor("op_7601_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7601_cast_fp16 = einsum(equation = var_7601_equation_0, values = (var_7099_cast_fp16, var_7501_cast_fp16))[name = tensor("op_7601_cast_fp16")]; + tensor var_7603_equation_0 = const()[name = tensor("op_7603_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7603_cast_fp16 = einsum(equation = var_7603_equation_0, values = (var_7103_cast_fp16, var_7502_cast_fp16))[name = tensor("op_7603_cast_fp16")]; + tensor var_7605_equation_0 = const()[name = tensor("op_7605_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7605_cast_fp16 = einsum(equation = var_7605_equation_0, values = (var_7103_cast_fp16, var_7503_cast_fp16))[name = tensor("op_7605_cast_fp16")]; + tensor var_7607_equation_0 = const()[name = tensor("op_7607_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7607_cast_fp16 = einsum(equation = var_7607_equation_0, values = (var_7103_cast_fp16, var_7504_cast_fp16))[name = tensor("op_7607_cast_fp16")]; + tensor var_7609_equation_0 = const()[name = tensor("op_7609_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7609_cast_fp16 = einsum(equation = var_7609_equation_0, values = (var_7103_cast_fp16, var_7505_cast_fp16))[name = tensor("op_7609_cast_fp16")]; + tensor var_7611_equation_0 = const()[name = tensor("op_7611_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7611_cast_fp16 = einsum(equation = var_7611_equation_0, values = (var_7107_cast_fp16, var_7506_cast_fp16))[name = tensor("op_7611_cast_fp16")]; + tensor var_7613_equation_0 = const()[name = tensor("op_7613_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7613_cast_fp16 = einsum(equation = var_7613_equation_0, values = (var_7107_cast_fp16, var_7507_cast_fp16))[name = tensor("op_7613_cast_fp16")]; + tensor var_7615_equation_0 = const()[name = tensor("op_7615_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7615_cast_fp16 = einsum(equation = var_7615_equation_0, values = (var_7107_cast_fp16, var_7508_cast_fp16))[name = tensor("op_7615_cast_fp16")]; + tensor var_7617_equation_0 = const()[name = tensor("op_7617_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7617_cast_fp16 = einsum(equation = var_7617_equation_0, values = (var_7107_cast_fp16, var_7509_cast_fp16))[name = tensor("op_7617_cast_fp16")]; + tensor var_7619_equation_0 = const()[name = tensor("op_7619_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7619_cast_fp16 = einsum(equation = var_7619_equation_0, values = (var_7111_cast_fp16, var_7510_cast_fp16))[name = tensor("op_7619_cast_fp16")]; + tensor var_7621_equation_0 = const()[name = tensor("op_7621_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7621_cast_fp16 = einsum(equation = var_7621_equation_0, values = (var_7111_cast_fp16, var_7511_cast_fp16))[name = tensor("op_7621_cast_fp16")]; + tensor var_7623_equation_0 = const()[name = tensor("op_7623_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7623_cast_fp16 = einsum(equation = var_7623_equation_0, values = (var_7111_cast_fp16, var_7512_cast_fp16))[name = tensor("op_7623_cast_fp16")]; + tensor var_7625_equation_0 = const()[name = tensor("op_7625_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7625_cast_fp16 = einsum(equation = var_7625_equation_0, values = (var_7111_cast_fp16, var_7513_cast_fp16))[name = tensor("op_7625_cast_fp16")]; + tensor var_7627_equation_0 = const()[name = tensor("op_7627_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7627_cast_fp16 = einsum(equation = var_7627_equation_0, values = (var_7115_cast_fp16, var_7514_cast_fp16))[name = tensor("op_7627_cast_fp16")]; + tensor var_7629_equation_0 = const()[name = tensor("op_7629_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7629_cast_fp16 = einsum(equation = var_7629_equation_0, values = (var_7115_cast_fp16, var_7515_cast_fp16))[name = tensor("op_7629_cast_fp16")]; + tensor var_7631_equation_0 = const()[name = tensor("op_7631_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7631_cast_fp16 = einsum(equation = var_7631_equation_0, values = (var_7115_cast_fp16, var_7516_cast_fp16))[name = tensor("op_7631_cast_fp16")]; + tensor var_7633_equation_0 = const()[name = tensor("op_7633_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7633_cast_fp16 = einsum(equation = var_7633_equation_0, values = (var_7115_cast_fp16, var_7517_cast_fp16))[name = tensor("op_7633_cast_fp16")]; + tensor var_7635_equation_0 = const()[name = tensor("op_7635_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7635_cast_fp16 = einsum(equation = var_7635_equation_0, values = (var_7119_cast_fp16, var_7518_cast_fp16))[name = tensor("op_7635_cast_fp16")]; + tensor var_7637_equation_0 = const()[name = tensor("op_7637_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7637_cast_fp16 = einsum(equation = var_7637_equation_0, values = (var_7119_cast_fp16, var_7519_cast_fp16))[name = tensor("op_7637_cast_fp16")]; + tensor var_7639_equation_0 = const()[name = tensor("op_7639_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7639_cast_fp16 = einsum(equation = var_7639_equation_0, values = (var_7119_cast_fp16, var_7520_cast_fp16))[name = tensor("op_7639_cast_fp16")]; + tensor var_7641_equation_0 = const()[name = tensor("op_7641_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7641_cast_fp16 = einsum(equation = var_7641_equation_0, values = (var_7119_cast_fp16, var_7521_cast_fp16))[name = tensor("op_7641_cast_fp16")]; + tensor var_7643_equation_0 = const()[name = tensor("op_7643_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7643_cast_fp16 = einsum(equation = var_7643_equation_0, values = (var_7123_cast_fp16, var_7522_cast_fp16))[name = tensor("op_7643_cast_fp16")]; + tensor var_7645_equation_0 = const()[name = tensor("op_7645_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7645_cast_fp16 = einsum(equation = var_7645_equation_0, values = (var_7123_cast_fp16, var_7523_cast_fp16))[name = tensor("op_7645_cast_fp16")]; + tensor var_7647_equation_0 = const()[name = tensor("op_7647_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7647_cast_fp16 = einsum(equation = var_7647_equation_0, values = (var_7123_cast_fp16, var_7524_cast_fp16))[name = tensor("op_7647_cast_fp16")]; + tensor var_7649_equation_0 = const()[name = tensor("op_7649_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7649_cast_fp16 = einsum(equation = var_7649_equation_0, values = (var_7123_cast_fp16, var_7525_cast_fp16))[name = tensor("op_7649_cast_fp16")]; + tensor var_7651_equation_0 = const()[name = tensor("op_7651_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7651_cast_fp16 = einsum(equation = var_7651_equation_0, values = (var_7127_cast_fp16, var_7526_cast_fp16))[name = tensor("op_7651_cast_fp16")]; + tensor var_7653_equation_0 = const()[name = tensor("op_7653_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7653_cast_fp16 = einsum(equation = var_7653_equation_0, values = (var_7127_cast_fp16, var_7527_cast_fp16))[name = tensor("op_7653_cast_fp16")]; + tensor var_7655_equation_0 = const()[name = tensor("op_7655_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7655_cast_fp16 = einsum(equation = var_7655_equation_0, values = (var_7127_cast_fp16, var_7528_cast_fp16))[name = tensor("op_7655_cast_fp16")]; + tensor var_7657_equation_0 = const()[name = tensor("op_7657_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7657_cast_fp16 = einsum(equation = var_7657_equation_0, values = (var_7127_cast_fp16, var_7529_cast_fp16))[name = tensor("op_7657_cast_fp16")]; + tensor var_7659_equation_0 = const()[name = tensor("op_7659_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7659_cast_fp16 = einsum(equation = var_7659_equation_0, values = (var_7131_cast_fp16, var_7530_cast_fp16))[name = tensor("op_7659_cast_fp16")]; + tensor var_7661_equation_0 = const()[name = tensor("op_7661_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7661_cast_fp16 = einsum(equation = var_7661_equation_0, values = (var_7131_cast_fp16, var_7531_cast_fp16))[name = tensor("op_7661_cast_fp16")]; + tensor var_7663_equation_0 = const()[name = tensor("op_7663_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7663_cast_fp16 = einsum(equation = var_7663_equation_0, values = (var_7131_cast_fp16, var_7532_cast_fp16))[name = tensor("op_7663_cast_fp16")]; + tensor var_7665_equation_0 = const()[name = tensor("op_7665_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7665_cast_fp16 = einsum(equation = var_7665_equation_0, values = (var_7131_cast_fp16, var_7533_cast_fp16))[name = tensor("op_7665_cast_fp16")]; + tensor var_7667_equation_0 = const()[name = tensor("op_7667_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7667_cast_fp16 = einsum(equation = var_7667_equation_0, values = (var_7135_cast_fp16, var_7534_cast_fp16))[name = tensor("op_7667_cast_fp16")]; + tensor var_7669_equation_0 = const()[name = tensor("op_7669_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7669_cast_fp16 = einsum(equation = var_7669_equation_0, values = (var_7135_cast_fp16, var_7535_cast_fp16))[name = tensor("op_7669_cast_fp16")]; + tensor var_7671_equation_0 = const()[name = tensor("op_7671_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7671_cast_fp16 = einsum(equation = var_7671_equation_0, values = (var_7135_cast_fp16, var_7536_cast_fp16))[name = tensor("op_7671_cast_fp16")]; + tensor var_7673_equation_0 = const()[name = tensor("op_7673_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7673_cast_fp16 = einsum(equation = var_7673_equation_0, values = (var_7135_cast_fp16, var_7537_cast_fp16))[name = tensor("op_7673_cast_fp16")]; + tensor var_7675_equation_0 = const()[name = tensor("op_7675_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7675_cast_fp16 = einsum(equation = var_7675_equation_0, values = (var_7139_cast_fp16, var_7538_cast_fp16))[name = tensor("op_7675_cast_fp16")]; + tensor var_7677_equation_0 = const()[name = tensor("op_7677_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7677_cast_fp16 = einsum(equation = var_7677_equation_0, values = (var_7139_cast_fp16, var_7539_cast_fp16))[name = tensor("op_7677_cast_fp16")]; + tensor var_7679_equation_0 = const()[name = tensor("op_7679_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7679_cast_fp16 = einsum(equation = var_7679_equation_0, values = (var_7139_cast_fp16, var_7540_cast_fp16))[name = tensor("op_7679_cast_fp16")]; + tensor var_7681_equation_0 = const()[name = tensor("op_7681_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7681_cast_fp16 = einsum(equation = var_7681_equation_0, values = (var_7139_cast_fp16, var_7541_cast_fp16))[name = tensor("op_7681_cast_fp16")]; + tensor var_7683_equation_0 = const()[name = tensor("op_7683_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7683_cast_fp16 = einsum(equation = var_7683_equation_0, values = (var_7143_cast_fp16, var_7542_cast_fp16))[name = tensor("op_7683_cast_fp16")]; + tensor var_7685_equation_0 = const()[name = tensor("op_7685_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7685_cast_fp16 = einsum(equation = var_7685_equation_0, values = (var_7143_cast_fp16, var_7543_cast_fp16))[name = tensor("op_7685_cast_fp16")]; + tensor var_7687_equation_0 = const()[name = tensor("op_7687_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7687_cast_fp16 = einsum(equation = var_7687_equation_0, values = (var_7143_cast_fp16, var_7544_cast_fp16))[name = tensor("op_7687_cast_fp16")]; + tensor var_7689_equation_0 = const()[name = tensor("op_7689_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7689_cast_fp16 = einsum(equation = var_7689_equation_0, values = (var_7143_cast_fp16, var_7545_cast_fp16))[name = tensor("op_7689_cast_fp16")]; + tensor var_7691_equation_0 = const()[name = tensor("op_7691_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7691_cast_fp16 = einsum(equation = var_7691_equation_0, values = (var_7147_cast_fp16, var_7546_cast_fp16))[name = tensor("op_7691_cast_fp16")]; + tensor var_7693_equation_0 = const()[name = tensor("op_7693_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7693_cast_fp16 = einsum(equation = var_7693_equation_0, values = (var_7147_cast_fp16, var_7547_cast_fp16))[name = tensor("op_7693_cast_fp16")]; + tensor var_7695_equation_0 = const()[name = tensor("op_7695_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7695_cast_fp16 = einsum(equation = var_7695_equation_0, values = (var_7147_cast_fp16, var_7548_cast_fp16))[name = tensor("op_7695_cast_fp16")]; + tensor var_7697_equation_0 = const()[name = tensor("op_7697_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7697_cast_fp16 = einsum(equation = var_7697_equation_0, values = (var_7147_cast_fp16, var_7549_cast_fp16))[name = tensor("op_7697_cast_fp16")]; + tensor var_7699_equation_0 = const()[name = tensor("op_7699_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7699_cast_fp16 = einsum(equation = var_7699_equation_0, values = (var_7151_cast_fp16, var_7550_cast_fp16))[name = tensor("op_7699_cast_fp16")]; + tensor var_7701_equation_0 = const()[name = tensor("op_7701_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7701_cast_fp16 = einsum(equation = var_7701_equation_0, values = (var_7151_cast_fp16, var_7551_cast_fp16))[name = tensor("op_7701_cast_fp16")]; + tensor var_7703_equation_0 = const()[name = tensor("op_7703_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7703_cast_fp16 = einsum(equation = var_7703_equation_0, values = (var_7151_cast_fp16, var_7552_cast_fp16))[name = tensor("op_7703_cast_fp16")]; + tensor var_7705_equation_0 = const()[name = tensor("op_7705_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7705_cast_fp16 = einsum(equation = var_7705_equation_0, values = (var_7151_cast_fp16, var_7553_cast_fp16))[name = tensor("op_7705_cast_fp16")]; + tensor var_7707_equation_0 = const()[name = tensor("op_7707_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7707_cast_fp16 = einsum(equation = var_7707_equation_0, values = (var_7155_cast_fp16, var_7554_cast_fp16))[name = tensor("op_7707_cast_fp16")]; + tensor var_7709_equation_0 = const()[name = tensor("op_7709_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7709_cast_fp16 = einsum(equation = var_7709_equation_0, values = (var_7155_cast_fp16, var_7555_cast_fp16))[name = tensor("op_7709_cast_fp16")]; + tensor var_7711_equation_0 = const()[name = tensor("op_7711_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7711_cast_fp16 = einsum(equation = var_7711_equation_0, values = (var_7155_cast_fp16, var_7556_cast_fp16))[name = tensor("op_7711_cast_fp16")]; + tensor var_7713_equation_0 = const()[name = tensor("op_7713_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7713_cast_fp16 = einsum(equation = var_7713_equation_0, values = (var_7155_cast_fp16, var_7557_cast_fp16))[name = tensor("op_7713_cast_fp16")]; + tensor var_7715_equation_0 = const()[name = tensor("op_7715_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7715_cast_fp16 = einsum(equation = var_7715_equation_0, values = (var_7159_cast_fp16, var_7558_cast_fp16))[name = tensor("op_7715_cast_fp16")]; + tensor var_7717_equation_0 = const()[name = tensor("op_7717_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7717_cast_fp16 = einsum(equation = var_7717_equation_0, values = (var_7159_cast_fp16, var_7559_cast_fp16))[name = tensor("op_7717_cast_fp16")]; + tensor var_7719_equation_0 = const()[name = tensor("op_7719_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7719_cast_fp16 = einsum(equation = var_7719_equation_0, values = (var_7159_cast_fp16, var_7560_cast_fp16))[name = tensor("op_7719_cast_fp16")]; + tensor var_7721_equation_0 = const()[name = tensor("op_7721_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_7721_cast_fp16 = einsum(equation = var_7721_equation_0, values = (var_7159_cast_fp16, var_7561_cast_fp16))[name = tensor("op_7721_cast_fp16")]; + tensor var_7723_interleave_0 = const()[name = tensor("op_7723_interleave_0"), val = tensor(false)]; + tensor var_7723_cast_fp16 = concat(axis = var_6282, interleave = var_7723_interleave_0, values = (var_7563_cast_fp16, var_7565_cast_fp16, var_7567_cast_fp16, var_7569_cast_fp16))[name = tensor("op_7723_cast_fp16")]; + tensor var_7725_interleave_0 = const()[name = tensor("op_7725_interleave_0"), val = tensor(false)]; + tensor var_7725_cast_fp16 = concat(axis = var_6282, interleave = var_7725_interleave_0, values = (var_7571_cast_fp16, var_7573_cast_fp16, var_7575_cast_fp16, var_7577_cast_fp16))[name = tensor("op_7725_cast_fp16")]; + tensor var_7727_interleave_0 = const()[name = tensor("op_7727_interleave_0"), val = tensor(false)]; + tensor var_7727_cast_fp16 = concat(axis = var_6282, interleave = var_7727_interleave_0, values = (var_7579_cast_fp16, var_7581_cast_fp16, var_7583_cast_fp16, var_7585_cast_fp16))[name = tensor("op_7727_cast_fp16")]; + tensor var_7729_interleave_0 = const()[name = tensor("op_7729_interleave_0"), val = tensor(false)]; + tensor var_7729_cast_fp16 = concat(axis = var_6282, interleave = var_7729_interleave_0, values = (var_7587_cast_fp16, var_7589_cast_fp16, var_7591_cast_fp16, var_7593_cast_fp16))[name = tensor("op_7729_cast_fp16")]; + tensor var_7731_interleave_0 = const()[name = tensor("op_7731_interleave_0"), val = tensor(false)]; + tensor var_7731_cast_fp16 = concat(axis = var_6282, interleave = var_7731_interleave_0, values = (var_7595_cast_fp16, var_7597_cast_fp16, var_7599_cast_fp16, var_7601_cast_fp16))[name = tensor("op_7731_cast_fp16")]; + tensor var_7733_interleave_0 = const()[name = tensor("op_7733_interleave_0"), val = tensor(false)]; + tensor var_7733_cast_fp16 = concat(axis = var_6282, interleave = var_7733_interleave_0, values = (var_7603_cast_fp16, var_7605_cast_fp16, var_7607_cast_fp16, var_7609_cast_fp16))[name = tensor("op_7733_cast_fp16")]; + tensor var_7735_interleave_0 = const()[name = tensor("op_7735_interleave_0"), val = tensor(false)]; + tensor var_7735_cast_fp16 = concat(axis = var_6282, interleave = var_7735_interleave_0, values = (var_7611_cast_fp16, var_7613_cast_fp16, var_7615_cast_fp16, var_7617_cast_fp16))[name = tensor("op_7735_cast_fp16")]; + tensor var_7737_interleave_0 = const()[name = tensor("op_7737_interleave_0"), val = tensor(false)]; + tensor var_7737_cast_fp16 = concat(axis = var_6282, interleave = var_7737_interleave_0, values = (var_7619_cast_fp16, var_7621_cast_fp16, var_7623_cast_fp16, var_7625_cast_fp16))[name = tensor("op_7737_cast_fp16")]; + tensor var_7739_interleave_0 = const()[name = tensor("op_7739_interleave_0"), val = tensor(false)]; + tensor var_7739_cast_fp16 = concat(axis = var_6282, interleave = var_7739_interleave_0, values = (var_7627_cast_fp16, var_7629_cast_fp16, var_7631_cast_fp16, var_7633_cast_fp16))[name = tensor("op_7739_cast_fp16")]; + tensor var_7741_interleave_0 = const()[name = tensor("op_7741_interleave_0"), val = tensor(false)]; + tensor var_7741_cast_fp16 = concat(axis = var_6282, interleave = var_7741_interleave_0, values = (var_7635_cast_fp16, var_7637_cast_fp16, var_7639_cast_fp16, var_7641_cast_fp16))[name = tensor("op_7741_cast_fp16")]; + tensor var_7743_interleave_0 = const()[name = tensor("op_7743_interleave_0"), val = tensor(false)]; + tensor var_7743_cast_fp16 = concat(axis = var_6282, interleave = var_7743_interleave_0, values = (var_7643_cast_fp16, var_7645_cast_fp16, var_7647_cast_fp16, var_7649_cast_fp16))[name = tensor("op_7743_cast_fp16")]; + tensor var_7745_interleave_0 = const()[name = tensor("op_7745_interleave_0"), val = tensor(false)]; + tensor var_7745_cast_fp16 = concat(axis = var_6282, interleave = var_7745_interleave_0, values = (var_7651_cast_fp16, var_7653_cast_fp16, var_7655_cast_fp16, var_7657_cast_fp16))[name = tensor("op_7745_cast_fp16")]; + tensor var_7747_interleave_0 = const()[name = tensor("op_7747_interleave_0"), val = tensor(false)]; + tensor var_7747_cast_fp16 = concat(axis = var_6282, interleave = var_7747_interleave_0, values = (var_7659_cast_fp16, var_7661_cast_fp16, var_7663_cast_fp16, var_7665_cast_fp16))[name = tensor("op_7747_cast_fp16")]; + tensor var_7749_interleave_0 = const()[name = tensor("op_7749_interleave_0"), val = tensor(false)]; + tensor var_7749_cast_fp16 = concat(axis = var_6282, interleave = var_7749_interleave_0, values = (var_7667_cast_fp16, var_7669_cast_fp16, var_7671_cast_fp16, var_7673_cast_fp16))[name = tensor("op_7749_cast_fp16")]; + tensor var_7751_interleave_0 = const()[name = tensor("op_7751_interleave_0"), val = tensor(false)]; + tensor var_7751_cast_fp16 = concat(axis = var_6282, interleave = var_7751_interleave_0, values = (var_7675_cast_fp16, var_7677_cast_fp16, var_7679_cast_fp16, var_7681_cast_fp16))[name = tensor("op_7751_cast_fp16")]; + tensor var_7753_interleave_0 = const()[name = tensor("op_7753_interleave_0"), val = tensor(false)]; + tensor var_7753_cast_fp16 = concat(axis = var_6282, interleave = var_7753_interleave_0, values = (var_7683_cast_fp16, var_7685_cast_fp16, var_7687_cast_fp16, var_7689_cast_fp16))[name = tensor("op_7753_cast_fp16")]; + tensor var_7755_interleave_0 = const()[name = tensor("op_7755_interleave_0"), val = tensor(false)]; + tensor var_7755_cast_fp16 = concat(axis = var_6282, interleave = var_7755_interleave_0, values = (var_7691_cast_fp16, var_7693_cast_fp16, var_7695_cast_fp16, var_7697_cast_fp16))[name = tensor("op_7755_cast_fp16")]; + tensor var_7757_interleave_0 = const()[name = tensor("op_7757_interleave_0"), val = tensor(false)]; + tensor var_7757_cast_fp16 = concat(axis = var_6282, interleave = var_7757_interleave_0, values = (var_7699_cast_fp16, var_7701_cast_fp16, var_7703_cast_fp16, var_7705_cast_fp16))[name = tensor("op_7757_cast_fp16")]; + tensor var_7759_interleave_0 = const()[name = tensor("op_7759_interleave_0"), val = tensor(false)]; + tensor var_7759_cast_fp16 = concat(axis = var_6282, interleave = var_7759_interleave_0, values = (var_7707_cast_fp16, var_7709_cast_fp16, var_7711_cast_fp16, var_7713_cast_fp16))[name = tensor("op_7759_cast_fp16")]; + tensor var_7761_interleave_0 = const()[name = tensor("op_7761_interleave_0"), val = tensor(false)]; + tensor var_7761_cast_fp16 = concat(axis = var_6282, interleave = var_7761_interleave_0, values = (var_7715_cast_fp16, var_7717_cast_fp16, var_7719_cast_fp16, var_7721_cast_fp16))[name = tensor("op_7761_cast_fp16")]; + tensor input_33_interleave_0 = const()[name = tensor("input_33_interleave_0"), val = tensor(false)]; + tensor input_33_cast_fp16 = concat(axis = var_6307, interleave = input_33_interleave_0, values = (var_7723_cast_fp16, var_7725_cast_fp16, var_7727_cast_fp16, var_7729_cast_fp16, var_7731_cast_fp16, var_7733_cast_fp16, var_7735_cast_fp16, var_7737_cast_fp16, var_7739_cast_fp16, var_7741_cast_fp16, var_7743_cast_fp16, var_7745_cast_fp16, var_7747_cast_fp16, var_7749_cast_fp16, var_7751_cast_fp16, var_7753_cast_fp16, var_7755_cast_fp16, var_7757_cast_fp16, var_7759_cast_fp16, var_7761_cast_fp16))[name = tensor("input_33_cast_fp16")]; + tensor var_7766 = const()[name = tensor("op_7766"), val = tensor([1, 1])]; + tensor var_7768 = const()[name = tensor("op_7768"), val = tensor([1, 1])]; + tensor obj_19_pad_type_0 = const()[name = tensor("obj_19_pad_type_0"), val = tensor("custom")]; + tensor obj_19_pad_0 = const()[name = tensor("obj_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_4_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_4_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(181549760)))]; + tensor layers_4_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_4_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(184826624)))]; + tensor obj_19_cast_fp16 = conv(bias = layers_4_self_attn_o_proj_bias_to_fp16, dilations = var_7768, groups = var_6307, pad = obj_19_pad_0, pad_type = obj_19_pad_type_0, strides = var_7766, weight = layers_4_self_attn_o_proj_weight_to_fp16, x = input_33_cast_fp16)[name = tensor("obj_19_cast_fp16")]; + tensor inputs_19_cast_fp16 = add(x = inputs_17_cast_fp16, y = obj_19_cast_fp16)[name = tensor("inputs_19_cast_fp16")]; + tensor var_7774 = const()[name = tensor("op_7774"), val = tensor([1])]; + tensor channels_mean_19_cast_fp16 = reduce_mean(axes = var_7774, keep_dims = var_6308, x = inputs_19_cast_fp16)[name = tensor("channels_mean_19_cast_fp16")]; + tensor zero_mean_19_cast_fp16 = sub(x = inputs_19_cast_fp16, y = channels_mean_19_cast_fp16)[name = tensor("zero_mean_19_cast_fp16")]; + tensor zero_mean_sq_19_cast_fp16 = mul(x = zero_mean_19_cast_fp16, y = zero_mean_19_cast_fp16)[name = tensor("zero_mean_sq_19_cast_fp16")]; + tensor var_7778 = const()[name = tensor("op_7778"), val = tensor([1])]; + tensor var_7779_cast_fp16 = reduce_mean(axes = var_7778, keep_dims = var_6308, x = zero_mean_sq_19_cast_fp16)[name = tensor("op_7779_cast_fp16")]; + tensor var_7780_to_fp16 = const()[name = tensor("op_7780_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_7781_cast_fp16 = add(x = var_7779_cast_fp16, y = var_7780_to_fp16)[name = tensor("op_7781_cast_fp16")]; + tensor denom_19_epsilon_0_to_fp16 = const()[name = tensor("denom_19_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_19_cast_fp16 = rsqrt(epsilon = denom_19_epsilon_0_to_fp16, x = var_7781_cast_fp16)[name = tensor("denom_19_cast_fp16")]; + tensor out_19_cast_fp16 = mul(x = zero_mean_19_cast_fp16, y = denom_19_cast_fp16)[name = tensor("out_19_cast_fp16")]; + tensor input_35_gamma_0_to_fp16 = const()[name = tensor("input_35_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(184829248)))]; + tensor input_35_beta_0_to_fp16 = const()[name = tensor("input_35_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(184831872)))]; + tensor input_35_epsilon_0_to_fp16 = const()[name = tensor("input_35_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_35_cast_fp16 = batch_norm(beta = input_35_beta_0_to_fp16, epsilon = input_35_epsilon_0_to_fp16, gamma = input_35_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_19_cast_fp16)[name = tensor("input_35_cast_fp16")]; + tensor var_7792 = const()[name = tensor("op_7792"), val = tensor([1, 1])]; + tensor var_7794 = const()[name = tensor("op_7794"), val = tensor([1, 1])]; + tensor input_37_pad_type_0 = const()[name = tensor("input_37_pad_type_0"), val = tensor("custom")]; + tensor input_37_pad_0 = const()[name = tensor("input_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_4_fc1_weight_to_fp16 = const()[name = tensor("layers_4_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(184834496)))]; + tensor layers_4_fc1_bias_to_fp16 = const()[name = tensor("layers_4_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(197941760)))]; + tensor input_37_cast_fp16 = conv(bias = layers_4_fc1_bias_to_fp16, dilations = var_7794, groups = var_6307, pad = input_37_pad_0, pad_type = input_37_pad_type_0, strides = var_7792, weight = layers_4_fc1_weight_to_fp16, x = input_35_cast_fp16)[name = tensor("input_37_cast_fp16")]; + tensor input_39_mode_0 = const()[name = tensor("input_39_mode_0"), val = tensor("EXACT")]; + tensor input_39_cast_fp16 = gelu(mode = input_39_mode_0, x = input_37_cast_fp16)[name = tensor("input_39_cast_fp16")]; + tensor var_7800 = const()[name = tensor("op_7800"), val = tensor([1, 1])]; + tensor var_7802 = const()[name = tensor("op_7802"), val = tensor([1, 1])]; + tensor hidden_states_13_pad_type_0 = const()[name = tensor("hidden_states_13_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_13_pad_0 = const()[name = tensor("hidden_states_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_4_fc2_weight_to_fp16 = const()[name = tensor("layers_4_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(197952064)))]; + tensor layers_4_fc2_bias_to_fp16 = const()[name = tensor("layers_4_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211059328)))]; + tensor hidden_states_13_cast_fp16 = conv(bias = layers_4_fc2_bias_to_fp16, dilations = var_7802, groups = var_6307, pad = hidden_states_13_pad_0, pad_type = hidden_states_13_pad_type_0, strides = var_7800, weight = layers_4_fc2_weight_to_fp16, x = input_39_cast_fp16)[name = tensor("hidden_states_13_cast_fp16")]; + tensor inputs_21_cast_fp16 = add(x = inputs_19_cast_fp16, y = hidden_states_13_cast_fp16)[name = tensor("inputs_21_cast_fp16")]; + tensor var_7809 = const()[name = tensor("op_7809"), val = tensor(3)]; + tensor var_7834 = const()[name = tensor("op_7834"), val = tensor(1)]; + tensor var_7835 = const()[name = tensor("op_7835"), val = tensor(true)]; + tensor var_7845 = const()[name = tensor("op_7845"), val = tensor([1])]; + tensor channels_mean_21_cast_fp16 = reduce_mean(axes = var_7845, keep_dims = var_7835, x = inputs_21_cast_fp16)[name = tensor("channels_mean_21_cast_fp16")]; + tensor zero_mean_21_cast_fp16 = sub(x = inputs_21_cast_fp16, y = channels_mean_21_cast_fp16)[name = tensor("zero_mean_21_cast_fp16")]; + tensor zero_mean_sq_21_cast_fp16 = mul(x = zero_mean_21_cast_fp16, y = zero_mean_21_cast_fp16)[name = tensor("zero_mean_sq_21_cast_fp16")]; + tensor var_7849 = const()[name = tensor("op_7849"), val = tensor([1])]; + tensor var_7850_cast_fp16 = reduce_mean(axes = var_7849, keep_dims = var_7835, x = zero_mean_sq_21_cast_fp16)[name = tensor("op_7850_cast_fp16")]; + tensor var_7851_to_fp16 = const()[name = tensor("op_7851_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_7852_cast_fp16 = add(x = var_7850_cast_fp16, y = var_7851_to_fp16)[name = tensor("op_7852_cast_fp16")]; + tensor denom_21_epsilon_0_to_fp16 = const()[name = tensor("denom_21_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_21_cast_fp16 = rsqrt(epsilon = denom_21_epsilon_0_to_fp16, x = var_7852_cast_fp16)[name = tensor("denom_21_cast_fp16")]; + tensor out_21_cast_fp16 = mul(x = zero_mean_21_cast_fp16, y = denom_21_cast_fp16)[name = tensor("out_21_cast_fp16")]; + tensor obj_21_gamma_0_to_fp16 = const()[name = tensor("obj_21_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211061952)))]; + tensor obj_21_beta_0_to_fp16 = const()[name = tensor("obj_21_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211064576)))]; + tensor obj_21_epsilon_0_to_fp16 = const()[name = tensor("obj_21_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_21_cast_fp16 = batch_norm(beta = obj_21_beta_0_to_fp16, epsilon = obj_21_epsilon_0_to_fp16, gamma = obj_21_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_21_cast_fp16)[name = tensor("obj_21_cast_fp16")]; + tensor var_7867 = const()[name = tensor("op_7867"), val = tensor([1, 1])]; + tensor var_7869 = const()[name = tensor("op_7869"), val = tensor([1, 1])]; + tensor query_11_pad_type_0 = const()[name = tensor("query_11_pad_type_0"), val = tensor("custom")]; + tensor query_11_pad_0 = const()[name = tensor("query_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_5_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_5_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(211067200)))]; + tensor layers_5_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(214344064)))]; + tensor query_11_cast_fp16 = conv(bias = layers_5_self_attn_q_proj_bias_to_fp16, dilations = var_7869, groups = var_7834, pad = query_11_pad_0, pad_type = query_11_pad_type_0, strides = var_7867, weight = layers_5_self_attn_q_proj_weight_to_fp16, x = obj_21_cast_fp16)[name = tensor("query_11_cast_fp16")]; + tensor var_7873 = const()[name = tensor("op_7873"), val = tensor([1, 1])]; + tensor var_7875 = const()[name = tensor("op_7875"), val = tensor([1, 1])]; + tensor key_11_pad_type_0 = const()[name = tensor("key_11_pad_type_0"), val = tensor("custom")]; + tensor key_11_pad_0 = const()[name = tensor("key_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_5_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_5_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(214346688)))]; + tensor key_11_cast_fp16 = conv(dilations = var_7875, groups = var_7834, pad = key_11_pad_0, pad_type = key_11_pad_type_0, strides = var_7873, weight = layers_5_self_attn_k_proj_weight_to_fp16, x = obj_21_cast_fp16)[name = tensor("key_11_cast_fp16")]; + tensor var_7880 = const()[name = tensor("op_7880"), val = tensor([1, 1])]; + tensor var_7882 = const()[name = tensor("op_7882"), val = tensor([1, 1])]; + tensor value_11_pad_type_0 = const()[name = tensor("value_11_pad_type_0"), val = tensor("custom")]; + tensor value_11_pad_0 = const()[name = tensor("value_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_5_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_5_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(217623552)))]; + tensor layers_5_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220900416)))]; + tensor value_11_cast_fp16 = conv(bias = layers_5_self_attn_v_proj_bias_to_fp16, dilations = var_7882, groups = var_7834, pad = value_11_pad_0, pad_type = value_11_pad_type_0, strides = var_7880, weight = layers_5_self_attn_v_proj_weight_to_fp16, x = obj_21_cast_fp16)[name = tensor("value_11_cast_fp16")]; + tensor var_7889_begin_0 = const()[name = tensor("op_7889_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7889_end_0 = const()[name = tensor("op_7889_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_7889_end_mask_0 = const()[name = tensor("op_7889_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7889_cast_fp16 = slice_by_index(begin = var_7889_begin_0, end = var_7889_end_0, end_mask = var_7889_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_7889_cast_fp16")]; + tensor var_7893_begin_0 = const()[name = tensor("op_7893_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_7893_end_0 = const()[name = tensor("op_7893_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_7893_end_mask_0 = const()[name = tensor("op_7893_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7893_cast_fp16 = slice_by_index(begin = var_7893_begin_0, end = var_7893_end_0, end_mask = var_7893_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_7893_cast_fp16")]; + tensor var_7897_begin_0 = const()[name = tensor("op_7897_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_7897_end_0 = const()[name = tensor("op_7897_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_7897_end_mask_0 = const()[name = tensor("op_7897_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7897_cast_fp16 = slice_by_index(begin = var_7897_begin_0, end = var_7897_end_0, end_mask = var_7897_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_7897_cast_fp16")]; + tensor var_7901_begin_0 = const()[name = tensor("op_7901_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_7901_end_0 = const()[name = tensor("op_7901_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_7901_end_mask_0 = const()[name = tensor("op_7901_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7901_cast_fp16 = slice_by_index(begin = var_7901_begin_0, end = var_7901_end_0, end_mask = var_7901_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_7901_cast_fp16")]; + tensor var_7905_begin_0 = const()[name = tensor("op_7905_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_7905_end_0 = const()[name = tensor("op_7905_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_7905_end_mask_0 = const()[name = tensor("op_7905_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7905_cast_fp16 = slice_by_index(begin = var_7905_begin_0, end = var_7905_end_0, end_mask = var_7905_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_7905_cast_fp16")]; + tensor var_7909_begin_0 = const()[name = tensor("op_7909_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_7909_end_0 = const()[name = tensor("op_7909_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_7909_end_mask_0 = const()[name = tensor("op_7909_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7909_cast_fp16 = slice_by_index(begin = var_7909_begin_0, end = var_7909_end_0, end_mask = var_7909_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_7909_cast_fp16")]; + tensor var_7913_begin_0 = const()[name = tensor("op_7913_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_7913_end_0 = const()[name = tensor("op_7913_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_7913_end_mask_0 = const()[name = tensor("op_7913_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7913_cast_fp16 = slice_by_index(begin = var_7913_begin_0, end = var_7913_end_0, end_mask = var_7913_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_7913_cast_fp16")]; + tensor var_7917_begin_0 = const()[name = tensor("op_7917_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_7917_end_0 = const()[name = tensor("op_7917_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_7917_end_mask_0 = const()[name = tensor("op_7917_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7917_cast_fp16 = slice_by_index(begin = var_7917_begin_0, end = var_7917_end_0, end_mask = var_7917_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_7917_cast_fp16")]; + tensor var_7921_begin_0 = const()[name = tensor("op_7921_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_7921_end_0 = const()[name = tensor("op_7921_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_7921_end_mask_0 = const()[name = tensor("op_7921_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7921_cast_fp16 = slice_by_index(begin = var_7921_begin_0, end = var_7921_end_0, end_mask = var_7921_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_7921_cast_fp16")]; + tensor var_7925_begin_0 = const()[name = tensor("op_7925_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_7925_end_0 = const()[name = tensor("op_7925_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_7925_end_mask_0 = const()[name = tensor("op_7925_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7925_cast_fp16 = slice_by_index(begin = var_7925_begin_0, end = var_7925_end_0, end_mask = var_7925_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_7925_cast_fp16")]; + tensor var_7929_begin_0 = const()[name = tensor("op_7929_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_7929_end_0 = const()[name = tensor("op_7929_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_7929_end_mask_0 = const()[name = tensor("op_7929_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7929_cast_fp16 = slice_by_index(begin = var_7929_begin_0, end = var_7929_end_0, end_mask = var_7929_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_7929_cast_fp16")]; + tensor var_7933_begin_0 = const()[name = tensor("op_7933_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_7933_end_0 = const()[name = tensor("op_7933_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_7933_end_mask_0 = const()[name = tensor("op_7933_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7933_cast_fp16 = slice_by_index(begin = var_7933_begin_0, end = var_7933_end_0, end_mask = var_7933_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_7933_cast_fp16")]; + tensor var_7937_begin_0 = const()[name = tensor("op_7937_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_7937_end_0 = const()[name = tensor("op_7937_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_7937_end_mask_0 = const()[name = tensor("op_7937_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7937_cast_fp16 = slice_by_index(begin = var_7937_begin_0, end = var_7937_end_0, end_mask = var_7937_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_7937_cast_fp16")]; + tensor var_7941_begin_0 = const()[name = tensor("op_7941_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_7941_end_0 = const()[name = tensor("op_7941_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_7941_end_mask_0 = const()[name = tensor("op_7941_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7941_cast_fp16 = slice_by_index(begin = var_7941_begin_0, end = var_7941_end_0, end_mask = var_7941_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_7941_cast_fp16")]; + tensor var_7945_begin_0 = const()[name = tensor("op_7945_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_7945_end_0 = const()[name = tensor("op_7945_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_7945_end_mask_0 = const()[name = tensor("op_7945_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7945_cast_fp16 = slice_by_index(begin = var_7945_begin_0, end = var_7945_end_0, end_mask = var_7945_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_7945_cast_fp16")]; + tensor var_7949_begin_0 = const()[name = tensor("op_7949_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_7949_end_0 = const()[name = tensor("op_7949_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_7949_end_mask_0 = const()[name = tensor("op_7949_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7949_cast_fp16 = slice_by_index(begin = var_7949_begin_0, end = var_7949_end_0, end_mask = var_7949_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_7949_cast_fp16")]; + tensor var_7953_begin_0 = const()[name = tensor("op_7953_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_7953_end_0 = const()[name = tensor("op_7953_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_7953_end_mask_0 = const()[name = tensor("op_7953_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7953_cast_fp16 = slice_by_index(begin = var_7953_begin_0, end = var_7953_end_0, end_mask = var_7953_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_7953_cast_fp16")]; + tensor var_7957_begin_0 = const()[name = tensor("op_7957_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_7957_end_0 = const()[name = tensor("op_7957_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_7957_end_mask_0 = const()[name = tensor("op_7957_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7957_cast_fp16 = slice_by_index(begin = var_7957_begin_0, end = var_7957_end_0, end_mask = var_7957_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_7957_cast_fp16")]; + tensor var_7961_begin_0 = const()[name = tensor("op_7961_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_7961_end_0 = const()[name = tensor("op_7961_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_7961_end_mask_0 = const()[name = tensor("op_7961_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7961_cast_fp16 = slice_by_index(begin = var_7961_begin_0, end = var_7961_end_0, end_mask = var_7961_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_7961_cast_fp16")]; + tensor var_7965_begin_0 = const()[name = tensor("op_7965_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_7965_end_0 = const()[name = tensor("op_7965_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_7965_end_mask_0 = const()[name = tensor("op_7965_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_7965_cast_fp16 = slice_by_index(begin = var_7965_begin_0, end = var_7965_end_0, end_mask = var_7965_end_mask_0, x = query_11_cast_fp16)[name = tensor("op_7965_cast_fp16")]; + tensor var_7974_begin_0 = const()[name = tensor("op_7974_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_7974_end_0 = const()[name = tensor("op_7974_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_7974_end_mask_0 = const()[name = tensor("op_7974_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_7974_cast_fp16 = slice_by_index(begin = var_7974_begin_0, end = var_7974_end_0, end_mask = var_7974_end_mask_0, x = var_7889_cast_fp16)[name = tensor("op_7974_cast_fp16")]; + tensor var_7981_begin_0 = const()[name = tensor("op_7981_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_7981_end_0 = const()[name = tensor("op_7981_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_7981_end_mask_0 = const()[name = tensor("op_7981_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_7981_cast_fp16 = slice_by_index(begin = var_7981_begin_0, end = var_7981_end_0, end_mask = var_7981_end_mask_0, x = var_7889_cast_fp16)[name = tensor("op_7981_cast_fp16")]; + tensor var_7988_begin_0 = const()[name = tensor("op_7988_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_7988_end_0 = const()[name = tensor("op_7988_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_7988_end_mask_0 = const()[name = tensor("op_7988_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_7988_cast_fp16 = slice_by_index(begin = var_7988_begin_0, end = var_7988_end_0, end_mask = var_7988_end_mask_0, x = var_7889_cast_fp16)[name = tensor("op_7988_cast_fp16")]; + tensor var_7995_begin_0 = const()[name = tensor("op_7995_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_7995_end_0 = const()[name = tensor("op_7995_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_7995_end_mask_0 = const()[name = tensor("op_7995_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_7995_cast_fp16 = slice_by_index(begin = var_7995_begin_0, end = var_7995_end_0, end_mask = var_7995_end_mask_0, x = var_7889_cast_fp16)[name = tensor("op_7995_cast_fp16")]; + tensor var_8002_begin_0 = const()[name = tensor("op_8002_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8002_end_0 = const()[name = tensor("op_8002_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_8002_end_mask_0 = const()[name = tensor("op_8002_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8002_cast_fp16 = slice_by_index(begin = var_8002_begin_0, end = var_8002_end_0, end_mask = var_8002_end_mask_0, x = var_7893_cast_fp16)[name = tensor("op_8002_cast_fp16")]; + tensor var_8009_begin_0 = const()[name = tensor("op_8009_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_8009_end_0 = const()[name = tensor("op_8009_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_8009_end_mask_0 = const()[name = tensor("op_8009_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8009_cast_fp16 = slice_by_index(begin = var_8009_begin_0, end = var_8009_end_0, end_mask = var_8009_end_mask_0, x = var_7893_cast_fp16)[name = tensor("op_8009_cast_fp16")]; + tensor var_8016_begin_0 = const()[name = tensor("op_8016_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_8016_end_0 = const()[name = tensor("op_8016_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_8016_end_mask_0 = const()[name = tensor("op_8016_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8016_cast_fp16 = slice_by_index(begin = var_8016_begin_0, end = var_8016_end_0, end_mask = var_8016_end_mask_0, x = var_7893_cast_fp16)[name = tensor("op_8016_cast_fp16")]; + tensor var_8023_begin_0 = const()[name = tensor("op_8023_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_8023_end_0 = const()[name = tensor("op_8023_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_8023_end_mask_0 = const()[name = tensor("op_8023_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8023_cast_fp16 = slice_by_index(begin = var_8023_begin_0, end = var_8023_end_0, end_mask = var_8023_end_mask_0, x = var_7893_cast_fp16)[name = tensor("op_8023_cast_fp16")]; + tensor var_8030_begin_0 = const()[name = tensor("op_8030_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8030_end_0 = const()[name = tensor("op_8030_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_8030_end_mask_0 = const()[name = tensor("op_8030_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8030_cast_fp16 = slice_by_index(begin = var_8030_begin_0, end = var_8030_end_0, end_mask = var_8030_end_mask_0, x = var_7897_cast_fp16)[name = tensor("op_8030_cast_fp16")]; + tensor var_8037_begin_0 = const()[name = tensor("op_8037_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_8037_end_0 = const()[name = tensor("op_8037_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_8037_end_mask_0 = const()[name = tensor("op_8037_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8037_cast_fp16 = slice_by_index(begin = var_8037_begin_0, end = var_8037_end_0, end_mask = var_8037_end_mask_0, x = var_7897_cast_fp16)[name = tensor("op_8037_cast_fp16")]; + tensor var_8044_begin_0 = const()[name = tensor("op_8044_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_8044_end_0 = const()[name = tensor("op_8044_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_8044_end_mask_0 = const()[name = tensor("op_8044_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8044_cast_fp16 = slice_by_index(begin = var_8044_begin_0, end = var_8044_end_0, end_mask = var_8044_end_mask_0, x = var_7897_cast_fp16)[name = tensor("op_8044_cast_fp16")]; + tensor var_8051_begin_0 = const()[name = tensor("op_8051_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_8051_end_0 = const()[name = tensor("op_8051_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_8051_end_mask_0 = const()[name = tensor("op_8051_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8051_cast_fp16 = slice_by_index(begin = var_8051_begin_0, end = var_8051_end_0, end_mask = var_8051_end_mask_0, x = var_7897_cast_fp16)[name = tensor("op_8051_cast_fp16")]; + tensor var_8058_begin_0 = const()[name = tensor("op_8058_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8058_end_0 = const()[name = tensor("op_8058_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_8058_end_mask_0 = const()[name = tensor("op_8058_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8058_cast_fp16 = slice_by_index(begin = var_8058_begin_0, end = var_8058_end_0, end_mask = var_8058_end_mask_0, x = var_7901_cast_fp16)[name = tensor("op_8058_cast_fp16")]; + tensor var_8065_begin_0 = const()[name = tensor("op_8065_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_8065_end_0 = const()[name = tensor("op_8065_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_8065_end_mask_0 = const()[name = tensor("op_8065_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8065_cast_fp16 = slice_by_index(begin = var_8065_begin_0, end = var_8065_end_0, end_mask = var_8065_end_mask_0, x = var_7901_cast_fp16)[name = tensor("op_8065_cast_fp16")]; + tensor var_8072_begin_0 = const()[name = tensor("op_8072_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_8072_end_0 = const()[name = tensor("op_8072_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_8072_end_mask_0 = const()[name = tensor("op_8072_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8072_cast_fp16 = slice_by_index(begin = var_8072_begin_0, end = var_8072_end_0, end_mask = var_8072_end_mask_0, x = var_7901_cast_fp16)[name = tensor("op_8072_cast_fp16")]; + tensor var_8079_begin_0 = const()[name = tensor("op_8079_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_8079_end_0 = const()[name = tensor("op_8079_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_8079_end_mask_0 = const()[name = tensor("op_8079_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8079_cast_fp16 = slice_by_index(begin = var_8079_begin_0, end = var_8079_end_0, end_mask = var_8079_end_mask_0, x = var_7901_cast_fp16)[name = tensor("op_8079_cast_fp16")]; + tensor var_8086_begin_0 = const()[name = tensor("op_8086_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8086_end_0 = const()[name = tensor("op_8086_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_8086_end_mask_0 = const()[name = tensor("op_8086_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8086_cast_fp16 = slice_by_index(begin = var_8086_begin_0, end = var_8086_end_0, end_mask = var_8086_end_mask_0, x = var_7905_cast_fp16)[name = tensor("op_8086_cast_fp16")]; + tensor var_8093_begin_0 = const()[name = tensor("op_8093_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_8093_end_0 = const()[name = tensor("op_8093_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_8093_end_mask_0 = const()[name = tensor("op_8093_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8093_cast_fp16 = slice_by_index(begin = var_8093_begin_0, end = var_8093_end_0, end_mask = var_8093_end_mask_0, x = var_7905_cast_fp16)[name = tensor("op_8093_cast_fp16")]; + tensor var_8100_begin_0 = const()[name = tensor("op_8100_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_8100_end_0 = const()[name = tensor("op_8100_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_8100_end_mask_0 = const()[name = tensor("op_8100_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8100_cast_fp16 = slice_by_index(begin = var_8100_begin_0, end = var_8100_end_0, end_mask = var_8100_end_mask_0, x = var_7905_cast_fp16)[name = tensor("op_8100_cast_fp16")]; + tensor var_8107_begin_0 = const()[name = tensor("op_8107_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_8107_end_0 = const()[name = tensor("op_8107_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_8107_end_mask_0 = const()[name = tensor("op_8107_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8107_cast_fp16 = slice_by_index(begin = var_8107_begin_0, end = var_8107_end_0, end_mask = var_8107_end_mask_0, x = var_7905_cast_fp16)[name = tensor("op_8107_cast_fp16")]; + tensor var_8114_begin_0 = const()[name = tensor("op_8114_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8114_end_0 = const()[name = tensor("op_8114_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_8114_end_mask_0 = const()[name = tensor("op_8114_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8114_cast_fp16 = slice_by_index(begin = var_8114_begin_0, end = var_8114_end_0, end_mask = var_8114_end_mask_0, x = var_7909_cast_fp16)[name = tensor("op_8114_cast_fp16")]; + tensor var_8121_begin_0 = const()[name = tensor("op_8121_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_8121_end_0 = const()[name = tensor("op_8121_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_8121_end_mask_0 = const()[name = tensor("op_8121_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8121_cast_fp16 = slice_by_index(begin = var_8121_begin_0, end = var_8121_end_0, end_mask = var_8121_end_mask_0, x = var_7909_cast_fp16)[name = tensor("op_8121_cast_fp16")]; + tensor var_8128_begin_0 = const()[name = tensor("op_8128_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_8128_end_0 = const()[name = tensor("op_8128_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_8128_end_mask_0 = const()[name = tensor("op_8128_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8128_cast_fp16 = slice_by_index(begin = var_8128_begin_0, end = var_8128_end_0, end_mask = var_8128_end_mask_0, x = var_7909_cast_fp16)[name = tensor("op_8128_cast_fp16")]; + tensor var_8135_begin_0 = const()[name = tensor("op_8135_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_8135_end_0 = const()[name = tensor("op_8135_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_8135_end_mask_0 = const()[name = tensor("op_8135_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8135_cast_fp16 = slice_by_index(begin = var_8135_begin_0, end = var_8135_end_0, end_mask = var_8135_end_mask_0, x = var_7909_cast_fp16)[name = tensor("op_8135_cast_fp16")]; + tensor var_8142_begin_0 = const()[name = tensor("op_8142_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8142_end_0 = const()[name = tensor("op_8142_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_8142_end_mask_0 = const()[name = tensor("op_8142_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8142_cast_fp16 = slice_by_index(begin = var_8142_begin_0, end = var_8142_end_0, end_mask = var_8142_end_mask_0, x = var_7913_cast_fp16)[name = tensor("op_8142_cast_fp16")]; + tensor var_8149_begin_0 = const()[name = tensor("op_8149_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_8149_end_0 = const()[name = tensor("op_8149_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_8149_end_mask_0 = const()[name = tensor("op_8149_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8149_cast_fp16 = slice_by_index(begin = var_8149_begin_0, end = var_8149_end_0, end_mask = var_8149_end_mask_0, x = var_7913_cast_fp16)[name = tensor("op_8149_cast_fp16")]; + tensor var_8156_begin_0 = const()[name = tensor("op_8156_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_8156_end_0 = const()[name = tensor("op_8156_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_8156_end_mask_0 = const()[name = tensor("op_8156_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8156_cast_fp16 = slice_by_index(begin = var_8156_begin_0, end = var_8156_end_0, end_mask = var_8156_end_mask_0, x = var_7913_cast_fp16)[name = tensor("op_8156_cast_fp16")]; + tensor var_8163_begin_0 = const()[name = tensor("op_8163_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_8163_end_0 = const()[name = tensor("op_8163_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_8163_end_mask_0 = const()[name = tensor("op_8163_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8163_cast_fp16 = slice_by_index(begin = var_8163_begin_0, end = var_8163_end_0, end_mask = var_8163_end_mask_0, x = var_7913_cast_fp16)[name = tensor("op_8163_cast_fp16")]; + tensor var_8170_begin_0 = const()[name = tensor("op_8170_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8170_end_0 = const()[name = tensor("op_8170_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_8170_end_mask_0 = const()[name = tensor("op_8170_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8170_cast_fp16 = slice_by_index(begin = var_8170_begin_0, end = var_8170_end_0, end_mask = var_8170_end_mask_0, x = var_7917_cast_fp16)[name = tensor("op_8170_cast_fp16")]; + tensor var_8177_begin_0 = const()[name = tensor("op_8177_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_8177_end_0 = const()[name = tensor("op_8177_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_8177_end_mask_0 = const()[name = tensor("op_8177_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8177_cast_fp16 = slice_by_index(begin = var_8177_begin_0, end = var_8177_end_0, end_mask = var_8177_end_mask_0, x = var_7917_cast_fp16)[name = tensor("op_8177_cast_fp16")]; + tensor var_8184_begin_0 = const()[name = tensor("op_8184_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_8184_end_0 = const()[name = tensor("op_8184_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_8184_end_mask_0 = const()[name = tensor("op_8184_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8184_cast_fp16 = slice_by_index(begin = var_8184_begin_0, end = var_8184_end_0, end_mask = var_8184_end_mask_0, x = var_7917_cast_fp16)[name = tensor("op_8184_cast_fp16")]; + tensor var_8191_begin_0 = const()[name = tensor("op_8191_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_8191_end_0 = const()[name = tensor("op_8191_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_8191_end_mask_0 = const()[name = tensor("op_8191_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8191_cast_fp16 = slice_by_index(begin = var_8191_begin_0, end = var_8191_end_0, end_mask = var_8191_end_mask_0, x = var_7917_cast_fp16)[name = tensor("op_8191_cast_fp16")]; + tensor var_8198_begin_0 = const()[name = tensor("op_8198_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8198_end_0 = const()[name = tensor("op_8198_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_8198_end_mask_0 = const()[name = tensor("op_8198_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8198_cast_fp16 = slice_by_index(begin = var_8198_begin_0, end = var_8198_end_0, end_mask = var_8198_end_mask_0, x = var_7921_cast_fp16)[name = tensor("op_8198_cast_fp16")]; + tensor var_8205_begin_0 = const()[name = tensor("op_8205_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_8205_end_0 = const()[name = tensor("op_8205_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_8205_end_mask_0 = const()[name = tensor("op_8205_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8205_cast_fp16 = slice_by_index(begin = var_8205_begin_0, end = var_8205_end_0, end_mask = var_8205_end_mask_0, x = var_7921_cast_fp16)[name = tensor("op_8205_cast_fp16")]; + tensor var_8212_begin_0 = const()[name = tensor("op_8212_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_8212_end_0 = const()[name = tensor("op_8212_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_8212_end_mask_0 = const()[name = tensor("op_8212_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8212_cast_fp16 = slice_by_index(begin = var_8212_begin_0, end = var_8212_end_0, end_mask = var_8212_end_mask_0, x = var_7921_cast_fp16)[name = tensor("op_8212_cast_fp16")]; + tensor var_8219_begin_0 = const()[name = tensor("op_8219_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_8219_end_0 = const()[name = tensor("op_8219_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_8219_end_mask_0 = const()[name = tensor("op_8219_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8219_cast_fp16 = slice_by_index(begin = var_8219_begin_0, end = var_8219_end_0, end_mask = var_8219_end_mask_0, x = var_7921_cast_fp16)[name = tensor("op_8219_cast_fp16")]; + tensor var_8226_begin_0 = const()[name = tensor("op_8226_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8226_end_0 = const()[name = tensor("op_8226_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_8226_end_mask_0 = const()[name = tensor("op_8226_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8226_cast_fp16 = slice_by_index(begin = var_8226_begin_0, end = var_8226_end_0, end_mask = var_8226_end_mask_0, x = var_7925_cast_fp16)[name = tensor("op_8226_cast_fp16")]; + tensor var_8233_begin_0 = const()[name = tensor("op_8233_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_8233_end_0 = const()[name = tensor("op_8233_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_8233_end_mask_0 = const()[name = tensor("op_8233_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8233_cast_fp16 = slice_by_index(begin = var_8233_begin_0, end = var_8233_end_0, end_mask = var_8233_end_mask_0, x = var_7925_cast_fp16)[name = tensor("op_8233_cast_fp16")]; + tensor var_8240_begin_0 = const()[name = tensor("op_8240_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_8240_end_0 = const()[name = tensor("op_8240_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_8240_end_mask_0 = const()[name = tensor("op_8240_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8240_cast_fp16 = slice_by_index(begin = var_8240_begin_0, end = var_8240_end_0, end_mask = var_8240_end_mask_0, x = var_7925_cast_fp16)[name = tensor("op_8240_cast_fp16")]; + tensor var_8247_begin_0 = const()[name = tensor("op_8247_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_8247_end_0 = const()[name = tensor("op_8247_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_8247_end_mask_0 = const()[name = tensor("op_8247_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8247_cast_fp16 = slice_by_index(begin = var_8247_begin_0, end = var_8247_end_0, end_mask = var_8247_end_mask_0, x = var_7925_cast_fp16)[name = tensor("op_8247_cast_fp16")]; + tensor var_8254_begin_0 = const()[name = tensor("op_8254_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8254_end_0 = const()[name = tensor("op_8254_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_8254_end_mask_0 = const()[name = tensor("op_8254_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8254_cast_fp16 = slice_by_index(begin = var_8254_begin_0, end = var_8254_end_0, end_mask = var_8254_end_mask_0, x = var_7929_cast_fp16)[name = tensor("op_8254_cast_fp16")]; + tensor var_8261_begin_0 = const()[name = tensor("op_8261_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_8261_end_0 = const()[name = tensor("op_8261_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_8261_end_mask_0 = const()[name = tensor("op_8261_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8261_cast_fp16 = slice_by_index(begin = var_8261_begin_0, end = var_8261_end_0, end_mask = var_8261_end_mask_0, x = var_7929_cast_fp16)[name = tensor("op_8261_cast_fp16")]; + tensor var_8268_begin_0 = const()[name = tensor("op_8268_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_8268_end_0 = const()[name = tensor("op_8268_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_8268_end_mask_0 = const()[name = tensor("op_8268_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8268_cast_fp16 = slice_by_index(begin = var_8268_begin_0, end = var_8268_end_0, end_mask = var_8268_end_mask_0, x = var_7929_cast_fp16)[name = tensor("op_8268_cast_fp16")]; + tensor var_8275_begin_0 = const()[name = tensor("op_8275_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_8275_end_0 = const()[name = tensor("op_8275_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_8275_end_mask_0 = const()[name = tensor("op_8275_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8275_cast_fp16 = slice_by_index(begin = var_8275_begin_0, end = var_8275_end_0, end_mask = var_8275_end_mask_0, x = var_7929_cast_fp16)[name = tensor("op_8275_cast_fp16")]; + tensor var_8282_begin_0 = const()[name = tensor("op_8282_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8282_end_0 = const()[name = tensor("op_8282_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_8282_end_mask_0 = const()[name = tensor("op_8282_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8282_cast_fp16 = slice_by_index(begin = var_8282_begin_0, end = var_8282_end_0, end_mask = var_8282_end_mask_0, x = var_7933_cast_fp16)[name = tensor("op_8282_cast_fp16")]; + tensor var_8289_begin_0 = const()[name = tensor("op_8289_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_8289_end_0 = const()[name = tensor("op_8289_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_8289_end_mask_0 = const()[name = tensor("op_8289_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8289_cast_fp16 = slice_by_index(begin = var_8289_begin_0, end = var_8289_end_0, end_mask = var_8289_end_mask_0, x = var_7933_cast_fp16)[name = tensor("op_8289_cast_fp16")]; + tensor var_8296_begin_0 = const()[name = tensor("op_8296_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_8296_end_0 = const()[name = tensor("op_8296_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_8296_end_mask_0 = const()[name = tensor("op_8296_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8296_cast_fp16 = slice_by_index(begin = var_8296_begin_0, end = var_8296_end_0, end_mask = var_8296_end_mask_0, x = var_7933_cast_fp16)[name = tensor("op_8296_cast_fp16")]; + tensor var_8303_begin_0 = const()[name = tensor("op_8303_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_8303_end_0 = const()[name = tensor("op_8303_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_8303_end_mask_0 = const()[name = tensor("op_8303_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8303_cast_fp16 = slice_by_index(begin = var_8303_begin_0, end = var_8303_end_0, end_mask = var_8303_end_mask_0, x = var_7933_cast_fp16)[name = tensor("op_8303_cast_fp16")]; + tensor var_8310_begin_0 = const()[name = tensor("op_8310_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8310_end_0 = const()[name = tensor("op_8310_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_8310_end_mask_0 = const()[name = tensor("op_8310_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8310_cast_fp16 = slice_by_index(begin = var_8310_begin_0, end = var_8310_end_0, end_mask = var_8310_end_mask_0, x = var_7937_cast_fp16)[name = tensor("op_8310_cast_fp16")]; + tensor var_8317_begin_0 = const()[name = tensor("op_8317_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_8317_end_0 = const()[name = tensor("op_8317_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_8317_end_mask_0 = const()[name = tensor("op_8317_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8317_cast_fp16 = slice_by_index(begin = var_8317_begin_0, end = var_8317_end_0, end_mask = var_8317_end_mask_0, x = var_7937_cast_fp16)[name = tensor("op_8317_cast_fp16")]; + tensor var_8324_begin_0 = const()[name = tensor("op_8324_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_8324_end_0 = const()[name = tensor("op_8324_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_8324_end_mask_0 = const()[name = tensor("op_8324_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8324_cast_fp16 = slice_by_index(begin = var_8324_begin_0, end = var_8324_end_0, end_mask = var_8324_end_mask_0, x = var_7937_cast_fp16)[name = tensor("op_8324_cast_fp16")]; + tensor var_8331_begin_0 = const()[name = tensor("op_8331_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_8331_end_0 = const()[name = tensor("op_8331_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_8331_end_mask_0 = const()[name = tensor("op_8331_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8331_cast_fp16 = slice_by_index(begin = var_8331_begin_0, end = var_8331_end_0, end_mask = var_8331_end_mask_0, x = var_7937_cast_fp16)[name = tensor("op_8331_cast_fp16")]; + tensor var_8338_begin_0 = const()[name = tensor("op_8338_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8338_end_0 = const()[name = tensor("op_8338_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_8338_end_mask_0 = const()[name = tensor("op_8338_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8338_cast_fp16 = slice_by_index(begin = var_8338_begin_0, end = var_8338_end_0, end_mask = var_8338_end_mask_0, x = var_7941_cast_fp16)[name = tensor("op_8338_cast_fp16")]; + tensor var_8345_begin_0 = const()[name = tensor("op_8345_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_8345_end_0 = const()[name = tensor("op_8345_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_8345_end_mask_0 = const()[name = tensor("op_8345_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8345_cast_fp16 = slice_by_index(begin = var_8345_begin_0, end = var_8345_end_0, end_mask = var_8345_end_mask_0, x = var_7941_cast_fp16)[name = tensor("op_8345_cast_fp16")]; + tensor var_8352_begin_0 = const()[name = tensor("op_8352_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_8352_end_0 = const()[name = tensor("op_8352_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_8352_end_mask_0 = const()[name = tensor("op_8352_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8352_cast_fp16 = slice_by_index(begin = var_8352_begin_0, end = var_8352_end_0, end_mask = var_8352_end_mask_0, x = var_7941_cast_fp16)[name = tensor("op_8352_cast_fp16")]; + tensor var_8359_begin_0 = const()[name = tensor("op_8359_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_8359_end_0 = const()[name = tensor("op_8359_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_8359_end_mask_0 = const()[name = tensor("op_8359_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8359_cast_fp16 = slice_by_index(begin = var_8359_begin_0, end = var_8359_end_0, end_mask = var_8359_end_mask_0, x = var_7941_cast_fp16)[name = tensor("op_8359_cast_fp16")]; + tensor var_8366_begin_0 = const()[name = tensor("op_8366_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8366_end_0 = const()[name = tensor("op_8366_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_8366_end_mask_0 = const()[name = tensor("op_8366_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8366_cast_fp16 = slice_by_index(begin = var_8366_begin_0, end = var_8366_end_0, end_mask = var_8366_end_mask_0, x = var_7945_cast_fp16)[name = tensor("op_8366_cast_fp16")]; + tensor var_8373_begin_0 = const()[name = tensor("op_8373_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_8373_end_0 = const()[name = tensor("op_8373_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_8373_end_mask_0 = const()[name = tensor("op_8373_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8373_cast_fp16 = slice_by_index(begin = var_8373_begin_0, end = var_8373_end_0, end_mask = var_8373_end_mask_0, x = var_7945_cast_fp16)[name = tensor("op_8373_cast_fp16")]; + tensor var_8380_begin_0 = const()[name = tensor("op_8380_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_8380_end_0 = const()[name = tensor("op_8380_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_8380_end_mask_0 = const()[name = tensor("op_8380_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8380_cast_fp16 = slice_by_index(begin = var_8380_begin_0, end = var_8380_end_0, end_mask = var_8380_end_mask_0, x = var_7945_cast_fp16)[name = tensor("op_8380_cast_fp16")]; + tensor var_8387_begin_0 = const()[name = tensor("op_8387_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_8387_end_0 = const()[name = tensor("op_8387_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_8387_end_mask_0 = const()[name = tensor("op_8387_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8387_cast_fp16 = slice_by_index(begin = var_8387_begin_0, end = var_8387_end_0, end_mask = var_8387_end_mask_0, x = var_7945_cast_fp16)[name = tensor("op_8387_cast_fp16")]; + tensor var_8394_begin_0 = const()[name = tensor("op_8394_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8394_end_0 = const()[name = tensor("op_8394_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_8394_end_mask_0 = const()[name = tensor("op_8394_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8394_cast_fp16 = slice_by_index(begin = var_8394_begin_0, end = var_8394_end_0, end_mask = var_8394_end_mask_0, x = var_7949_cast_fp16)[name = tensor("op_8394_cast_fp16")]; + tensor var_8401_begin_0 = const()[name = tensor("op_8401_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_8401_end_0 = const()[name = tensor("op_8401_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_8401_end_mask_0 = const()[name = tensor("op_8401_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8401_cast_fp16 = slice_by_index(begin = var_8401_begin_0, end = var_8401_end_0, end_mask = var_8401_end_mask_0, x = var_7949_cast_fp16)[name = tensor("op_8401_cast_fp16")]; + tensor var_8408_begin_0 = const()[name = tensor("op_8408_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_8408_end_0 = const()[name = tensor("op_8408_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_8408_end_mask_0 = const()[name = tensor("op_8408_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8408_cast_fp16 = slice_by_index(begin = var_8408_begin_0, end = var_8408_end_0, end_mask = var_8408_end_mask_0, x = var_7949_cast_fp16)[name = tensor("op_8408_cast_fp16")]; + tensor var_8415_begin_0 = const()[name = tensor("op_8415_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_8415_end_0 = const()[name = tensor("op_8415_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_8415_end_mask_0 = const()[name = tensor("op_8415_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8415_cast_fp16 = slice_by_index(begin = var_8415_begin_0, end = var_8415_end_0, end_mask = var_8415_end_mask_0, x = var_7949_cast_fp16)[name = tensor("op_8415_cast_fp16")]; + tensor var_8422_begin_0 = const()[name = tensor("op_8422_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8422_end_0 = const()[name = tensor("op_8422_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_8422_end_mask_0 = const()[name = tensor("op_8422_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8422_cast_fp16 = slice_by_index(begin = var_8422_begin_0, end = var_8422_end_0, end_mask = var_8422_end_mask_0, x = var_7953_cast_fp16)[name = tensor("op_8422_cast_fp16")]; + tensor var_8429_begin_0 = const()[name = tensor("op_8429_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_8429_end_0 = const()[name = tensor("op_8429_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_8429_end_mask_0 = const()[name = tensor("op_8429_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8429_cast_fp16 = slice_by_index(begin = var_8429_begin_0, end = var_8429_end_0, end_mask = var_8429_end_mask_0, x = var_7953_cast_fp16)[name = tensor("op_8429_cast_fp16")]; + tensor var_8436_begin_0 = const()[name = tensor("op_8436_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_8436_end_0 = const()[name = tensor("op_8436_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_8436_end_mask_0 = const()[name = tensor("op_8436_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8436_cast_fp16 = slice_by_index(begin = var_8436_begin_0, end = var_8436_end_0, end_mask = var_8436_end_mask_0, x = var_7953_cast_fp16)[name = tensor("op_8436_cast_fp16")]; + tensor var_8443_begin_0 = const()[name = tensor("op_8443_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_8443_end_0 = const()[name = tensor("op_8443_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_8443_end_mask_0 = const()[name = tensor("op_8443_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8443_cast_fp16 = slice_by_index(begin = var_8443_begin_0, end = var_8443_end_0, end_mask = var_8443_end_mask_0, x = var_7953_cast_fp16)[name = tensor("op_8443_cast_fp16")]; + tensor var_8450_begin_0 = const()[name = tensor("op_8450_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8450_end_0 = const()[name = tensor("op_8450_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_8450_end_mask_0 = const()[name = tensor("op_8450_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8450_cast_fp16 = slice_by_index(begin = var_8450_begin_0, end = var_8450_end_0, end_mask = var_8450_end_mask_0, x = var_7957_cast_fp16)[name = tensor("op_8450_cast_fp16")]; + tensor var_8457_begin_0 = const()[name = tensor("op_8457_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_8457_end_0 = const()[name = tensor("op_8457_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_8457_end_mask_0 = const()[name = tensor("op_8457_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8457_cast_fp16 = slice_by_index(begin = var_8457_begin_0, end = var_8457_end_0, end_mask = var_8457_end_mask_0, x = var_7957_cast_fp16)[name = tensor("op_8457_cast_fp16")]; + tensor var_8464_begin_0 = const()[name = tensor("op_8464_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_8464_end_0 = const()[name = tensor("op_8464_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_8464_end_mask_0 = const()[name = tensor("op_8464_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8464_cast_fp16 = slice_by_index(begin = var_8464_begin_0, end = var_8464_end_0, end_mask = var_8464_end_mask_0, x = var_7957_cast_fp16)[name = tensor("op_8464_cast_fp16")]; + tensor var_8471_begin_0 = const()[name = tensor("op_8471_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_8471_end_0 = const()[name = tensor("op_8471_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_8471_end_mask_0 = const()[name = tensor("op_8471_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8471_cast_fp16 = slice_by_index(begin = var_8471_begin_0, end = var_8471_end_0, end_mask = var_8471_end_mask_0, x = var_7957_cast_fp16)[name = tensor("op_8471_cast_fp16")]; + tensor var_8478_begin_0 = const()[name = tensor("op_8478_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8478_end_0 = const()[name = tensor("op_8478_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_8478_end_mask_0 = const()[name = tensor("op_8478_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8478_cast_fp16 = slice_by_index(begin = var_8478_begin_0, end = var_8478_end_0, end_mask = var_8478_end_mask_0, x = var_7961_cast_fp16)[name = tensor("op_8478_cast_fp16")]; + tensor var_8485_begin_0 = const()[name = tensor("op_8485_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_8485_end_0 = const()[name = tensor("op_8485_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_8485_end_mask_0 = const()[name = tensor("op_8485_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8485_cast_fp16 = slice_by_index(begin = var_8485_begin_0, end = var_8485_end_0, end_mask = var_8485_end_mask_0, x = var_7961_cast_fp16)[name = tensor("op_8485_cast_fp16")]; + tensor var_8492_begin_0 = const()[name = tensor("op_8492_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_8492_end_0 = const()[name = tensor("op_8492_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_8492_end_mask_0 = const()[name = tensor("op_8492_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8492_cast_fp16 = slice_by_index(begin = var_8492_begin_0, end = var_8492_end_0, end_mask = var_8492_end_mask_0, x = var_7961_cast_fp16)[name = tensor("op_8492_cast_fp16")]; + tensor var_8499_begin_0 = const()[name = tensor("op_8499_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_8499_end_0 = const()[name = tensor("op_8499_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_8499_end_mask_0 = const()[name = tensor("op_8499_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8499_cast_fp16 = slice_by_index(begin = var_8499_begin_0, end = var_8499_end_0, end_mask = var_8499_end_mask_0, x = var_7961_cast_fp16)[name = tensor("op_8499_cast_fp16")]; + tensor var_8506_begin_0 = const()[name = tensor("op_8506_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8506_end_0 = const()[name = tensor("op_8506_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_8506_end_mask_0 = const()[name = tensor("op_8506_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8506_cast_fp16 = slice_by_index(begin = var_8506_begin_0, end = var_8506_end_0, end_mask = var_8506_end_mask_0, x = var_7965_cast_fp16)[name = tensor("op_8506_cast_fp16")]; + tensor var_8513_begin_0 = const()[name = tensor("op_8513_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_8513_end_0 = const()[name = tensor("op_8513_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_8513_end_mask_0 = const()[name = tensor("op_8513_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8513_cast_fp16 = slice_by_index(begin = var_8513_begin_0, end = var_8513_end_0, end_mask = var_8513_end_mask_0, x = var_7965_cast_fp16)[name = tensor("op_8513_cast_fp16")]; + tensor var_8520_begin_0 = const()[name = tensor("op_8520_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_8520_end_0 = const()[name = tensor("op_8520_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_8520_end_mask_0 = const()[name = tensor("op_8520_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8520_cast_fp16 = slice_by_index(begin = var_8520_begin_0, end = var_8520_end_0, end_mask = var_8520_end_mask_0, x = var_7965_cast_fp16)[name = tensor("op_8520_cast_fp16")]; + tensor var_8527_begin_0 = const()[name = tensor("op_8527_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_8527_end_0 = const()[name = tensor("op_8527_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_8527_end_mask_0 = const()[name = tensor("op_8527_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8527_cast_fp16 = slice_by_index(begin = var_8527_begin_0, end = var_8527_end_0, end_mask = var_8527_end_mask_0, x = var_7965_cast_fp16)[name = tensor("op_8527_cast_fp16")]; + tensor k_11_perm_0 = const()[name = tensor("k_11_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_8532_begin_0 = const()[name = tensor("op_8532_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8532_end_0 = const()[name = tensor("op_8532_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_8532_end_mask_0 = const()[name = tensor("op_8532_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_26 = transpose(perm = k_11_perm_0, x = key_11_cast_fp16)[name = tensor("transpose_26")]; + tensor var_8532_cast_fp16 = slice_by_index(begin = var_8532_begin_0, end = var_8532_end_0, end_mask = var_8532_end_mask_0, x = transpose_26)[name = tensor("op_8532_cast_fp16")]; + tensor var_8536_begin_0 = const()[name = tensor("op_8536_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_8536_end_0 = const()[name = tensor("op_8536_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_8536_end_mask_0 = const()[name = tensor("op_8536_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8536_cast_fp16 = slice_by_index(begin = var_8536_begin_0, end = var_8536_end_0, end_mask = var_8536_end_mask_0, x = transpose_26)[name = tensor("op_8536_cast_fp16")]; + tensor var_8540_begin_0 = const()[name = tensor("op_8540_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_8540_end_0 = const()[name = tensor("op_8540_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_8540_end_mask_0 = const()[name = tensor("op_8540_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8540_cast_fp16 = slice_by_index(begin = var_8540_begin_0, end = var_8540_end_0, end_mask = var_8540_end_mask_0, x = transpose_26)[name = tensor("op_8540_cast_fp16")]; + tensor var_8544_begin_0 = const()[name = tensor("op_8544_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_8544_end_0 = const()[name = tensor("op_8544_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_8544_end_mask_0 = const()[name = tensor("op_8544_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8544_cast_fp16 = slice_by_index(begin = var_8544_begin_0, end = var_8544_end_0, end_mask = var_8544_end_mask_0, x = transpose_26)[name = tensor("op_8544_cast_fp16")]; + tensor var_8548_begin_0 = const()[name = tensor("op_8548_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_8548_end_0 = const()[name = tensor("op_8548_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_8548_end_mask_0 = const()[name = tensor("op_8548_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8548_cast_fp16 = slice_by_index(begin = var_8548_begin_0, end = var_8548_end_0, end_mask = var_8548_end_mask_0, x = transpose_26)[name = tensor("op_8548_cast_fp16")]; + tensor var_8552_begin_0 = const()[name = tensor("op_8552_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_8552_end_0 = const()[name = tensor("op_8552_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_8552_end_mask_0 = const()[name = tensor("op_8552_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8552_cast_fp16 = slice_by_index(begin = var_8552_begin_0, end = var_8552_end_0, end_mask = var_8552_end_mask_0, x = transpose_26)[name = tensor("op_8552_cast_fp16")]; + tensor var_8556_begin_0 = const()[name = tensor("op_8556_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_8556_end_0 = const()[name = tensor("op_8556_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_8556_end_mask_0 = const()[name = tensor("op_8556_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8556_cast_fp16 = slice_by_index(begin = var_8556_begin_0, end = var_8556_end_0, end_mask = var_8556_end_mask_0, x = transpose_26)[name = tensor("op_8556_cast_fp16")]; + tensor var_8560_begin_0 = const()[name = tensor("op_8560_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_8560_end_0 = const()[name = tensor("op_8560_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_8560_end_mask_0 = const()[name = tensor("op_8560_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8560_cast_fp16 = slice_by_index(begin = var_8560_begin_0, end = var_8560_end_0, end_mask = var_8560_end_mask_0, x = transpose_26)[name = tensor("op_8560_cast_fp16")]; + tensor var_8564_begin_0 = const()[name = tensor("op_8564_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_8564_end_0 = const()[name = tensor("op_8564_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_8564_end_mask_0 = const()[name = tensor("op_8564_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8564_cast_fp16 = slice_by_index(begin = var_8564_begin_0, end = var_8564_end_0, end_mask = var_8564_end_mask_0, x = transpose_26)[name = tensor("op_8564_cast_fp16")]; + tensor var_8568_begin_0 = const()[name = tensor("op_8568_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_8568_end_0 = const()[name = tensor("op_8568_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_8568_end_mask_0 = const()[name = tensor("op_8568_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8568_cast_fp16 = slice_by_index(begin = var_8568_begin_0, end = var_8568_end_0, end_mask = var_8568_end_mask_0, x = transpose_26)[name = tensor("op_8568_cast_fp16")]; + tensor var_8572_begin_0 = const()[name = tensor("op_8572_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_8572_end_0 = const()[name = tensor("op_8572_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_8572_end_mask_0 = const()[name = tensor("op_8572_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8572_cast_fp16 = slice_by_index(begin = var_8572_begin_0, end = var_8572_end_0, end_mask = var_8572_end_mask_0, x = transpose_26)[name = tensor("op_8572_cast_fp16")]; + tensor var_8576_begin_0 = const()[name = tensor("op_8576_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_8576_end_0 = const()[name = tensor("op_8576_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_8576_end_mask_0 = const()[name = tensor("op_8576_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8576_cast_fp16 = slice_by_index(begin = var_8576_begin_0, end = var_8576_end_0, end_mask = var_8576_end_mask_0, x = transpose_26)[name = tensor("op_8576_cast_fp16")]; + tensor var_8580_begin_0 = const()[name = tensor("op_8580_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_8580_end_0 = const()[name = tensor("op_8580_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_8580_end_mask_0 = const()[name = tensor("op_8580_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8580_cast_fp16 = slice_by_index(begin = var_8580_begin_0, end = var_8580_end_0, end_mask = var_8580_end_mask_0, x = transpose_26)[name = tensor("op_8580_cast_fp16")]; + tensor var_8584_begin_0 = const()[name = tensor("op_8584_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_8584_end_0 = const()[name = tensor("op_8584_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_8584_end_mask_0 = const()[name = tensor("op_8584_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8584_cast_fp16 = slice_by_index(begin = var_8584_begin_0, end = var_8584_end_0, end_mask = var_8584_end_mask_0, x = transpose_26)[name = tensor("op_8584_cast_fp16")]; + tensor var_8588_begin_0 = const()[name = tensor("op_8588_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_8588_end_0 = const()[name = tensor("op_8588_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_8588_end_mask_0 = const()[name = tensor("op_8588_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8588_cast_fp16 = slice_by_index(begin = var_8588_begin_0, end = var_8588_end_0, end_mask = var_8588_end_mask_0, x = transpose_26)[name = tensor("op_8588_cast_fp16")]; + tensor var_8592_begin_0 = const()[name = tensor("op_8592_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_8592_end_0 = const()[name = tensor("op_8592_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_8592_end_mask_0 = const()[name = tensor("op_8592_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8592_cast_fp16 = slice_by_index(begin = var_8592_begin_0, end = var_8592_end_0, end_mask = var_8592_end_mask_0, x = transpose_26)[name = tensor("op_8592_cast_fp16")]; + tensor var_8596_begin_0 = const()[name = tensor("op_8596_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_8596_end_0 = const()[name = tensor("op_8596_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_8596_end_mask_0 = const()[name = tensor("op_8596_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8596_cast_fp16 = slice_by_index(begin = var_8596_begin_0, end = var_8596_end_0, end_mask = var_8596_end_mask_0, x = transpose_26)[name = tensor("op_8596_cast_fp16")]; + tensor var_8600_begin_0 = const()[name = tensor("op_8600_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_8600_end_0 = const()[name = tensor("op_8600_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_8600_end_mask_0 = const()[name = tensor("op_8600_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8600_cast_fp16 = slice_by_index(begin = var_8600_begin_0, end = var_8600_end_0, end_mask = var_8600_end_mask_0, x = transpose_26)[name = tensor("op_8600_cast_fp16")]; + tensor var_8604_begin_0 = const()[name = tensor("op_8604_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_8604_end_0 = const()[name = tensor("op_8604_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_8604_end_mask_0 = const()[name = tensor("op_8604_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8604_cast_fp16 = slice_by_index(begin = var_8604_begin_0, end = var_8604_end_0, end_mask = var_8604_end_mask_0, x = transpose_26)[name = tensor("op_8604_cast_fp16")]; + tensor var_8608_begin_0 = const()[name = tensor("op_8608_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_8608_end_0 = const()[name = tensor("op_8608_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_8608_end_mask_0 = const()[name = tensor("op_8608_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_8608_cast_fp16 = slice_by_index(begin = var_8608_begin_0, end = var_8608_end_0, end_mask = var_8608_end_mask_0, x = transpose_26)[name = tensor("op_8608_cast_fp16")]; + tensor var_8610_begin_0 = const()[name = tensor("op_8610_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_8610_end_0 = const()[name = tensor("op_8610_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_8610_end_mask_0 = const()[name = tensor("op_8610_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8610_cast_fp16 = slice_by_index(begin = var_8610_begin_0, end = var_8610_end_0, end_mask = var_8610_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_8610_cast_fp16")]; + tensor var_8614_begin_0 = const()[name = tensor("op_8614_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_8614_end_0 = const()[name = tensor("op_8614_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_8614_end_mask_0 = const()[name = tensor("op_8614_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8614_cast_fp16 = slice_by_index(begin = var_8614_begin_0, end = var_8614_end_0, end_mask = var_8614_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_8614_cast_fp16")]; + tensor var_8618_begin_0 = const()[name = tensor("op_8618_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_8618_end_0 = const()[name = tensor("op_8618_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_8618_end_mask_0 = const()[name = tensor("op_8618_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8618_cast_fp16 = slice_by_index(begin = var_8618_begin_0, end = var_8618_end_0, end_mask = var_8618_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_8618_cast_fp16")]; + tensor var_8622_begin_0 = const()[name = tensor("op_8622_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_8622_end_0 = const()[name = tensor("op_8622_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_8622_end_mask_0 = const()[name = tensor("op_8622_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8622_cast_fp16 = slice_by_index(begin = var_8622_begin_0, end = var_8622_end_0, end_mask = var_8622_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_8622_cast_fp16")]; + tensor var_8626_begin_0 = const()[name = tensor("op_8626_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_8626_end_0 = const()[name = tensor("op_8626_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_8626_end_mask_0 = const()[name = tensor("op_8626_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8626_cast_fp16 = slice_by_index(begin = var_8626_begin_0, end = var_8626_end_0, end_mask = var_8626_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_8626_cast_fp16")]; + tensor var_8630_begin_0 = const()[name = tensor("op_8630_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_8630_end_0 = const()[name = tensor("op_8630_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_8630_end_mask_0 = const()[name = tensor("op_8630_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8630_cast_fp16 = slice_by_index(begin = var_8630_begin_0, end = var_8630_end_0, end_mask = var_8630_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_8630_cast_fp16")]; + tensor var_8634_begin_0 = const()[name = tensor("op_8634_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_8634_end_0 = const()[name = tensor("op_8634_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_8634_end_mask_0 = const()[name = tensor("op_8634_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8634_cast_fp16 = slice_by_index(begin = var_8634_begin_0, end = var_8634_end_0, end_mask = var_8634_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_8634_cast_fp16")]; + tensor var_8638_begin_0 = const()[name = tensor("op_8638_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_8638_end_0 = const()[name = tensor("op_8638_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_8638_end_mask_0 = const()[name = tensor("op_8638_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8638_cast_fp16 = slice_by_index(begin = var_8638_begin_0, end = var_8638_end_0, end_mask = var_8638_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_8638_cast_fp16")]; + tensor var_8642_begin_0 = const()[name = tensor("op_8642_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_8642_end_0 = const()[name = tensor("op_8642_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_8642_end_mask_0 = const()[name = tensor("op_8642_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8642_cast_fp16 = slice_by_index(begin = var_8642_begin_0, end = var_8642_end_0, end_mask = var_8642_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_8642_cast_fp16")]; + tensor var_8646_begin_0 = const()[name = tensor("op_8646_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_8646_end_0 = const()[name = tensor("op_8646_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_8646_end_mask_0 = const()[name = tensor("op_8646_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8646_cast_fp16 = slice_by_index(begin = var_8646_begin_0, end = var_8646_end_0, end_mask = var_8646_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_8646_cast_fp16")]; + tensor var_8650_begin_0 = const()[name = tensor("op_8650_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_8650_end_0 = const()[name = tensor("op_8650_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_8650_end_mask_0 = const()[name = tensor("op_8650_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8650_cast_fp16 = slice_by_index(begin = var_8650_begin_0, end = var_8650_end_0, end_mask = var_8650_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_8650_cast_fp16")]; + tensor var_8654_begin_0 = const()[name = tensor("op_8654_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_8654_end_0 = const()[name = tensor("op_8654_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_8654_end_mask_0 = const()[name = tensor("op_8654_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8654_cast_fp16 = slice_by_index(begin = var_8654_begin_0, end = var_8654_end_0, end_mask = var_8654_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_8654_cast_fp16")]; + tensor var_8658_begin_0 = const()[name = tensor("op_8658_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_8658_end_0 = const()[name = tensor("op_8658_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_8658_end_mask_0 = const()[name = tensor("op_8658_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8658_cast_fp16 = slice_by_index(begin = var_8658_begin_0, end = var_8658_end_0, end_mask = var_8658_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_8658_cast_fp16")]; + tensor var_8662_begin_0 = const()[name = tensor("op_8662_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_8662_end_0 = const()[name = tensor("op_8662_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_8662_end_mask_0 = const()[name = tensor("op_8662_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8662_cast_fp16 = slice_by_index(begin = var_8662_begin_0, end = var_8662_end_0, end_mask = var_8662_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_8662_cast_fp16")]; + tensor var_8666_begin_0 = const()[name = tensor("op_8666_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_8666_end_0 = const()[name = tensor("op_8666_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_8666_end_mask_0 = const()[name = tensor("op_8666_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8666_cast_fp16 = slice_by_index(begin = var_8666_begin_0, end = var_8666_end_0, end_mask = var_8666_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_8666_cast_fp16")]; + tensor var_8670_begin_0 = const()[name = tensor("op_8670_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_8670_end_0 = const()[name = tensor("op_8670_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_8670_end_mask_0 = const()[name = tensor("op_8670_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8670_cast_fp16 = slice_by_index(begin = var_8670_begin_0, end = var_8670_end_0, end_mask = var_8670_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_8670_cast_fp16")]; + tensor var_8674_begin_0 = const()[name = tensor("op_8674_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_8674_end_0 = const()[name = tensor("op_8674_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_8674_end_mask_0 = const()[name = tensor("op_8674_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8674_cast_fp16 = slice_by_index(begin = var_8674_begin_0, end = var_8674_end_0, end_mask = var_8674_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_8674_cast_fp16")]; + tensor var_8678_begin_0 = const()[name = tensor("op_8678_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_8678_end_0 = const()[name = tensor("op_8678_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_8678_end_mask_0 = const()[name = tensor("op_8678_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8678_cast_fp16 = slice_by_index(begin = var_8678_begin_0, end = var_8678_end_0, end_mask = var_8678_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_8678_cast_fp16")]; + tensor var_8682_begin_0 = const()[name = tensor("op_8682_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_8682_end_0 = const()[name = tensor("op_8682_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_8682_end_mask_0 = const()[name = tensor("op_8682_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8682_cast_fp16 = slice_by_index(begin = var_8682_begin_0, end = var_8682_end_0, end_mask = var_8682_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_8682_cast_fp16")]; + tensor var_8686_begin_0 = const()[name = tensor("op_8686_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_8686_end_0 = const()[name = tensor("op_8686_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_8686_end_mask_0 = const()[name = tensor("op_8686_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_8686_cast_fp16 = slice_by_index(begin = var_8686_begin_0, end = var_8686_end_0, end_mask = var_8686_end_mask_0, x = value_11_cast_fp16)[name = tensor("op_8686_cast_fp16")]; + tensor var_8690_equation_0 = const()[name = tensor("op_8690_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8690_cast_fp16 = einsum(equation = var_8690_equation_0, values = (var_8532_cast_fp16, var_7974_cast_fp16))[name = tensor("op_8690_cast_fp16")]; + tensor var_8691_to_fp16 = const()[name = tensor("op_8691_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_801_cast_fp16 = mul(x = var_8690_cast_fp16, y = var_8691_to_fp16)[name = tensor("aw_chunk_801_cast_fp16")]; + tensor var_8694_equation_0 = const()[name = tensor("op_8694_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8694_cast_fp16 = einsum(equation = var_8694_equation_0, values = (var_8532_cast_fp16, var_7981_cast_fp16))[name = tensor("op_8694_cast_fp16")]; + tensor var_8695_to_fp16 = const()[name = tensor("op_8695_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_803_cast_fp16 = mul(x = var_8694_cast_fp16, y = var_8695_to_fp16)[name = tensor("aw_chunk_803_cast_fp16")]; + tensor var_8698_equation_0 = const()[name = tensor("op_8698_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8698_cast_fp16 = einsum(equation = var_8698_equation_0, values = (var_8532_cast_fp16, var_7988_cast_fp16))[name = tensor("op_8698_cast_fp16")]; + tensor var_8699_to_fp16 = const()[name = tensor("op_8699_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_805_cast_fp16 = mul(x = var_8698_cast_fp16, y = var_8699_to_fp16)[name = tensor("aw_chunk_805_cast_fp16")]; + tensor var_8702_equation_0 = const()[name = tensor("op_8702_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8702_cast_fp16 = einsum(equation = var_8702_equation_0, values = (var_8532_cast_fp16, var_7995_cast_fp16))[name = tensor("op_8702_cast_fp16")]; + tensor var_8703_to_fp16 = const()[name = tensor("op_8703_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_807_cast_fp16 = mul(x = var_8702_cast_fp16, y = var_8703_to_fp16)[name = tensor("aw_chunk_807_cast_fp16")]; + tensor var_8706_equation_0 = const()[name = tensor("op_8706_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8706_cast_fp16 = einsum(equation = var_8706_equation_0, values = (var_8536_cast_fp16, var_8002_cast_fp16))[name = tensor("op_8706_cast_fp16")]; + tensor var_8707_to_fp16 = const()[name = tensor("op_8707_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_809_cast_fp16 = mul(x = var_8706_cast_fp16, y = var_8707_to_fp16)[name = tensor("aw_chunk_809_cast_fp16")]; + tensor var_8710_equation_0 = const()[name = tensor("op_8710_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8710_cast_fp16 = einsum(equation = var_8710_equation_0, values = (var_8536_cast_fp16, var_8009_cast_fp16))[name = tensor("op_8710_cast_fp16")]; + tensor var_8711_to_fp16 = const()[name = tensor("op_8711_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_811_cast_fp16 = mul(x = var_8710_cast_fp16, y = var_8711_to_fp16)[name = tensor("aw_chunk_811_cast_fp16")]; + tensor var_8714_equation_0 = const()[name = tensor("op_8714_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8714_cast_fp16 = einsum(equation = var_8714_equation_0, values = (var_8536_cast_fp16, var_8016_cast_fp16))[name = tensor("op_8714_cast_fp16")]; + tensor var_8715_to_fp16 = const()[name = tensor("op_8715_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_813_cast_fp16 = mul(x = var_8714_cast_fp16, y = var_8715_to_fp16)[name = tensor("aw_chunk_813_cast_fp16")]; + tensor var_8718_equation_0 = const()[name = tensor("op_8718_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8718_cast_fp16 = einsum(equation = var_8718_equation_0, values = (var_8536_cast_fp16, var_8023_cast_fp16))[name = tensor("op_8718_cast_fp16")]; + tensor var_8719_to_fp16 = const()[name = tensor("op_8719_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_815_cast_fp16 = mul(x = var_8718_cast_fp16, y = var_8719_to_fp16)[name = tensor("aw_chunk_815_cast_fp16")]; + tensor var_8722_equation_0 = const()[name = tensor("op_8722_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8722_cast_fp16 = einsum(equation = var_8722_equation_0, values = (var_8540_cast_fp16, var_8030_cast_fp16))[name = tensor("op_8722_cast_fp16")]; + tensor var_8723_to_fp16 = const()[name = tensor("op_8723_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_817_cast_fp16 = mul(x = var_8722_cast_fp16, y = var_8723_to_fp16)[name = tensor("aw_chunk_817_cast_fp16")]; + tensor var_8726_equation_0 = const()[name = tensor("op_8726_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8726_cast_fp16 = einsum(equation = var_8726_equation_0, values = (var_8540_cast_fp16, var_8037_cast_fp16))[name = tensor("op_8726_cast_fp16")]; + tensor var_8727_to_fp16 = const()[name = tensor("op_8727_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_819_cast_fp16 = mul(x = var_8726_cast_fp16, y = var_8727_to_fp16)[name = tensor("aw_chunk_819_cast_fp16")]; + tensor var_8730_equation_0 = const()[name = tensor("op_8730_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8730_cast_fp16 = einsum(equation = var_8730_equation_0, values = (var_8540_cast_fp16, var_8044_cast_fp16))[name = tensor("op_8730_cast_fp16")]; + tensor var_8731_to_fp16 = const()[name = tensor("op_8731_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_821_cast_fp16 = mul(x = var_8730_cast_fp16, y = var_8731_to_fp16)[name = tensor("aw_chunk_821_cast_fp16")]; + tensor var_8734_equation_0 = const()[name = tensor("op_8734_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8734_cast_fp16 = einsum(equation = var_8734_equation_0, values = (var_8540_cast_fp16, var_8051_cast_fp16))[name = tensor("op_8734_cast_fp16")]; + tensor var_8735_to_fp16 = const()[name = tensor("op_8735_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_823_cast_fp16 = mul(x = var_8734_cast_fp16, y = var_8735_to_fp16)[name = tensor("aw_chunk_823_cast_fp16")]; + tensor var_8738_equation_0 = const()[name = tensor("op_8738_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8738_cast_fp16 = einsum(equation = var_8738_equation_0, values = (var_8544_cast_fp16, var_8058_cast_fp16))[name = tensor("op_8738_cast_fp16")]; + tensor var_8739_to_fp16 = const()[name = tensor("op_8739_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_825_cast_fp16 = mul(x = var_8738_cast_fp16, y = var_8739_to_fp16)[name = tensor("aw_chunk_825_cast_fp16")]; + tensor var_8742_equation_0 = const()[name = tensor("op_8742_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8742_cast_fp16 = einsum(equation = var_8742_equation_0, values = (var_8544_cast_fp16, var_8065_cast_fp16))[name = tensor("op_8742_cast_fp16")]; + tensor var_8743_to_fp16 = const()[name = tensor("op_8743_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_827_cast_fp16 = mul(x = var_8742_cast_fp16, y = var_8743_to_fp16)[name = tensor("aw_chunk_827_cast_fp16")]; + tensor var_8746_equation_0 = const()[name = tensor("op_8746_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8746_cast_fp16 = einsum(equation = var_8746_equation_0, values = (var_8544_cast_fp16, var_8072_cast_fp16))[name = tensor("op_8746_cast_fp16")]; + tensor var_8747_to_fp16 = const()[name = tensor("op_8747_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_829_cast_fp16 = mul(x = var_8746_cast_fp16, y = var_8747_to_fp16)[name = tensor("aw_chunk_829_cast_fp16")]; + tensor var_8750_equation_0 = const()[name = tensor("op_8750_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8750_cast_fp16 = einsum(equation = var_8750_equation_0, values = (var_8544_cast_fp16, var_8079_cast_fp16))[name = tensor("op_8750_cast_fp16")]; + tensor var_8751_to_fp16 = const()[name = tensor("op_8751_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_831_cast_fp16 = mul(x = var_8750_cast_fp16, y = var_8751_to_fp16)[name = tensor("aw_chunk_831_cast_fp16")]; + tensor var_8754_equation_0 = const()[name = tensor("op_8754_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8754_cast_fp16 = einsum(equation = var_8754_equation_0, values = (var_8548_cast_fp16, var_8086_cast_fp16))[name = tensor("op_8754_cast_fp16")]; + tensor var_8755_to_fp16 = const()[name = tensor("op_8755_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_833_cast_fp16 = mul(x = var_8754_cast_fp16, y = var_8755_to_fp16)[name = tensor("aw_chunk_833_cast_fp16")]; + tensor var_8758_equation_0 = const()[name = tensor("op_8758_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8758_cast_fp16 = einsum(equation = var_8758_equation_0, values = (var_8548_cast_fp16, var_8093_cast_fp16))[name = tensor("op_8758_cast_fp16")]; + tensor var_8759_to_fp16 = const()[name = tensor("op_8759_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_835_cast_fp16 = mul(x = var_8758_cast_fp16, y = var_8759_to_fp16)[name = tensor("aw_chunk_835_cast_fp16")]; + tensor var_8762_equation_0 = const()[name = tensor("op_8762_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8762_cast_fp16 = einsum(equation = var_8762_equation_0, values = (var_8548_cast_fp16, var_8100_cast_fp16))[name = tensor("op_8762_cast_fp16")]; + tensor var_8763_to_fp16 = const()[name = tensor("op_8763_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_837_cast_fp16 = mul(x = var_8762_cast_fp16, y = var_8763_to_fp16)[name = tensor("aw_chunk_837_cast_fp16")]; + tensor var_8766_equation_0 = const()[name = tensor("op_8766_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8766_cast_fp16 = einsum(equation = var_8766_equation_0, values = (var_8548_cast_fp16, var_8107_cast_fp16))[name = tensor("op_8766_cast_fp16")]; + tensor var_8767_to_fp16 = const()[name = tensor("op_8767_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_839_cast_fp16 = mul(x = var_8766_cast_fp16, y = var_8767_to_fp16)[name = tensor("aw_chunk_839_cast_fp16")]; + tensor var_8770_equation_0 = const()[name = tensor("op_8770_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8770_cast_fp16 = einsum(equation = var_8770_equation_0, values = (var_8552_cast_fp16, var_8114_cast_fp16))[name = tensor("op_8770_cast_fp16")]; + tensor var_8771_to_fp16 = const()[name = tensor("op_8771_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_841_cast_fp16 = mul(x = var_8770_cast_fp16, y = var_8771_to_fp16)[name = tensor("aw_chunk_841_cast_fp16")]; + tensor var_8774_equation_0 = const()[name = tensor("op_8774_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8774_cast_fp16 = einsum(equation = var_8774_equation_0, values = (var_8552_cast_fp16, var_8121_cast_fp16))[name = tensor("op_8774_cast_fp16")]; + tensor var_8775_to_fp16 = const()[name = tensor("op_8775_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_843_cast_fp16 = mul(x = var_8774_cast_fp16, y = var_8775_to_fp16)[name = tensor("aw_chunk_843_cast_fp16")]; + tensor var_8778_equation_0 = const()[name = tensor("op_8778_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8778_cast_fp16 = einsum(equation = var_8778_equation_0, values = (var_8552_cast_fp16, var_8128_cast_fp16))[name = tensor("op_8778_cast_fp16")]; + tensor var_8779_to_fp16 = const()[name = tensor("op_8779_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_845_cast_fp16 = mul(x = var_8778_cast_fp16, y = var_8779_to_fp16)[name = tensor("aw_chunk_845_cast_fp16")]; + tensor var_8782_equation_0 = const()[name = tensor("op_8782_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8782_cast_fp16 = einsum(equation = var_8782_equation_0, values = (var_8552_cast_fp16, var_8135_cast_fp16))[name = tensor("op_8782_cast_fp16")]; + tensor var_8783_to_fp16 = const()[name = tensor("op_8783_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_847_cast_fp16 = mul(x = var_8782_cast_fp16, y = var_8783_to_fp16)[name = tensor("aw_chunk_847_cast_fp16")]; + tensor var_8786_equation_0 = const()[name = tensor("op_8786_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8786_cast_fp16 = einsum(equation = var_8786_equation_0, values = (var_8556_cast_fp16, var_8142_cast_fp16))[name = tensor("op_8786_cast_fp16")]; + tensor var_8787_to_fp16 = const()[name = tensor("op_8787_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_849_cast_fp16 = mul(x = var_8786_cast_fp16, y = var_8787_to_fp16)[name = tensor("aw_chunk_849_cast_fp16")]; + tensor var_8790_equation_0 = const()[name = tensor("op_8790_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8790_cast_fp16 = einsum(equation = var_8790_equation_0, values = (var_8556_cast_fp16, var_8149_cast_fp16))[name = tensor("op_8790_cast_fp16")]; + tensor var_8791_to_fp16 = const()[name = tensor("op_8791_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_851_cast_fp16 = mul(x = var_8790_cast_fp16, y = var_8791_to_fp16)[name = tensor("aw_chunk_851_cast_fp16")]; + tensor var_8794_equation_0 = const()[name = tensor("op_8794_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8794_cast_fp16 = einsum(equation = var_8794_equation_0, values = (var_8556_cast_fp16, var_8156_cast_fp16))[name = tensor("op_8794_cast_fp16")]; + tensor var_8795_to_fp16 = const()[name = tensor("op_8795_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_853_cast_fp16 = mul(x = var_8794_cast_fp16, y = var_8795_to_fp16)[name = tensor("aw_chunk_853_cast_fp16")]; + tensor var_8798_equation_0 = const()[name = tensor("op_8798_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8798_cast_fp16 = einsum(equation = var_8798_equation_0, values = (var_8556_cast_fp16, var_8163_cast_fp16))[name = tensor("op_8798_cast_fp16")]; + tensor var_8799_to_fp16 = const()[name = tensor("op_8799_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_855_cast_fp16 = mul(x = var_8798_cast_fp16, y = var_8799_to_fp16)[name = tensor("aw_chunk_855_cast_fp16")]; + tensor var_8802_equation_0 = const()[name = tensor("op_8802_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8802_cast_fp16 = einsum(equation = var_8802_equation_0, values = (var_8560_cast_fp16, var_8170_cast_fp16))[name = tensor("op_8802_cast_fp16")]; + tensor var_8803_to_fp16 = const()[name = tensor("op_8803_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_857_cast_fp16 = mul(x = var_8802_cast_fp16, y = var_8803_to_fp16)[name = tensor("aw_chunk_857_cast_fp16")]; + tensor var_8806_equation_0 = const()[name = tensor("op_8806_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8806_cast_fp16 = einsum(equation = var_8806_equation_0, values = (var_8560_cast_fp16, var_8177_cast_fp16))[name = tensor("op_8806_cast_fp16")]; + tensor var_8807_to_fp16 = const()[name = tensor("op_8807_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_859_cast_fp16 = mul(x = var_8806_cast_fp16, y = var_8807_to_fp16)[name = tensor("aw_chunk_859_cast_fp16")]; + tensor var_8810_equation_0 = const()[name = tensor("op_8810_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8810_cast_fp16 = einsum(equation = var_8810_equation_0, values = (var_8560_cast_fp16, var_8184_cast_fp16))[name = tensor("op_8810_cast_fp16")]; + tensor var_8811_to_fp16 = const()[name = tensor("op_8811_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_861_cast_fp16 = mul(x = var_8810_cast_fp16, y = var_8811_to_fp16)[name = tensor("aw_chunk_861_cast_fp16")]; + tensor var_8814_equation_0 = const()[name = tensor("op_8814_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8814_cast_fp16 = einsum(equation = var_8814_equation_0, values = (var_8560_cast_fp16, var_8191_cast_fp16))[name = tensor("op_8814_cast_fp16")]; + tensor var_8815_to_fp16 = const()[name = tensor("op_8815_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_863_cast_fp16 = mul(x = var_8814_cast_fp16, y = var_8815_to_fp16)[name = tensor("aw_chunk_863_cast_fp16")]; + tensor var_8818_equation_0 = const()[name = tensor("op_8818_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8818_cast_fp16 = einsum(equation = var_8818_equation_0, values = (var_8564_cast_fp16, var_8198_cast_fp16))[name = tensor("op_8818_cast_fp16")]; + tensor var_8819_to_fp16 = const()[name = tensor("op_8819_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_865_cast_fp16 = mul(x = var_8818_cast_fp16, y = var_8819_to_fp16)[name = tensor("aw_chunk_865_cast_fp16")]; + tensor var_8822_equation_0 = const()[name = tensor("op_8822_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8822_cast_fp16 = einsum(equation = var_8822_equation_0, values = (var_8564_cast_fp16, var_8205_cast_fp16))[name = tensor("op_8822_cast_fp16")]; + tensor var_8823_to_fp16 = const()[name = tensor("op_8823_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_867_cast_fp16 = mul(x = var_8822_cast_fp16, y = var_8823_to_fp16)[name = tensor("aw_chunk_867_cast_fp16")]; + tensor var_8826_equation_0 = const()[name = tensor("op_8826_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8826_cast_fp16 = einsum(equation = var_8826_equation_0, values = (var_8564_cast_fp16, var_8212_cast_fp16))[name = tensor("op_8826_cast_fp16")]; + tensor var_8827_to_fp16 = const()[name = tensor("op_8827_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_869_cast_fp16 = mul(x = var_8826_cast_fp16, y = var_8827_to_fp16)[name = tensor("aw_chunk_869_cast_fp16")]; + tensor var_8830_equation_0 = const()[name = tensor("op_8830_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8830_cast_fp16 = einsum(equation = var_8830_equation_0, values = (var_8564_cast_fp16, var_8219_cast_fp16))[name = tensor("op_8830_cast_fp16")]; + tensor var_8831_to_fp16 = const()[name = tensor("op_8831_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_871_cast_fp16 = mul(x = var_8830_cast_fp16, y = var_8831_to_fp16)[name = tensor("aw_chunk_871_cast_fp16")]; + tensor var_8834_equation_0 = const()[name = tensor("op_8834_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8834_cast_fp16 = einsum(equation = var_8834_equation_0, values = (var_8568_cast_fp16, var_8226_cast_fp16))[name = tensor("op_8834_cast_fp16")]; + tensor var_8835_to_fp16 = const()[name = tensor("op_8835_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_873_cast_fp16 = mul(x = var_8834_cast_fp16, y = var_8835_to_fp16)[name = tensor("aw_chunk_873_cast_fp16")]; + tensor var_8838_equation_0 = const()[name = tensor("op_8838_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8838_cast_fp16 = einsum(equation = var_8838_equation_0, values = (var_8568_cast_fp16, var_8233_cast_fp16))[name = tensor("op_8838_cast_fp16")]; + tensor var_8839_to_fp16 = const()[name = tensor("op_8839_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_875_cast_fp16 = mul(x = var_8838_cast_fp16, y = var_8839_to_fp16)[name = tensor("aw_chunk_875_cast_fp16")]; + tensor var_8842_equation_0 = const()[name = tensor("op_8842_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8842_cast_fp16 = einsum(equation = var_8842_equation_0, values = (var_8568_cast_fp16, var_8240_cast_fp16))[name = tensor("op_8842_cast_fp16")]; + tensor var_8843_to_fp16 = const()[name = tensor("op_8843_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_877_cast_fp16 = mul(x = var_8842_cast_fp16, y = var_8843_to_fp16)[name = tensor("aw_chunk_877_cast_fp16")]; + tensor var_8846_equation_0 = const()[name = tensor("op_8846_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8846_cast_fp16 = einsum(equation = var_8846_equation_0, values = (var_8568_cast_fp16, var_8247_cast_fp16))[name = tensor("op_8846_cast_fp16")]; + tensor var_8847_to_fp16 = const()[name = tensor("op_8847_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_879_cast_fp16 = mul(x = var_8846_cast_fp16, y = var_8847_to_fp16)[name = tensor("aw_chunk_879_cast_fp16")]; + tensor var_8850_equation_0 = const()[name = tensor("op_8850_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8850_cast_fp16 = einsum(equation = var_8850_equation_0, values = (var_8572_cast_fp16, var_8254_cast_fp16))[name = tensor("op_8850_cast_fp16")]; + tensor var_8851_to_fp16 = const()[name = tensor("op_8851_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_881_cast_fp16 = mul(x = var_8850_cast_fp16, y = var_8851_to_fp16)[name = tensor("aw_chunk_881_cast_fp16")]; + tensor var_8854_equation_0 = const()[name = tensor("op_8854_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8854_cast_fp16 = einsum(equation = var_8854_equation_0, values = (var_8572_cast_fp16, var_8261_cast_fp16))[name = tensor("op_8854_cast_fp16")]; + tensor var_8855_to_fp16 = const()[name = tensor("op_8855_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_883_cast_fp16 = mul(x = var_8854_cast_fp16, y = var_8855_to_fp16)[name = tensor("aw_chunk_883_cast_fp16")]; + tensor var_8858_equation_0 = const()[name = tensor("op_8858_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8858_cast_fp16 = einsum(equation = var_8858_equation_0, values = (var_8572_cast_fp16, var_8268_cast_fp16))[name = tensor("op_8858_cast_fp16")]; + tensor var_8859_to_fp16 = const()[name = tensor("op_8859_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_885_cast_fp16 = mul(x = var_8858_cast_fp16, y = var_8859_to_fp16)[name = tensor("aw_chunk_885_cast_fp16")]; + tensor var_8862_equation_0 = const()[name = tensor("op_8862_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8862_cast_fp16 = einsum(equation = var_8862_equation_0, values = (var_8572_cast_fp16, var_8275_cast_fp16))[name = tensor("op_8862_cast_fp16")]; + tensor var_8863_to_fp16 = const()[name = tensor("op_8863_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_887_cast_fp16 = mul(x = var_8862_cast_fp16, y = var_8863_to_fp16)[name = tensor("aw_chunk_887_cast_fp16")]; + tensor var_8866_equation_0 = const()[name = tensor("op_8866_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8866_cast_fp16 = einsum(equation = var_8866_equation_0, values = (var_8576_cast_fp16, var_8282_cast_fp16))[name = tensor("op_8866_cast_fp16")]; + tensor var_8867_to_fp16 = const()[name = tensor("op_8867_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_889_cast_fp16 = mul(x = var_8866_cast_fp16, y = var_8867_to_fp16)[name = tensor("aw_chunk_889_cast_fp16")]; + tensor var_8870_equation_0 = const()[name = tensor("op_8870_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8870_cast_fp16 = einsum(equation = var_8870_equation_0, values = (var_8576_cast_fp16, var_8289_cast_fp16))[name = tensor("op_8870_cast_fp16")]; + tensor var_8871_to_fp16 = const()[name = tensor("op_8871_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_891_cast_fp16 = mul(x = var_8870_cast_fp16, y = var_8871_to_fp16)[name = tensor("aw_chunk_891_cast_fp16")]; + tensor var_8874_equation_0 = const()[name = tensor("op_8874_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8874_cast_fp16 = einsum(equation = var_8874_equation_0, values = (var_8576_cast_fp16, var_8296_cast_fp16))[name = tensor("op_8874_cast_fp16")]; + tensor var_8875_to_fp16 = const()[name = tensor("op_8875_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_893_cast_fp16 = mul(x = var_8874_cast_fp16, y = var_8875_to_fp16)[name = tensor("aw_chunk_893_cast_fp16")]; + tensor var_8878_equation_0 = const()[name = tensor("op_8878_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8878_cast_fp16 = einsum(equation = var_8878_equation_0, values = (var_8576_cast_fp16, var_8303_cast_fp16))[name = tensor("op_8878_cast_fp16")]; + tensor var_8879_to_fp16 = const()[name = tensor("op_8879_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_895_cast_fp16 = mul(x = var_8878_cast_fp16, y = var_8879_to_fp16)[name = tensor("aw_chunk_895_cast_fp16")]; + tensor var_8882_equation_0 = const()[name = tensor("op_8882_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8882_cast_fp16 = einsum(equation = var_8882_equation_0, values = (var_8580_cast_fp16, var_8310_cast_fp16))[name = tensor("op_8882_cast_fp16")]; + tensor var_8883_to_fp16 = const()[name = tensor("op_8883_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_897_cast_fp16 = mul(x = var_8882_cast_fp16, y = var_8883_to_fp16)[name = tensor("aw_chunk_897_cast_fp16")]; + tensor var_8886_equation_0 = const()[name = tensor("op_8886_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8886_cast_fp16 = einsum(equation = var_8886_equation_0, values = (var_8580_cast_fp16, var_8317_cast_fp16))[name = tensor("op_8886_cast_fp16")]; + tensor var_8887_to_fp16 = const()[name = tensor("op_8887_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_899_cast_fp16 = mul(x = var_8886_cast_fp16, y = var_8887_to_fp16)[name = tensor("aw_chunk_899_cast_fp16")]; + tensor var_8890_equation_0 = const()[name = tensor("op_8890_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8890_cast_fp16 = einsum(equation = var_8890_equation_0, values = (var_8580_cast_fp16, var_8324_cast_fp16))[name = tensor("op_8890_cast_fp16")]; + tensor var_8891_to_fp16 = const()[name = tensor("op_8891_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_901_cast_fp16 = mul(x = var_8890_cast_fp16, y = var_8891_to_fp16)[name = tensor("aw_chunk_901_cast_fp16")]; + tensor var_8894_equation_0 = const()[name = tensor("op_8894_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8894_cast_fp16 = einsum(equation = var_8894_equation_0, values = (var_8580_cast_fp16, var_8331_cast_fp16))[name = tensor("op_8894_cast_fp16")]; + tensor var_8895_to_fp16 = const()[name = tensor("op_8895_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_903_cast_fp16 = mul(x = var_8894_cast_fp16, y = var_8895_to_fp16)[name = tensor("aw_chunk_903_cast_fp16")]; + tensor var_8898_equation_0 = const()[name = tensor("op_8898_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8898_cast_fp16 = einsum(equation = var_8898_equation_0, values = (var_8584_cast_fp16, var_8338_cast_fp16))[name = tensor("op_8898_cast_fp16")]; + tensor var_8899_to_fp16 = const()[name = tensor("op_8899_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_905_cast_fp16 = mul(x = var_8898_cast_fp16, y = var_8899_to_fp16)[name = tensor("aw_chunk_905_cast_fp16")]; + tensor var_8902_equation_0 = const()[name = tensor("op_8902_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8902_cast_fp16 = einsum(equation = var_8902_equation_0, values = (var_8584_cast_fp16, var_8345_cast_fp16))[name = tensor("op_8902_cast_fp16")]; + tensor var_8903_to_fp16 = const()[name = tensor("op_8903_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_907_cast_fp16 = mul(x = var_8902_cast_fp16, y = var_8903_to_fp16)[name = tensor("aw_chunk_907_cast_fp16")]; + tensor var_8906_equation_0 = const()[name = tensor("op_8906_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8906_cast_fp16 = einsum(equation = var_8906_equation_0, values = (var_8584_cast_fp16, var_8352_cast_fp16))[name = tensor("op_8906_cast_fp16")]; + tensor var_8907_to_fp16 = const()[name = tensor("op_8907_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_909_cast_fp16 = mul(x = var_8906_cast_fp16, y = var_8907_to_fp16)[name = tensor("aw_chunk_909_cast_fp16")]; + tensor var_8910_equation_0 = const()[name = tensor("op_8910_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8910_cast_fp16 = einsum(equation = var_8910_equation_0, values = (var_8584_cast_fp16, var_8359_cast_fp16))[name = tensor("op_8910_cast_fp16")]; + tensor var_8911_to_fp16 = const()[name = tensor("op_8911_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_911_cast_fp16 = mul(x = var_8910_cast_fp16, y = var_8911_to_fp16)[name = tensor("aw_chunk_911_cast_fp16")]; + tensor var_8914_equation_0 = const()[name = tensor("op_8914_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8914_cast_fp16 = einsum(equation = var_8914_equation_0, values = (var_8588_cast_fp16, var_8366_cast_fp16))[name = tensor("op_8914_cast_fp16")]; + tensor var_8915_to_fp16 = const()[name = tensor("op_8915_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_913_cast_fp16 = mul(x = var_8914_cast_fp16, y = var_8915_to_fp16)[name = tensor("aw_chunk_913_cast_fp16")]; + tensor var_8918_equation_0 = const()[name = tensor("op_8918_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8918_cast_fp16 = einsum(equation = var_8918_equation_0, values = (var_8588_cast_fp16, var_8373_cast_fp16))[name = tensor("op_8918_cast_fp16")]; + tensor var_8919_to_fp16 = const()[name = tensor("op_8919_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_915_cast_fp16 = mul(x = var_8918_cast_fp16, y = var_8919_to_fp16)[name = tensor("aw_chunk_915_cast_fp16")]; + tensor var_8922_equation_0 = const()[name = tensor("op_8922_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8922_cast_fp16 = einsum(equation = var_8922_equation_0, values = (var_8588_cast_fp16, var_8380_cast_fp16))[name = tensor("op_8922_cast_fp16")]; + tensor var_8923_to_fp16 = const()[name = tensor("op_8923_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_917_cast_fp16 = mul(x = var_8922_cast_fp16, y = var_8923_to_fp16)[name = tensor("aw_chunk_917_cast_fp16")]; + tensor var_8926_equation_0 = const()[name = tensor("op_8926_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8926_cast_fp16 = einsum(equation = var_8926_equation_0, values = (var_8588_cast_fp16, var_8387_cast_fp16))[name = tensor("op_8926_cast_fp16")]; + tensor var_8927_to_fp16 = const()[name = tensor("op_8927_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_919_cast_fp16 = mul(x = var_8926_cast_fp16, y = var_8927_to_fp16)[name = tensor("aw_chunk_919_cast_fp16")]; + tensor var_8930_equation_0 = const()[name = tensor("op_8930_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8930_cast_fp16 = einsum(equation = var_8930_equation_0, values = (var_8592_cast_fp16, var_8394_cast_fp16))[name = tensor("op_8930_cast_fp16")]; + tensor var_8931_to_fp16 = const()[name = tensor("op_8931_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_921_cast_fp16 = mul(x = var_8930_cast_fp16, y = var_8931_to_fp16)[name = tensor("aw_chunk_921_cast_fp16")]; + tensor var_8934_equation_0 = const()[name = tensor("op_8934_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8934_cast_fp16 = einsum(equation = var_8934_equation_0, values = (var_8592_cast_fp16, var_8401_cast_fp16))[name = tensor("op_8934_cast_fp16")]; + tensor var_8935_to_fp16 = const()[name = tensor("op_8935_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_923_cast_fp16 = mul(x = var_8934_cast_fp16, y = var_8935_to_fp16)[name = tensor("aw_chunk_923_cast_fp16")]; + tensor var_8938_equation_0 = const()[name = tensor("op_8938_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8938_cast_fp16 = einsum(equation = var_8938_equation_0, values = (var_8592_cast_fp16, var_8408_cast_fp16))[name = tensor("op_8938_cast_fp16")]; + tensor var_8939_to_fp16 = const()[name = tensor("op_8939_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_925_cast_fp16 = mul(x = var_8938_cast_fp16, y = var_8939_to_fp16)[name = tensor("aw_chunk_925_cast_fp16")]; + tensor var_8942_equation_0 = const()[name = tensor("op_8942_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8942_cast_fp16 = einsum(equation = var_8942_equation_0, values = (var_8592_cast_fp16, var_8415_cast_fp16))[name = tensor("op_8942_cast_fp16")]; + tensor var_8943_to_fp16 = const()[name = tensor("op_8943_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_927_cast_fp16 = mul(x = var_8942_cast_fp16, y = var_8943_to_fp16)[name = tensor("aw_chunk_927_cast_fp16")]; + tensor var_8946_equation_0 = const()[name = tensor("op_8946_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8946_cast_fp16 = einsum(equation = var_8946_equation_0, values = (var_8596_cast_fp16, var_8422_cast_fp16))[name = tensor("op_8946_cast_fp16")]; + tensor var_8947_to_fp16 = const()[name = tensor("op_8947_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_929_cast_fp16 = mul(x = var_8946_cast_fp16, y = var_8947_to_fp16)[name = tensor("aw_chunk_929_cast_fp16")]; + tensor var_8950_equation_0 = const()[name = tensor("op_8950_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8950_cast_fp16 = einsum(equation = var_8950_equation_0, values = (var_8596_cast_fp16, var_8429_cast_fp16))[name = tensor("op_8950_cast_fp16")]; + tensor var_8951_to_fp16 = const()[name = tensor("op_8951_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_931_cast_fp16 = mul(x = var_8950_cast_fp16, y = var_8951_to_fp16)[name = tensor("aw_chunk_931_cast_fp16")]; + tensor var_8954_equation_0 = const()[name = tensor("op_8954_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8954_cast_fp16 = einsum(equation = var_8954_equation_0, values = (var_8596_cast_fp16, var_8436_cast_fp16))[name = tensor("op_8954_cast_fp16")]; + tensor var_8955_to_fp16 = const()[name = tensor("op_8955_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_933_cast_fp16 = mul(x = var_8954_cast_fp16, y = var_8955_to_fp16)[name = tensor("aw_chunk_933_cast_fp16")]; + tensor var_8958_equation_0 = const()[name = tensor("op_8958_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8958_cast_fp16 = einsum(equation = var_8958_equation_0, values = (var_8596_cast_fp16, var_8443_cast_fp16))[name = tensor("op_8958_cast_fp16")]; + tensor var_8959_to_fp16 = const()[name = tensor("op_8959_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_935_cast_fp16 = mul(x = var_8958_cast_fp16, y = var_8959_to_fp16)[name = tensor("aw_chunk_935_cast_fp16")]; + tensor var_8962_equation_0 = const()[name = tensor("op_8962_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8962_cast_fp16 = einsum(equation = var_8962_equation_0, values = (var_8600_cast_fp16, var_8450_cast_fp16))[name = tensor("op_8962_cast_fp16")]; + tensor var_8963_to_fp16 = const()[name = tensor("op_8963_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_937_cast_fp16 = mul(x = var_8962_cast_fp16, y = var_8963_to_fp16)[name = tensor("aw_chunk_937_cast_fp16")]; + tensor var_8966_equation_0 = const()[name = tensor("op_8966_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8966_cast_fp16 = einsum(equation = var_8966_equation_0, values = (var_8600_cast_fp16, var_8457_cast_fp16))[name = tensor("op_8966_cast_fp16")]; + tensor var_8967_to_fp16 = const()[name = tensor("op_8967_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_939_cast_fp16 = mul(x = var_8966_cast_fp16, y = var_8967_to_fp16)[name = tensor("aw_chunk_939_cast_fp16")]; + tensor var_8970_equation_0 = const()[name = tensor("op_8970_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8970_cast_fp16 = einsum(equation = var_8970_equation_0, values = (var_8600_cast_fp16, var_8464_cast_fp16))[name = tensor("op_8970_cast_fp16")]; + tensor var_8971_to_fp16 = const()[name = tensor("op_8971_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_941_cast_fp16 = mul(x = var_8970_cast_fp16, y = var_8971_to_fp16)[name = tensor("aw_chunk_941_cast_fp16")]; + tensor var_8974_equation_0 = const()[name = tensor("op_8974_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8974_cast_fp16 = einsum(equation = var_8974_equation_0, values = (var_8600_cast_fp16, var_8471_cast_fp16))[name = tensor("op_8974_cast_fp16")]; + tensor var_8975_to_fp16 = const()[name = tensor("op_8975_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_943_cast_fp16 = mul(x = var_8974_cast_fp16, y = var_8975_to_fp16)[name = tensor("aw_chunk_943_cast_fp16")]; + tensor var_8978_equation_0 = const()[name = tensor("op_8978_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8978_cast_fp16 = einsum(equation = var_8978_equation_0, values = (var_8604_cast_fp16, var_8478_cast_fp16))[name = tensor("op_8978_cast_fp16")]; + tensor var_8979_to_fp16 = const()[name = tensor("op_8979_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_945_cast_fp16 = mul(x = var_8978_cast_fp16, y = var_8979_to_fp16)[name = tensor("aw_chunk_945_cast_fp16")]; + tensor var_8982_equation_0 = const()[name = tensor("op_8982_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8982_cast_fp16 = einsum(equation = var_8982_equation_0, values = (var_8604_cast_fp16, var_8485_cast_fp16))[name = tensor("op_8982_cast_fp16")]; + tensor var_8983_to_fp16 = const()[name = tensor("op_8983_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_947_cast_fp16 = mul(x = var_8982_cast_fp16, y = var_8983_to_fp16)[name = tensor("aw_chunk_947_cast_fp16")]; + tensor var_8986_equation_0 = const()[name = tensor("op_8986_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8986_cast_fp16 = einsum(equation = var_8986_equation_0, values = (var_8604_cast_fp16, var_8492_cast_fp16))[name = tensor("op_8986_cast_fp16")]; + tensor var_8987_to_fp16 = const()[name = tensor("op_8987_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_949_cast_fp16 = mul(x = var_8986_cast_fp16, y = var_8987_to_fp16)[name = tensor("aw_chunk_949_cast_fp16")]; + tensor var_8990_equation_0 = const()[name = tensor("op_8990_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8990_cast_fp16 = einsum(equation = var_8990_equation_0, values = (var_8604_cast_fp16, var_8499_cast_fp16))[name = tensor("op_8990_cast_fp16")]; + tensor var_8991_to_fp16 = const()[name = tensor("op_8991_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_951_cast_fp16 = mul(x = var_8990_cast_fp16, y = var_8991_to_fp16)[name = tensor("aw_chunk_951_cast_fp16")]; + tensor var_8994_equation_0 = const()[name = tensor("op_8994_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8994_cast_fp16 = einsum(equation = var_8994_equation_0, values = (var_8608_cast_fp16, var_8506_cast_fp16))[name = tensor("op_8994_cast_fp16")]; + tensor var_8995_to_fp16 = const()[name = tensor("op_8995_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_953_cast_fp16 = mul(x = var_8994_cast_fp16, y = var_8995_to_fp16)[name = tensor("aw_chunk_953_cast_fp16")]; + tensor var_8998_equation_0 = const()[name = tensor("op_8998_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_8998_cast_fp16 = einsum(equation = var_8998_equation_0, values = (var_8608_cast_fp16, var_8513_cast_fp16))[name = tensor("op_8998_cast_fp16")]; + tensor var_8999_to_fp16 = const()[name = tensor("op_8999_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_955_cast_fp16 = mul(x = var_8998_cast_fp16, y = var_8999_to_fp16)[name = tensor("aw_chunk_955_cast_fp16")]; + tensor var_9002_equation_0 = const()[name = tensor("op_9002_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_9002_cast_fp16 = einsum(equation = var_9002_equation_0, values = (var_8608_cast_fp16, var_8520_cast_fp16))[name = tensor("op_9002_cast_fp16")]; + tensor var_9003_to_fp16 = const()[name = tensor("op_9003_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_957_cast_fp16 = mul(x = var_9002_cast_fp16, y = var_9003_to_fp16)[name = tensor("aw_chunk_957_cast_fp16")]; + tensor var_9006_equation_0 = const()[name = tensor("op_9006_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_9006_cast_fp16 = einsum(equation = var_9006_equation_0, values = (var_8608_cast_fp16, var_8527_cast_fp16))[name = tensor("op_9006_cast_fp16")]; + tensor var_9007_to_fp16 = const()[name = tensor("op_9007_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_959_cast_fp16 = mul(x = var_9006_cast_fp16, y = var_9007_to_fp16)[name = tensor("aw_chunk_959_cast_fp16")]; + tensor var_9009_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_801_cast_fp16)[name = tensor("op_9009_cast_fp16")]; + tensor var_9010_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_803_cast_fp16)[name = tensor("op_9010_cast_fp16")]; + tensor var_9011_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_805_cast_fp16)[name = tensor("op_9011_cast_fp16")]; + tensor var_9012_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_807_cast_fp16)[name = tensor("op_9012_cast_fp16")]; + tensor var_9013_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_809_cast_fp16)[name = tensor("op_9013_cast_fp16")]; + tensor var_9014_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_811_cast_fp16)[name = tensor("op_9014_cast_fp16")]; + tensor var_9015_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_813_cast_fp16)[name = tensor("op_9015_cast_fp16")]; + tensor var_9016_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_815_cast_fp16)[name = tensor("op_9016_cast_fp16")]; + tensor var_9017_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_817_cast_fp16)[name = tensor("op_9017_cast_fp16")]; + tensor var_9018_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_819_cast_fp16)[name = tensor("op_9018_cast_fp16")]; + tensor var_9019_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_821_cast_fp16)[name = tensor("op_9019_cast_fp16")]; + tensor var_9020_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_823_cast_fp16)[name = tensor("op_9020_cast_fp16")]; + tensor var_9021_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_825_cast_fp16)[name = tensor("op_9021_cast_fp16")]; + tensor var_9022_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_827_cast_fp16)[name = tensor("op_9022_cast_fp16")]; + tensor var_9023_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_829_cast_fp16)[name = tensor("op_9023_cast_fp16")]; + tensor var_9024_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_831_cast_fp16)[name = tensor("op_9024_cast_fp16")]; + tensor var_9025_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_833_cast_fp16)[name = tensor("op_9025_cast_fp16")]; + tensor var_9026_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_835_cast_fp16)[name = tensor("op_9026_cast_fp16")]; + tensor var_9027_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_837_cast_fp16)[name = tensor("op_9027_cast_fp16")]; + tensor var_9028_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_839_cast_fp16)[name = tensor("op_9028_cast_fp16")]; + tensor var_9029_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_841_cast_fp16)[name = tensor("op_9029_cast_fp16")]; + tensor var_9030_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_843_cast_fp16)[name = tensor("op_9030_cast_fp16")]; + tensor var_9031_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_845_cast_fp16)[name = tensor("op_9031_cast_fp16")]; + tensor var_9032_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_847_cast_fp16)[name = tensor("op_9032_cast_fp16")]; + tensor var_9033_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_849_cast_fp16)[name = tensor("op_9033_cast_fp16")]; + tensor var_9034_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_851_cast_fp16)[name = tensor("op_9034_cast_fp16")]; + tensor var_9035_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_853_cast_fp16)[name = tensor("op_9035_cast_fp16")]; + tensor var_9036_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_855_cast_fp16)[name = tensor("op_9036_cast_fp16")]; + tensor var_9037_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_857_cast_fp16)[name = tensor("op_9037_cast_fp16")]; + tensor var_9038_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_859_cast_fp16)[name = tensor("op_9038_cast_fp16")]; + tensor var_9039_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_861_cast_fp16)[name = tensor("op_9039_cast_fp16")]; + tensor var_9040_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_863_cast_fp16)[name = tensor("op_9040_cast_fp16")]; + tensor var_9041_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_865_cast_fp16)[name = tensor("op_9041_cast_fp16")]; + tensor var_9042_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_867_cast_fp16)[name = tensor("op_9042_cast_fp16")]; + tensor var_9043_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_869_cast_fp16)[name = tensor("op_9043_cast_fp16")]; + tensor var_9044_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_871_cast_fp16)[name = tensor("op_9044_cast_fp16")]; + tensor var_9045_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_873_cast_fp16)[name = tensor("op_9045_cast_fp16")]; + tensor var_9046_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_875_cast_fp16)[name = tensor("op_9046_cast_fp16")]; + tensor var_9047_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_877_cast_fp16)[name = tensor("op_9047_cast_fp16")]; + tensor var_9048_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_879_cast_fp16)[name = tensor("op_9048_cast_fp16")]; + tensor var_9049_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_881_cast_fp16)[name = tensor("op_9049_cast_fp16")]; + tensor var_9050_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_883_cast_fp16)[name = tensor("op_9050_cast_fp16")]; + tensor var_9051_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_885_cast_fp16)[name = tensor("op_9051_cast_fp16")]; + tensor var_9052_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_887_cast_fp16)[name = tensor("op_9052_cast_fp16")]; + tensor var_9053_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_889_cast_fp16)[name = tensor("op_9053_cast_fp16")]; + tensor var_9054_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_891_cast_fp16)[name = tensor("op_9054_cast_fp16")]; + tensor var_9055_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_893_cast_fp16)[name = tensor("op_9055_cast_fp16")]; + tensor var_9056_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_895_cast_fp16)[name = tensor("op_9056_cast_fp16")]; + tensor var_9057_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_897_cast_fp16)[name = tensor("op_9057_cast_fp16")]; + tensor var_9058_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_899_cast_fp16)[name = tensor("op_9058_cast_fp16")]; + tensor var_9059_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_901_cast_fp16)[name = tensor("op_9059_cast_fp16")]; + tensor var_9060_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_903_cast_fp16)[name = tensor("op_9060_cast_fp16")]; + tensor var_9061_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_905_cast_fp16)[name = tensor("op_9061_cast_fp16")]; + tensor var_9062_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_907_cast_fp16)[name = tensor("op_9062_cast_fp16")]; + tensor var_9063_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_909_cast_fp16)[name = tensor("op_9063_cast_fp16")]; + tensor var_9064_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_911_cast_fp16)[name = tensor("op_9064_cast_fp16")]; + tensor var_9065_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_913_cast_fp16)[name = tensor("op_9065_cast_fp16")]; + tensor var_9066_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_915_cast_fp16)[name = tensor("op_9066_cast_fp16")]; + tensor var_9067_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_917_cast_fp16)[name = tensor("op_9067_cast_fp16")]; + tensor var_9068_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_919_cast_fp16)[name = tensor("op_9068_cast_fp16")]; + tensor var_9069_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_921_cast_fp16)[name = tensor("op_9069_cast_fp16")]; + tensor var_9070_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_923_cast_fp16)[name = tensor("op_9070_cast_fp16")]; + tensor var_9071_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_925_cast_fp16)[name = tensor("op_9071_cast_fp16")]; + tensor var_9072_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_927_cast_fp16)[name = tensor("op_9072_cast_fp16")]; + tensor var_9073_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_929_cast_fp16)[name = tensor("op_9073_cast_fp16")]; + tensor var_9074_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_931_cast_fp16)[name = tensor("op_9074_cast_fp16")]; + tensor var_9075_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_933_cast_fp16)[name = tensor("op_9075_cast_fp16")]; + tensor var_9076_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_935_cast_fp16)[name = tensor("op_9076_cast_fp16")]; + tensor var_9077_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_937_cast_fp16)[name = tensor("op_9077_cast_fp16")]; + tensor var_9078_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_939_cast_fp16)[name = tensor("op_9078_cast_fp16")]; + tensor var_9079_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_941_cast_fp16)[name = tensor("op_9079_cast_fp16")]; + tensor var_9080_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_943_cast_fp16)[name = tensor("op_9080_cast_fp16")]; + tensor var_9081_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_945_cast_fp16)[name = tensor("op_9081_cast_fp16")]; + tensor var_9082_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_947_cast_fp16)[name = tensor("op_9082_cast_fp16")]; + tensor var_9083_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_949_cast_fp16)[name = tensor("op_9083_cast_fp16")]; + tensor var_9084_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_951_cast_fp16)[name = tensor("op_9084_cast_fp16")]; + tensor var_9085_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_953_cast_fp16)[name = tensor("op_9085_cast_fp16")]; + tensor var_9086_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_955_cast_fp16)[name = tensor("op_9086_cast_fp16")]; + tensor var_9087_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_957_cast_fp16)[name = tensor("op_9087_cast_fp16")]; + tensor var_9088_cast_fp16 = softmax(axis = var_7834, x = aw_chunk_959_cast_fp16)[name = tensor("op_9088_cast_fp16")]; + tensor var_9090_equation_0 = const()[name = tensor("op_9090_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9090_cast_fp16 = einsum(equation = var_9090_equation_0, values = (var_8610_cast_fp16, var_9009_cast_fp16))[name = tensor("op_9090_cast_fp16")]; + tensor var_9092_equation_0 = const()[name = tensor("op_9092_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9092_cast_fp16 = einsum(equation = var_9092_equation_0, values = (var_8610_cast_fp16, var_9010_cast_fp16))[name = tensor("op_9092_cast_fp16")]; + tensor var_9094_equation_0 = const()[name = tensor("op_9094_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9094_cast_fp16 = einsum(equation = var_9094_equation_0, values = (var_8610_cast_fp16, var_9011_cast_fp16))[name = tensor("op_9094_cast_fp16")]; + tensor var_9096_equation_0 = const()[name = tensor("op_9096_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9096_cast_fp16 = einsum(equation = var_9096_equation_0, values = (var_8610_cast_fp16, var_9012_cast_fp16))[name = tensor("op_9096_cast_fp16")]; + tensor var_9098_equation_0 = const()[name = tensor("op_9098_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9098_cast_fp16 = einsum(equation = var_9098_equation_0, values = (var_8614_cast_fp16, var_9013_cast_fp16))[name = tensor("op_9098_cast_fp16")]; + tensor var_9100_equation_0 = const()[name = tensor("op_9100_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9100_cast_fp16 = einsum(equation = var_9100_equation_0, values = (var_8614_cast_fp16, var_9014_cast_fp16))[name = tensor("op_9100_cast_fp16")]; + tensor var_9102_equation_0 = const()[name = tensor("op_9102_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9102_cast_fp16 = einsum(equation = var_9102_equation_0, values = (var_8614_cast_fp16, var_9015_cast_fp16))[name = tensor("op_9102_cast_fp16")]; + tensor var_9104_equation_0 = const()[name = tensor("op_9104_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9104_cast_fp16 = einsum(equation = var_9104_equation_0, values = (var_8614_cast_fp16, var_9016_cast_fp16))[name = tensor("op_9104_cast_fp16")]; + tensor var_9106_equation_0 = const()[name = tensor("op_9106_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9106_cast_fp16 = einsum(equation = var_9106_equation_0, values = (var_8618_cast_fp16, var_9017_cast_fp16))[name = tensor("op_9106_cast_fp16")]; + tensor var_9108_equation_0 = const()[name = tensor("op_9108_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9108_cast_fp16 = einsum(equation = var_9108_equation_0, values = (var_8618_cast_fp16, var_9018_cast_fp16))[name = tensor("op_9108_cast_fp16")]; + tensor var_9110_equation_0 = const()[name = tensor("op_9110_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9110_cast_fp16 = einsum(equation = var_9110_equation_0, values = (var_8618_cast_fp16, var_9019_cast_fp16))[name = tensor("op_9110_cast_fp16")]; + tensor var_9112_equation_0 = const()[name = tensor("op_9112_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9112_cast_fp16 = einsum(equation = var_9112_equation_0, values = (var_8618_cast_fp16, var_9020_cast_fp16))[name = tensor("op_9112_cast_fp16")]; + tensor var_9114_equation_0 = const()[name = tensor("op_9114_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9114_cast_fp16 = einsum(equation = var_9114_equation_0, values = (var_8622_cast_fp16, var_9021_cast_fp16))[name = tensor("op_9114_cast_fp16")]; + tensor var_9116_equation_0 = const()[name = tensor("op_9116_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9116_cast_fp16 = einsum(equation = var_9116_equation_0, values = (var_8622_cast_fp16, var_9022_cast_fp16))[name = tensor("op_9116_cast_fp16")]; + tensor var_9118_equation_0 = const()[name = tensor("op_9118_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9118_cast_fp16 = einsum(equation = var_9118_equation_0, values = (var_8622_cast_fp16, var_9023_cast_fp16))[name = tensor("op_9118_cast_fp16")]; + tensor var_9120_equation_0 = const()[name = tensor("op_9120_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9120_cast_fp16 = einsum(equation = var_9120_equation_0, values = (var_8622_cast_fp16, var_9024_cast_fp16))[name = tensor("op_9120_cast_fp16")]; + tensor var_9122_equation_0 = const()[name = tensor("op_9122_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9122_cast_fp16 = einsum(equation = var_9122_equation_0, values = (var_8626_cast_fp16, var_9025_cast_fp16))[name = tensor("op_9122_cast_fp16")]; + tensor var_9124_equation_0 = const()[name = tensor("op_9124_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9124_cast_fp16 = einsum(equation = var_9124_equation_0, values = (var_8626_cast_fp16, var_9026_cast_fp16))[name = tensor("op_9124_cast_fp16")]; + tensor var_9126_equation_0 = const()[name = tensor("op_9126_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9126_cast_fp16 = einsum(equation = var_9126_equation_0, values = (var_8626_cast_fp16, var_9027_cast_fp16))[name = tensor("op_9126_cast_fp16")]; + tensor var_9128_equation_0 = const()[name = tensor("op_9128_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9128_cast_fp16 = einsum(equation = var_9128_equation_0, values = (var_8626_cast_fp16, var_9028_cast_fp16))[name = tensor("op_9128_cast_fp16")]; + tensor var_9130_equation_0 = const()[name = tensor("op_9130_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9130_cast_fp16 = einsum(equation = var_9130_equation_0, values = (var_8630_cast_fp16, var_9029_cast_fp16))[name = tensor("op_9130_cast_fp16")]; + tensor var_9132_equation_0 = const()[name = tensor("op_9132_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9132_cast_fp16 = einsum(equation = var_9132_equation_0, values = (var_8630_cast_fp16, var_9030_cast_fp16))[name = tensor("op_9132_cast_fp16")]; + tensor var_9134_equation_0 = const()[name = tensor("op_9134_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9134_cast_fp16 = einsum(equation = var_9134_equation_0, values = (var_8630_cast_fp16, var_9031_cast_fp16))[name = tensor("op_9134_cast_fp16")]; + tensor var_9136_equation_0 = const()[name = tensor("op_9136_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9136_cast_fp16 = einsum(equation = var_9136_equation_0, values = (var_8630_cast_fp16, var_9032_cast_fp16))[name = tensor("op_9136_cast_fp16")]; + tensor var_9138_equation_0 = const()[name = tensor("op_9138_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9138_cast_fp16 = einsum(equation = var_9138_equation_0, values = (var_8634_cast_fp16, var_9033_cast_fp16))[name = tensor("op_9138_cast_fp16")]; + tensor var_9140_equation_0 = const()[name = tensor("op_9140_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9140_cast_fp16 = einsum(equation = var_9140_equation_0, values = (var_8634_cast_fp16, var_9034_cast_fp16))[name = tensor("op_9140_cast_fp16")]; + tensor var_9142_equation_0 = const()[name = tensor("op_9142_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9142_cast_fp16 = einsum(equation = var_9142_equation_0, values = (var_8634_cast_fp16, var_9035_cast_fp16))[name = tensor("op_9142_cast_fp16")]; + tensor var_9144_equation_0 = const()[name = tensor("op_9144_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9144_cast_fp16 = einsum(equation = var_9144_equation_0, values = (var_8634_cast_fp16, var_9036_cast_fp16))[name = tensor("op_9144_cast_fp16")]; + tensor var_9146_equation_0 = const()[name = tensor("op_9146_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9146_cast_fp16 = einsum(equation = var_9146_equation_0, values = (var_8638_cast_fp16, var_9037_cast_fp16))[name = tensor("op_9146_cast_fp16")]; + tensor var_9148_equation_0 = const()[name = tensor("op_9148_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9148_cast_fp16 = einsum(equation = var_9148_equation_0, values = (var_8638_cast_fp16, var_9038_cast_fp16))[name = tensor("op_9148_cast_fp16")]; + tensor var_9150_equation_0 = const()[name = tensor("op_9150_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9150_cast_fp16 = einsum(equation = var_9150_equation_0, values = (var_8638_cast_fp16, var_9039_cast_fp16))[name = tensor("op_9150_cast_fp16")]; + tensor var_9152_equation_0 = const()[name = tensor("op_9152_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9152_cast_fp16 = einsum(equation = var_9152_equation_0, values = (var_8638_cast_fp16, var_9040_cast_fp16))[name = tensor("op_9152_cast_fp16")]; + tensor var_9154_equation_0 = const()[name = tensor("op_9154_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9154_cast_fp16 = einsum(equation = var_9154_equation_0, values = (var_8642_cast_fp16, var_9041_cast_fp16))[name = tensor("op_9154_cast_fp16")]; + tensor var_9156_equation_0 = const()[name = tensor("op_9156_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9156_cast_fp16 = einsum(equation = var_9156_equation_0, values = (var_8642_cast_fp16, var_9042_cast_fp16))[name = tensor("op_9156_cast_fp16")]; + tensor var_9158_equation_0 = const()[name = tensor("op_9158_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9158_cast_fp16 = einsum(equation = var_9158_equation_0, values = (var_8642_cast_fp16, var_9043_cast_fp16))[name = tensor("op_9158_cast_fp16")]; + tensor var_9160_equation_0 = const()[name = tensor("op_9160_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9160_cast_fp16 = einsum(equation = var_9160_equation_0, values = (var_8642_cast_fp16, var_9044_cast_fp16))[name = tensor("op_9160_cast_fp16")]; + tensor var_9162_equation_0 = const()[name = tensor("op_9162_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9162_cast_fp16 = einsum(equation = var_9162_equation_0, values = (var_8646_cast_fp16, var_9045_cast_fp16))[name = tensor("op_9162_cast_fp16")]; + tensor var_9164_equation_0 = const()[name = tensor("op_9164_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9164_cast_fp16 = einsum(equation = var_9164_equation_0, values = (var_8646_cast_fp16, var_9046_cast_fp16))[name = tensor("op_9164_cast_fp16")]; + tensor var_9166_equation_0 = const()[name = tensor("op_9166_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9166_cast_fp16 = einsum(equation = var_9166_equation_0, values = (var_8646_cast_fp16, var_9047_cast_fp16))[name = tensor("op_9166_cast_fp16")]; + tensor var_9168_equation_0 = const()[name = tensor("op_9168_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9168_cast_fp16 = einsum(equation = var_9168_equation_0, values = (var_8646_cast_fp16, var_9048_cast_fp16))[name = tensor("op_9168_cast_fp16")]; + tensor var_9170_equation_0 = const()[name = tensor("op_9170_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9170_cast_fp16 = einsum(equation = var_9170_equation_0, values = (var_8650_cast_fp16, var_9049_cast_fp16))[name = tensor("op_9170_cast_fp16")]; + tensor var_9172_equation_0 = const()[name = tensor("op_9172_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9172_cast_fp16 = einsum(equation = var_9172_equation_0, values = (var_8650_cast_fp16, var_9050_cast_fp16))[name = tensor("op_9172_cast_fp16")]; + tensor var_9174_equation_0 = const()[name = tensor("op_9174_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9174_cast_fp16 = einsum(equation = var_9174_equation_0, values = (var_8650_cast_fp16, var_9051_cast_fp16))[name = tensor("op_9174_cast_fp16")]; + tensor var_9176_equation_0 = const()[name = tensor("op_9176_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9176_cast_fp16 = einsum(equation = var_9176_equation_0, values = (var_8650_cast_fp16, var_9052_cast_fp16))[name = tensor("op_9176_cast_fp16")]; + tensor var_9178_equation_0 = const()[name = tensor("op_9178_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9178_cast_fp16 = einsum(equation = var_9178_equation_0, values = (var_8654_cast_fp16, var_9053_cast_fp16))[name = tensor("op_9178_cast_fp16")]; + tensor var_9180_equation_0 = const()[name = tensor("op_9180_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9180_cast_fp16 = einsum(equation = var_9180_equation_0, values = (var_8654_cast_fp16, var_9054_cast_fp16))[name = tensor("op_9180_cast_fp16")]; + tensor var_9182_equation_0 = const()[name = tensor("op_9182_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9182_cast_fp16 = einsum(equation = var_9182_equation_0, values = (var_8654_cast_fp16, var_9055_cast_fp16))[name = tensor("op_9182_cast_fp16")]; + tensor var_9184_equation_0 = const()[name = tensor("op_9184_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9184_cast_fp16 = einsum(equation = var_9184_equation_0, values = (var_8654_cast_fp16, var_9056_cast_fp16))[name = tensor("op_9184_cast_fp16")]; + tensor var_9186_equation_0 = const()[name = tensor("op_9186_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9186_cast_fp16 = einsum(equation = var_9186_equation_0, values = (var_8658_cast_fp16, var_9057_cast_fp16))[name = tensor("op_9186_cast_fp16")]; + tensor var_9188_equation_0 = const()[name = tensor("op_9188_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9188_cast_fp16 = einsum(equation = var_9188_equation_0, values = (var_8658_cast_fp16, var_9058_cast_fp16))[name = tensor("op_9188_cast_fp16")]; + tensor var_9190_equation_0 = const()[name = tensor("op_9190_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9190_cast_fp16 = einsum(equation = var_9190_equation_0, values = (var_8658_cast_fp16, var_9059_cast_fp16))[name = tensor("op_9190_cast_fp16")]; + tensor var_9192_equation_0 = const()[name = tensor("op_9192_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9192_cast_fp16 = einsum(equation = var_9192_equation_0, values = (var_8658_cast_fp16, var_9060_cast_fp16))[name = tensor("op_9192_cast_fp16")]; + tensor var_9194_equation_0 = const()[name = tensor("op_9194_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9194_cast_fp16 = einsum(equation = var_9194_equation_0, values = (var_8662_cast_fp16, var_9061_cast_fp16))[name = tensor("op_9194_cast_fp16")]; + tensor var_9196_equation_0 = const()[name = tensor("op_9196_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9196_cast_fp16 = einsum(equation = var_9196_equation_0, values = (var_8662_cast_fp16, var_9062_cast_fp16))[name = tensor("op_9196_cast_fp16")]; + tensor var_9198_equation_0 = const()[name = tensor("op_9198_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9198_cast_fp16 = einsum(equation = var_9198_equation_0, values = (var_8662_cast_fp16, var_9063_cast_fp16))[name = tensor("op_9198_cast_fp16")]; + tensor var_9200_equation_0 = const()[name = tensor("op_9200_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9200_cast_fp16 = einsum(equation = var_9200_equation_0, values = (var_8662_cast_fp16, var_9064_cast_fp16))[name = tensor("op_9200_cast_fp16")]; + tensor var_9202_equation_0 = const()[name = tensor("op_9202_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9202_cast_fp16 = einsum(equation = var_9202_equation_0, values = (var_8666_cast_fp16, var_9065_cast_fp16))[name = tensor("op_9202_cast_fp16")]; + tensor var_9204_equation_0 = const()[name = tensor("op_9204_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9204_cast_fp16 = einsum(equation = var_9204_equation_0, values = (var_8666_cast_fp16, var_9066_cast_fp16))[name = tensor("op_9204_cast_fp16")]; + tensor var_9206_equation_0 = const()[name = tensor("op_9206_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9206_cast_fp16 = einsum(equation = var_9206_equation_0, values = (var_8666_cast_fp16, var_9067_cast_fp16))[name = tensor("op_9206_cast_fp16")]; + tensor var_9208_equation_0 = const()[name = tensor("op_9208_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9208_cast_fp16 = einsum(equation = var_9208_equation_0, values = (var_8666_cast_fp16, var_9068_cast_fp16))[name = tensor("op_9208_cast_fp16")]; + tensor var_9210_equation_0 = const()[name = tensor("op_9210_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9210_cast_fp16 = einsum(equation = var_9210_equation_0, values = (var_8670_cast_fp16, var_9069_cast_fp16))[name = tensor("op_9210_cast_fp16")]; + tensor var_9212_equation_0 = const()[name = tensor("op_9212_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9212_cast_fp16 = einsum(equation = var_9212_equation_0, values = (var_8670_cast_fp16, var_9070_cast_fp16))[name = tensor("op_9212_cast_fp16")]; + tensor var_9214_equation_0 = const()[name = tensor("op_9214_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9214_cast_fp16 = einsum(equation = var_9214_equation_0, values = (var_8670_cast_fp16, var_9071_cast_fp16))[name = tensor("op_9214_cast_fp16")]; + tensor var_9216_equation_0 = const()[name = tensor("op_9216_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9216_cast_fp16 = einsum(equation = var_9216_equation_0, values = (var_8670_cast_fp16, var_9072_cast_fp16))[name = tensor("op_9216_cast_fp16")]; + tensor var_9218_equation_0 = const()[name = tensor("op_9218_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9218_cast_fp16 = einsum(equation = var_9218_equation_0, values = (var_8674_cast_fp16, var_9073_cast_fp16))[name = tensor("op_9218_cast_fp16")]; + tensor var_9220_equation_0 = const()[name = tensor("op_9220_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9220_cast_fp16 = einsum(equation = var_9220_equation_0, values = (var_8674_cast_fp16, var_9074_cast_fp16))[name = tensor("op_9220_cast_fp16")]; + tensor var_9222_equation_0 = const()[name = tensor("op_9222_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9222_cast_fp16 = einsum(equation = var_9222_equation_0, values = (var_8674_cast_fp16, var_9075_cast_fp16))[name = tensor("op_9222_cast_fp16")]; + tensor var_9224_equation_0 = const()[name = tensor("op_9224_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9224_cast_fp16 = einsum(equation = var_9224_equation_0, values = (var_8674_cast_fp16, var_9076_cast_fp16))[name = tensor("op_9224_cast_fp16")]; + tensor var_9226_equation_0 = const()[name = tensor("op_9226_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9226_cast_fp16 = einsum(equation = var_9226_equation_0, values = (var_8678_cast_fp16, var_9077_cast_fp16))[name = tensor("op_9226_cast_fp16")]; + tensor var_9228_equation_0 = const()[name = tensor("op_9228_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9228_cast_fp16 = einsum(equation = var_9228_equation_0, values = (var_8678_cast_fp16, var_9078_cast_fp16))[name = tensor("op_9228_cast_fp16")]; + tensor var_9230_equation_0 = const()[name = tensor("op_9230_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9230_cast_fp16 = einsum(equation = var_9230_equation_0, values = (var_8678_cast_fp16, var_9079_cast_fp16))[name = tensor("op_9230_cast_fp16")]; + tensor var_9232_equation_0 = const()[name = tensor("op_9232_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9232_cast_fp16 = einsum(equation = var_9232_equation_0, values = (var_8678_cast_fp16, var_9080_cast_fp16))[name = tensor("op_9232_cast_fp16")]; + tensor var_9234_equation_0 = const()[name = tensor("op_9234_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9234_cast_fp16 = einsum(equation = var_9234_equation_0, values = (var_8682_cast_fp16, var_9081_cast_fp16))[name = tensor("op_9234_cast_fp16")]; + tensor var_9236_equation_0 = const()[name = tensor("op_9236_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9236_cast_fp16 = einsum(equation = var_9236_equation_0, values = (var_8682_cast_fp16, var_9082_cast_fp16))[name = tensor("op_9236_cast_fp16")]; + tensor var_9238_equation_0 = const()[name = tensor("op_9238_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9238_cast_fp16 = einsum(equation = var_9238_equation_0, values = (var_8682_cast_fp16, var_9083_cast_fp16))[name = tensor("op_9238_cast_fp16")]; + tensor var_9240_equation_0 = const()[name = tensor("op_9240_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9240_cast_fp16 = einsum(equation = var_9240_equation_0, values = (var_8682_cast_fp16, var_9084_cast_fp16))[name = tensor("op_9240_cast_fp16")]; + tensor var_9242_equation_0 = const()[name = tensor("op_9242_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9242_cast_fp16 = einsum(equation = var_9242_equation_0, values = (var_8686_cast_fp16, var_9085_cast_fp16))[name = tensor("op_9242_cast_fp16")]; + tensor var_9244_equation_0 = const()[name = tensor("op_9244_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9244_cast_fp16 = einsum(equation = var_9244_equation_0, values = (var_8686_cast_fp16, var_9086_cast_fp16))[name = tensor("op_9244_cast_fp16")]; + tensor var_9246_equation_0 = const()[name = tensor("op_9246_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9246_cast_fp16 = einsum(equation = var_9246_equation_0, values = (var_8686_cast_fp16, var_9087_cast_fp16))[name = tensor("op_9246_cast_fp16")]; + tensor var_9248_equation_0 = const()[name = tensor("op_9248_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_9248_cast_fp16 = einsum(equation = var_9248_equation_0, values = (var_8686_cast_fp16, var_9088_cast_fp16))[name = tensor("op_9248_cast_fp16")]; + tensor var_9250_interleave_0 = const()[name = tensor("op_9250_interleave_0"), val = tensor(false)]; + tensor var_9250_cast_fp16 = concat(axis = var_7809, interleave = var_9250_interleave_0, values = (var_9090_cast_fp16, var_9092_cast_fp16, var_9094_cast_fp16, var_9096_cast_fp16))[name = tensor("op_9250_cast_fp16")]; + tensor var_9252_interleave_0 = const()[name = tensor("op_9252_interleave_0"), val = tensor(false)]; + tensor var_9252_cast_fp16 = concat(axis = var_7809, interleave = var_9252_interleave_0, values = (var_9098_cast_fp16, var_9100_cast_fp16, var_9102_cast_fp16, var_9104_cast_fp16))[name = tensor("op_9252_cast_fp16")]; + tensor var_9254_interleave_0 = const()[name = tensor("op_9254_interleave_0"), val = tensor(false)]; + tensor var_9254_cast_fp16 = concat(axis = var_7809, interleave = var_9254_interleave_0, values = (var_9106_cast_fp16, var_9108_cast_fp16, var_9110_cast_fp16, var_9112_cast_fp16))[name = tensor("op_9254_cast_fp16")]; + tensor var_9256_interleave_0 = const()[name = tensor("op_9256_interleave_0"), val = tensor(false)]; + tensor var_9256_cast_fp16 = concat(axis = var_7809, interleave = var_9256_interleave_0, values = (var_9114_cast_fp16, var_9116_cast_fp16, var_9118_cast_fp16, var_9120_cast_fp16))[name = tensor("op_9256_cast_fp16")]; + tensor var_9258_interleave_0 = const()[name = tensor("op_9258_interleave_0"), val = tensor(false)]; + tensor var_9258_cast_fp16 = concat(axis = var_7809, interleave = var_9258_interleave_0, values = (var_9122_cast_fp16, var_9124_cast_fp16, var_9126_cast_fp16, var_9128_cast_fp16))[name = tensor("op_9258_cast_fp16")]; + tensor var_9260_interleave_0 = const()[name = tensor("op_9260_interleave_0"), val = tensor(false)]; + tensor var_9260_cast_fp16 = concat(axis = var_7809, interleave = var_9260_interleave_0, values = (var_9130_cast_fp16, var_9132_cast_fp16, var_9134_cast_fp16, var_9136_cast_fp16))[name = tensor("op_9260_cast_fp16")]; + tensor var_9262_interleave_0 = const()[name = tensor("op_9262_interleave_0"), val = tensor(false)]; + tensor var_9262_cast_fp16 = concat(axis = var_7809, interleave = var_9262_interleave_0, values = (var_9138_cast_fp16, var_9140_cast_fp16, var_9142_cast_fp16, var_9144_cast_fp16))[name = tensor("op_9262_cast_fp16")]; + tensor var_9264_interleave_0 = const()[name = tensor("op_9264_interleave_0"), val = tensor(false)]; + tensor var_9264_cast_fp16 = concat(axis = var_7809, interleave = var_9264_interleave_0, values = (var_9146_cast_fp16, var_9148_cast_fp16, var_9150_cast_fp16, var_9152_cast_fp16))[name = tensor("op_9264_cast_fp16")]; + tensor var_9266_interleave_0 = const()[name = tensor("op_9266_interleave_0"), val = tensor(false)]; + tensor var_9266_cast_fp16 = concat(axis = var_7809, interleave = var_9266_interleave_0, values = (var_9154_cast_fp16, var_9156_cast_fp16, var_9158_cast_fp16, var_9160_cast_fp16))[name = tensor("op_9266_cast_fp16")]; + tensor var_9268_interleave_0 = const()[name = tensor("op_9268_interleave_0"), val = tensor(false)]; + tensor var_9268_cast_fp16 = concat(axis = var_7809, interleave = var_9268_interleave_0, values = (var_9162_cast_fp16, var_9164_cast_fp16, var_9166_cast_fp16, var_9168_cast_fp16))[name = tensor("op_9268_cast_fp16")]; + tensor var_9270_interleave_0 = const()[name = tensor("op_9270_interleave_0"), val = tensor(false)]; + tensor var_9270_cast_fp16 = concat(axis = var_7809, interleave = var_9270_interleave_0, values = (var_9170_cast_fp16, var_9172_cast_fp16, var_9174_cast_fp16, var_9176_cast_fp16))[name = tensor("op_9270_cast_fp16")]; + tensor var_9272_interleave_0 = const()[name = tensor("op_9272_interleave_0"), val = tensor(false)]; + tensor var_9272_cast_fp16 = concat(axis = var_7809, interleave = var_9272_interleave_0, values = (var_9178_cast_fp16, var_9180_cast_fp16, var_9182_cast_fp16, var_9184_cast_fp16))[name = tensor("op_9272_cast_fp16")]; + tensor var_9274_interleave_0 = const()[name = tensor("op_9274_interleave_0"), val = tensor(false)]; + tensor var_9274_cast_fp16 = concat(axis = var_7809, interleave = var_9274_interleave_0, values = (var_9186_cast_fp16, var_9188_cast_fp16, var_9190_cast_fp16, var_9192_cast_fp16))[name = tensor("op_9274_cast_fp16")]; + tensor var_9276_interleave_0 = const()[name = tensor("op_9276_interleave_0"), val = tensor(false)]; + tensor var_9276_cast_fp16 = concat(axis = var_7809, interleave = var_9276_interleave_0, values = (var_9194_cast_fp16, var_9196_cast_fp16, var_9198_cast_fp16, var_9200_cast_fp16))[name = tensor("op_9276_cast_fp16")]; + tensor var_9278_interleave_0 = const()[name = tensor("op_9278_interleave_0"), val = tensor(false)]; + tensor var_9278_cast_fp16 = concat(axis = var_7809, interleave = var_9278_interleave_0, values = (var_9202_cast_fp16, var_9204_cast_fp16, var_9206_cast_fp16, var_9208_cast_fp16))[name = tensor("op_9278_cast_fp16")]; + tensor var_9280_interleave_0 = const()[name = tensor("op_9280_interleave_0"), val = tensor(false)]; + tensor var_9280_cast_fp16 = concat(axis = var_7809, interleave = var_9280_interleave_0, values = (var_9210_cast_fp16, var_9212_cast_fp16, var_9214_cast_fp16, var_9216_cast_fp16))[name = tensor("op_9280_cast_fp16")]; + tensor var_9282_interleave_0 = const()[name = tensor("op_9282_interleave_0"), val = tensor(false)]; + tensor var_9282_cast_fp16 = concat(axis = var_7809, interleave = var_9282_interleave_0, values = (var_9218_cast_fp16, var_9220_cast_fp16, var_9222_cast_fp16, var_9224_cast_fp16))[name = tensor("op_9282_cast_fp16")]; + tensor var_9284_interleave_0 = const()[name = tensor("op_9284_interleave_0"), val = tensor(false)]; + tensor var_9284_cast_fp16 = concat(axis = var_7809, interleave = var_9284_interleave_0, values = (var_9226_cast_fp16, var_9228_cast_fp16, var_9230_cast_fp16, var_9232_cast_fp16))[name = tensor("op_9284_cast_fp16")]; + tensor var_9286_interleave_0 = const()[name = tensor("op_9286_interleave_0"), val = tensor(false)]; + tensor var_9286_cast_fp16 = concat(axis = var_7809, interleave = var_9286_interleave_0, values = (var_9234_cast_fp16, var_9236_cast_fp16, var_9238_cast_fp16, var_9240_cast_fp16))[name = tensor("op_9286_cast_fp16")]; + tensor var_9288_interleave_0 = const()[name = tensor("op_9288_interleave_0"), val = tensor(false)]; + tensor var_9288_cast_fp16 = concat(axis = var_7809, interleave = var_9288_interleave_0, values = (var_9242_cast_fp16, var_9244_cast_fp16, var_9246_cast_fp16, var_9248_cast_fp16))[name = tensor("op_9288_cast_fp16")]; + tensor input_41_interleave_0 = const()[name = tensor("input_41_interleave_0"), val = tensor(false)]; + tensor input_41_cast_fp16 = concat(axis = var_7834, interleave = input_41_interleave_0, values = (var_9250_cast_fp16, var_9252_cast_fp16, var_9254_cast_fp16, var_9256_cast_fp16, var_9258_cast_fp16, var_9260_cast_fp16, var_9262_cast_fp16, var_9264_cast_fp16, var_9266_cast_fp16, var_9268_cast_fp16, var_9270_cast_fp16, var_9272_cast_fp16, var_9274_cast_fp16, var_9276_cast_fp16, var_9278_cast_fp16, var_9280_cast_fp16, var_9282_cast_fp16, var_9284_cast_fp16, var_9286_cast_fp16, var_9288_cast_fp16))[name = tensor("input_41_cast_fp16")]; + tensor var_9293 = const()[name = tensor("op_9293"), val = tensor([1, 1])]; + tensor var_9295 = const()[name = tensor("op_9295"), val = tensor([1, 1])]; + tensor obj_23_pad_type_0 = const()[name = tensor("obj_23_pad_type_0"), val = tensor("custom")]; + tensor obj_23_pad_0 = const()[name = tensor("obj_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_5_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_5_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220903040)))]; + tensor layers_5_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_5_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(224179904)))]; + tensor obj_23_cast_fp16 = conv(bias = layers_5_self_attn_o_proj_bias_to_fp16, dilations = var_9295, groups = var_7834, pad = obj_23_pad_0, pad_type = obj_23_pad_type_0, strides = var_9293, weight = layers_5_self_attn_o_proj_weight_to_fp16, x = input_41_cast_fp16)[name = tensor("obj_23_cast_fp16")]; + tensor inputs_23_cast_fp16 = add(x = inputs_21_cast_fp16, y = obj_23_cast_fp16)[name = tensor("inputs_23_cast_fp16")]; + tensor var_9301 = const()[name = tensor("op_9301"), val = tensor([1])]; + tensor channels_mean_23_cast_fp16 = reduce_mean(axes = var_9301, keep_dims = var_7835, x = inputs_23_cast_fp16)[name = tensor("channels_mean_23_cast_fp16")]; + tensor zero_mean_23_cast_fp16 = sub(x = inputs_23_cast_fp16, y = channels_mean_23_cast_fp16)[name = tensor("zero_mean_23_cast_fp16")]; + tensor zero_mean_sq_23_cast_fp16 = mul(x = zero_mean_23_cast_fp16, y = zero_mean_23_cast_fp16)[name = tensor("zero_mean_sq_23_cast_fp16")]; + tensor var_9305 = const()[name = tensor("op_9305"), val = tensor([1])]; + tensor var_9306_cast_fp16 = reduce_mean(axes = var_9305, keep_dims = var_7835, x = zero_mean_sq_23_cast_fp16)[name = tensor("op_9306_cast_fp16")]; + tensor var_9307_to_fp16 = const()[name = tensor("op_9307_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_9308_cast_fp16 = add(x = var_9306_cast_fp16, y = var_9307_to_fp16)[name = tensor("op_9308_cast_fp16")]; + tensor denom_23_epsilon_0_to_fp16 = const()[name = tensor("denom_23_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_23_cast_fp16 = rsqrt(epsilon = denom_23_epsilon_0_to_fp16, x = var_9308_cast_fp16)[name = tensor("denom_23_cast_fp16")]; + tensor out_23_cast_fp16 = mul(x = zero_mean_23_cast_fp16, y = denom_23_cast_fp16)[name = tensor("out_23_cast_fp16")]; + tensor input_43_gamma_0_to_fp16 = const()[name = tensor("input_43_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(224182528)))]; + tensor input_43_beta_0_to_fp16 = const()[name = tensor("input_43_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(224185152)))]; + tensor input_43_epsilon_0_to_fp16 = const()[name = tensor("input_43_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_43_cast_fp16 = batch_norm(beta = input_43_beta_0_to_fp16, epsilon = input_43_epsilon_0_to_fp16, gamma = input_43_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_23_cast_fp16)[name = tensor("input_43_cast_fp16")]; + tensor var_9319 = const()[name = tensor("op_9319"), val = tensor([1, 1])]; + tensor var_9321 = const()[name = tensor("op_9321"), val = tensor([1, 1])]; + tensor input_45_pad_type_0 = const()[name = tensor("input_45_pad_type_0"), val = tensor("custom")]; + tensor input_45_pad_0 = const()[name = tensor("input_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_5_fc1_weight_to_fp16 = const()[name = tensor("layers_5_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(224187776)))]; + tensor layers_5_fc1_bias_to_fp16 = const()[name = tensor("layers_5_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237295040)))]; + tensor input_45_cast_fp16 = conv(bias = layers_5_fc1_bias_to_fp16, dilations = var_9321, groups = var_7834, pad = input_45_pad_0, pad_type = input_45_pad_type_0, strides = var_9319, weight = layers_5_fc1_weight_to_fp16, x = input_43_cast_fp16)[name = tensor("input_45_cast_fp16")]; + tensor input_47_mode_0 = const()[name = tensor("input_47_mode_0"), val = tensor("EXACT")]; + tensor input_47_cast_fp16 = gelu(mode = input_47_mode_0, x = input_45_cast_fp16)[name = tensor("input_47_cast_fp16")]; + tensor var_9327 = const()[name = tensor("op_9327"), val = tensor([1, 1])]; + tensor var_9329 = const()[name = tensor("op_9329"), val = tensor([1, 1])]; + tensor hidden_states_15_pad_type_0 = const()[name = tensor("hidden_states_15_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_15_pad_0 = const()[name = tensor("hidden_states_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_5_fc2_weight_to_fp16 = const()[name = tensor("layers_5_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(237305344)))]; + tensor layers_5_fc2_bias_to_fp16 = const()[name = tensor("layers_5_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250412608)))]; + tensor hidden_states_15_cast_fp16 = conv(bias = layers_5_fc2_bias_to_fp16, dilations = var_9329, groups = var_7834, pad = hidden_states_15_pad_0, pad_type = hidden_states_15_pad_type_0, strides = var_9327, weight = layers_5_fc2_weight_to_fp16, x = input_47_cast_fp16)[name = tensor("hidden_states_15_cast_fp16")]; + tensor inputs_25_cast_fp16 = add(x = inputs_23_cast_fp16, y = hidden_states_15_cast_fp16)[name = tensor("inputs_25_cast_fp16")]; + tensor var_9336 = const()[name = tensor("op_9336"), val = tensor(3)]; + tensor var_9361 = const()[name = tensor("op_9361"), val = tensor(1)]; + tensor var_9362 = const()[name = tensor("op_9362"), val = tensor(true)]; + tensor var_9372 = const()[name = tensor("op_9372"), val = tensor([1])]; + tensor channels_mean_25_cast_fp16 = reduce_mean(axes = var_9372, keep_dims = var_9362, x = inputs_25_cast_fp16)[name = tensor("channels_mean_25_cast_fp16")]; + tensor zero_mean_25_cast_fp16 = sub(x = inputs_25_cast_fp16, y = channels_mean_25_cast_fp16)[name = tensor("zero_mean_25_cast_fp16")]; + tensor zero_mean_sq_25_cast_fp16 = mul(x = zero_mean_25_cast_fp16, y = zero_mean_25_cast_fp16)[name = tensor("zero_mean_sq_25_cast_fp16")]; + tensor var_9376 = const()[name = tensor("op_9376"), val = tensor([1])]; + tensor var_9377_cast_fp16 = reduce_mean(axes = var_9376, keep_dims = var_9362, x = zero_mean_sq_25_cast_fp16)[name = tensor("op_9377_cast_fp16")]; + tensor var_9378_to_fp16 = const()[name = tensor("op_9378_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_9379_cast_fp16 = add(x = var_9377_cast_fp16, y = var_9378_to_fp16)[name = tensor("op_9379_cast_fp16")]; + tensor denom_25_epsilon_0_to_fp16 = const()[name = tensor("denom_25_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_25_cast_fp16 = rsqrt(epsilon = denom_25_epsilon_0_to_fp16, x = var_9379_cast_fp16)[name = tensor("denom_25_cast_fp16")]; + tensor out_25_cast_fp16 = mul(x = zero_mean_25_cast_fp16, y = denom_25_cast_fp16)[name = tensor("out_25_cast_fp16")]; + tensor obj_25_gamma_0_to_fp16 = const()[name = tensor("obj_25_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250415232)))]; + tensor obj_25_beta_0_to_fp16 = const()[name = tensor("obj_25_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250417856)))]; + tensor obj_25_epsilon_0_to_fp16 = const()[name = tensor("obj_25_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_25_cast_fp16 = batch_norm(beta = obj_25_beta_0_to_fp16, epsilon = obj_25_epsilon_0_to_fp16, gamma = obj_25_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_25_cast_fp16)[name = tensor("obj_25_cast_fp16")]; + tensor var_9394 = const()[name = tensor("op_9394"), val = tensor([1, 1])]; + tensor var_9396 = const()[name = tensor("op_9396"), val = tensor([1, 1])]; + tensor query_13_pad_type_0 = const()[name = tensor("query_13_pad_type_0"), val = tensor("custom")]; + tensor query_13_pad_0 = const()[name = tensor("query_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_6_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_6_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(250420480)))]; + tensor layers_6_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_6_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(253697344)))]; + tensor query_13_cast_fp16 = conv(bias = layers_6_self_attn_q_proj_bias_to_fp16, dilations = var_9396, groups = var_9361, pad = query_13_pad_0, pad_type = query_13_pad_type_0, strides = var_9394, weight = layers_6_self_attn_q_proj_weight_to_fp16, x = obj_25_cast_fp16)[name = tensor("query_13_cast_fp16")]; + tensor var_9400 = const()[name = tensor("op_9400"), val = tensor([1, 1])]; + tensor var_9402 = const()[name = tensor("op_9402"), val = tensor([1, 1])]; + tensor key_13_pad_type_0 = const()[name = tensor("key_13_pad_type_0"), val = tensor("custom")]; + tensor key_13_pad_0 = const()[name = tensor("key_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_6_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_6_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(253699968)))]; + tensor key_13_cast_fp16 = conv(dilations = var_9402, groups = var_9361, pad = key_13_pad_0, pad_type = key_13_pad_type_0, strides = var_9400, weight = layers_6_self_attn_k_proj_weight_to_fp16, x = obj_25_cast_fp16)[name = tensor("key_13_cast_fp16")]; + tensor var_9407 = const()[name = tensor("op_9407"), val = tensor([1, 1])]; + tensor var_9409 = const()[name = tensor("op_9409"), val = tensor([1, 1])]; + tensor value_13_pad_type_0 = const()[name = tensor("value_13_pad_type_0"), val = tensor("custom")]; + tensor value_13_pad_0 = const()[name = tensor("value_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_6_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_6_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(256976832)))]; + tensor layers_6_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_6_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(260253696)))]; + tensor value_13_cast_fp16 = conv(bias = layers_6_self_attn_v_proj_bias_to_fp16, dilations = var_9409, groups = var_9361, pad = value_13_pad_0, pad_type = value_13_pad_type_0, strides = var_9407, weight = layers_6_self_attn_v_proj_weight_to_fp16, x = obj_25_cast_fp16)[name = tensor("value_13_cast_fp16")]; + tensor var_9416_begin_0 = const()[name = tensor("op_9416_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9416_end_0 = const()[name = tensor("op_9416_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_9416_end_mask_0 = const()[name = tensor("op_9416_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9416_cast_fp16 = slice_by_index(begin = var_9416_begin_0, end = var_9416_end_0, end_mask = var_9416_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9416_cast_fp16")]; + tensor var_9420_begin_0 = const()[name = tensor("op_9420_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_9420_end_0 = const()[name = tensor("op_9420_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_9420_end_mask_0 = const()[name = tensor("op_9420_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9420_cast_fp16 = slice_by_index(begin = var_9420_begin_0, end = var_9420_end_0, end_mask = var_9420_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9420_cast_fp16")]; + tensor var_9424_begin_0 = const()[name = tensor("op_9424_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_9424_end_0 = const()[name = tensor("op_9424_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_9424_end_mask_0 = const()[name = tensor("op_9424_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9424_cast_fp16 = slice_by_index(begin = var_9424_begin_0, end = var_9424_end_0, end_mask = var_9424_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9424_cast_fp16")]; + tensor var_9428_begin_0 = const()[name = tensor("op_9428_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_9428_end_0 = const()[name = tensor("op_9428_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_9428_end_mask_0 = const()[name = tensor("op_9428_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9428_cast_fp16 = slice_by_index(begin = var_9428_begin_0, end = var_9428_end_0, end_mask = var_9428_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9428_cast_fp16")]; + tensor var_9432_begin_0 = const()[name = tensor("op_9432_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_9432_end_0 = const()[name = tensor("op_9432_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_9432_end_mask_0 = const()[name = tensor("op_9432_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9432_cast_fp16 = slice_by_index(begin = var_9432_begin_0, end = var_9432_end_0, end_mask = var_9432_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9432_cast_fp16")]; + tensor var_9436_begin_0 = const()[name = tensor("op_9436_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_9436_end_0 = const()[name = tensor("op_9436_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_9436_end_mask_0 = const()[name = tensor("op_9436_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9436_cast_fp16 = slice_by_index(begin = var_9436_begin_0, end = var_9436_end_0, end_mask = var_9436_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9436_cast_fp16")]; + tensor var_9440_begin_0 = const()[name = tensor("op_9440_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_9440_end_0 = const()[name = tensor("op_9440_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_9440_end_mask_0 = const()[name = tensor("op_9440_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9440_cast_fp16 = slice_by_index(begin = var_9440_begin_0, end = var_9440_end_0, end_mask = var_9440_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9440_cast_fp16")]; + tensor var_9444_begin_0 = const()[name = tensor("op_9444_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_9444_end_0 = const()[name = tensor("op_9444_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_9444_end_mask_0 = const()[name = tensor("op_9444_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9444_cast_fp16 = slice_by_index(begin = var_9444_begin_0, end = var_9444_end_0, end_mask = var_9444_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9444_cast_fp16")]; + tensor var_9448_begin_0 = const()[name = tensor("op_9448_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_9448_end_0 = const()[name = tensor("op_9448_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_9448_end_mask_0 = const()[name = tensor("op_9448_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9448_cast_fp16 = slice_by_index(begin = var_9448_begin_0, end = var_9448_end_0, end_mask = var_9448_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9448_cast_fp16")]; + tensor var_9452_begin_0 = const()[name = tensor("op_9452_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_9452_end_0 = const()[name = tensor("op_9452_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_9452_end_mask_0 = const()[name = tensor("op_9452_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9452_cast_fp16 = slice_by_index(begin = var_9452_begin_0, end = var_9452_end_0, end_mask = var_9452_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9452_cast_fp16")]; + tensor var_9456_begin_0 = const()[name = tensor("op_9456_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_9456_end_0 = const()[name = tensor("op_9456_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_9456_end_mask_0 = const()[name = tensor("op_9456_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9456_cast_fp16 = slice_by_index(begin = var_9456_begin_0, end = var_9456_end_0, end_mask = var_9456_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9456_cast_fp16")]; + tensor var_9460_begin_0 = const()[name = tensor("op_9460_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_9460_end_0 = const()[name = tensor("op_9460_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_9460_end_mask_0 = const()[name = tensor("op_9460_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9460_cast_fp16 = slice_by_index(begin = var_9460_begin_0, end = var_9460_end_0, end_mask = var_9460_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9460_cast_fp16")]; + tensor var_9464_begin_0 = const()[name = tensor("op_9464_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_9464_end_0 = const()[name = tensor("op_9464_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_9464_end_mask_0 = const()[name = tensor("op_9464_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9464_cast_fp16 = slice_by_index(begin = var_9464_begin_0, end = var_9464_end_0, end_mask = var_9464_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9464_cast_fp16")]; + tensor var_9468_begin_0 = const()[name = tensor("op_9468_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_9468_end_0 = const()[name = tensor("op_9468_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_9468_end_mask_0 = const()[name = tensor("op_9468_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9468_cast_fp16 = slice_by_index(begin = var_9468_begin_0, end = var_9468_end_0, end_mask = var_9468_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9468_cast_fp16")]; + tensor var_9472_begin_0 = const()[name = tensor("op_9472_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_9472_end_0 = const()[name = tensor("op_9472_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_9472_end_mask_0 = const()[name = tensor("op_9472_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9472_cast_fp16 = slice_by_index(begin = var_9472_begin_0, end = var_9472_end_0, end_mask = var_9472_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9472_cast_fp16")]; + tensor var_9476_begin_0 = const()[name = tensor("op_9476_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_9476_end_0 = const()[name = tensor("op_9476_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_9476_end_mask_0 = const()[name = tensor("op_9476_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9476_cast_fp16 = slice_by_index(begin = var_9476_begin_0, end = var_9476_end_0, end_mask = var_9476_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9476_cast_fp16")]; + tensor var_9480_begin_0 = const()[name = tensor("op_9480_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_9480_end_0 = const()[name = tensor("op_9480_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_9480_end_mask_0 = const()[name = tensor("op_9480_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9480_cast_fp16 = slice_by_index(begin = var_9480_begin_0, end = var_9480_end_0, end_mask = var_9480_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9480_cast_fp16")]; + tensor var_9484_begin_0 = const()[name = tensor("op_9484_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_9484_end_0 = const()[name = tensor("op_9484_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_9484_end_mask_0 = const()[name = tensor("op_9484_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9484_cast_fp16 = slice_by_index(begin = var_9484_begin_0, end = var_9484_end_0, end_mask = var_9484_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9484_cast_fp16")]; + tensor var_9488_begin_0 = const()[name = tensor("op_9488_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_9488_end_0 = const()[name = tensor("op_9488_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_9488_end_mask_0 = const()[name = tensor("op_9488_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9488_cast_fp16 = slice_by_index(begin = var_9488_begin_0, end = var_9488_end_0, end_mask = var_9488_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9488_cast_fp16")]; + tensor var_9492_begin_0 = const()[name = tensor("op_9492_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_9492_end_0 = const()[name = tensor("op_9492_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_9492_end_mask_0 = const()[name = tensor("op_9492_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_9492_cast_fp16 = slice_by_index(begin = var_9492_begin_0, end = var_9492_end_0, end_mask = var_9492_end_mask_0, x = query_13_cast_fp16)[name = tensor("op_9492_cast_fp16")]; + tensor var_9501_begin_0 = const()[name = tensor("op_9501_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9501_end_0 = const()[name = tensor("op_9501_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_9501_end_mask_0 = const()[name = tensor("op_9501_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9501_cast_fp16 = slice_by_index(begin = var_9501_begin_0, end = var_9501_end_0, end_mask = var_9501_end_mask_0, x = var_9416_cast_fp16)[name = tensor("op_9501_cast_fp16")]; + tensor var_9508_begin_0 = const()[name = tensor("op_9508_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_9508_end_0 = const()[name = tensor("op_9508_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_9508_end_mask_0 = const()[name = tensor("op_9508_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9508_cast_fp16 = slice_by_index(begin = var_9508_begin_0, end = var_9508_end_0, end_mask = var_9508_end_mask_0, x = var_9416_cast_fp16)[name = tensor("op_9508_cast_fp16")]; + tensor var_9515_begin_0 = const()[name = tensor("op_9515_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_9515_end_0 = const()[name = tensor("op_9515_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_9515_end_mask_0 = const()[name = tensor("op_9515_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9515_cast_fp16 = slice_by_index(begin = var_9515_begin_0, end = var_9515_end_0, end_mask = var_9515_end_mask_0, x = var_9416_cast_fp16)[name = tensor("op_9515_cast_fp16")]; + tensor var_9522_begin_0 = const()[name = tensor("op_9522_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_9522_end_0 = const()[name = tensor("op_9522_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_9522_end_mask_0 = const()[name = tensor("op_9522_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9522_cast_fp16 = slice_by_index(begin = var_9522_begin_0, end = var_9522_end_0, end_mask = var_9522_end_mask_0, x = var_9416_cast_fp16)[name = tensor("op_9522_cast_fp16")]; + tensor var_9529_begin_0 = const()[name = tensor("op_9529_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9529_end_0 = const()[name = tensor("op_9529_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_9529_end_mask_0 = const()[name = tensor("op_9529_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9529_cast_fp16 = slice_by_index(begin = var_9529_begin_0, end = var_9529_end_0, end_mask = var_9529_end_mask_0, x = var_9420_cast_fp16)[name = tensor("op_9529_cast_fp16")]; + tensor var_9536_begin_0 = const()[name = tensor("op_9536_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_9536_end_0 = const()[name = tensor("op_9536_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_9536_end_mask_0 = const()[name = tensor("op_9536_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9536_cast_fp16 = slice_by_index(begin = var_9536_begin_0, end = var_9536_end_0, end_mask = var_9536_end_mask_0, x = var_9420_cast_fp16)[name = tensor("op_9536_cast_fp16")]; + tensor var_9543_begin_0 = const()[name = tensor("op_9543_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_9543_end_0 = const()[name = tensor("op_9543_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_9543_end_mask_0 = const()[name = tensor("op_9543_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9543_cast_fp16 = slice_by_index(begin = var_9543_begin_0, end = var_9543_end_0, end_mask = var_9543_end_mask_0, x = var_9420_cast_fp16)[name = tensor("op_9543_cast_fp16")]; + tensor var_9550_begin_0 = const()[name = tensor("op_9550_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_9550_end_0 = const()[name = tensor("op_9550_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_9550_end_mask_0 = const()[name = tensor("op_9550_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9550_cast_fp16 = slice_by_index(begin = var_9550_begin_0, end = var_9550_end_0, end_mask = var_9550_end_mask_0, x = var_9420_cast_fp16)[name = tensor("op_9550_cast_fp16")]; + tensor var_9557_begin_0 = const()[name = tensor("op_9557_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9557_end_0 = const()[name = tensor("op_9557_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_9557_end_mask_0 = const()[name = tensor("op_9557_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9557_cast_fp16 = slice_by_index(begin = var_9557_begin_0, end = var_9557_end_0, end_mask = var_9557_end_mask_0, x = var_9424_cast_fp16)[name = tensor("op_9557_cast_fp16")]; + tensor var_9564_begin_0 = const()[name = tensor("op_9564_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_9564_end_0 = const()[name = tensor("op_9564_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_9564_end_mask_0 = const()[name = tensor("op_9564_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9564_cast_fp16 = slice_by_index(begin = var_9564_begin_0, end = var_9564_end_0, end_mask = var_9564_end_mask_0, x = var_9424_cast_fp16)[name = tensor("op_9564_cast_fp16")]; + tensor var_9571_begin_0 = const()[name = tensor("op_9571_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_9571_end_0 = const()[name = tensor("op_9571_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_9571_end_mask_0 = const()[name = tensor("op_9571_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9571_cast_fp16 = slice_by_index(begin = var_9571_begin_0, end = var_9571_end_0, end_mask = var_9571_end_mask_0, x = var_9424_cast_fp16)[name = tensor("op_9571_cast_fp16")]; + tensor var_9578_begin_0 = const()[name = tensor("op_9578_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_9578_end_0 = const()[name = tensor("op_9578_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_9578_end_mask_0 = const()[name = tensor("op_9578_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9578_cast_fp16 = slice_by_index(begin = var_9578_begin_0, end = var_9578_end_0, end_mask = var_9578_end_mask_0, x = var_9424_cast_fp16)[name = tensor("op_9578_cast_fp16")]; + tensor var_9585_begin_0 = const()[name = tensor("op_9585_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9585_end_0 = const()[name = tensor("op_9585_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_9585_end_mask_0 = const()[name = tensor("op_9585_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9585_cast_fp16 = slice_by_index(begin = var_9585_begin_0, end = var_9585_end_0, end_mask = var_9585_end_mask_0, x = var_9428_cast_fp16)[name = tensor("op_9585_cast_fp16")]; + tensor var_9592_begin_0 = const()[name = tensor("op_9592_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_9592_end_0 = const()[name = tensor("op_9592_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_9592_end_mask_0 = const()[name = tensor("op_9592_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9592_cast_fp16 = slice_by_index(begin = var_9592_begin_0, end = var_9592_end_0, end_mask = var_9592_end_mask_0, x = var_9428_cast_fp16)[name = tensor("op_9592_cast_fp16")]; + tensor var_9599_begin_0 = const()[name = tensor("op_9599_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_9599_end_0 = const()[name = tensor("op_9599_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_9599_end_mask_0 = const()[name = tensor("op_9599_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9599_cast_fp16 = slice_by_index(begin = var_9599_begin_0, end = var_9599_end_0, end_mask = var_9599_end_mask_0, x = var_9428_cast_fp16)[name = tensor("op_9599_cast_fp16")]; + tensor var_9606_begin_0 = const()[name = tensor("op_9606_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_9606_end_0 = const()[name = tensor("op_9606_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_9606_end_mask_0 = const()[name = tensor("op_9606_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9606_cast_fp16 = slice_by_index(begin = var_9606_begin_0, end = var_9606_end_0, end_mask = var_9606_end_mask_0, x = var_9428_cast_fp16)[name = tensor("op_9606_cast_fp16")]; + tensor var_9613_begin_0 = const()[name = tensor("op_9613_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9613_end_0 = const()[name = tensor("op_9613_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_9613_end_mask_0 = const()[name = tensor("op_9613_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9613_cast_fp16 = slice_by_index(begin = var_9613_begin_0, end = var_9613_end_0, end_mask = var_9613_end_mask_0, x = var_9432_cast_fp16)[name = tensor("op_9613_cast_fp16")]; + tensor var_9620_begin_0 = const()[name = tensor("op_9620_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_9620_end_0 = const()[name = tensor("op_9620_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_9620_end_mask_0 = const()[name = tensor("op_9620_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9620_cast_fp16 = slice_by_index(begin = var_9620_begin_0, end = var_9620_end_0, end_mask = var_9620_end_mask_0, x = var_9432_cast_fp16)[name = tensor("op_9620_cast_fp16")]; + tensor var_9627_begin_0 = const()[name = tensor("op_9627_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_9627_end_0 = const()[name = tensor("op_9627_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_9627_end_mask_0 = const()[name = tensor("op_9627_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9627_cast_fp16 = slice_by_index(begin = var_9627_begin_0, end = var_9627_end_0, end_mask = var_9627_end_mask_0, x = var_9432_cast_fp16)[name = tensor("op_9627_cast_fp16")]; + tensor var_9634_begin_0 = const()[name = tensor("op_9634_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_9634_end_0 = const()[name = tensor("op_9634_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_9634_end_mask_0 = const()[name = tensor("op_9634_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9634_cast_fp16 = slice_by_index(begin = var_9634_begin_0, end = var_9634_end_0, end_mask = var_9634_end_mask_0, x = var_9432_cast_fp16)[name = tensor("op_9634_cast_fp16")]; + tensor var_9641_begin_0 = const()[name = tensor("op_9641_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9641_end_0 = const()[name = tensor("op_9641_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_9641_end_mask_0 = const()[name = tensor("op_9641_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9641_cast_fp16 = slice_by_index(begin = var_9641_begin_0, end = var_9641_end_0, end_mask = var_9641_end_mask_0, x = var_9436_cast_fp16)[name = tensor("op_9641_cast_fp16")]; + tensor var_9648_begin_0 = const()[name = tensor("op_9648_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_9648_end_0 = const()[name = tensor("op_9648_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_9648_end_mask_0 = const()[name = tensor("op_9648_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9648_cast_fp16 = slice_by_index(begin = var_9648_begin_0, end = var_9648_end_0, end_mask = var_9648_end_mask_0, x = var_9436_cast_fp16)[name = tensor("op_9648_cast_fp16")]; + tensor var_9655_begin_0 = const()[name = tensor("op_9655_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_9655_end_0 = const()[name = tensor("op_9655_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_9655_end_mask_0 = const()[name = tensor("op_9655_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9655_cast_fp16 = slice_by_index(begin = var_9655_begin_0, end = var_9655_end_0, end_mask = var_9655_end_mask_0, x = var_9436_cast_fp16)[name = tensor("op_9655_cast_fp16")]; + tensor var_9662_begin_0 = const()[name = tensor("op_9662_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_9662_end_0 = const()[name = tensor("op_9662_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_9662_end_mask_0 = const()[name = tensor("op_9662_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9662_cast_fp16 = slice_by_index(begin = var_9662_begin_0, end = var_9662_end_0, end_mask = var_9662_end_mask_0, x = var_9436_cast_fp16)[name = tensor("op_9662_cast_fp16")]; + tensor var_9669_begin_0 = const()[name = tensor("op_9669_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9669_end_0 = const()[name = tensor("op_9669_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_9669_end_mask_0 = const()[name = tensor("op_9669_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9669_cast_fp16 = slice_by_index(begin = var_9669_begin_0, end = var_9669_end_0, end_mask = var_9669_end_mask_0, x = var_9440_cast_fp16)[name = tensor("op_9669_cast_fp16")]; + tensor var_9676_begin_0 = const()[name = tensor("op_9676_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_9676_end_0 = const()[name = tensor("op_9676_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_9676_end_mask_0 = const()[name = tensor("op_9676_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9676_cast_fp16 = slice_by_index(begin = var_9676_begin_0, end = var_9676_end_0, end_mask = var_9676_end_mask_0, x = var_9440_cast_fp16)[name = tensor("op_9676_cast_fp16")]; + tensor var_9683_begin_0 = const()[name = tensor("op_9683_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_9683_end_0 = const()[name = tensor("op_9683_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_9683_end_mask_0 = const()[name = tensor("op_9683_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9683_cast_fp16 = slice_by_index(begin = var_9683_begin_0, end = var_9683_end_0, end_mask = var_9683_end_mask_0, x = var_9440_cast_fp16)[name = tensor("op_9683_cast_fp16")]; + tensor var_9690_begin_0 = const()[name = tensor("op_9690_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_9690_end_0 = const()[name = tensor("op_9690_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_9690_end_mask_0 = const()[name = tensor("op_9690_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9690_cast_fp16 = slice_by_index(begin = var_9690_begin_0, end = var_9690_end_0, end_mask = var_9690_end_mask_0, x = var_9440_cast_fp16)[name = tensor("op_9690_cast_fp16")]; + tensor var_9697_begin_0 = const()[name = tensor("op_9697_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9697_end_0 = const()[name = tensor("op_9697_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_9697_end_mask_0 = const()[name = tensor("op_9697_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9697_cast_fp16 = slice_by_index(begin = var_9697_begin_0, end = var_9697_end_0, end_mask = var_9697_end_mask_0, x = var_9444_cast_fp16)[name = tensor("op_9697_cast_fp16")]; + tensor var_9704_begin_0 = const()[name = tensor("op_9704_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_9704_end_0 = const()[name = tensor("op_9704_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_9704_end_mask_0 = const()[name = tensor("op_9704_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9704_cast_fp16 = slice_by_index(begin = var_9704_begin_0, end = var_9704_end_0, end_mask = var_9704_end_mask_0, x = var_9444_cast_fp16)[name = tensor("op_9704_cast_fp16")]; + tensor var_9711_begin_0 = const()[name = tensor("op_9711_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_9711_end_0 = const()[name = tensor("op_9711_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_9711_end_mask_0 = const()[name = tensor("op_9711_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9711_cast_fp16 = slice_by_index(begin = var_9711_begin_0, end = var_9711_end_0, end_mask = var_9711_end_mask_0, x = var_9444_cast_fp16)[name = tensor("op_9711_cast_fp16")]; + tensor var_9718_begin_0 = const()[name = tensor("op_9718_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_9718_end_0 = const()[name = tensor("op_9718_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_9718_end_mask_0 = const()[name = tensor("op_9718_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9718_cast_fp16 = slice_by_index(begin = var_9718_begin_0, end = var_9718_end_0, end_mask = var_9718_end_mask_0, x = var_9444_cast_fp16)[name = tensor("op_9718_cast_fp16")]; + tensor var_9725_begin_0 = const()[name = tensor("op_9725_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9725_end_0 = const()[name = tensor("op_9725_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_9725_end_mask_0 = const()[name = tensor("op_9725_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9725_cast_fp16 = slice_by_index(begin = var_9725_begin_0, end = var_9725_end_0, end_mask = var_9725_end_mask_0, x = var_9448_cast_fp16)[name = tensor("op_9725_cast_fp16")]; + tensor var_9732_begin_0 = const()[name = tensor("op_9732_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_9732_end_0 = const()[name = tensor("op_9732_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_9732_end_mask_0 = const()[name = tensor("op_9732_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9732_cast_fp16 = slice_by_index(begin = var_9732_begin_0, end = var_9732_end_0, end_mask = var_9732_end_mask_0, x = var_9448_cast_fp16)[name = tensor("op_9732_cast_fp16")]; + tensor var_9739_begin_0 = const()[name = tensor("op_9739_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_9739_end_0 = const()[name = tensor("op_9739_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_9739_end_mask_0 = const()[name = tensor("op_9739_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9739_cast_fp16 = slice_by_index(begin = var_9739_begin_0, end = var_9739_end_0, end_mask = var_9739_end_mask_0, x = var_9448_cast_fp16)[name = tensor("op_9739_cast_fp16")]; + tensor var_9746_begin_0 = const()[name = tensor("op_9746_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_9746_end_0 = const()[name = tensor("op_9746_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_9746_end_mask_0 = const()[name = tensor("op_9746_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9746_cast_fp16 = slice_by_index(begin = var_9746_begin_0, end = var_9746_end_0, end_mask = var_9746_end_mask_0, x = var_9448_cast_fp16)[name = tensor("op_9746_cast_fp16")]; + tensor var_9753_begin_0 = const()[name = tensor("op_9753_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9753_end_0 = const()[name = tensor("op_9753_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_9753_end_mask_0 = const()[name = tensor("op_9753_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9753_cast_fp16 = slice_by_index(begin = var_9753_begin_0, end = var_9753_end_0, end_mask = var_9753_end_mask_0, x = var_9452_cast_fp16)[name = tensor("op_9753_cast_fp16")]; + tensor var_9760_begin_0 = const()[name = tensor("op_9760_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_9760_end_0 = const()[name = tensor("op_9760_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_9760_end_mask_0 = const()[name = tensor("op_9760_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9760_cast_fp16 = slice_by_index(begin = var_9760_begin_0, end = var_9760_end_0, end_mask = var_9760_end_mask_0, x = var_9452_cast_fp16)[name = tensor("op_9760_cast_fp16")]; + tensor var_9767_begin_0 = const()[name = tensor("op_9767_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_9767_end_0 = const()[name = tensor("op_9767_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_9767_end_mask_0 = const()[name = tensor("op_9767_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9767_cast_fp16 = slice_by_index(begin = var_9767_begin_0, end = var_9767_end_0, end_mask = var_9767_end_mask_0, x = var_9452_cast_fp16)[name = tensor("op_9767_cast_fp16")]; + tensor var_9774_begin_0 = const()[name = tensor("op_9774_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_9774_end_0 = const()[name = tensor("op_9774_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_9774_end_mask_0 = const()[name = tensor("op_9774_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9774_cast_fp16 = slice_by_index(begin = var_9774_begin_0, end = var_9774_end_0, end_mask = var_9774_end_mask_0, x = var_9452_cast_fp16)[name = tensor("op_9774_cast_fp16")]; + tensor var_9781_begin_0 = const()[name = tensor("op_9781_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9781_end_0 = const()[name = tensor("op_9781_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_9781_end_mask_0 = const()[name = tensor("op_9781_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9781_cast_fp16 = slice_by_index(begin = var_9781_begin_0, end = var_9781_end_0, end_mask = var_9781_end_mask_0, x = var_9456_cast_fp16)[name = tensor("op_9781_cast_fp16")]; + tensor var_9788_begin_0 = const()[name = tensor("op_9788_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_9788_end_0 = const()[name = tensor("op_9788_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_9788_end_mask_0 = const()[name = tensor("op_9788_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9788_cast_fp16 = slice_by_index(begin = var_9788_begin_0, end = var_9788_end_0, end_mask = var_9788_end_mask_0, x = var_9456_cast_fp16)[name = tensor("op_9788_cast_fp16")]; + tensor var_9795_begin_0 = const()[name = tensor("op_9795_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_9795_end_0 = const()[name = tensor("op_9795_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_9795_end_mask_0 = const()[name = tensor("op_9795_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9795_cast_fp16 = slice_by_index(begin = var_9795_begin_0, end = var_9795_end_0, end_mask = var_9795_end_mask_0, x = var_9456_cast_fp16)[name = tensor("op_9795_cast_fp16")]; + tensor var_9802_begin_0 = const()[name = tensor("op_9802_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_9802_end_0 = const()[name = tensor("op_9802_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_9802_end_mask_0 = const()[name = tensor("op_9802_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9802_cast_fp16 = slice_by_index(begin = var_9802_begin_0, end = var_9802_end_0, end_mask = var_9802_end_mask_0, x = var_9456_cast_fp16)[name = tensor("op_9802_cast_fp16")]; + tensor var_9809_begin_0 = const()[name = tensor("op_9809_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9809_end_0 = const()[name = tensor("op_9809_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_9809_end_mask_0 = const()[name = tensor("op_9809_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9809_cast_fp16 = slice_by_index(begin = var_9809_begin_0, end = var_9809_end_0, end_mask = var_9809_end_mask_0, x = var_9460_cast_fp16)[name = tensor("op_9809_cast_fp16")]; + tensor var_9816_begin_0 = const()[name = tensor("op_9816_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_9816_end_0 = const()[name = tensor("op_9816_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_9816_end_mask_0 = const()[name = tensor("op_9816_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9816_cast_fp16 = slice_by_index(begin = var_9816_begin_0, end = var_9816_end_0, end_mask = var_9816_end_mask_0, x = var_9460_cast_fp16)[name = tensor("op_9816_cast_fp16")]; + tensor var_9823_begin_0 = const()[name = tensor("op_9823_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_9823_end_0 = const()[name = tensor("op_9823_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_9823_end_mask_0 = const()[name = tensor("op_9823_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9823_cast_fp16 = slice_by_index(begin = var_9823_begin_0, end = var_9823_end_0, end_mask = var_9823_end_mask_0, x = var_9460_cast_fp16)[name = tensor("op_9823_cast_fp16")]; + tensor var_9830_begin_0 = const()[name = tensor("op_9830_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_9830_end_0 = const()[name = tensor("op_9830_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_9830_end_mask_0 = const()[name = tensor("op_9830_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9830_cast_fp16 = slice_by_index(begin = var_9830_begin_0, end = var_9830_end_0, end_mask = var_9830_end_mask_0, x = var_9460_cast_fp16)[name = tensor("op_9830_cast_fp16")]; + tensor var_9837_begin_0 = const()[name = tensor("op_9837_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9837_end_0 = const()[name = tensor("op_9837_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_9837_end_mask_0 = const()[name = tensor("op_9837_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9837_cast_fp16 = slice_by_index(begin = var_9837_begin_0, end = var_9837_end_0, end_mask = var_9837_end_mask_0, x = var_9464_cast_fp16)[name = tensor("op_9837_cast_fp16")]; + tensor var_9844_begin_0 = const()[name = tensor("op_9844_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_9844_end_0 = const()[name = tensor("op_9844_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_9844_end_mask_0 = const()[name = tensor("op_9844_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9844_cast_fp16 = slice_by_index(begin = var_9844_begin_0, end = var_9844_end_0, end_mask = var_9844_end_mask_0, x = var_9464_cast_fp16)[name = tensor("op_9844_cast_fp16")]; + tensor var_9851_begin_0 = const()[name = tensor("op_9851_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_9851_end_0 = const()[name = tensor("op_9851_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_9851_end_mask_0 = const()[name = tensor("op_9851_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9851_cast_fp16 = slice_by_index(begin = var_9851_begin_0, end = var_9851_end_0, end_mask = var_9851_end_mask_0, x = var_9464_cast_fp16)[name = tensor("op_9851_cast_fp16")]; + tensor var_9858_begin_0 = const()[name = tensor("op_9858_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_9858_end_0 = const()[name = tensor("op_9858_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_9858_end_mask_0 = const()[name = tensor("op_9858_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9858_cast_fp16 = slice_by_index(begin = var_9858_begin_0, end = var_9858_end_0, end_mask = var_9858_end_mask_0, x = var_9464_cast_fp16)[name = tensor("op_9858_cast_fp16")]; + tensor var_9865_begin_0 = const()[name = tensor("op_9865_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9865_end_0 = const()[name = tensor("op_9865_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_9865_end_mask_0 = const()[name = tensor("op_9865_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9865_cast_fp16 = slice_by_index(begin = var_9865_begin_0, end = var_9865_end_0, end_mask = var_9865_end_mask_0, x = var_9468_cast_fp16)[name = tensor("op_9865_cast_fp16")]; + tensor var_9872_begin_0 = const()[name = tensor("op_9872_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_9872_end_0 = const()[name = tensor("op_9872_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_9872_end_mask_0 = const()[name = tensor("op_9872_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9872_cast_fp16 = slice_by_index(begin = var_9872_begin_0, end = var_9872_end_0, end_mask = var_9872_end_mask_0, x = var_9468_cast_fp16)[name = tensor("op_9872_cast_fp16")]; + tensor var_9879_begin_0 = const()[name = tensor("op_9879_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_9879_end_0 = const()[name = tensor("op_9879_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_9879_end_mask_0 = const()[name = tensor("op_9879_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9879_cast_fp16 = slice_by_index(begin = var_9879_begin_0, end = var_9879_end_0, end_mask = var_9879_end_mask_0, x = var_9468_cast_fp16)[name = tensor("op_9879_cast_fp16")]; + tensor var_9886_begin_0 = const()[name = tensor("op_9886_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_9886_end_0 = const()[name = tensor("op_9886_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_9886_end_mask_0 = const()[name = tensor("op_9886_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9886_cast_fp16 = slice_by_index(begin = var_9886_begin_0, end = var_9886_end_0, end_mask = var_9886_end_mask_0, x = var_9468_cast_fp16)[name = tensor("op_9886_cast_fp16")]; + tensor var_9893_begin_0 = const()[name = tensor("op_9893_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9893_end_0 = const()[name = tensor("op_9893_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_9893_end_mask_0 = const()[name = tensor("op_9893_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9893_cast_fp16 = slice_by_index(begin = var_9893_begin_0, end = var_9893_end_0, end_mask = var_9893_end_mask_0, x = var_9472_cast_fp16)[name = tensor("op_9893_cast_fp16")]; + tensor var_9900_begin_0 = const()[name = tensor("op_9900_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_9900_end_0 = const()[name = tensor("op_9900_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_9900_end_mask_0 = const()[name = tensor("op_9900_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9900_cast_fp16 = slice_by_index(begin = var_9900_begin_0, end = var_9900_end_0, end_mask = var_9900_end_mask_0, x = var_9472_cast_fp16)[name = tensor("op_9900_cast_fp16")]; + tensor var_9907_begin_0 = const()[name = tensor("op_9907_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_9907_end_0 = const()[name = tensor("op_9907_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_9907_end_mask_0 = const()[name = tensor("op_9907_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9907_cast_fp16 = slice_by_index(begin = var_9907_begin_0, end = var_9907_end_0, end_mask = var_9907_end_mask_0, x = var_9472_cast_fp16)[name = tensor("op_9907_cast_fp16")]; + tensor var_9914_begin_0 = const()[name = tensor("op_9914_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_9914_end_0 = const()[name = tensor("op_9914_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_9914_end_mask_0 = const()[name = tensor("op_9914_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9914_cast_fp16 = slice_by_index(begin = var_9914_begin_0, end = var_9914_end_0, end_mask = var_9914_end_mask_0, x = var_9472_cast_fp16)[name = tensor("op_9914_cast_fp16")]; + tensor var_9921_begin_0 = const()[name = tensor("op_9921_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9921_end_0 = const()[name = tensor("op_9921_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_9921_end_mask_0 = const()[name = tensor("op_9921_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9921_cast_fp16 = slice_by_index(begin = var_9921_begin_0, end = var_9921_end_0, end_mask = var_9921_end_mask_0, x = var_9476_cast_fp16)[name = tensor("op_9921_cast_fp16")]; + tensor var_9928_begin_0 = const()[name = tensor("op_9928_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_9928_end_0 = const()[name = tensor("op_9928_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_9928_end_mask_0 = const()[name = tensor("op_9928_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9928_cast_fp16 = slice_by_index(begin = var_9928_begin_0, end = var_9928_end_0, end_mask = var_9928_end_mask_0, x = var_9476_cast_fp16)[name = tensor("op_9928_cast_fp16")]; + tensor var_9935_begin_0 = const()[name = tensor("op_9935_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_9935_end_0 = const()[name = tensor("op_9935_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_9935_end_mask_0 = const()[name = tensor("op_9935_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9935_cast_fp16 = slice_by_index(begin = var_9935_begin_0, end = var_9935_end_0, end_mask = var_9935_end_mask_0, x = var_9476_cast_fp16)[name = tensor("op_9935_cast_fp16")]; + tensor var_9942_begin_0 = const()[name = tensor("op_9942_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_9942_end_0 = const()[name = tensor("op_9942_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_9942_end_mask_0 = const()[name = tensor("op_9942_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9942_cast_fp16 = slice_by_index(begin = var_9942_begin_0, end = var_9942_end_0, end_mask = var_9942_end_mask_0, x = var_9476_cast_fp16)[name = tensor("op_9942_cast_fp16")]; + tensor var_9949_begin_0 = const()[name = tensor("op_9949_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9949_end_0 = const()[name = tensor("op_9949_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_9949_end_mask_0 = const()[name = tensor("op_9949_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9949_cast_fp16 = slice_by_index(begin = var_9949_begin_0, end = var_9949_end_0, end_mask = var_9949_end_mask_0, x = var_9480_cast_fp16)[name = tensor("op_9949_cast_fp16")]; + tensor var_9956_begin_0 = const()[name = tensor("op_9956_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_9956_end_0 = const()[name = tensor("op_9956_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_9956_end_mask_0 = const()[name = tensor("op_9956_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9956_cast_fp16 = slice_by_index(begin = var_9956_begin_0, end = var_9956_end_0, end_mask = var_9956_end_mask_0, x = var_9480_cast_fp16)[name = tensor("op_9956_cast_fp16")]; + tensor var_9963_begin_0 = const()[name = tensor("op_9963_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_9963_end_0 = const()[name = tensor("op_9963_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_9963_end_mask_0 = const()[name = tensor("op_9963_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9963_cast_fp16 = slice_by_index(begin = var_9963_begin_0, end = var_9963_end_0, end_mask = var_9963_end_mask_0, x = var_9480_cast_fp16)[name = tensor("op_9963_cast_fp16")]; + tensor var_9970_begin_0 = const()[name = tensor("op_9970_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_9970_end_0 = const()[name = tensor("op_9970_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_9970_end_mask_0 = const()[name = tensor("op_9970_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9970_cast_fp16 = slice_by_index(begin = var_9970_begin_0, end = var_9970_end_0, end_mask = var_9970_end_mask_0, x = var_9480_cast_fp16)[name = tensor("op_9970_cast_fp16")]; + tensor var_9977_begin_0 = const()[name = tensor("op_9977_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_9977_end_0 = const()[name = tensor("op_9977_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_9977_end_mask_0 = const()[name = tensor("op_9977_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9977_cast_fp16 = slice_by_index(begin = var_9977_begin_0, end = var_9977_end_0, end_mask = var_9977_end_mask_0, x = var_9484_cast_fp16)[name = tensor("op_9977_cast_fp16")]; + tensor var_9984_begin_0 = const()[name = tensor("op_9984_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_9984_end_0 = const()[name = tensor("op_9984_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_9984_end_mask_0 = const()[name = tensor("op_9984_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9984_cast_fp16 = slice_by_index(begin = var_9984_begin_0, end = var_9984_end_0, end_mask = var_9984_end_mask_0, x = var_9484_cast_fp16)[name = tensor("op_9984_cast_fp16")]; + tensor var_9991_begin_0 = const()[name = tensor("op_9991_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_9991_end_0 = const()[name = tensor("op_9991_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_9991_end_mask_0 = const()[name = tensor("op_9991_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9991_cast_fp16 = slice_by_index(begin = var_9991_begin_0, end = var_9991_end_0, end_mask = var_9991_end_mask_0, x = var_9484_cast_fp16)[name = tensor("op_9991_cast_fp16")]; + tensor var_9998_begin_0 = const()[name = tensor("op_9998_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_9998_end_0 = const()[name = tensor("op_9998_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_9998_end_mask_0 = const()[name = tensor("op_9998_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_9998_cast_fp16 = slice_by_index(begin = var_9998_begin_0, end = var_9998_end_0, end_mask = var_9998_end_mask_0, x = var_9484_cast_fp16)[name = tensor("op_9998_cast_fp16")]; + tensor var_10005_begin_0 = const()[name = tensor("op_10005_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10005_end_0 = const()[name = tensor("op_10005_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_10005_end_mask_0 = const()[name = tensor("op_10005_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10005_cast_fp16 = slice_by_index(begin = var_10005_begin_0, end = var_10005_end_0, end_mask = var_10005_end_mask_0, x = var_9488_cast_fp16)[name = tensor("op_10005_cast_fp16")]; + tensor var_10012_begin_0 = const()[name = tensor("op_10012_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_10012_end_0 = const()[name = tensor("op_10012_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_10012_end_mask_0 = const()[name = tensor("op_10012_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10012_cast_fp16 = slice_by_index(begin = var_10012_begin_0, end = var_10012_end_0, end_mask = var_10012_end_mask_0, x = var_9488_cast_fp16)[name = tensor("op_10012_cast_fp16")]; + tensor var_10019_begin_0 = const()[name = tensor("op_10019_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_10019_end_0 = const()[name = tensor("op_10019_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_10019_end_mask_0 = const()[name = tensor("op_10019_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10019_cast_fp16 = slice_by_index(begin = var_10019_begin_0, end = var_10019_end_0, end_mask = var_10019_end_mask_0, x = var_9488_cast_fp16)[name = tensor("op_10019_cast_fp16")]; + tensor var_10026_begin_0 = const()[name = tensor("op_10026_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_10026_end_0 = const()[name = tensor("op_10026_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_10026_end_mask_0 = const()[name = tensor("op_10026_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10026_cast_fp16 = slice_by_index(begin = var_10026_begin_0, end = var_10026_end_0, end_mask = var_10026_end_mask_0, x = var_9488_cast_fp16)[name = tensor("op_10026_cast_fp16")]; + tensor var_10033_begin_0 = const()[name = tensor("op_10033_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10033_end_0 = const()[name = tensor("op_10033_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_10033_end_mask_0 = const()[name = tensor("op_10033_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10033_cast_fp16 = slice_by_index(begin = var_10033_begin_0, end = var_10033_end_0, end_mask = var_10033_end_mask_0, x = var_9492_cast_fp16)[name = tensor("op_10033_cast_fp16")]; + tensor var_10040_begin_0 = const()[name = tensor("op_10040_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_10040_end_0 = const()[name = tensor("op_10040_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_10040_end_mask_0 = const()[name = tensor("op_10040_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10040_cast_fp16 = slice_by_index(begin = var_10040_begin_0, end = var_10040_end_0, end_mask = var_10040_end_mask_0, x = var_9492_cast_fp16)[name = tensor("op_10040_cast_fp16")]; + tensor var_10047_begin_0 = const()[name = tensor("op_10047_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_10047_end_0 = const()[name = tensor("op_10047_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_10047_end_mask_0 = const()[name = tensor("op_10047_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10047_cast_fp16 = slice_by_index(begin = var_10047_begin_0, end = var_10047_end_0, end_mask = var_10047_end_mask_0, x = var_9492_cast_fp16)[name = tensor("op_10047_cast_fp16")]; + tensor var_10054_begin_0 = const()[name = tensor("op_10054_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_10054_end_0 = const()[name = tensor("op_10054_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_10054_end_mask_0 = const()[name = tensor("op_10054_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10054_cast_fp16 = slice_by_index(begin = var_10054_begin_0, end = var_10054_end_0, end_mask = var_10054_end_mask_0, x = var_9492_cast_fp16)[name = tensor("op_10054_cast_fp16")]; + tensor k_13_perm_0 = const()[name = tensor("k_13_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_10059_begin_0 = const()[name = tensor("op_10059_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10059_end_0 = const()[name = tensor("op_10059_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_10059_end_mask_0 = const()[name = tensor("op_10059_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_25 = transpose(perm = k_13_perm_0, x = key_13_cast_fp16)[name = tensor("transpose_25")]; + tensor var_10059_cast_fp16 = slice_by_index(begin = var_10059_begin_0, end = var_10059_end_0, end_mask = var_10059_end_mask_0, x = transpose_25)[name = tensor("op_10059_cast_fp16")]; + tensor var_10063_begin_0 = const()[name = tensor("op_10063_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_10063_end_0 = const()[name = tensor("op_10063_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_10063_end_mask_0 = const()[name = tensor("op_10063_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10063_cast_fp16 = slice_by_index(begin = var_10063_begin_0, end = var_10063_end_0, end_mask = var_10063_end_mask_0, x = transpose_25)[name = tensor("op_10063_cast_fp16")]; + tensor var_10067_begin_0 = const()[name = tensor("op_10067_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_10067_end_0 = const()[name = tensor("op_10067_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_10067_end_mask_0 = const()[name = tensor("op_10067_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10067_cast_fp16 = slice_by_index(begin = var_10067_begin_0, end = var_10067_end_0, end_mask = var_10067_end_mask_0, x = transpose_25)[name = tensor("op_10067_cast_fp16")]; + tensor var_10071_begin_0 = const()[name = tensor("op_10071_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_10071_end_0 = const()[name = tensor("op_10071_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_10071_end_mask_0 = const()[name = tensor("op_10071_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10071_cast_fp16 = slice_by_index(begin = var_10071_begin_0, end = var_10071_end_0, end_mask = var_10071_end_mask_0, x = transpose_25)[name = tensor("op_10071_cast_fp16")]; + tensor var_10075_begin_0 = const()[name = tensor("op_10075_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_10075_end_0 = const()[name = tensor("op_10075_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_10075_end_mask_0 = const()[name = tensor("op_10075_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10075_cast_fp16 = slice_by_index(begin = var_10075_begin_0, end = var_10075_end_0, end_mask = var_10075_end_mask_0, x = transpose_25)[name = tensor("op_10075_cast_fp16")]; + tensor var_10079_begin_0 = const()[name = tensor("op_10079_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_10079_end_0 = const()[name = tensor("op_10079_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_10079_end_mask_0 = const()[name = tensor("op_10079_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10079_cast_fp16 = slice_by_index(begin = var_10079_begin_0, end = var_10079_end_0, end_mask = var_10079_end_mask_0, x = transpose_25)[name = tensor("op_10079_cast_fp16")]; + tensor var_10083_begin_0 = const()[name = tensor("op_10083_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_10083_end_0 = const()[name = tensor("op_10083_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_10083_end_mask_0 = const()[name = tensor("op_10083_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10083_cast_fp16 = slice_by_index(begin = var_10083_begin_0, end = var_10083_end_0, end_mask = var_10083_end_mask_0, x = transpose_25)[name = tensor("op_10083_cast_fp16")]; + tensor var_10087_begin_0 = const()[name = tensor("op_10087_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_10087_end_0 = const()[name = tensor("op_10087_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_10087_end_mask_0 = const()[name = tensor("op_10087_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10087_cast_fp16 = slice_by_index(begin = var_10087_begin_0, end = var_10087_end_0, end_mask = var_10087_end_mask_0, x = transpose_25)[name = tensor("op_10087_cast_fp16")]; + tensor var_10091_begin_0 = const()[name = tensor("op_10091_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_10091_end_0 = const()[name = tensor("op_10091_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_10091_end_mask_0 = const()[name = tensor("op_10091_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10091_cast_fp16 = slice_by_index(begin = var_10091_begin_0, end = var_10091_end_0, end_mask = var_10091_end_mask_0, x = transpose_25)[name = tensor("op_10091_cast_fp16")]; + tensor var_10095_begin_0 = const()[name = tensor("op_10095_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_10095_end_0 = const()[name = tensor("op_10095_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_10095_end_mask_0 = const()[name = tensor("op_10095_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10095_cast_fp16 = slice_by_index(begin = var_10095_begin_0, end = var_10095_end_0, end_mask = var_10095_end_mask_0, x = transpose_25)[name = tensor("op_10095_cast_fp16")]; + tensor var_10099_begin_0 = const()[name = tensor("op_10099_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_10099_end_0 = const()[name = tensor("op_10099_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_10099_end_mask_0 = const()[name = tensor("op_10099_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10099_cast_fp16 = slice_by_index(begin = var_10099_begin_0, end = var_10099_end_0, end_mask = var_10099_end_mask_0, x = transpose_25)[name = tensor("op_10099_cast_fp16")]; + tensor var_10103_begin_0 = const()[name = tensor("op_10103_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_10103_end_0 = const()[name = tensor("op_10103_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_10103_end_mask_0 = const()[name = tensor("op_10103_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10103_cast_fp16 = slice_by_index(begin = var_10103_begin_0, end = var_10103_end_0, end_mask = var_10103_end_mask_0, x = transpose_25)[name = tensor("op_10103_cast_fp16")]; + tensor var_10107_begin_0 = const()[name = tensor("op_10107_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_10107_end_0 = const()[name = tensor("op_10107_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_10107_end_mask_0 = const()[name = tensor("op_10107_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10107_cast_fp16 = slice_by_index(begin = var_10107_begin_0, end = var_10107_end_0, end_mask = var_10107_end_mask_0, x = transpose_25)[name = tensor("op_10107_cast_fp16")]; + tensor var_10111_begin_0 = const()[name = tensor("op_10111_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_10111_end_0 = const()[name = tensor("op_10111_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_10111_end_mask_0 = const()[name = tensor("op_10111_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10111_cast_fp16 = slice_by_index(begin = var_10111_begin_0, end = var_10111_end_0, end_mask = var_10111_end_mask_0, x = transpose_25)[name = tensor("op_10111_cast_fp16")]; + tensor var_10115_begin_0 = const()[name = tensor("op_10115_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_10115_end_0 = const()[name = tensor("op_10115_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_10115_end_mask_0 = const()[name = tensor("op_10115_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10115_cast_fp16 = slice_by_index(begin = var_10115_begin_0, end = var_10115_end_0, end_mask = var_10115_end_mask_0, x = transpose_25)[name = tensor("op_10115_cast_fp16")]; + tensor var_10119_begin_0 = const()[name = tensor("op_10119_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_10119_end_0 = const()[name = tensor("op_10119_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_10119_end_mask_0 = const()[name = tensor("op_10119_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10119_cast_fp16 = slice_by_index(begin = var_10119_begin_0, end = var_10119_end_0, end_mask = var_10119_end_mask_0, x = transpose_25)[name = tensor("op_10119_cast_fp16")]; + tensor var_10123_begin_0 = const()[name = tensor("op_10123_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_10123_end_0 = const()[name = tensor("op_10123_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_10123_end_mask_0 = const()[name = tensor("op_10123_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10123_cast_fp16 = slice_by_index(begin = var_10123_begin_0, end = var_10123_end_0, end_mask = var_10123_end_mask_0, x = transpose_25)[name = tensor("op_10123_cast_fp16")]; + tensor var_10127_begin_0 = const()[name = tensor("op_10127_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_10127_end_0 = const()[name = tensor("op_10127_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_10127_end_mask_0 = const()[name = tensor("op_10127_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10127_cast_fp16 = slice_by_index(begin = var_10127_begin_0, end = var_10127_end_0, end_mask = var_10127_end_mask_0, x = transpose_25)[name = tensor("op_10127_cast_fp16")]; + tensor var_10131_begin_0 = const()[name = tensor("op_10131_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_10131_end_0 = const()[name = tensor("op_10131_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_10131_end_mask_0 = const()[name = tensor("op_10131_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10131_cast_fp16 = slice_by_index(begin = var_10131_begin_0, end = var_10131_end_0, end_mask = var_10131_end_mask_0, x = transpose_25)[name = tensor("op_10131_cast_fp16")]; + tensor var_10135_begin_0 = const()[name = tensor("op_10135_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_10135_end_0 = const()[name = tensor("op_10135_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_10135_end_mask_0 = const()[name = tensor("op_10135_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_10135_cast_fp16 = slice_by_index(begin = var_10135_begin_0, end = var_10135_end_0, end_mask = var_10135_end_mask_0, x = transpose_25)[name = tensor("op_10135_cast_fp16")]; + tensor var_10137_begin_0 = const()[name = tensor("op_10137_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10137_end_0 = const()[name = tensor("op_10137_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_10137_end_mask_0 = const()[name = tensor("op_10137_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10137_cast_fp16 = slice_by_index(begin = var_10137_begin_0, end = var_10137_end_0, end_mask = var_10137_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10137_cast_fp16")]; + tensor var_10141_begin_0 = const()[name = tensor("op_10141_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_10141_end_0 = const()[name = tensor("op_10141_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_10141_end_mask_0 = const()[name = tensor("op_10141_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10141_cast_fp16 = slice_by_index(begin = var_10141_begin_0, end = var_10141_end_0, end_mask = var_10141_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10141_cast_fp16")]; + tensor var_10145_begin_0 = const()[name = tensor("op_10145_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_10145_end_0 = const()[name = tensor("op_10145_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_10145_end_mask_0 = const()[name = tensor("op_10145_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10145_cast_fp16 = slice_by_index(begin = var_10145_begin_0, end = var_10145_end_0, end_mask = var_10145_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10145_cast_fp16")]; + tensor var_10149_begin_0 = const()[name = tensor("op_10149_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_10149_end_0 = const()[name = tensor("op_10149_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_10149_end_mask_0 = const()[name = tensor("op_10149_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10149_cast_fp16 = slice_by_index(begin = var_10149_begin_0, end = var_10149_end_0, end_mask = var_10149_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10149_cast_fp16")]; + tensor var_10153_begin_0 = const()[name = tensor("op_10153_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_10153_end_0 = const()[name = tensor("op_10153_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_10153_end_mask_0 = const()[name = tensor("op_10153_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10153_cast_fp16 = slice_by_index(begin = var_10153_begin_0, end = var_10153_end_0, end_mask = var_10153_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10153_cast_fp16")]; + tensor var_10157_begin_0 = const()[name = tensor("op_10157_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_10157_end_0 = const()[name = tensor("op_10157_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_10157_end_mask_0 = const()[name = tensor("op_10157_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10157_cast_fp16 = slice_by_index(begin = var_10157_begin_0, end = var_10157_end_0, end_mask = var_10157_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10157_cast_fp16")]; + tensor var_10161_begin_0 = const()[name = tensor("op_10161_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_10161_end_0 = const()[name = tensor("op_10161_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_10161_end_mask_0 = const()[name = tensor("op_10161_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10161_cast_fp16 = slice_by_index(begin = var_10161_begin_0, end = var_10161_end_0, end_mask = var_10161_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10161_cast_fp16")]; + tensor var_10165_begin_0 = const()[name = tensor("op_10165_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_10165_end_0 = const()[name = tensor("op_10165_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_10165_end_mask_0 = const()[name = tensor("op_10165_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10165_cast_fp16 = slice_by_index(begin = var_10165_begin_0, end = var_10165_end_0, end_mask = var_10165_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10165_cast_fp16")]; + tensor var_10169_begin_0 = const()[name = tensor("op_10169_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_10169_end_0 = const()[name = tensor("op_10169_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_10169_end_mask_0 = const()[name = tensor("op_10169_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10169_cast_fp16 = slice_by_index(begin = var_10169_begin_0, end = var_10169_end_0, end_mask = var_10169_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10169_cast_fp16")]; + tensor var_10173_begin_0 = const()[name = tensor("op_10173_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_10173_end_0 = const()[name = tensor("op_10173_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_10173_end_mask_0 = const()[name = tensor("op_10173_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10173_cast_fp16 = slice_by_index(begin = var_10173_begin_0, end = var_10173_end_0, end_mask = var_10173_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10173_cast_fp16")]; + tensor var_10177_begin_0 = const()[name = tensor("op_10177_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_10177_end_0 = const()[name = tensor("op_10177_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_10177_end_mask_0 = const()[name = tensor("op_10177_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10177_cast_fp16 = slice_by_index(begin = var_10177_begin_0, end = var_10177_end_0, end_mask = var_10177_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10177_cast_fp16")]; + tensor var_10181_begin_0 = const()[name = tensor("op_10181_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_10181_end_0 = const()[name = tensor("op_10181_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_10181_end_mask_0 = const()[name = tensor("op_10181_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10181_cast_fp16 = slice_by_index(begin = var_10181_begin_0, end = var_10181_end_0, end_mask = var_10181_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10181_cast_fp16")]; + tensor var_10185_begin_0 = const()[name = tensor("op_10185_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_10185_end_0 = const()[name = tensor("op_10185_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_10185_end_mask_0 = const()[name = tensor("op_10185_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10185_cast_fp16 = slice_by_index(begin = var_10185_begin_0, end = var_10185_end_0, end_mask = var_10185_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10185_cast_fp16")]; + tensor var_10189_begin_0 = const()[name = tensor("op_10189_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_10189_end_0 = const()[name = tensor("op_10189_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_10189_end_mask_0 = const()[name = tensor("op_10189_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10189_cast_fp16 = slice_by_index(begin = var_10189_begin_0, end = var_10189_end_0, end_mask = var_10189_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10189_cast_fp16")]; + tensor var_10193_begin_0 = const()[name = tensor("op_10193_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_10193_end_0 = const()[name = tensor("op_10193_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_10193_end_mask_0 = const()[name = tensor("op_10193_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10193_cast_fp16 = slice_by_index(begin = var_10193_begin_0, end = var_10193_end_0, end_mask = var_10193_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10193_cast_fp16")]; + tensor var_10197_begin_0 = const()[name = tensor("op_10197_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_10197_end_0 = const()[name = tensor("op_10197_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_10197_end_mask_0 = const()[name = tensor("op_10197_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10197_cast_fp16 = slice_by_index(begin = var_10197_begin_0, end = var_10197_end_0, end_mask = var_10197_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10197_cast_fp16")]; + tensor var_10201_begin_0 = const()[name = tensor("op_10201_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_10201_end_0 = const()[name = tensor("op_10201_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_10201_end_mask_0 = const()[name = tensor("op_10201_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10201_cast_fp16 = slice_by_index(begin = var_10201_begin_0, end = var_10201_end_0, end_mask = var_10201_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10201_cast_fp16")]; + tensor var_10205_begin_0 = const()[name = tensor("op_10205_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_10205_end_0 = const()[name = tensor("op_10205_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_10205_end_mask_0 = const()[name = tensor("op_10205_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10205_cast_fp16 = slice_by_index(begin = var_10205_begin_0, end = var_10205_end_0, end_mask = var_10205_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10205_cast_fp16")]; + tensor var_10209_begin_0 = const()[name = tensor("op_10209_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_10209_end_0 = const()[name = tensor("op_10209_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_10209_end_mask_0 = const()[name = tensor("op_10209_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10209_cast_fp16 = slice_by_index(begin = var_10209_begin_0, end = var_10209_end_0, end_mask = var_10209_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10209_cast_fp16")]; + tensor var_10213_begin_0 = const()[name = tensor("op_10213_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_10213_end_0 = const()[name = tensor("op_10213_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_10213_end_mask_0 = const()[name = tensor("op_10213_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10213_cast_fp16 = slice_by_index(begin = var_10213_begin_0, end = var_10213_end_0, end_mask = var_10213_end_mask_0, x = value_13_cast_fp16)[name = tensor("op_10213_cast_fp16")]; + tensor var_10217_equation_0 = const()[name = tensor("op_10217_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10217_cast_fp16 = einsum(equation = var_10217_equation_0, values = (var_10059_cast_fp16, var_9501_cast_fp16))[name = tensor("op_10217_cast_fp16")]; + tensor var_10218_to_fp16 = const()[name = tensor("op_10218_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_961_cast_fp16 = mul(x = var_10217_cast_fp16, y = var_10218_to_fp16)[name = tensor("aw_chunk_961_cast_fp16")]; + tensor var_10221_equation_0 = const()[name = tensor("op_10221_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10221_cast_fp16 = einsum(equation = var_10221_equation_0, values = (var_10059_cast_fp16, var_9508_cast_fp16))[name = tensor("op_10221_cast_fp16")]; + tensor var_10222_to_fp16 = const()[name = tensor("op_10222_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_963_cast_fp16 = mul(x = var_10221_cast_fp16, y = var_10222_to_fp16)[name = tensor("aw_chunk_963_cast_fp16")]; + tensor var_10225_equation_0 = const()[name = tensor("op_10225_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10225_cast_fp16 = einsum(equation = var_10225_equation_0, values = (var_10059_cast_fp16, var_9515_cast_fp16))[name = tensor("op_10225_cast_fp16")]; + tensor var_10226_to_fp16 = const()[name = tensor("op_10226_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_965_cast_fp16 = mul(x = var_10225_cast_fp16, y = var_10226_to_fp16)[name = tensor("aw_chunk_965_cast_fp16")]; + tensor var_10229_equation_0 = const()[name = tensor("op_10229_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10229_cast_fp16 = einsum(equation = var_10229_equation_0, values = (var_10059_cast_fp16, var_9522_cast_fp16))[name = tensor("op_10229_cast_fp16")]; + tensor var_10230_to_fp16 = const()[name = tensor("op_10230_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_967_cast_fp16 = mul(x = var_10229_cast_fp16, y = var_10230_to_fp16)[name = tensor("aw_chunk_967_cast_fp16")]; + tensor var_10233_equation_0 = const()[name = tensor("op_10233_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10233_cast_fp16 = einsum(equation = var_10233_equation_0, values = (var_10063_cast_fp16, var_9529_cast_fp16))[name = tensor("op_10233_cast_fp16")]; + tensor var_10234_to_fp16 = const()[name = tensor("op_10234_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_969_cast_fp16 = mul(x = var_10233_cast_fp16, y = var_10234_to_fp16)[name = tensor("aw_chunk_969_cast_fp16")]; + tensor var_10237_equation_0 = const()[name = tensor("op_10237_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10237_cast_fp16 = einsum(equation = var_10237_equation_0, values = (var_10063_cast_fp16, var_9536_cast_fp16))[name = tensor("op_10237_cast_fp16")]; + tensor var_10238_to_fp16 = const()[name = tensor("op_10238_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_971_cast_fp16 = mul(x = var_10237_cast_fp16, y = var_10238_to_fp16)[name = tensor("aw_chunk_971_cast_fp16")]; + tensor var_10241_equation_0 = const()[name = tensor("op_10241_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10241_cast_fp16 = einsum(equation = var_10241_equation_0, values = (var_10063_cast_fp16, var_9543_cast_fp16))[name = tensor("op_10241_cast_fp16")]; + tensor var_10242_to_fp16 = const()[name = tensor("op_10242_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_973_cast_fp16 = mul(x = var_10241_cast_fp16, y = var_10242_to_fp16)[name = tensor("aw_chunk_973_cast_fp16")]; + tensor var_10245_equation_0 = const()[name = tensor("op_10245_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10245_cast_fp16 = einsum(equation = var_10245_equation_0, values = (var_10063_cast_fp16, var_9550_cast_fp16))[name = tensor("op_10245_cast_fp16")]; + tensor var_10246_to_fp16 = const()[name = tensor("op_10246_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_975_cast_fp16 = mul(x = var_10245_cast_fp16, y = var_10246_to_fp16)[name = tensor("aw_chunk_975_cast_fp16")]; + tensor var_10249_equation_0 = const()[name = tensor("op_10249_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10249_cast_fp16 = einsum(equation = var_10249_equation_0, values = (var_10067_cast_fp16, var_9557_cast_fp16))[name = tensor("op_10249_cast_fp16")]; + tensor var_10250_to_fp16 = const()[name = tensor("op_10250_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_977_cast_fp16 = mul(x = var_10249_cast_fp16, y = var_10250_to_fp16)[name = tensor("aw_chunk_977_cast_fp16")]; + tensor var_10253_equation_0 = const()[name = tensor("op_10253_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10253_cast_fp16 = einsum(equation = var_10253_equation_0, values = (var_10067_cast_fp16, var_9564_cast_fp16))[name = tensor("op_10253_cast_fp16")]; + tensor var_10254_to_fp16 = const()[name = tensor("op_10254_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_979_cast_fp16 = mul(x = var_10253_cast_fp16, y = var_10254_to_fp16)[name = tensor("aw_chunk_979_cast_fp16")]; + tensor var_10257_equation_0 = const()[name = tensor("op_10257_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10257_cast_fp16 = einsum(equation = var_10257_equation_0, values = (var_10067_cast_fp16, var_9571_cast_fp16))[name = tensor("op_10257_cast_fp16")]; + tensor var_10258_to_fp16 = const()[name = tensor("op_10258_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_981_cast_fp16 = mul(x = var_10257_cast_fp16, y = var_10258_to_fp16)[name = tensor("aw_chunk_981_cast_fp16")]; + tensor var_10261_equation_0 = const()[name = tensor("op_10261_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10261_cast_fp16 = einsum(equation = var_10261_equation_0, values = (var_10067_cast_fp16, var_9578_cast_fp16))[name = tensor("op_10261_cast_fp16")]; + tensor var_10262_to_fp16 = const()[name = tensor("op_10262_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_983_cast_fp16 = mul(x = var_10261_cast_fp16, y = var_10262_to_fp16)[name = tensor("aw_chunk_983_cast_fp16")]; + tensor var_10265_equation_0 = const()[name = tensor("op_10265_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10265_cast_fp16 = einsum(equation = var_10265_equation_0, values = (var_10071_cast_fp16, var_9585_cast_fp16))[name = tensor("op_10265_cast_fp16")]; + tensor var_10266_to_fp16 = const()[name = tensor("op_10266_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_985_cast_fp16 = mul(x = var_10265_cast_fp16, y = var_10266_to_fp16)[name = tensor("aw_chunk_985_cast_fp16")]; + tensor var_10269_equation_0 = const()[name = tensor("op_10269_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10269_cast_fp16 = einsum(equation = var_10269_equation_0, values = (var_10071_cast_fp16, var_9592_cast_fp16))[name = tensor("op_10269_cast_fp16")]; + tensor var_10270_to_fp16 = const()[name = tensor("op_10270_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_987_cast_fp16 = mul(x = var_10269_cast_fp16, y = var_10270_to_fp16)[name = tensor("aw_chunk_987_cast_fp16")]; + tensor var_10273_equation_0 = const()[name = tensor("op_10273_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10273_cast_fp16 = einsum(equation = var_10273_equation_0, values = (var_10071_cast_fp16, var_9599_cast_fp16))[name = tensor("op_10273_cast_fp16")]; + tensor var_10274_to_fp16 = const()[name = tensor("op_10274_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_989_cast_fp16 = mul(x = var_10273_cast_fp16, y = var_10274_to_fp16)[name = tensor("aw_chunk_989_cast_fp16")]; + tensor var_10277_equation_0 = const()[name = tensor("op_10277_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10277_cast_fp16 = einsum(equation = var_10277_equation_0, values = (var_10071_cast_fp16, var_9606_cast_fp16))[name = tensor("op_10277_cast_fp16")]; + tensor var_10278_to_fp16 = const()[name = tensor("op_10278_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_991_cast_fp16 = mul(x = var_10277_cast_fp16, y = var_10278_to_fp16)[name = tensor("aw_chunk_991_cast_fp16")]; + tensor var_10281_equation_0 = const()[name = tensor("op_10281_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10281_cast_fp16 = einsum(equation = var_10281_equation_0, values = (var_10075_cast_fp16, var_9613_cast_fp16))[name = tensor("op_10281_cast_fp16")]; + tensor var_10282_to_fp16 = const()[name = tensor("op_10282_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_993_cast_fp16 = mul(x = var_10281_cast_fp16, y = var_10282_to_fp16)[name = tensor("aw_chunk_993_cast_fp16")]; + tensor var_10285_equation_0 = const()[name = tensor("op_10285_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10285_cast_fp16 = einsum(equation = var_10285_equation_0, values = (var_10075_cast_fp16, var_9620_cast_fp16))[name = tensor("op_10285_cast_fp16")]; + tensor var_10286_to_fp16 = const()[name = tensor("op_10286_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_995_cast_fp16 = mul(x = var_10285_cast_fp16, y = var_10286_to_fp16)[name = tensor("aw_chunk_995_cast_fp16")]; + tensor var_10289_equation_0 = const()[name = tensor("op_10289_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10289_cast_fp16 = einsum(equation = var_10289_equation_0, values = (var_10075_cast_fp16, var_9627_cast_fp16))[name = tensor("op_10289_cast_fp16")]; + tensor var_10290_to_fp16 = const()[name = tensor("op_10290_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_997_cast_fp16 = mul(x = var_10289_cast_fp16, y = var_10290_to_fp16)[name = tensor("aw_chunk_997_cast_fp16")]; + tensor var_10293_equation_0 = const()[name = tensor("op_10293_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10293_cast_fp16 = einsum(equation = var_10293_equation_0, values = (var_10075_cast_fp16, var_9634_cast_fp16))[name = tensor("op_10293_cast_fp16")]; + tensor var_10294_to_fp16 = const()[name = tensor("op_10294_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_999_cast_fp16 = mul(x = var_10293_cast_fp16, y = var_10294_to_fp16)[name = tensor("aw_chunk_999_cast_fp16")]; + tensor var_10297_equation_0 = const()[name = tensor("op_10297_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10297_cast_fp16 = einsum(equation = var_10297_equation_0, values = (var_10079_cast_fp16, var_9641_cast_fp16))[name = tensor("op_10297_cast_fp16")]; + tensor var_10298_to_fp16 = const()[name = tensor("op_10298_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1001_cast_fp16 = mul(x = var_10297_cast_fp16, y = var_10298_to_fp16)[name = tensor("aw_chunk_1001_cast_fp16")]; + tensor var_10301_equation_0 = const()[name = tensor("op_10301_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10301_cast_fp16 = einsum(equation = var_10301_equation_0, values = (var_10079_cast_fp16, var_9648_cast_fp16))[name = tensor("op_10301_cast_fp16")]; + tensor var_10302_to_fp16 = const()[name = tensor("op_10302_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1003_cast_fp16 = mul(x = var_10301_cast_fp16, y = var_10302_to_fp16)[name = tensor("aw_chunk_1003_cast_fp16")]; + tensor var_10305_equation_0 = const()[name = tensor("op_10305_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10305_cast_fp16 = einsum(equation = var_10305_equation_0, values = (var_10079_cast_fp16, var_9655_cast_fp16))[name = tensor("op_10305_cast_fp16")]; + tensor var_10306_to_fp16 = const()[name = tensor("op_10306_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1005_cast_fp16 = mul(x = var_10305_cast_fp16, y = var_10306_to_fp16)[name = tensor("aw_chunk_1005_cast_fp16")]; + tensor var_10309_equation_0 = const()[name = tensor("op_10309_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10309_cast_fp16 = einsum(equation = var_10309_equation_0, values = (var_10079_cast_fp16, var_9662_cast_fp16))[name = tensor("op_10309_cast_fp16")]; + tensor var_10310_to_fp16 = const()[name = tensor("op_10310_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1007_cast_fp16 = mul(x = var_10309_cast_fp16, y = var_10310_to_fp16)[name = tensor("aw_chunk_1007_cast_fp16")]; + tensor var_10313_equation_0 = const()[name = tensor("op_10313_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10313_cast_fp16 = einsum(equation = var_10313_equation_0, values = (var_10083_cast_fp16, var_9669_cast_fp16))[name = tensor("op_10313_cast_fp16")]; + tensor var_10314_to_fp16 = const()[name = tensor("op_10314_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1009_cast_fp16 = mul(x = var_10313_cast_fp16, y = var_10314_to_fp16)[name = tensor("aw_chunk_1009_cast_fp16")]; + tensor var_10317_equation_0 = const()[name = tensor("op_10317_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10317_cast_fp16 = einsum(equation = var_10317_equation_0, values = (var_10083_cast_fp16, var_9676_cast_fp16))[name = tensor("op_10317_cast_fp16")]; + tensor var_10318_to_fp16 = const()[name = tensor("op_10318_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1011_cast_fp16 = mul(x = var_10317_cast_fp16, y = var_10318_to_fp16)[name = tensor("aw_chunk_1011_cast_fp16")]; + tensor var_10321_equation_0 = const()[name = tensor("op_10321_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10321_cast_fp16 = einsum(equation = var_10321_equation_0, values = (var_10083_cast_fp16, var_9683_cast_fp16))[name = tensor("op_10321_cast_fp16")]; + tensor var_10322_to_fp16 = const()[name = tensor("op_10322_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1013_cast_fp16 = mul(x = var_10321_cast_fp16, y = var_10322_to_fp16)[name = tensor("aw_chunk_1013_cast_fp16")]; + tensor var_10325_equation_0 = const()[name = tensor("op_10325_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10325_cast_fp16 = einsum(equation = var_10325_equation_0, values = (var_10083_cast_fp16, var_9690_cast_fp16))[name = tensor("op_10325_cast_fp16")]; + tensor var_10326_to_fp16 = const()[name = tensor("op_10326_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1015_cast_fp16 = mul(x = var_10325_cast_fp16, y = var_10326_to_fp16)[name = tensor("aw_chunk_1015_cast_fp16")]; + tensor var_10329_equation_0 = const()[name = tensor("op_10329_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10329_cast_fp16 = einsum(equation = var_10329_equation_0, values = (var_10087_cast_fp16, var_9697_cast_fp16))[name = tensor("op_10329_cast_fp16")]; + tensor var_10330_to_fp16 = const()[name = tensor("op_10330_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1017_cast_fp16 = mul(x = var_10329_cast_fp16, y = var_10330_to_fp16)[name = tensor("aw_chunk_1017_cast_fp16")]; + tensor var_10333_equation_0 = const()[name = tensor("op_10333_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10333_cast_fp16 = einsum(equation = var_10333_equation_0, values = (var_10087_cast_fp16, var_9704_cast_fp16))[name = tensor("op_10333_cast_fp16")]; + tensor var_10334_to_fp16 = const()[name = tensor("op_10334_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1019_cast_fp16 = mul(x = var_10333_cast_fp16, y = var_10334_to_fp16)[name = tensor("aw_chunk_1019_cast_fp16")]; + tensor var_10337_equation_0 = const()[name = tensor("op_10337_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10337_cast_fp16 = einsum(equation = var_10337_equation_0, values = (var_10087_cast_fp16, var_9711_cast_fp16))[name = tensor("op_10337_cast_fp16")]; + tensor var_10338_to_fp16 = const()[name = tensor("op_10338_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1021_cast_fp16 = mul(x = var_10337_cast_fp16, y = var_10338_to_fp16)[name = tensor("aw_chunk_1021_cast_fp16")]; + tensor var_10341_equation_0 = const()[name = tensor("op_10341_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10341_cast_fp16 = einsum(equation = var_10341_equation_0, values = (var_10087_cast_fp16, var_9718_cast_fp16))[name = tensor("op_10341_cast_fp16")]; + tensor var_10342_to_fp16 = const()[name = tensor("op_10342_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1023_cast_fp16 = mul(x = var_10341_cast_fp16, y = var_10342_to_fp16)[name = tensor("aw_chunk_1023_cast_fp16")]; + tensor var_10345_equation_0 = const()[name = tensor("op_10345_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10345_cast_fp16 = einsum(equation = var_10345_equation_0, values = (var_10091_cast_fp16, var_9725_cast_fp16))[name = tensor("op_10345_cast_fp16")]; + tensor var_10346_to_fp16 = const()[name = tensor("op_10346_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1025_cast_fp16 = mul(x = var_10345_cast_fp16, y = var_10346_to_fp16)[name = tensor("aw_chunk_1025_cast_fp16")]; + tensor var_10349_equation_0 = const()[name = tensor("op_10349_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10349_cast_fp16 = einsum(equation = var_10349_equation_0, values = (var_10091_cast_fp16, var_9732_cast_fp16))[name = tensor("op_10349_cast_fp16")]; + tensor var_10350_to_fp16 = const()[name = tensor("op_10350_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1027_cast_fp16 = mul(x = var_10349_cast_fp16, y = var_10350_to_fp16)[name = tensor("aw_chunk_1027_cast_fp16")]; + tensor var_10353_equation_0 = const()[name = tensor("op_10353_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10353_cast_fp16 = einsum(equation = var_10353_equation_0, values = (var_10091_cast_fp16, var_9739_cast_fp16))[name = tensor("op_10353_cast_fp16")]; + tensor var_10354_to_fp16 = const()[name = tensor("op_10354_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1029_cast_fp16 = mul(x = var_10353_cast_fp16, y = var_10354_to_fp16)[name = tensor("aw_chunk_1029_cast_fp16")]; + tensor var_10357_equation_0 = const()[name = tensor("op_10357_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10357_cast_fp16 = einsum(equation = var_10357_equation_0, values = (var_10091_cast_fp16, var_9746_cast_fp16))[name = tensor("op_10357_cast_fp16")]; + tensor var_10358_to_fp16 = const()[name = tensor("op_10358_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1031_cast_fp16 = mul(x = var_10357_cast_fp16, y = var_10358_to_fp16)[name = tensor("aw_chunk_1031_cast_fp16")]; + tensor var_10361_equation_0 = const()[name = tensor("op_10361_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10361_cast_fp16 = einsum(equation = var_10361_equation_0, values = (var_10095_cast_fp16, var_9753_cast_fp16))[name = tensor("op_10361_cast_fp16")]; + tensor var_10362_to_fp16 = const()[name = tensor("op_10362_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1033_cast_fp16 = mul(x = var_10361_cast_fp16, y = var_10362_to_fp16)[name = tensor("aw_chunk_1033_cast_fp16")]; + tensor var_10365_equation_0 = const()[name = tensor("op_10365_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10365_cast_fp16 = einsum(equation = var_10365_equation_0, values = (var_10095_cast_fp16, var_9760_cast_fp16))[name = tensor("op_10365_cast_fp16")]; + tensor var_10366_to_fp16 = const()[name = tensor("op_10366_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1035_cast_fp16 = mul(x = var_10365_cast_fp16, y = var_10366_to_fp16)[name = tensor("aw_chunk_1035_cast_fp16")]; + tensor var_10369_equation_0 = const()[name = tensor("op_10369_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10369_cast_fp16 = einsum(equation = var_10369_equation_0, values = (var_10095_cast_fp16, var_9767_cast_fp16))[name = tensor("op_10369_cast_fp16")]; + tensor var_10370_to_fp16 = const()[name = tensor("op_10370_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1037_cast_fp16 = mul(x = var_10369_cast_fp16, y = var_10370_to_fp16)[name = tensor("aw_chunk_1037_cast_fp16")]; + tensor var_10373_equation_0 = const()[name = tensor("op_10373_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10373_cast_fp16 = einsum(equation = var_10373_equation_0, values = (var_10095_cast_fp16, var_9774_cast_fp16))[name = tensor("op_10373_cast_fp16")]; + tensor var_10374_to_fp16 = const()[name = tensor("op_10374_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1039_cast_fp16 = mul(x = var_10373_cast_fp16, y = var_10374_to_fp16)[name = tensor("aw_chunk_1039_cast_fp16")]; + tensor var_10377_equation_0 = const()[name = tensor("op_10377_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10377_cast_fp16 = einsum(equation = var_10377_equation_0, values = (var_10099_cast_fp16, var_9781_cast_fp16))[name = tensor("op_10377_cast_fp16")]; + tensor var_10378_to_fp16 = const()[name = tensor("op_10378_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1041_cast_fp16 = mul(x = var_10377_cast_fp16, y = var_10378_to_fp16)[name = tensor("aw_chunk_1041_cast_fp16")]; + tensor var_10381_equation_0 = const()[name = tensor("op_10381_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10381_cast_fp16 = einsum(equation = var_10381_equation_0, values = (var_10099_cast_fp16, var_9788_cast_fp16))[name = tensor("op_10381_cast_fp16")]; + tensor var_10382_to_fp16 = const()[name = tensor("op_10382_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1043_cast_fp16 = mul(x = var_10381_cast_fp16, y = var_10382_to_fp16)[name = tensor("aw_chunk_1043_cast_fp16")]; + tensor var_10385_equation_0 = const()[name = tensor("op_10385_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10385_cast_fp16 = einsum(equation = var_10385_equation_0, values = (var_10099_cast_fp16, var_9795_cast_fp16))[name = tensor("op_10385_cast_fp16")]; + tensor var_10386_to_fp16 = const()[name = tensor("op_10386_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1045_cast_fp16 = mul(x = var_10385_cast_fp16, y = var_10386_to_fp16)[name = tensor("aw_chunk_1045_cast_fp16")]; + tensor var_10389_equation_0 = const()[name = tensor("op_10389_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10389_cast_fp16 = einsum(equation = var_10389_equation_0, values = (var_10099_cast_fp16, var_9802_cast_fp16))[name = tensor("op_10389_cast_fp16")]; + tensor var_10390_to_fp16 = const()[name = tensor("op_10390_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1047_cast_fp16 = mul(x = var_10389_cast_fp16, y = var_10390_to_fp16)[name = tensor("aw_chunk_1047_cast_fp16")]; + tensor var_10393_equation_0 = const()[name = tensor("op_10393_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10393_cast_fp16 = einsum(equation = var_10393_equation_0, values = (var_10103_cast_fp16, var_9809_cast_fp16))[name = tensor("op_10393_cast_fp16")]; + tensor var_10394_to_fp16 = const()[name = tensor("op_10394_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1049_cast_fp16 = mul(x = var_10393_cast_fp16, y = var_10394_to_fp16)[name = tensor("aw_chunk_1049_cast_fp16")]; + tensor var_10397_equation_0 = const()[name = tensor("op_10397_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10397_cast_fp16 = einsum(equation = var_10397_equation_0, values = (var_10103_cast_fp16, var_9816_cast_fp16))[name = tensor("op_10397_cast_fp16")]; + tensor var_10398_to_fp16 = const()[name = tensor("op_10398_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1051_cast_fp16 = mul(x = var_10397_cast_fp16, y = var_10398_to_fp16)[name = tensor("aw_chunk_1051_cast_fp16")]; + tensor var_10401_equation_0 = const()[name = tensor("op_10401_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10401_cast_fp16 = einsum(equation = var_10401_equation_0, values = (var_10103_cast_fp16, var_9823_cast_fp16))[name = tensor("op_10401_cast_fp16")]; + tensor var_10402_to_fp16 = const()[name = tensor("op_10402_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1053_cast_fp16 = mul(x = var_10401_cast_fp16, y = var_10402_to_fp16)[name = tensor("aw_chunk_1053_cast_fp16")]; + tensor var_10405_equation_0 = const()[name = tensor("op_10405_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10405_cast_fp16 = einsum(equation = var_10405_equation_0, values = (var_10103_cast_fp16, var_9830_cast_fp16))[name = tensor("op_10405_cast_fp16")]; + tensor var_10406_to_fp16 = const()[name = tensor("op_10406_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1055_cast_fp16 = mul(x = var_10405_cast_fp16, y = var_10406_to_fp16)[name = tensor("aw_chunk_1055_cast_fp16")]; + tensor var_10409_equation_0 = const()[name = tensor("op_10409_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10409_cast_fp16 = einsum(equation = var_10409_equation_0, values = (var_10107_cast_fp16, var_9837_cast_fp16))[name = tensor("op_10409_cast_fp16")]; + tensor var_10410_to_fp16 = const()[name = tensor("op_10410_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1057_cast_fp16 = mul(x = var_10409_cast_fp16, y = var_10410_to_fp16)[name = tensor("aw_chunk_1057_cast_fp16")]; + tensor var_10413_equation_0 = const()[name = tensor("op_10413_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10413_cast_fp16 = einsum(equation = var_10413_equation_0, values = (var_10107_cast_fp16, var_9844_cast_fp16))[name = tensor("op_10413_cast_fp16")]; + tensor var_10414_to_fp16 = const()[name = tensor("op_10414_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1059_cast_fp16 = mul(x = var_10413_cast_fp16, y = var_10414_to_fp16)[name = tensor("aw_chunk_1059_cast_fp16")]; + tensor var_10417_equation_0 = const()[name = tensor("op_10417_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10417_cast_fp16 = einsum(equation = var_10417_equation_0, values = (var_10107_cast_fp16, var_9851_cast_fp16))[name = tensor("op_10417_cast_fp16")]; + tensor var_10418_to_fp16 = const()[name = tensor("op_10418_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1061_cast_fp16 = mul(x = var_10417_cast_fp16, y = var_10418_to_fp16)[name = tensor("aw_chunk_1061_cast_fp16")]; + tensor var_10421_equation_0 = const()[name = tensor("op_10421_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10421_cast_fp16 = einsum(equation = var_10421_equation_0, values = (var_10107_cast_fp16, var_9858_cast_fp16))[name = tensor("op_10421_cast_fp16")]; + tensor var_10422_to_fp16 = const()[name = tensor("op_10422_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1063_cast_fp16 = mul(x = var_10421_cast_fp16, y = var_10422_to_fp16)[name = tensor("aw_chunk_1063_cast_fp16")]; + tensor var_10425_equation_0 = const()[name = tensor("op_10425_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10425_cast_fp16 = einsum(equation = var_10425_equation_0, values = (var_10111_cast_fp16, var_9865_cast_fp16))[name = tensor("op_10425_cast_fp16")]; + tensor var_10426_to_fp16 = const()[name = tensor("op_10426_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1065_cast_fp16 = mul(x = var_10425_cast_fp16, y = var_10426_to_fp16)[name = tensor("aw_chunk_1065_cast_fp16")]; + tensor var_10429_equation_0 = const()[name = tensor("op_10429_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10429_cast_fp16 = einsum(equation = var_10429_equation_0, values = (var_10111_cast_fp16, var_9872_cast_fp16))[name = tensor("op_10429_cast_fp16")]; + tensor var_10430_to_fp16 = const()[name = tensor("op_10430_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1067_cast_fp16 = mul(x = var_10429_cast_fp16, y = var_10430_to_fp16)[name = tensor("aw_chunk_1067_cast_fp16")]; + tensor var_10433_equation_0 = const()[name = tensor("op_10433_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10433_cast_fp16 = einsum(equation = var_10433_equation_0, values = (var_10111_cast_fp16, var_9879_cast_fp16))[name = tensor("op_10433_cast_fp16")]; + tensor var_10434_to_fp16 = const()[name = tensor("op_10434_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1069_cast_fp16 = mul(x = var_10433_cast_fp16, y = var_10434_to_fp16)[name = tensor("aw_chunk_1069_cast_fp16")]; + tensor var_10437_equation_0 = const()[name = tensor("op_10437_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10437_cast_fp16 = einsum(equation = var_10437_equation_0, values = (var_10111_cast_fp16, var_9886_cast_fp16))[name = tensor("op_10437_cast_fp16")]; + tensor var_10438_to_fp16 = const()[name = tensor("op_10438_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1071_cast_fp16 = mul(x = var_10437_cast_fp16, y = var_10438_to_fp16)[name = tensor("aw_chunk_1071_cast_fp16")]; + tensor var_10441_equation_0 = const()[name = tensor("op_10441_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10441_cast_fp16 = einsum(equation = var_10441_equation_0, values = (var_10115_cast_fp16, var_9893_cast_fp16))[name = tensor("op_10441_cast_fp16")]; + tensor var_10442_to_fp16 = const()[name = tensor("op_10442_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1073_cast_fp16 = mul(x = var_10441_cast_fp16, y = var_10442_to_fp16)[name = tensor("aw_chunk_1073_cast_fp16")]; + tensor var_10445_equation_0 = const()[name = tensor("op_10445_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10445_cast_fp16 = einsum(equation = var_10445_equation_0, values = (var_10115_cast_fp16, var_9900_cast_fp16))[name = tensor("op_10445_cast_fp16")]; + tensor var_10446_to_fp16 = const()[name = tensor("op_10446_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1075_cast_fp16 = mul(x = var_10445_cast_fp16, y = var_10446_to_fp16)[name = tensor("aw_chunk_1075_cast_fp16")]; + tensor var_10449_equation_0 = const()[name = tensor("op_10449_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10449_cast_fp16 = einsum(equation = var_10449_equation_0, values = (var_10115_cast_fp16, var_9907_cast_fp16))[name = tensor("op_10449_cast_fp16")]; + tensor var_10450_to_fp16 = const()[name = tensor("op_10450_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1077_cast_fp16 = mul(x = var_10449_cast_fp16, y = var_10450_to_fp16)[name = tensor("aw_chunk_1077_cast_fp16")]; + tensor var_10453_equation_0 = const()[name = tensor("op_10453_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10453_cast_fp16 = einsum(equation = var_10453_equation_0, values = (var_10115_cast_fp16, var_9914_cast_fp16))[name = tensor("op_10453_cast_fp16")]; + tensor var_10454_to_fp16 = const()[name = tensor("op_10454_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1079_cast_fp16 = mul(x = var_10453_cast_fp16, y = var_10454_to_fp16)[name = tensor("aw_chunk_1079_cast_fp16")]; + tensor var_10457_equation_0 = const()[name = tensor("op_10457_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10457_cast_fp16 = einsum(equation = var_10457_equation_0, values = (var_10119_cast_fp16, var_9921_cast_fp16))[name = tensor("op_10457_cast_fp16")]; + tensor var_10458_to_fp16 = const()[name = tensor("op_10458_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1081_cast_fp16 = mul(x = var_10457_cast_fp16, y = var_10458_to_fp16)[name = tensor("aw_chunk_1081_cast_fp16")]; + tensor var_10461_equation_0 = const()[name = tensor("op_10461_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10461_cast_fp16 = einsum(equation = var_10461_equation_0, values = (var_10119_cast_fp16, var_9928_cast_fp16))[name = tensor("op_10461_cast_fp16")]; + tensor var_10462_to_fp16 = const()[name = tensor("op_10462_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1083_cast_fp16 = mul(x = var_10461_cast_fp16, y = var_10462_to_fp16)[name = tensor("aw_chunk_1083_cast_fp16")]; + tensor var_10465_equation_0 = const()[name = tensor("op_10465_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10465_cast_fp16 = einsum(equation = var_10465_equation_0, values = (var_10119_cast_fp16, var_9935_cast_fp16))[name = tensor("op_10465_cast_fp16")]; + tensor var_10466_to_fp16 = const()[name = tensor("op_10466_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1085_cast_fp16 = mul(x = var_10465_cast_fp16, y = var_10466_to_fp16)[name = tensor("aw_chunk_1085_cast_fp16")]; + tensor var_10469_equation_0 = const()[name = tensor("op_10469_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10469_cast_fp16 = einsum(equation = var_10469_equation_0, values = (var_10119_cast_fp16, var_9942_cast_fp16))[name = tensor("op_10469_cast_fp16")]; + tensor var_10470_to_fp16 = const()[name = tensor("op_10470_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1087_cast_fp16 = mul(x = var_10469_cast_fp16, y = var_10470_to_fp16)[name = tensor("aw_chunk_1087_cast_fp16")]; + tensor var_10473_equation_0 = const()[name = tensor("op_10473_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10473_cast_fp16 = einsum(equation = var_10473_equation_0, values = (var_10123_cast_fp16, var_9949_cast_fp16))[name = tensor("op_10473_cast_fp16")]; + tensor var_10474_to_fp16 = const()[name = tensor("op_10474_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1089_cast_fp16 = mul(x = var_10473_cast_fp16, y = var_10474_to_fp16)[name = tensor("aw_chunk_1089_cast_fp16")]; + tensor var_10477_equation_0 = const()[name = tensor("op_10477_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10477_cast_fp16 = einsum(equation = var_10477_equation_0, values = (var_10123_cast_fp16, var_9956_cast_fp16))[name = tensor("op_10477_cast_fp16")]; + tensor var_10478_to_fp16 = const()[name = tensor("op_10478_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1091_cast_fp16 = mul(x = var_10477_cast_fp16, y = var_10478_to_fp16)[name = tensor("aw_chunk_1091_cast_fp16")]; + tensor var_10481_equation_0 = const()[name = tensor("op_10481_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10481_cast_fp16 = einsum(equation = var_10481_equation_0, values = (var_10123_cast_fp16, var_9963_cast_fp16))[name = tensor("op_10481_cast_fp16")]; + tensor var_10482_to_fp16 = const()[name = tensor("op_10482_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1093_cast_fp16 = mul(x = var_10481_cast_fp16, y = var_10482_to_fp16)[name = tensor("aw_chunk_1093_cast_fp16")]; + tensor var_10485_equation_0 = const()[name = tensor("op_10485_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10485_cast_fp16 = einsum(equation = var_10485_equation_0, values = (var_10123_cast_fp16, var_9970_cast_fp16))[name = tensor("op_10485_cast_fp16")]; + tensor var_10486_to_fp16 = const()[name = tensor("op_10486_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1095_cast_fp16 = mul(x = var_10485_cast_fp16, y = var_10486_to_fp16)[name = tensor("aw_chunk_1095_cast_fp16")]; + tensor var_10489_equation_0 = const()[name = tensor("op_10489_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10489_cast_fp16 = einsum(equation = var_10489_equation_0, values = (var_10127_cast_fp16, var_9977_cast_fp16))[name = tensor("op_10489_cast_fp16")]; + tensor var_10490_to_fp16 = const()[name = tensor("op_10490_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1097_cast_fp16 = mul(x = var_10489_cast_fp16, y = var_10490_to_fp16)[name = tensor("aw_chunk_1097_cast_fp16")]; + tensor var_10493_equation_0 = const()[name = tensor("op_10493_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10493_cast_fp16 = einsum(equation = var_10493_equation_0, values = (var_10127_cast_fp16, var_9984_cast_fp16))[name = tensor("op_10493_cast_fp16")]; + tensor var_10494_to_fp16 = const()[name = tensor("op_10494_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1099_cast_fp16 = mul(x = var_10493_cast_fp16, y = var_10494_to_fp16)[name = tensor("aw_chunk_1099_cast_fp16")]; + tensor var_10497_equation_0 = const()[name = tensor("op_10497_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10497_cast_fp16 = einsum(equation = var_10497_equation_0, values = (var_10127_cast_fp16, var_9991_cast_fp16))[name = tensor("op_10497_cast_fp16")]; + tensor var_10498_to_fp16 = const()[name = tensor("op_10498_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1101_cast_fp16 = mul(x = var_10497_cast_fp16, y = var_10498_to_fp16)[name = tensor("aw_chunk_1101_cast_fp16")]; + tensor var_10501_equation_0 = const()[name = tensor("op_10501_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10501_cast_fp16 = einsum(equation = var_10501_equation_0, values = (var_10127_cast_fp16, var_9998_cast_fp16))[name = tensor("op_10501_cast_fp16")]; + tensor var_10502_to_fp16 = const()[name = tensor("op_10502_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1103_cast_fp16 = mul(x = var_10501_cast_fp16, y = var_10502_to_fp16)[name = tensor("aw_chunk_1103_cast_fp16")]; + tensor var_10505_equation_0 = const()[name = tensor("op_10505_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10505_cast_fp16 = einsum(equation = var_10505_equation_0, values = (var_10131_cast_fp16, var_10005_cast_fp16))[name = tensor("op_10505_cast_fp16")]; + tensor var_10506_to_fp16 = const()[name = tensor("op_10506_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1105_cast_fp16 = mul(x = var_10505_cast_fp16, y = var_10506_to_fp16)[name = tensor("aw_chunk_1105_cast_fp16")]; + tensor var_10509_equation_0 = const()[name = tensor("op_10509_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10509_cast_fp16 = einsum(equation = var_10509_equation_0, values = (var_10131_cast_fp16, var_10012_cast_fp16))[name = tensor("op_10509_cast_fp16")]; + tensor var_10510_to_fp16 = const()[name = tensor("op_10510_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1107_cast_fp16 = mul(x = var_10509_cast_fp16, y = var_10510_to_fp16)[name = tensor("aw_chunk_1107_cast_fp16")]; + tensor var_10513_equation_0 = const()[name = tensor("op_10513_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10513_cast_fp16 = einsum(equation = var_10513_equation_0, values = (var_10131_cast_fp16, var_10019_cast_fp16))[name = tensor("op_10513_cast_fp16")]; + tensor var_10514_to_fp16 = const()[name = tensor("op_10514_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1109_cast_fp16 = mul(x = var_10513_cast_fp16, y = var_10514_to_fp16)[name = tensor("aw_chunk_1109_cast_fp16")]; + tensor var_10517_equation_0 = const()[name = tensor("op_10517_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10517_cast_fp16 = einsum(equation = var_10517_equation_0, values = (var_10131_cast_fp16, var_10026_cast_fp16))[name = tensor("op_10517_cast_fp16")]; + tensor var_10518_to_fp16 = const()[name = tensor("op_10518_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1111_cast_fp16 = mul(x = var_10517_cast_fp16, y = var_10518_to_fp16)[name = tensor("aw_chunk_1111_cast_fp16")]; + tensor var_10521_equation_0 = const()[name = tensor("op_10521_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10521_cast_fp16 = einsum(equation = var_10521_equation_0, values = (var_10135_cast_fp16, var_10033_cast_fp16))[name = tensor("op_10521_cast_fp16")]; + tensor var_10522_to_fp16 = const()[name = tensor("op_10522_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1113_cast_fp16 = mul(x = var_10521_cast_fp16, y = var_10522_to_fp16)[name = tensor("aw_chunk_1113_cast_fp16")]; + tensor var_10525_equation_0 = const()[name = tensor("op_10525_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10525_cast_fp16 = einsum(equation = var_10525_equation_0, values = (var_10135_cast_fp16, var_10040_cast_fp16))[name = tensor("op_10525_cast_fp16")]; + tensor var_10526_to_fp16 = const()[name = tensor("op_10526_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1115_cast_fp16 = mul(x = var_10525_cast_fp16, y = var_10526_to_fp16)[name = tensor("aw_chunk_1115_cast_fp16")]; + tensor var_10529_equation_0 = const()[name = tensor("op_10529_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10529_cast_fp16 = einsum(equation = var_10529_equation_0, values = (var_10135_cast_fp16, var_10047_cast_fp16))[name = tensor("op_10529_cast_fp16")]; + tensor var_10530_to_fp16 = const()[name = tensor("op_10530_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1117_cast_fp16 = mul(x = var_10529_cast_fp16, y = var_10530_to_fp16)[name = tensor("aw_chunk_1117_cast_fp16")]; + tensor var_10533_equation_0 = const()[name = tensor("op_10533_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_10533_cast_fp16 = einsum(equation = var_10533_equation_0, values = (var_10135_cast_fp16, var_10054_cast_fp16))[name = tensor("op_10533_cast_fp16")]; + tensor var_10534_to_fp16 = const()[name = tensor("op_10534_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1119_cast_fp16 = mul(x = var_10533_cast_fp16, y = var_10534_to_fp16)[name = tensor("aw_chunk_1119_cast_fp16")]; + tensor var_10536_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_961_cast_fp16)[name = tensor("op_10536_cast_fp16")]; + tensor var_10537_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_963_cast_fp16)[name = tensor("op_10537_cast_fp16")]; + tensor var_10538_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_965_cast_fp16)[name = tensor("op_10538_cast_fp16")]; + tensor var_10539_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_967_cast_fp16)[name = tensor("op_10539_cast_fp16")]; + tensor var_10540_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_969_cast_fp16)[name = tensor("op_10540_cast_fp16")]; + tensor var_10541_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_971_cast_fp16)[name = tensor("op_10541_cast_fp16")]; + tensor var_10542_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_973_cast_fp16)[name = tensor("op_10542_cast_fp16")]; + tensor var_10543_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_975_cast_fp16)[name = tensor("op_10543_cast_fp16")]; + tensor var_10544_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_977_cast_fp16)[name = tensor("op_10544_cast_fp16")]; + tensor var_10545_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_979_cast_fp16)[name = tensor("op_10545_cast_fp16")]; + tensor var_10546_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_981_cast_fp16)[name = tensor("op_10546_cast_fp16")]; + tensor var_10547_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_983_cast_fp16)[name = tensor("op_10547_cast_fp16")]; + tensor var_10548_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_985_cast_fp16)[name = tensor("op_10548_cast_fp16")]; + tensor var_10549_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_987_cast_fp16)[name = tensor("op_10549_cast_fp16")]; + tensor var_10550_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_989_cast_fp16)[name = tensor("op_10550_cast_fp16")]; + tensor var_10551_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_991_cast_fp16)[name = tensor("op_10551_cast_fp16")]; + tensor var_10552_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_993_cast_fp16)[name = tensor("op_10552_cast_fp16")]; + tensor var_10553_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_995_cast_fp16)[name = tensor("op_10553_cast_fp16")]; + tensor var_10554_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_997_cast_fp16)[name = tensor("op_10554_cast_fp16")]; + tensor var_10555_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_999_cast_fp16)[name = tensor("op_10555_cast_fp16")]; + tensor var_10556_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1001_cast_fp16)[name = tensor("op_10556_cast_fp16")]; + tensor var_10557_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1003_cast_fp16)[name = tensor("op_10557_cast_fp16")]; + tensor var_10558_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1005_cast_fp16)[name = tensor("op_10558_cast_fp16")]; + tensor var_10559_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1007_cast_fp16)[name = tensor("op_10559_cast_fp16")]; + tensor var_10560_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1009_cast_fp16)[name = tensor("op_10560_cast_fp16")]; + tensor var_10561_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1011_cast_fp16)[name = tensor("op_10561_cast_fp16")]; + tensor var_10562_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1013_cast_fp16)[name = tensor("op_10562_cast_fp16")]; + tensor var_10563_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1015_cast_fp16)[name = tensor("op_10563_cast_fp16")]; + tensor var_10564_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1017_cast_fp16)[name = tensor("op_10564_cast_fp16")]; + tensor var_10565_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1019_cast_fp16)[name = tensor("op_10565_cast_fp16")]; + tensor var_10566_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1021_cast_fp16)[name = tensor("op_10566_cast_fp16")]; + tensor var_10567_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1023_cast_fp16)[name = tensor("op_10567_cast_fp16")]; + tensor var_10568_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1025_cast_fp16)[name = tensor("op_10568_cast_fp16")]; + tensor var_10569_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1027_cast_fp16)[name = tensor("op_10569_cast_fp16")]; + tensor var_10570_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1029_cast_fp16)[name = tensor("op_10570_cast_fp16")]; + tensor var_10571_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1031_cast_fp16)[name = tensor("op_10571_cast_fp16")]; + tensor var_10572_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1033_cast_fp16)[name = tensor("op_10572_cast_fp16")]; + tensor var_10573_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1035_cast_fp16)[name = tensor("op_10573_cast_fp16")]; + tensor var_10574_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1037_cast_fp16)[name = tensor("op_10574_cast_fp16")]; + tensor var_10575_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1039_cast_fp16)[name = tensor("op_10575_cast_fp16")]; + tensor var_10576_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1041_cast_fp16)[name = tensor("op_10576_cast_fp16")]; + tensor var_10577_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1043_cast_fp16)[name = tensor("op_10577_cast_fp16")]; + tensor var_10578_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1045_cast_fp16)[name = tensor("op_10578_cast_fp16")]; + tensor var_10579_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1047_cast_fp16)[name = tensor("op_10579_cast_fp16")]; + tensor var_10580_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1049_cast_fp16)[name = tensor("op_10580_cast_fp16")]; + tensor var_10581_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1051_cast_fp16)[name = tensor("op_10581_cast_fp16")]; + tensor var_10582_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1053_cast_fp16)[name = tensor("op_10582_cast_fp16")]; + tensor var_10583_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1055_cast_fp16)[name = tensor("op_10583_cast_fp16")]; + tensor var_10584_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1057_cast_fp16)[name = tensor("op_10584_cast_fp16")]; + tensor var_10585_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1059_cast_fp16)[name = tensor("op_10585_cast_fp16")]; + tensor var_10586_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1061_cast_fp16)[name = tensor("op_10586_cast_fp16")]; + tensor var_10587_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1063_cast_fp16)[name = tensor("op_10587_cast_fp16")]; + tensor var_10588_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1065_cast_fp16)[name = tensor("op_10588_cast_fp16")]; + tensor var_10589_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1067_cast_fp16)[name = tensor("op_10589_cast_fp16")]; + tensor var_10590_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1069_cast_fp16)[name = tensor("op_10590_cast_fp16")]; + tensor var_10591_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1071_cast_fp16)[name = tensor("op_10591_cast_fp16")]; + tensor var_10592_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1073_cast_fp16)[name = tensor("op_10592_cast_fp16")]; + tensor var_10593_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1075_cast_fp16)[name = tensor("op_10593_cast_fp16")]; + tensor var_10594_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1077_cast_fp16)[name = tensor("op_10594_cast_fp16")]; + tensor var_10595_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1079_cast_fp16)[name = tensor("op_10595_cast_fp16")]; + tensor var_10596_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1081_cast_fp16)[name = tensor("op_10596_cast_fp16")]; + tensor var_10597_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1083_cast_fp16)[name = tensor("op_10597_cast_fp16")]; + tensor var_10598_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1085_cast_fp16)[name = tensor("op_10598_cast_fp16")]; + tensor var_10599_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1087_cast_fp16)[name = tensor("op_10599_cast_fp16")]; + tensor var_10600_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1089_cast_fp16)[name = tensor("op_10600_cast_fp16")]; + tensor var_10601_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1091_cast_fp16)[name = tensor("op_10601_cast_fp16")]; + tensor var_10602_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1093_cast_fp16)[name = tensor("op_10602_cast_fp16")]; + tensor var_10603_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1095_cast_fp16)[name = tensor("op_10603_cast_fp16")]; + tensor var_10604_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1097_cast_fp16)[name = tensor("op_10604_cast_fp16")]; + tensor var_10605_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1099_cast_fp16)[name = tensor("op_10605_cast_fp16")]; + tensor var_10606_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1101_cast_fp16)[name = tensor("op_10606_cast_fp16")]; + tensor var_10607_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1103_cast_fp16)[name = tensor("op_10607_cast_fp16")]; + tensor var_10608_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1105_cast_fp16)[name = tensor("op_10608_cast_fp16")]; + tensor var_10609_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1107_cast_fp16)[name = tensor("op_10609_cast_fp16")]; + tensor var_10610_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1109_cast_fp16)[name = tensor("op_10610_cast_fp16")]; + tensor var_10611_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1111_cast_fp16)[name = tensor("op_10611_cast_fp16")]; + tensor var_10612_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1113_cast_fp16)[name = tensor("op_10612_cast_fp16")]; + tensor var_10613_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1115_cast_fp16)[name = tensor("op_10613_cast_fp16")]; + tensor var_10614_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1117_cast_fp16)[name = tensor("op_10614_cast_fp16")]; + tensor var_10615_cast_fp16 = softmax(axis = var_9361, x = aw_chunk_1119_cast_fp16)[name = tensor("op_10615_cast_fp16")]; + tensor var_10617_equation_0 = const()[name = tensor("op_10617_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10617_cast_fp16 = einsum(equation = var_10617_equation_0, values = (var_10137_cast_fp16, var_10536_cast_fp16))[name = tensor("op_10617_cast_fp16")]; + tensor var_10619_equation_0 = const()[name = tensor("op_10619_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10619_cast_fp16 = einsum(equation = var_10619_equation_0, values = (var_10137_cast_fp16, var_10537_cast_fp16))[name = tensor("op_10619_cast_fp16")]; + tensor var_10621_equation_0 = const()[name = tensor("op_10621_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10621_cast_fp16 = einsum(equation = var_10621_equation_0, values = (var_10137_cast_fp16, var_10538_cast_fp16))[name = tensor("op_10621_cast_fp16")]; + tensor var_10623_equation_0 = const()[name = tensor("op_10623_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10623_cast_fp16 = einsum(equation = var_10623_equation_0, values = (var_10137_cast_fp16, var_10539_cast_fp16))[name = tensor("op_10623_cast_fp16")]; + tensor var_10625_equation_0 = const()[name = tensor("op_10625_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10625_cast_fp16 = einsum(equation = var_10625_equation_0, values = (var_10141_cast_fp16, var_10540_cast_fp16))[name = tensor("op_10625_cast_fp16")]; + tensor var_10627_equation_0 = const()[name = tensor("op_10627_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10627_cast_fp16 = einsum(equation = var_10627_equation_0, values = (var_10141_cast_fp16, var_10541_cast_fp16))[name = tensor("op_10627_cast_fp16")]; + tensor var_10629_equation_0 = const()[name = tensor("op_10629_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10629_cast_fp16 = einsum(equation = var_10629_equation_0, values = (var_10141_cast_fp16, var_10542_cast_fp16))[name = tensor("op_10629_cast_fp16")]; + tensor var_10631_equation_0 = const()[name = tensor("op_10631_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10631_cast_fp16 = einsum(equation = var_10631_equation_0, values = (var_10141_cast_fp16, var_10543_cast_fp16))[name = tensor("op_10631_cast_fp16")]; + tensor var_10633_equation_0 = const()[name = tensor("op_10633_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10633_cast_fp16 = einsum(equation = var_10633_equation_0, values = (var_10145_cast_fp16, var_10544_cast_fp16))[name = tensor("op_10633_cast_fp16")]; + tensor var_10635_equation_0 = const()[name = tensor("op_10635_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10635_cast_fp16 = einsum(equation = var_10635_equation_0, values = (var_10145_cast_fp16, var_10545_cast_fp16))[name = tensor("op_10635_cast_fp16")]; + tensor var_10637_equation_0 = const()[name = tensor("op_10637_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10637_cast_fp16 = einsum(equation = var_10637_equation_0, values = (var_10145_cast_fp16, var_10546_cast_fp16))[name = tensor("op_10637_cast_fp16")]; + tensor var_10639_equation_0 = const()[name = tensor("op_10639_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10639_cast_fp16 = einsum(equation = var_10639_equation_0, values = (var_10145_cast_fp16, var_10547_cast_fp16))[name = tensor("op_10639_cast_fp16")]; + tensor var_10641_equation_0 = const()[name = tensor("op_10641_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10641_cast_fp16 = einsum(equation = var_10641_equation_0, values = (var_10149_cast_fp16, var_10548_cast_fp16))[name = tensor("op_10641_cast_fp16")]; + tensor var_10643_equation_0 = const()[name = tensor("op_10643_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10643_cast_fp16 = einsum(equation = var_10643_equation_0, values = (var_10149_cast_fp16, var_10549_cast_fp16))[name = tensor("op_10643_cast_fp16")]; + tensor var_10645_equation_0 = const()[name = tensor("op_10645_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10645_cast_fp16 = einsum(equation = var_10645_equation_0, values = (var_10149_cast_fp16, var_10550_cast_fp16))[name = tensor("op_10645_cast_fp16")]; + tensor var_10647_equation_0 = const()[name = tensor("op_10647_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10647_cast_fp16 = einsum(equation = var_10647_equation_0, values = (var_10149_cast_fp16, var_10551_cast_fp16))[name = tensor("op_10647_cast_fp16")]; + tensor var_10649_equation_0 = const()[name = tensor("op_10649_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10649_cast_fp16 = einsum(equation = var_10649_equation_0, values = (var_10153_cast_fp16, var_10552_cast_fp16))[name = tensor("op_10649_cast_fp16")]; + tensor var_10651_equation_0 = const()[name = tensor("op_10651_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10651_cast_fp16 = einsum(equation = var_10651_equation_0, values = (var_10153_cast_fp16, var_10553_cast_fp16))[name = tensor("op_10651_cast_fp16")]; + tensor var_10653_equation_0 = const()[name = tensor("op_10653_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10653_cast_fp16 = einsum(equation = var_10653_equation_0, values = (var_10153_cast_fp16, var_10554_cast_fp16))[name = tensor("op_10653_cast_fp16")]; + tensor var_10655_equation_0 = const()[name = tensor("op_10655_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10655_cast_fp16 = einsum(equation = var_10655_equation_0, values = (var_10153_cast_fp16, var_10555_cast_fp16))[name = tensor("op_10655_cast_fp16")]; + tensor var_10657_equation_0 = const()[name = tensor("op_10657_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10657_cast_fp16 = einsum(equation = var_10657_equation_0, values = (var_10157_cast_fp16, var_10556_cast_fp16))[name = tensor("op_10657_cast_fp16")]; + tensor var_10659_equation_0 = const()[name = tensor("op_10659_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10659_cast_fp16 = einsum(equation = var_10659_equation_0, values = (var_10157_cast_fp16, var_10557_cast_fp16))[name = tensor("op_10659_cast_fp16")]; + tensor var_10661_equation_0 = const()[name = tensor("op_10661_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10661_cast_fp16 = einsum(equation = var_10661_equation_0, values = (var_10157_cast_fp16, var_10558_cast_fp16))[name = tensor("op_10661_cast_fp16")]; + tensor var_10663_equation_0 = const()[name = tensor("op_10663_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10663_cast_fp16 = einsum(equation = var_10663_equation_0, values = (var_10157_cast_fp16, var_10559_cast_fp16))[name = tensor("op_10663_cast_fp16")]; + tensor var_10665_equation_0 = const()[name = tensor("op_10665_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10665_cast_fp16 = einsum(equation = var_10665_equation_0, values = (var_10161_cast_fp16, var_10560_cast_fp16))[name = tensor("op_10665_cast_fp16")]; + tensor var_10667_equation_0 = const()[name = tensor("op_10667_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10667_cast_fp16 = einsum(equation = var_10667_equation_0, values = (var_10161_cast_fp16, var_10561_cast_fp16))[name = tensor("op_10667_cast_fp16")]; + tensor var_10669_equation_0 = const()[name = tensor("op_10669_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10669_cast_fp16 = einsum(equation = var_10669_equation_0, values = (var_10161_cast_fp16, var_10562_cast_fp16))[name = tensor("op_10669_cast_fp16")]; + tensor var_10671_equation_0 = const()[name = tensor("op_10671_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10671_cast_fp16 = einsum(equation = var_10671_equation_0, values = (var_10161_cast_fp16, var_10563_cast_fp16))[name = tensor("op_10671_cast_fp16")]; + tensor var_10673_equation_0 = const()[name = tensor("op_10673_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10673_cast_fp16 = einsum(equation = var_10673_equation_0, values = (var_10165_cast_fp16, var_10564_cast_fp16))[name = tensor("op_10673_cast_fp16")]; + tensor var_10675_equation_0 = const()[name = tensor("op_10675_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10675_cast_fp16 = einsum(equation = var_10675_equation_0, values = (var_10165_cast_fp16, var_10565_cast_fp16))[name = tensor("op_10675_cast_fp16")]; + tensor var_10677_equation_0 = const()[name = tensor("op_10677_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10677_cast_fp16 = einsum(equation = var_10677_equation_0, values = (var_10165_cast_fp16, var_10566_cast_fp16))[name = tensor("op_10677_cast_fp16")]; + tensor var_10679_equation_0 = const()[name = tensor("op_10679_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10679_cast_fp16 = einsum(equation = var_10679_equation_0, values = (var_10165_cast_fp16, var_10567_cast_fp16))[name = tensor("op_10679_cast_fp16")]; + tensor var_10681_equation_0 = const()[name = tensor("op_10681_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10681_cast_fp16 = einsum(equation = var_10681_equation_0, values = (var_10169_cast_fp16, var_10568_cast_fp16))[name = tensor("op_10681_cast_fp16")]; + tensor var_10683_equation_0 = const()[name = tensor("op_10683_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10683_cast_fp16 = einsum(equation = var_10683_equation_0, values = (var_10169_cast_fp16, var_10569_cast_fp16))[name = tensor("op_10683_cast_fp16")]; + tensor var_10685_equation_0 = const()[name = tensor("op_10685_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10685_cast_fp16 = einsum(equation = var_10685_equation_0, values = (var_10169_cast_fp16, var_10570_cast_fp16))[name = tensor("op_10685_cast_fp16")]; + tensor var_10687_equation_0 = const()[name = tensor("op_10687_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10687_cast_fp16 = einsum(equation = var_10687_equation_0, values = (var_10169_cast_fp16, var_10571_cast_fp16))[name = tensor("op_10687_cast_fp16")]; + tensor var_10689_equation_0 = const()[name = tensor("op_10689_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10689_cast_fp16 = einsum(equation = var_10689_equation_0, values = (var_10173_cast_fp16, var_10572_cast_fp16))[name = tensor("op_10689_cast_fp16")]; + tensor var_10691_equation_0 = const()[name = tensor("op_10691_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10691_cast_fp16 = einsum(equation = var_10691_equation_0, values = (var_10173_cast_fp16, var_10573_cast_fp16))[name = tensor("op_10691_cast_fp16")]; + tensor var_10693_equation_0 = const()[name = tensor("op_10693_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10693_cast_fp16 = einsum(equation = var_10693_equation_0, values = (var_10173_cast_fp16, var_10574_cast_fp16))[name = tensor("op_10693_cast_fp16")]; + tensor var_10695_equation_0 = const()[name = tensor("op_10695_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10695_cast_fp16 = einsum(equation = var_10695_equation_0, values = (var_10173_cast_fp16, var_10575_cast_fp16))[name = tensor("op_10695_cast_fp16")]; + tensor var_10697_equation_0 = const()[name = tensor("op_10697_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10697_cast_fp16 = einsum(equation = var_10697_equation_0, values = (var_10177_cast_fp16, var_10576_cast_fp16))[name = tensor("op_10697_cast_fp16")]; + tensor var_10699_equation_0 = const()[name = tensor("op_10699_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10699_cast_fp16 = einsum(equation = var_10699_equation_0, values = (var_10177_cast_fp16, var_10577_cast_fp16))[name = tensor("op_10699_cast_fp16")]; + tensor var_10701_equation_0 = const()[name = tensor("op_10701_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10701_cast_fp16 = einsum(equation = var_10701_equation_0, values = (var_10177_cast_fp16, var_10578_cast_fp16))[name = tensor("op_10701_cast_fp16")]; + tensor var_10703_equation_0 = const()[name = tensor("op_10703_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10703_cast_fp16 = einsum(equation = var_10703_equation_0, values = (var_10177_cast_fp16, var_10579_cast_fp16))[name = tensor("op_10703_cast_fp16")]; + tensor var_10705_equation_0 = const()[name = tensor("op_10705_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10705_cast_fp16 = einsum(equation = var_10705_equation_0, values = (var_10181_cast_fp16, var_10580_cast_fp16))[name = tensor("op_10705_cast_fp16")]; + tensor var_10707_equation_0 = const()[name = tensor("op_10707_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10707_cast_fp16 = einsum(equation = var_10707_equation_0, values = (var_10181_cast_fp16, var_10581_cast_fp16))[name = tensor("op_10707_cast_fp16")]; + tensor var_10709_equation_0 = const()[name = tensor("op_10709_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10709_cast_fp16 = einsum(equation = var_10709_equation_0, values = (var_10181_cast_fp16, var_10582_cast_fp16))[name = tensor("op_10709_cast_fp16")]; + tensor var_10711_equation_0 = const()[name = tensor("op_10711_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10711_cast_fp16 = einsum(equation = var_10711_equation_0, values = (var_10181_cast_fp16, var_10583_cast_fp16))[name = tensor("op_10711_cast_fp16")]; + tensor var_10713_equation_0 = const()[name = tensor("op_10713_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10713_cast_fp16 = einsum(equation = var_10713_equation_0, values = (var_10185_cast_fp16, var_10584_cast_fp16))[name = tensor("op_10713_cast_fp16")]; + tensor var_10715_equation_0 = const()[name = tensor("op_10715_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10715_cast_fp16 = einsum(equation = var_10715_equation_0, values = (var_10185_cast_fp16, var_10585_cast_fp16))[name = tensor("op_10715_cast_fp16")]; + tensor var_10717_equation_0 = const()[name = tensor("op_10717_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10717_cast_fp16 = einsum(equation = var_10717_equation_0, values = (var_10185_cast_fp16, var_10586_cast_fp16))[name = tensor("op_10717_cast_fp16")]; + tensor var_10719_equation_0 = const()[name = tensor("op_10719_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10719_cast_fp16 = einsum(equation = var_10719_equation_0, values = (var_10185_cast_fp16, var_10587_cast_fp16))[name = tensor("op_10719_cast_fp16")]; + tensor var_10721_equation_0 = const()[name = tensor("op_10721_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10721_cast_fp16 = einsum(equation = var_10721_equation_0, values = (var_10189_cast_fp16, var_10588_cast_fp16))[name = tensor("op_10721_cast_fp16")]; + tensor var_10723_equation_0 = const()[name = tensor("op_10723_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10723_cast_fp16 = einsum(equation = var_10723_equation_0, values = (var_10189_cast_fp16, var_10589_cast_fp16))[name = tensor("op_10723_cast_fp16")]; + tensor var_10725_equation_0 = const()[name = tensor("op_10725_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10725_cast_fp16 = einsum(equation = var_10725_equation_0, values = (var_10189_cast_fp16, var_10590_cast_fp16))[name = tensor("op_10725_cast_fp16")]; + tensor var_10727_equation_0 = const()[name = tensor("op_10727_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10727_cast_fp16 = einsum(equation = var_10727_equation_0, values = (var_10189_cast_fp16, var_10591_cast_fp16))[name = tensor("op_10727_cast_fp16")]; + tensor var_10729_equation_0 = const()[name = tensor("op_10729_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10729_cast_fp16 = einsum(equation = var_10729_equation_0, values = (var_10193_cast_fp16, var_10592_cast_fp16))[name = tensor("op_10729_cast_fp16")]; + tensor var_10731_equation_0 = const()[name = tensor("op_10731_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10731_cast_fp16 = einsum(equation = var_10731_equation_0, values = (var_10193_cast_fp16, var_10593_cast_fp16))[name = tensor("op_10731_cast_fp16")]; + tensor var_10733_equation_0 = const()[name = tensor("op_10733_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10733_cast_fp16 = einsum(equation = var_10733_equation_0, values = (var_10193_cast_fp16, var_10594_cast_fp16))[name = tensor("op_10733_cast_fp16")]; + tensor var_10735_equation_0 = const()[name = tensor("op_10735_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10735_cast_fp16 = einsum(equation = var_10735_equation_0, values = (var_10193_cast_fp16, var_10595_cast_fp16))[name = tensor("op_10735_cast_fp16")]; + tensor var_10737_equation_0 = const()[name = tensor("op_10737_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10737_cast_fp16 = einsum(equation = var_10737_equation_0, values = (var_10197_cast_fp16, var_10596_cast_fp16))[name = tensor("op_10737_cast_fp16")]; + tensor var_10739_equation_0 = const()[name = tensor("op_10739_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10739_cast_fp16 = einsum(equation = var_10739_equation_0, values = (var_10197_cast_fp16, var_10597_cast_fp16))[name = tensor("op_10739_cast_fp16")]; + tensor var_10741_equation_0 = const()[name = tensor("op_10741_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10741_cast_fp16 = einsum(equation = var_10741_equation_0, values = (var_10197_cast_fp16, var_10598_cast_fp16))[name = tensor("op_10741_cast_fp16")]; + tensor var_10743_equation_0 = const()[name = tensor("op_10743_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10743_cast_fp16 = einsum(equation = var_10743_equation_0, values = (var_10197_cast_fp16, var_10599_cast_fp16))[name = tensor("op_10743_cast_fp16")]; + tensor var_10745_equation_0 = const()[name = tensor("op_10745_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10745_cast_fp16 = einsum(equation = var_10745_equation_0, values = (var_10201_cast_fp16, var_10600_cast_fp16))[name = tensor("op_10745_cast_fp16")]; + tensor var_10747_equation_0 = const()[name = tensor("op_10747_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10747_cast_fp16 = einsum(equation = var_10747_equation_0, values = (var_10201_cast_fp16, var_10601_cast_fp16))[name = tensor("op_10747_cast_fp16")]; + tensor var_10749_equation_0 = const()[name = tensor("op_10749_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10749_cast_fp16 = einsum(equation = var_10749_equation_0, values = (var_10201_cast_fp16, var_10602_cast_fp16))[name = tensor("op_10749_cast_fp16")]; + tensor var_10751_equation_0 = const()[name = tensor("op_10751_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10751_cast_fp16 = einsum(equation = var_10751_equation_0, values = (var_10201_cast_fp16, var_10603_cast_fp16))[name = tensor("op_10751_cast_fp16")]; + tensor var_10753_equation_0 = const()[name = tensor("op_10753_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10753_cast_fp16 = einsum(equation = var_10753_equation_0, values = (var_10205_cast_fp16, var_10604_cast_fp16))[name = tensor("op_10753_cast_fp16")]; + tensor var_10755_equation_0 = const()[name = tensor("op_10755_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10755_cast_fp16 = einsum(equation = var_10755_equation_0, values = (var_10205_cast_fp16, var_10605_cast_fp16))[name = tensor("op_10755_cast_fp16")]; + tensor var_10757_equation_0 = const()[name = tensor("op_10757_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10757_cast_fp16 = einsum(equation = var_10757_equation_0, values = (var_10205_cast_fp16, var_10606_cast_fp16))[name = tensor("op_10757_cast_fp16")]; + tensor var_10759_equation_0 = const()[name = tensor("op_10759_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10759_cast_fp16 = einsum(equation = var_10759_equation_0, values = (var_10205_cast_fp16, var_10607_cast_fp16))[name = tensor("op_10759_cast_fp16")]; + tensor var_10761_equation_0 = const()[name = tensor("op_10761_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10761_cast_fp16 = einsum(equation = var_10761_equation_0, values = (var_10209_cast_fp16, var_10608_cast_fp16))[name = tensor("op_10761_cast_fp16")]; + tensor var_10763_equation_0 = const()[name = tensor("op_10763_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10763_cast_fp16 = einsum(equation = var_10763_equation_0, values = (var_10209_cast_fp16, var_10609_cast_fp16))[name = tensor("op_10763_cast_fp16")]; + tensor var_10765_equation_0 = const()[name = tensor("op_10765_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10765_cast_fp16 = einsum(equation = var_10765_equation_0, values = (var_10209_cast_fp16, var_10610_cast_fp16))[name = tensor("op_10765_cast_fp16")]; + tensor var_10767_equation_0 = const()[name = tensor("op_10767_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10767_cast_fp16 = einsum(equation = var_10767_equation_0, values = (var_10209_cast_fp16, var_10611_cast_fp16))[name = tensor("op_10767_cast_fp16")]; + tensor var_10769_equation_0 = const()[name = tensor("op_10769_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10769_cast_fp16 = einsum(equation = var_10769_equation_0, values = (var_10213_cast_fp16, var_10612_cast_fp16))[name = tensor("op_10769_cast_fp16")]; + tensor var_10771_equation_0 = const()[name = tensor("op_10771_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10771_cast_fp16 = einsum(equation = var_10771_equation_0, values = (var_10213_cast_fp16, var_10613_cast_fp16))[name = tensor("op_10771_cast_fp16")]; + tensor var_10773_equation_0 = const()[name = tensor("op_10773_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10773_cast_fp16 = einsum(equation = var_10773_equation_0, values = (var_10213_cast_fp16, var_10614_cast_fp16))[name = tensor("op_10773_cast_fp16")]; + tensor var_10775_equation_0 = const()[name = tensor("op_10775_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_10775_cast_fp16 = einsum(equation = var_10775_equation_0, values = (var_10213_cast_fp16, var_10615_cast_fp16))[name = tensor("op_10775_cast_fp16")]; + tensor var_10777_interleave_0 = const()[name = tensor("op_10777_interleave_0"), val = tensor(false)]; + tensor var_10777_cast_fp16 = concat(axis = var_9336, interleave = var_10777_interleave_0, values = (var_10617_cast_fp16, var_10619_cast_fp16, var_10621_cast_fp16, var_10623_cast_fp16))[name = tensor("op_10777_cast_fp16")]; + tensor var_10779_interleave_0 = const()[name = tensor("op_10779_interleave_0"), val = tensor(false)]; + tensor var_10779_cast_fp16 = concat(axis = var_9336, interleave = var_10779_interleave_0, values = (var_10625_cast_fp16, var_10627_cast_fp16, var_10629_cast_fp16, var_10631_cast_fp16))[name = tensor("op_10779_cast_fp16")]; + tensor var_10781_interleave_0 = const()[name = tensor("op_10781_interleave_0"), val = tensor(false)]; + tensor var_10781_cast_fp16 = concat(axis = var_9336, interleave = var_10781_interleave_0, values = (var_10633_cast_fp16, var_10635_cast_fp16, var_10637_cast_fp16, var_10639_cast_fp16))[name = tensor("op_10781_cast_fp16")]; + tensor var_10783_interleave_0 = const()[name = tensor("op_10783_interleave_0"), val = tensor(false)]; + tensor var_10783_cast_fp16 = concat(axis = var_9336, interleave = var_10783_interleave_0, values = (var_10641_cast_fp16, var_10643_cast_fp16, var_10645_cast_fp16, var_10647_cast_fp16))[name = tensor("op_10783_cast_fp16")]; + tensor var_10785_interleave_0 = const()[name = tensor("op_10785_interleave_0"), val = tensor(false)]; + tensor var_10785_cast_fp16 = concat(axis = var_9336, interleave = var_10785_interleave_0, values = (var_10649_cast_fp16, var_10651_cast_fp16, var_10653_cast_fp16, var_10655_cast_fp16))[name = tensor("op_10785_cast_fp16")]; + tensor var_10787_interleave_0 = const()[name = tensor("op_10787_interleave_0"), val = tensor(false)]; + tensor var_10787_cast_fp16 = concat(axis = var_9336, interleave = var_10787_interleave_0, values = (var_10657_cast_fp16, var_10659_cast_fp16, var_10661_cast_fp16, var_10663_cast_fp16))[name = tensor("op_10787_cast_fp16")]; + tensor var_10789_interleave_0 = const()[name = tensor("op_10789_interleave_0"), val = tensor(false)]; + tensor var_10789_cast_fp16 = concat(axis = var_9336, interleave = var_10789_interleave_0, values = (var_10665_cast_fp16, var_10667_cast_fp16, var_10669_cast_fp16, var_10671_cast_fp16))[name = tensor("op_10789_cast_fp16")]; + tensor var_10791_interleave_0 = const()[name = tensor("op_10791_interleave_0"), val = tensor(false)]; + tensor var_10791_cast_fp16 = concat(axis = var_9336, interleave = var_10791_interleave_0, values = (var_10673_cast_fp16, var_10675_cast_fp16, var_10677_cast_fp16, var_10679_cast_fp16))[name = tensor("op_10791_cast_fp16")]; + tensor var_10793_interleave_0 = const()[name = tensor("op_10793_interleave_0"), val = tensor(false)]; + tensor var_10793_cast_fp16 = concat(axis = var_9336, interleave = var_10793_interleave_0, values = (var_10681_cast_fp16, var_10683_cast_fp16, var_10685_cast_fp16, var_10687_cast_fp16))[name = tensor("op_10793_cast_fp16")]; + tensor var_10795_interleave_0 = const()[name = tensor("op_10795_interleave_0"), val = tensor(false)]; + tensor var_10795_cast_fp16 = concat(axis = var_9336, interleave = var_10795_interleave_0, values = (var_10689_cast_fp16, var_10691_cast_fp16, var_10693_cast_fp16, var_10695_cast_fp16))[name = tensor("op_10795_cast_fp16")]; + tensor var_10797_interleave_0 = const()[name = tensor("op_10797_interleave_0"), val = tensor(false)]; + tensor var_10797_cast_fp16 = concat(axis = var_9336, interleave = var_10797_interleave_0, values = (var_10697_cast_fp16, var_10699_cast_fp16, var_10701_cast_fp16, var_10703_cast_fp16))[name = tensor("op_10797_cast_fp16")]; + tensor var_10799_interleave_0 = const()[name = tensor("op_10799_interleave_0"), val = tensor(false)]; + tensor var_10799_cast_fp16 = concat(axis = var_9336, interleave = var_10799_interleave_0, values = (var_10705_cast_fp16, var_10707_cast_fp16, var_10709_cast_fp16, var_10711_cast_fp16))[name = tensor("op_10799_cast_fp16")]; + tensor var_10801_interleave_0 = const()[name = tensor("op_10801_interleave_0"), val = tensor(false)]; + tensor var_10801_cast_fp16 = concat(axis = var_9336, interleave = var_10801_interleave_0, values = (var_10713_cast_fp16, var_10715_cast_fp16, var_10717_cast_fp16, var_10719_cast_fp16))[name = tensor("op_10801_cast_fp16")]; + tensor var_10803_interleave_0 = const()[name = tensor("op_10803_interleave_0"), val = tensor(false)]; + tensor var_10803_cast_fp16 = concat(axis = var_9336, interleave = var_10803_interleave_0, values = (var_10721_cast_fp16, var_10723_cast_fp16, var_10725_cast_fp16, var_10727_cast_fp16))[name = tensor("op_10803_cast_fp16")]; + tensor var_10805_interleave_0 = const()[name = tensor("op_10805_interleave_0"), val = tensor(false)]; + tensor var_10805_cast_fp16 = concat(axis = var_9336, interleave = var_10805_interleave_0, values = (var_10729_cast_fp16, var_10731_cast_fp16, var_10733_cast_fp16, var_10735_cast_fp16))[name = tensor("op_10805_cast_fp16")]; + tensor var_10807_interleave_0 = const()[name = tensor("op_10807_interleave_0"), val = tensor(false)]; + tensor var_10807_cast_fp16 = concat(axis = var_9336, interleave = var_10807_interleave_0, values = (var_10737_cast_fp16, var_10739_cast_fp16, var_10741_cast_fp16, var_10743_cast_fp16))[name = tensor("op_10807_cast_fp16")]; + tensor var_10809_interleave_0 = const()[name = tensor("op_10809_interleave_0"), val = tensor(false)]; + tensor var_10809_cast_fp16 = concat(axis = var_9336, interleave = var_10809_interleave_0, values = (var_10745_cast_fp16, var_10747_cast_fp16, var_10749_cast_fp16, var_10751_cast_fp16))[name = tensor("op_10809_cast_fp16")]; + tensor var_10811_interleave_0 = const()[name = tensor("op_10811_interleave_0"), val = tensor(false)]; + tensor var_10811_cast_fp16 = concat(axis = var_9336, interleave = var_10811_interleave_0, values = (var_10753_cast_fp16, var_10755_cast_fp16, var_10757_cast_fp16, var_10759_cast_fp16))[name = tensor("op_10811_cast_fp16")]; + tensor var_10813_interleave_0 = const()[name = tensor("op_10813_interleave_0"), val = tensor(false)]; + tensor var_10813_cast_fp16 = concat(axis = var_9336, interleave = var_10813_interleave_0, values = (var_10761_cast_fp16, var_10763_cast_fp16, var_10765_cast_fp16, var_10767_cast_fp16))[name = tensor("op_10813_cast_fp16")]; + tensor var_10815_interleave_0 = const()[name = tensor("op_10815_interleave_0"), val = tensor(false)]; + tensor var_10815_cast_fp16 = concat(axis = var_9336, interleave = var_10815_interleave_0, values = (var_10769_cast_fp16, var_10771_cast_fp16, var_10773_cast_fp16, var_10775_cast_fp16))[name = tensor("op_10815_cast_fp16")]; + tensor input_49_interleave_0 = const()[name = tensor("input_49_interleave_0"), val = tensor(false)]; + tensor input_49_cast_fp16 = concat(axis = var_9361, interleave = input_49_interleave_0, values = (var_10777_cast_fp16, var_10779_cast_fp16, var_10781_cast_fp16, var_10783_cast_fp16, var_10785_cast_fp16, var_10787_cast_fp16, var_10789_cast_fp16, var_10791_cast_fp16, var_10793_cast_fp16, var_10795_cast_fp16, var_10797_cast_fp16, var_10799_cast_fp16, var_10801_cast_fp16, var_10803_cast_fp16, var_10805_cast_fp16, var_10807_cast_fp16, var_10809_cast_fp16, var_10811_cast_fp16, var_10813_cast_fp16, var_10815_cast_fp16))[name = tensor("input_49_cast_fp16")]; + tensor var_10820 = const()[name = tensor("op_10820"), val = tensor([1, 1])]; + tensor var_10822 = const()[name = tensor("op_10822"), val = tensor([1, 1])]; + tensor obj_27_pad_type_0 = const()[name = tensor("obj_27_pad_type_0"), val = tensor("custom")]; + tensor obj_27_pad_0 = const()[name = tensor("obj_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_6_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_6_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(260256320)))]; + tensor layers_6_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_6_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263533184)))]; + tensor obj_27_cast_fp16 = conv(bias = layers_6_self_attn_o_proj_bias_to_fp16, dilations = var_10822, groups = var_9361, pad = obj_27_pad_0, pad_type = obj_27_pad_type_0, strides = var_10820, weight = layers_6_self_attn_o_proj_weight_to_fp16, x = input_49_cast_fp16)[name = tensor("obj_27_cast_fp16")]; + tensor inputs_27_cast_fp16 = add(x = inputs_25_cast_fp16, y = obj_27_cast_fp16)[name = tensor("inputs_27_cast_fp16")]; + tensor var_10828 = const()[name = tensor("op_10828"), val = tensor([1])]; + tensor channels_mean_27_cast_fp16 = reduce_mean(axes = var_10828, keep_dims = var_9362, x = inputs_27_cast_fp16)[name = tensor("channels_mean_27_cast_fp16")]; + tensor zero_mean_27_cast_fp16 = sub(x = inputs_27_cast_fp16, y = channels_mean_27_cast_fp16)[name = tensor("zero_mean_27_cast_fp16")]; + tensor zero_mean_sq_27_cast_fp16 = mul(x = zero_mean_27_cast_fp16, y = zero_mean_27_cast_fp16)[name = tensor("zero_mean_sq_27_cast_fp16")]; + tensor var_10832 = const()[name = tensor("op_10832"), val = tensor([1])]; + tensor var_10833_cast_fp16 = reduce_mean(axes = var_10832, keep_dims = var_9362, x = zero_mean_sq_27_cast_fp16)[name = tensor("op_10833_cast_fp16")]; + tensor var_10834_to_fp16 = const()[name = tensor("op_10834_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_10835_cast_fp16 = add(x = var_10833_cast_fp16, y = var_10834_to_fp16)[name = tensor("op_10835_cast_fp16")]; + tensor denom_27_epsilon_0_to_fp16 = const()[name = tensor("denom_27_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_27_cast_fp16 = rsqrt(epsilon = denom_27_epsilon_0_to_fp16, x = var_10835_cast_fp16)[name = tensor("denom_27_cast_fp16")]; + tensor out_27_cast_fp16 = mul(x = zero_mean_27_cast_fp16, y = denom_27_cast_fp16)[name = tensor("out_27_cast_fp16")]; + tensor input_51_gamma_0_to_fp16 = const()[name = tensor("input_51_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263535808)))]; + tensor input_51_beta_0_to_fp16 = const()[name = tensor("input_51_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263538432)))]; + tensor input_51_epsilon_0_to_fp16 = const()[name = tensor("input_51_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_51_cast_fp16 = batch_norm(beta = input_51_beta_0_to_fp16, epsilon = input_51_epsilon_0_to_fp16, gamma = input_51_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_27_cast_fp16)[name = tensor("input_51_cast_fp16")]; + tensor var_10846 = const()[name = tensor("op_10846"), val = tensor([1, 1])]; + tensor var_10848 = const()[name = tensor("op_10848"), val = tensor([1, 1])]; + tensor input_53_pad_type_0 = const()[name = tensor("input_53_pad_type_0"), val = tensor("custom")]; + tensor input_53_pad_0 = const()[name = tensor("input_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_6_fc1_weight_to_fp16 = const()[name = tensor("layers_6_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(263541056)))]; + tensor layers_6_fc1_bias_to_fp16 = const()[name = tensor("layers_6_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(276648320)))]; + tensor input_53_cast_fp16 = conv(bias = layers_6_fc1_bias_to_fp16, dilations = var_10848, groups = var_9361, pad = input_53_pad_0, pad_type = input_53_pad_type_0, strides = var_10846, weight = layers_6_fc1_weight_to_fp16, x = input_51_cast_fp16)[name = tensor("input_53_cast_fp16")]; + tensor input_55_mode_0 = const()[name = tensor("input_55_mode_0"), val = tensor("EXACT")]; + tensor input_55_cast_fp16 = gelu(mode = input_55_mode_0, x = input_53_cast_fp16)[name = tensor("input_55_cast_fp16")]; + tensor var_10854 = const()[name = tensor("op_10854"), val = tensor([1, 1])]; + tensor var_10856 = const()[name = tensor("op_10856"), val = tensor([1, 1])]; + tensor hidden_states_17_pad_type_0 = const()[name = tensor("hidden_states_17_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_17_pad_0 = const()[name = tensor("hidden_states_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_6_fc2_weight_to_fp16 = const()[name = tensor("layers_6_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(276658624)))]; + tensor layers_6_fc2_bias_to_fp16 = const()[name = tensor("layers_6_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(289765888)))]; + tensor hidden_states_17_cast_fp16 = conv(bias = layers_6_fc2_bias_to_fp16, dilations = var_10856, groups = var_9361, pad = hidden_states_17_pad_0, pad_type = hidden_states_17_pad_type_0, strides = var_10854, weight = layers_6_fc2_weight_to_fp16, x = input_55_cast_fp16)[name = tensor("hidden_states_17_cast_fp16")]; + tensor inputs_29_cast_fp16 = add(x = inputs_27_cast_fp16, y = hidden_states_17_cast_fp16)[name = tensor("inputs_29_cast_fp16")]; + tensor var_10863 = const()[name = tensor("op_10863"), val = tensor(3)]; + tensor var_10888 = const()[name = tensor("op_10888"), val = tensor(1)]; + tensor var_10889 = const()[name = tensor("op_10889"), val = tensor(true)]; + tensor var_10899 = const()[name = tensor("op_10899"), val = tensor([1])]; + tensor channels_mean_29_cast_fp16 = reduce_mean(axes = var_10899, keep_dims = var_10889, x = inputs_29_cast_fp16)[name = tensor("channels_mean_29_cast_fp16")]; + tensor zero_mean_29_cast_fp16 = sub(x = inputs_29_cast_fp16, y = channels_mean_29_cast_fp16)[name = tensor("zero_mean_29_cast_fp16")]; + tensor zero_mean_sq_29_cast_fp16 = mul(x = zero_mean_29_cast_fp16, y = zero_mean_29_cast_fp16)[name = tensor("zero_mean_sq_29_cast_fp16")]; + tensor var_10903 = const()[name = tensor("op_10903"), val = tensor([1])]; + tensor var_10904_cast_fp16 = reduce_mean(axes = var_10903, keep_dims = var_10889, x = zero_mean_sq_29_cast_fp16)[name = tensor("op_10904_cast_fp16")]; + tensor var_10905_to_fp16 = const()[name = tensor("op_10905_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_10906_cast_fp16 = add(x = var_10904_cast_fp16, y = var_10905_to_fp16)[name = tensor("op_10906_cast_fp16")]; + tensor denom_29_epsilon_0_to_fp16 = const()[name = tensor("denom_29_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_29_cast_fp16 = rsqrt(epsilon = denom_29_epsilon_0_to_fp16, x = var_10906_cast_fp16)[name = tensor("denom_29_cast_fp16")]; + tensor out_29_cast_fp16 = mul(x = zero_mean_29_cast_fp16, y = denom_29_cast_fp16)[name = tensor("out_29_cast_fp16")]; + tensor obj_29_gamma_0_to_fp16 = const()[name = tensor("obj_29_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(289768512)))]; + tensor obj_29_beta_0_to_fp16 = const()[name = tensor("obj_29_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(289771136)))]; + tensor obj_29_epsilon_0_to_fp16 = const()[name = tensor("obj_29_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_29_cast_fp16 = batch_norm(beta = obj_29_beta_0_to_fp16, epsilon = obj_29_epsilon_0_to_fp16, gamma = obj_29_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_29_cast_fp16)[name = tensor("obj_29_cast_fp16")]; + tensor var_10921 = const()[name = tensor("op_10921"), val = tensor([1, 1])]; + tensor var_10923 = const()[name = tensor("op_10923"), val = tensor([1, 1])]; + tensor query_15_pad_type_0 = const()[name = tensor("query_15_pad_type_0"), val = tensor("custom")]; + tensor query_15_pad_0 = const()[name = tensor("query_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_7_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_7_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(289773760)))]; + tensor layers_7_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_7_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(293050624)))]; + tensor query_15_cast_fp16 = conv(bias = layers_7_self_attn_q_proj_bias_to_fp16, dilations = var_10923, groups = var_10888, pad = query_15_pad_0, pad_type = query_15_pad_type_0, strides = var_10921, weight = layers_7_self_attn_q_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor("query_15_cast_fp16")]; + tensor var_10927 = const()[name = tensor("op_10927"), val = tensor([1, 1])]; + tensor var_10929 = const()[name = tensor("op_10929"), val = tensor([1, 1])]; + tensor key_15_pad_type_0 = const()[name = tensor("key_15_pad_type_0"), val = tensor("custom")]; + tensor key_15_pad_0 = const()[name = tensor("key_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_7_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_7_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(293053248)))]; + tensor key_15_cast_fp16 = conv(dilations = var_10929, groups = var_10888, pad = key_15_pad_0, pad_type = key_15_pad_type_0, strides = var_10927, weight = layers_7_self_attn_k_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor("key_15_cast_fp16")]; + tensor var_10934 = const()[name = tensor("op_10934"), val = tensor([1, 1])]; + tensor var_10936 = const()[name = tensor("op_10936"), val = tensor([1, 1])]; + tensor value_15_pad_type_0 = const()[name = tensor("value_15_pad_type_0"), val = tensor("custom")]; + tensor value_15_pad_0 = const()[name = tensor("value_15_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_7_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_7_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(296330112)))]; + tensor layers_7_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_7_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299606976)))]; + tensor value_15_cast_fp16 = conv(bias = layers_7_self_attn_v_proj_bias_to_fp16, dilations = var_10936, groups = var_10888, pad = value_15_pad_0, pad_type = value_15_pad_type_0, strides = var_10934, weight = layers_7_self_attn_v_proj_weight_to_fp16, x = obj_29_cast_fp16)[name = tensor("value_15_cast_fp16")]; + tensor var_10943_begin_0 = const()[name = tensor("op_10943_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_10943_end_0 = const()[name = tensor("op_10943_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_10943_end_mask_0 = const()[name = tensor("op_10943_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10943_cast_fp16 = slice_by_index(begin = var_10943_begin_0, end = var_10943_end_0, end_mask = var_10943_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_10943_cast_fp16")]; + tensor var_10947_begin_0 = const()[name = tensor("op_10947_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_10947_end_0 = const()[name = tensor("op_10947_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_10947_end_mask_0 = const()[name = tensor("op_10947_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10947_cast_fp16 = slice_by_index(begin = var_10947_begin_0, end = var_10947_end_0, end_mask = var_10947_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_10947_cast_fp16")]; + tensor var_10951_begin_0 = const()[name = tensor("op_10951_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_10951_end_0 = const()[name = tensor("op_10951_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_10951_end_mask_0 = const()[name = tensor("op_10951_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10951_cast_fp16 = slice_by_index(begin = var_10951_begin_0, end = var_10951_end_0, end_mask = var_10951_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_10951_cast_fp16")]; + tensor var_10955_begin_0 = const()[name = tensor("op_10955_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_10955_end_0 = const()[name = tensor("op_10955_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_10955_end_mask_0 = const()[name = tensor("op_10955_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10955_cast_fp16 = slice_by_index(begin = var_10955_begin_0, end = var_10955_end_0, end_mask = var_10955_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_10955_cast_fp16")]; + tensor var_10959_begin_0 = const()[name = tensor("op_10959_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_10959_end_0 = const()[name = tensor("op_10959_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_10959_end_mask_0 = const()[name = tensor("op_10959_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10959_cast_fp16 = slice_by_index(begin = var_10959_begin_0, end = var_10959_end_0, end_mask = var_10959_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_10959_cast_fp16")]; + tensor var_10963_begin_0 = const()[name = tensor("op_10963_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_10963_end_0 = const()[name = tensor("op_10963_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_10963_end_mask_0 = const()[name = tensor("op_10963_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10963_cast_fp16 = slice_by_index(begin = var_10963_begin_0, end = var_10963_end_0, end_mask = var_10963_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_10963_cast_fp16")]; + tensor var_10967_begin_0 = const()[name = tensor("op_10967_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_10967_end_0 = const()[name = tensor("op_10967_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_10967_end_mask_0 = const()[name = tensor("op_10967_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10967_cast_fp16 = slice_by_index(begin = var_10967_begin_0, end = var_10967_end_0, end_mask = var_10967_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_10967_cast_fp16")]; + tensor var_10971_begin_0 = const()[name = tensor("op_10971_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_10971_end_0 = const()[name = tensor("op_10971_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_10971_end_mask_0 = const()[name = tensor("op_10971_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10971_cast_fp16 = slice_by_index(begin = var_10971_begin_0, end = var_10971_end_0, end_mask = var_10971_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_10971_cast_fp16")]; + tensor var_10975_begin_0 = const()[name = tensor("op_10975_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_10975_end_0 = const()[name = tensor("op_10975_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_10975_end_mask_0 = const()[name = tensor("op_10975_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10975_cast_fp16 = slice_by_index(begin = var_10975_begin_0, end = var_10975_end_0, end_mask = var_10975_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_10975_cast_fp16")]; + tensor var_10979_begin_0 = const()[name = tensor("op_10979_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_10979_end_0 = const()[name = tensor("op_10979_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_10979_end_mask_0 = const()[name = tensor("op_10979_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10979_cast_fp16 = slice_by_index(begin = var_10979_begin_0, end = var_10979_end_0, end_mask = var_10979_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_10979_cast_fp16")]; + tensor var_10983_begin_0 = const()[name = tensor("op_10983_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_10983_end_0 = const()[name = tensor("op_10983_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_10983_end_mask_0 = const()[name = tensor("op_10983_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10983_cast_fp16 = slice_by_index(begin = var_10983_begin_0, end = var_10983_end_0, end_mask = var_10983_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_10983_cast_fp16")]; + tensor var_10987_begin_0 = const()[name = tensor("op_10987_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_10987_end_0 = const()[name = tensor("op_10987_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_10987_end_mask_0 = const()[name = tensor("op_10987_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10987_cast_fp16 = slice_by_index(begin = var_10987_begin_0, end = var_10987_end_0, end_mask = var_10987_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_10987_cast_fp16")]; + tensor var_10991_begin_0 = const()[name = tensor("op_10991_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_10991_end_0 = const()[name = tensor("op_10991_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_10991_end_mask_0 = const()[name = tensor("op_10991_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10991_cast_fp16 = slice_by_index(begin = var_10991_begin_0, end = var_10991_end_0, end_mask = var_10991_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_10991_cast_fp16")]; + tensor var_10995_begin_0 = const()[name = tensor("op_10995_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_10995_end_0 = const()[name = tensor("op_10995_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_10995_end_mask_0 = const()[name = tensor("op_10995_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10995_cast_fp16 = slice_by_index(begin = var_10995_begin_0, end = var_10995_end_0, end_mask = var_10995_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_10995_cast_fp16")]; + tensor var_10999_begin_0 = const()[name = tensor("op_10999_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_10999_end_0 = const()[name = tensor("op_10999_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_10999_end_mask_0 = const()[name = tensor("op_10999_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_10999_cast_fp16 = slice_by_index(begin = var_10999_begin_0, end = var_10999_end_0, end_mask = var_10999_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_10999_cast_fp16")]; + tensor var_11003_begin_0 = const()[name = tensor("op_11003_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_11003_end_0 = const()[name = tensor("op_11003_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_11003_end_mask_0 = const()[name = tensor("op_11003_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_11003_cast_fp16 = slice_by_index(begin = var_11003_begin_0, end = var_11003_end_0, end_mask = var_11003_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_11003_cast_fp16")]; + tensor var_11007_begin_0 = const()[name = tensor("op_11007_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_11007_end_0 = const()[name = tensor("op_11007_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_11007_end_mask_0 = const()[name = tensor("op_11007_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_11007_cast_fp16 = slice_by_index(begin = var_11007_begin_0, end = var_11007_end_0, end_mask = var_11007_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_11007_cast_fp16")]; + tensor var_11011_begin_0 = const()[name = tensor("op_11011_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_11011_end_0 = const()[name = tensor("op_11011_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_11011_end_mask_0 = const()[name = tensor("op_11011_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_11011_cast_fp16 = slice_by_index(begin = var_11011_begin_0, end = var_11011_end_0, end_mask = var_11011_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_11011_cast_fp16")]; + tensor var_11015_begin_0 = const()[name = tensor("op_11015_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_11015_end_0 = const()[name = tensor("op_11015_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_11015_end_mask_0 = const()[name = tensor("op_11015_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_11015_cast_fp16 = slice_by_index(begin = var_11015_begin_0, end = var_11015_end_0, end_mask = var_11015_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_11015_cast_fp16")]; + tensor var_11019_begin_0 = const()[name = tensor("op_11019_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_11019_end_0 = const()[name = tensor("op_11019_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_11019_end_mask_0 = const()[name = tensor("op_11019_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_11019_cast_fp16 = slice_by_index(begin = var_11019_begin_0, end = var_11019_end_0, end_mask = var_11019_end_mask_0, x = query_15_cast_fp16)[name = tensor("op_11019_cast_fp16")]; + tensor var_11028_begin_0 = const()[name = tensor("op_11028_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11028_end_0 = const()[name = tensor("op_11028_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_11028_end_mask_0 = const()[name = tensor("op_11028_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11028_cast_fp16 = slice_by_index(begin = var_11028_begin_0, end = var_11028_end_0, end_mask = var_11028_end_mask_0, x = var_10943_cast_fp16)[name = tensor("op_11028_cast_fp16")]; + tensor var_11035_begin_0 = const()[name = tensor("op_11035_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_11035_end_0 = const()[name = tensor("op_11035_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_11035_end_mask_0 = const()[name = tensor("op_11035_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11035_cast_fp16 = slice_by_index(begin = var_11035_begin_0, end = var_11035_end_0, end_mask = var_11035_end_mask_0, x = var_10943_cast_fp16)[name = tensor("op_11035_cast_fp16")]; + tensor var_11042_begin_0 = const()[name = tensor("op_11042_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_11042_end_0 = const()[name = tensor("op_11042_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_11042_end_mask_0 = const()[name = tensor("op_11042_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11042_cast_fp16 = slice_by_index(begin = var_11042_begin_0, end = var_11042_end_0, end_mask = var_11042_end_mask_0, x = var_10943_cast_fp16)[name = tensor("op_11042_cast_fp16")]; + tensor var_11049_begin_0 = const()[name = tensor("op_11049_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_11049_end_0 = const()[name = tensor("op_11049_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_11049_end_mask_0 = const()[name = tensor("op_11049_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11049_cast_fp16 = slice_by_index(begin = var_11049_begin_0, end = var_11049_end_0, end_mask = var_11049_end_mask_0, x = var_10943_cast_fp16)[name = tensor("op_11049_cast_fp16")]; + tensor var_11056_begin_0 = const()[name = tensor("op_11056_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11056_end_0 = const()[name = tensor("op_11056_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_11056_end_mask_0 = const()[name = tensor("op_11056_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11056_cast_fp16 = slice_by_index(begin = var_11056_begin_0, end = var_11056_end_0, end_mask = var_11056_end_mask_0, x = var_10947_cast_fp16)[name = tensor("op_11056_cast_fp16")]; + tensor var_11063_begin_0 = const()[name = tensor("op_11063_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_11063_end_0 = const()[name = tensor("op_11063_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_11063_end_mask_0 = const()[name = tensor("op_11063_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11063_cast_fp16 = slice_by_index(begin = var_11063_begin_0, end = var_11063_end_0, end_mask = var_11063_end_mask_0, x = var_10947_cast_fp16)[name = tensor("op_11063_cast_fp16")]; + tensor var_11070_begin_0 = const()[name = tensor("op_11070_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_11070_end_0 = const()[name = tensor("op_11070_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_11070_end_mask_0 = const()[name = tensor("op_11070_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11070_cast_fp16 = slice_by_index(begin = var_11070_begin_0, end = var_11070_end_0, end_mask = var_11070_end_mask_0, x = var_10947_cast_fp16)[name = tensor("op_11070_cast_fp16")]; + tensor var_11077_begin_0 = const()[name = tensor("op_11077_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_11077_end_0 = const()[name = tensor("op_11077_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_11077_end_mask_0 = const()[name = tensor("op_11077_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11077_cast_fp16 = slice_by_index(begin = var_11077_begin_0, end = var_11077_end_0, end_mask = var_11077_end_mask_0, x = var_10947_cast_fp16)[name = tensor("op_11077_cast_fp16")]; + tensor var_11084_begin_0 = const()[name = tensor("op_11084_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11084_end_0 = const()[name = tensor("op_11084_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_11084_end_mask_0 = const()[name = tensor("op_11084_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11084_cast_fp16 = slice_by_index(begin = var_11084_begin_0, end = var_11084_end_0, end_mask = var_11084_end_mask_0, x = var_10951_cast_fp16)[name = tensor("op_11084_cast_fp16")]; + tensor var_11091_begin_0 = const()[name = tensor("op_11091_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_11091_end_0 = const()[name = tensor("op_11091_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_11091_end_mask_0 = const()[name = tensor("op_11091_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11091_cast_fp16 = slice_by_index(begin = var_11091_begin_0, end = var_11091_end_0, end_mask = var_11091_end_mask_0, x = var_10951_cast_fp16)[name = tensor("op_11091_cast_fp16")]; + tensor var_11098_begin_0 = const()[name = tensor("op_11098_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_11098_end_0 = const()[name = tensor("op_11098_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_11098_end_mask_0 = const()[name = tensor("op_11098_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11098_cast_fp16 = slice_by_index(begin = var_11098_begin_0, end = var_11098_end_0, end_mask = var_11098_end_mask_0, x = var_10951_cast_fp16)[name = tensor("op_11098_cast_fp16")]; + tensor var_11105_begin_0 = const()[name = tensor("op_11105_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_11105_end_0 = const()[name = tensor("op_11105_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_11105_end_mask_0 = const()[name = tensor("op_11105_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11105_cast_fp16 = slice_by_index(begin = var_11105_begin_0, end = var_11105_end_0, end_mask = var_11105_end_mask_0, x = var_10951_cast_fp16)[name = tensor("op_11105_cast_fp16")]; + tensor var_11112_begin_0 = const()[name = tensor("op_11112_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11112_end_0 = const()[name = tensor("op_11112_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_11112_end_mask_0 = const()[name = tensor("op_11112_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11112_cast_fp16 = slice_by_index(begin = var_11112_begin_0, end = var_11112_end_0, end_mask = var_11112_end_mask_0, x = var_10955_cast_fp16)[name = tensor("op_11112_cast_fp16")]; + tensor var_11119_begin_0 = const()[name = tensor("op_11119_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_11119_end_0 = const()[name = tensor("op_11119_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_11119_end_mask_0 = const()[name = tensor("op_11119_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11119_cast_fp16 = slice_by_index(begin = var_11119_begin_0, end = var_11119_end_0, end_mask = var_11119_end_mask_0, x = var_10955_cast_fp16)[name = tensor("op_11119_cast_fp16")]; + tensor var_11126_begin_0 = const()[name = tensor("op_11126_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_11126_end_0 = const()[name = tensor("op_11126_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_11126_end_mask_0 = const()[name = tensor("op_11126_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11126_cast_fp16 = slice_by_index(begin = var_11126_begin_0, end = var_11126_end_0, end_mask = var_11126_end_mask_0, x = var_10955_cast_fp16)[name = tensor("op_11126_cast_fp16")]; + tensor var_11133_begin_0 = const()[name = tensor("op_11133_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_11133_end_0 = const()[name = tensor("op_11133_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_11133_end_mask_0 = const()[name = tensor("op_11133_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11133_cast_fp16 = slice_by_index(begin = var_11133_begin_0, end = var_11133_end_0, end_mask = var_11133_end_mask_0, x = var_10955_cast_fp16)[name = tensor("op_11133_cast_fp16")]; + tensor var_11140_begin_0 = const()[name = tensor("op_11140_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11140_end_0 = const()[name = tensor("op_11140_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_11140_end_mask_0 = const()[name = tensor("op_11140_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11140_cast_fp16 = slice_by_index(begin = var_11140_begin_0, end = var_11140_end_0, end_mask = var_11140_end_mask_0, x = var_10959_cast_fp16)[name = tensor("op_11140_cast_fp16")]; + tensor var_11147_begin_0 = const()[name = tensor("op_11147_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_11147_end_0 = const()[name = tensor("op_11147_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_11147_end_mask_0 = const()[name = tensor("op_11147_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11147_cast_fp16 = slice_by_index(begin = var_11147_begin_0, end = var_11147_end_0, end_mask = var_11147_end_mask_0, x = var_10959_cast_fp16)[name = tensor("op_11147_cast_fp16")]; + tensor var_11154_begin_0 = const()[name = tensor("op_11154_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_11154_end_0 = const()[name = tensor("op_11154_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_11154_end_mask_0 = const()[name = tensor("op_11154_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11154_cast_fp16 = slice_by_index(begin = var_11154_begin_0, end = var_11154_end_0, end_mask = var_11154_end_mask_0, x = var_10959_cast_fp16)[name = tensor("op_11154_cast_fp16")]; + tensor var_11161_begin_0 = const()[name = tensor("op_11161_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_11161_end_0 = const()[name = tensor("op_11161_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_11161_end_mask_0 = const()[name = tensor("op_11161_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11161_cast_fp16 = slice_by_index(begin = var_11161_begin_0, end = var_11161_end_0, end_mask = var_11161_end_mask_0, x = var_10959_cast_fp16)[name = tensor("op_11161_cast_fp16")]; + tensor var_11168_begin_0 = const()[name = tensor("op_11168_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11168_end_0 = const()[name = tensor("op_11168_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_11168_end_mask_0 = const()[name = tensor("op_11168_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11168_cast_fp16 = slice_by_index(begin = var_11168_begin_0, end = var_11168_end_0, end_mask = var_11168_end_mask_0, x = var_10963_cast_fp16)[name = tensor("op_11168_cast_fp16")]; + tensor var_11175_begin_0 = const()[name = tensor("op_11175_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_11175_end_0 = const()[name = tensor("op_11175_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_11175_end_mask_0 = const()[name = tensor("op_11175_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11175_cast_fp16 = slice_by_index(begin = var_11175_begin_0, end = var_11175_end_0, end_mask = var_11175_end_mask_0, x = var_10963_cast_fp16)[name = tensor("op_11175_cast_fp16")]; + tensor var_11182_begin_0 = const()[name = tensor("op_11182_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_11182_end_0 = const()[name = tensor("op_11182_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_11182_end_mask_0 = const()[name = tensor("op_11182_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11182_cast_fp16 = slice_by_index(begin = var_11182_begin_0, end = var_11182_end_0, end_mask = var_11182_end_mask_0, x = var_10963_cast_fp16)[name = tensor("op_11182_cast_fp16")]; + tensor var_11189_begin_0 = const()[name = tensor("op_11189_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_11189_end_0 = const()[name = tensor("op_11189_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_11189_end_mask_0 = const()[name = tensor("op_11189_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11189_cast_fp16 = slice_by_index(begin = var_11189_begin_0, end = var_11189_end_0, end_mask = var_11189_end_mask_0, x = var_10963_cast_fp16)[name = tensor("op_11189_cast_fp16")]; + tensor var_11196_begin_0 = const()[name = tensor("op_11196_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11196_end_0 = const()[name = tensor("op_11196_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_11196_end_mask_0 = const()[name = tensor("op_11196_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11196_cast_fp16 = slice_by_index(begin = var_11196_begin_0, end = var_11196_end_0, end_mask = var_11196_end_mask_0, x = var_10967_cast_fp16)[name = tensor("op_11196_cast_fp16")]; + tensor var_11203_begin_0 = const()[name = tensor("op_11203_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_11203_end_0 = const()[name = tensor("op_11203_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_11203_end_mask_0 = const()[name = tensor("op_11203_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11203_cast_fp16 = slice_by_index(begin = var_11203_begin_0, end = var_11203_end_0, end_mask = var_11203_end_mask_0, x = var_10967_cast_fp16)[name = tensor("op_11203_cast_fp16")]; + tensor var_11210_begin_0 = const()[name = tensor("op_11210_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_11210_end_0 = const()[name = tensor("op_11210_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_11210_end_mask_0 = const()[name = tensor("op_11210_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11210_cast_fp16 = slice_by_index(begin = var_11210_begin_0, end = var_11210_end_0, end_mask = var_11210_end_mask_0, x = var_10967_cast_fp16)[name = tensor("op_11210_cast_fp16")]; + tensor var_11217_begin_0 = const()[name = tensor("op_11217_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_11217_end_0 = const()[name = tensor("op_11217_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_11217_end_mask_0 = const()[name = tensor("op_11217_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11217_cast_fp16 = slice_by_index(begin = var_11217_begin_0, end = var_11217_end_0, end_mask = var_11217_end_mask_0, x = var_10967_cast_fp16)[name = tensor("op_11217_cast_fp16")]; + tensor var_11224_begin_0 = const()[name = tensor("op_11224_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11224_end_0 = const()[name = tensor("op_11224_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_11224_end_mask_0 = const()[name = tensor("op_11224_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11224_cast_fp16 = slice_by_index(begin = var_11224_begin_0, end = var_11224_end_0, end_mask = var_11224_end_mask_0, x = var_10971_cast_fp16)[name = tensor("op_11224_cast_fp16")]; + tensor var_11231_begin_0 = const()[name = tensor("op_11231_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_11231_end_0 = const()[name = tensor("op_11231_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_11231_end_mask_0 = const()[name = tensor("op_11231_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11231_cast_fp16 = slice_by_index(begin = var_11231_begin_0, end = var_11231_end_0, end_mask = var_11231_end_mask_0, x = var_10971_cast_fp16)[name = tensor("op_11231_cast_fp16")]; + tensor var_11238_begin_0 = const()[name = tensor("op_11238_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_11238_end_0 = const()[name = tensor("op_11238_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_11238_end_mask_0 = const()[name = tensor("op_11238_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11238_cast_fp16 = slice_by_index(begin = var_11238_begin_0, end = var_11238_end_0, end_mask = var_11238_end_mask_0, x = var_10971_cast_fp16)[name = tensor("op_11238_cast_fp16")]; + tensor var_11245_begin_0 = const()[name = tensor("op_11245_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_11245_end_0 = const()[name = tensor("op_11245_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_11245_end_mask_0 = const()[name = tensor("op_11245_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11245_cast_fp16 = slice_by_index(begin = var_11245_begin_0, end = var_11245_end_0, end_mask = var_11245_end_mask_0, x = var_10971_cast_fp16)[name = tensor("op_11245_cast_fp16")]; + tensor var_11252_begin_0 = const()[name = tensor("op_11252_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11252_end_0 = const()[name = tensor("op_11252_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_11252_end_mask_0 = const()[name = tensor("op_11252_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11252_cast_fp16 = slice_by_index(begin = var_11252_begin_0, end = var_11252_end_0, end_mask = var_11252_end_mask_0, x = var_10975_cast_fp16)[name = tensor("op_11252_cast_fp16")]; + tensor var_11259_begin_0 = const()[name = tensor("op_11259_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_11259_end_0 = const()[name = tensor("op_11259_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_11259_end_mask_0 = const()[name = tensor("op_11259_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11259_cast_fp16 = slice_by_index(begin = var_11259_begin_0, end = var_11259_end_0, end_mask = var_11259_end_mask_0, x = var_10975_cast_fp16)[name = tensor("op_11259_cast_fp16")]; + tensor var_11266_begin_0 = const()[name = tensor("op_11266_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_11266_end_0 = const()[name = tensor("op_11266_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_11266_end_mask_0 = const()[name = tensor("op_11266_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11266_cast_fp16 = slice_by_index(begin = var_11266_begin_0, end = var_11266_end_0, end_mask = var_11266_end_mask_0, x = var_10975_cast_fp16)[name = tensor("op_11266_cast_fp16")]; + tensor var_11273_begin_0 = const()[name = tensor("op_11273_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_11273_end_0 = const()[name = tensor("op_11273_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_11273_end_mask_0 = const()[name = tensor("op_11273_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11273_cast_fp16 = slice_by_index(begin = var_11273_begin_0, end = var_11273_end_0, end_mask = var_11273_end_mask_0, x = var_10975_cast_fp16)[name = tensor("op_11273_cast_fp16")]; + tensor var_11280_begin_0 = const()[name = tensor("op_11280_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11280_end_0 = const()[name = tensor("op_11280_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_11280_end_mask_0 = const()[name = tensor("op_11280_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11280_cast_fp16 = slice_by_index(begin = var_11280_begin_0, end = var_11280_end_0, end_mask = var_11280_end_mask_0, x = var_10979_cast_fp16)[name = tensor("op_11280_cast_fp16")]; + tensor var_11287_begin_0 = const()[name = tensor("op_11287_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_11287_end_0 = const()[name = tensor("op_11287_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_11287_end_mask_0 = const()[name = tensor("op_11287_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11287_cast_fp16 = slice_by_index(begin = var_11287_begin_0, end = var_11287_end_0, end_mask = var_11287_end_mask_0, x = var_10979_cast_fp16)[name = tensor("op_11287_cast_fp16")]; + tensor var_11294_begin_0 = const()[name = tensor("op_11294_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_11294_end_0 = const()[name = tensor("op_11294_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_11294_end_mask_0 = const()[name = tensor("op_11294_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11294_cast_fp16 = slice_by_index(begin = var_11294_begin_0, end = var_11294_end_0, end_mask = var_11294_end_mask_0, x = var_10979_cast_fp16)[name = tensor("op_11294_cast_fp16")]; + tensor var_11301_begin_0 = const()[name = tensor("op_11301_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_11301_end_0 = const()[name = tensor("op_11301_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_11301_end_mask_0 = const()[name = tensor("op_11301_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11301_cast_fp16 = slice_by_index(begin = var_11301_begin_0, end = var_11301_end_0, end_mask = var_11301_end_mask_0, x = var_10979_cast_fp16)[name = tensor("op_11301_cast_fp16")]; + tensor var_11308_begin_0 = const()[name = tensor("op_11308_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11308_end_0 = const()[name = tensor("op_11308_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_11308_end_mask_0 = const()[name = tensor("op_11308_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11308_cast_fp16 = slice_by_index(begin = var_11308_begin_0, end = var_11308_end_0, end_mask = var_11308_end_mask_0, x = var_10983_cast_fp16)[name = tensor("op_11308_cast_fp16")]; + tensor var_11315_begin_0 = const()[name = tensor("op_11315_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_11315_end_0 = const()[name = tensor("op_11315_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_11315_end_mask_0 = const()[name = tensor("op_11315_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11315_cast_fp16 = slice_by_index(begin = var_11315_begin_0, end = var_11315_end_0, end_mask = var_11315_end_mask_0, x = var_10983_cast_fp16)[name = tensor("op_11315_cast_fp16")]; + tensor var_11322_begin_0 = const()[name = tensor("op_11322_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_11322_end_0 = const()[name = tensor("op_11322_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_11322_end_mask_0 = const()[name = tensor("op_11322_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11322_cast_fp16 = slice_by_index(begin = var_11322_begin_0, end = var_11322_end_0, end_mask = var_11322_end_mask_0, x = var_10983_cast_fp16)[name = tensor("op_11322_cast_fp16")]; + tensor var_11329_begin_0 = const()[name = tensor("op_11329_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_11329_end_0 = const()[name = tensor("op_11329_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_11329_end_mask_0 = const()[name = tensor("op_11329_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11329_cast_fp16 = slice_by_index(begin = var_11329_begin_0, end = var_11329_end_0, end_mask = var_11329_end_mask_0, x = var_10983_cast_fp16)[name = tensor("op_11329_cast_fp16")]; + tensor var_11336_begin_0 = const()[name = tensor("op_11336_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11336_end_0 = const()[name = tensor("op_11336_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_11336_end_mask_0 = const()[name = tensor("op_11336_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11336_cast_fp16 = slice_by_index(begin = var_11336_begin_0, end = var_11336_end_0, end_mask = var_11336_end_mask_0, x = var_10987_cast_fp16)[name = tensor("op_11336_cast_fp16")]; + tensor var_11343_begin_0 = const()[name = tensor("op_11343_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_11343_end_0 = const()[name = tensor("op_11343_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_11343_end_mask_0 = const()[name = tensor("op_11343_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11343_cast_fp16 = slice_by_index(begin = var_11343_begin_0, end = var_11343_end_0, end_mask = var_11343_end_mask_0, x = var_10987_cast_fp16)[name = tensor("op_11343_cast_fp16")]; + tensor var_11350_begin_0 = const()[name = tensor("op_11350_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_11350_end_0 = const()[name = tensor("op_11350_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_11350_end_mask_0 = const()[name = tensor("op_11350_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11350_cast_fp16 = slice_by_index(begin = var_11350_begin_0, end = var_11350_end_0, end_mask = var_11350_end_mask_0, x = var_10987_cast_fp16)[name = tensor("op_11350_cast_fp16")]; + tensor var_11357_begin_0 = const()[name = tensor("op_11357_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_11357_end_0 = const()[name = tensor("op_11357_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_11357_end_mask_0 = const()[name = tensor("op_11357_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11357_cast_fp16 = slice_by_index(begin = var_11357_begin_0, end = var_11357_end_0, end_mask = var_11357_end_mask_0, x = var_10987_cast_fp16)[name = tensor("op_11357_cast_fp16")]; + tensor var_11364_begin_0 = const()[name = tensor("op_11364_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11364_end_0 = const()[name = tensor("op_11364_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_11364_end_mask_0 = const()[name = tensor("op_11364_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11364_cast_fp16 = slice_by_index(begin = var_11364_begin_0, end = var_11364_end_0, end_mask = var_11364_end_mask_0, x = var_10991_cast_fp16)[name = tensor("op_11364_cast_fp16")]; + tensor var_11371_begin_0 = const()[name = tensor("op_11371_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_11371_end_0 = const()[name = tensor("op_11371_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_11371_end_mask_0 = const()[name = tensor("op_11371_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11371_cast_fp16 = slice_by_index(begin = var_11371_begin_0, end = var_11371_end_0, end_mask = var_11371_end_mask_0, x = var_10991_cast_fp16)[name = tensor("op_11371_cast_fp16")]; + tensor var_11378_begin_0 = const()[name = tensor("op_11378_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_11378_end_0 = const()[name = tensor("op_11378_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_11378_end_mask_0 = const()[name = tensor("op_11378_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11378_cast_fp16 = slice_by_index(begin = var_11378_begin_0, end = var_11378_end_0, end_mask = var_11378_end_mask_0, x = var_10991_cast_fp16)[name = tensor("op_11378_cast_fp16")]; + tensor var_11385_begin_0 = const()[name = tensor("op_11385_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_11385_end_0 = const()[name = tensor("op_11385_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_11385_end_mask_0 = const()[name = tensor("op_11385_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11385_cast_fp16 = slice_by_index(begin = var_11385_begin_0, end = var_11385_end_0, end_mask = var_11385_end_mask_0, x = var_10991_cast_fp16)[name = tensor("op_11385_cast_fp16")]; + tensor var_11392_begin_0 = const()[name = tensor("op_11392_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11392_end_0 = const()[name = tensor("op_11392_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_11392_end_mask_0 = const()[name = tensor("op_11392_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11392_cast_fp16 = slice_by_index(begin = var_11392_begin_0, end = var_11392_end_0, end_mask = var_11392_end_mask_0, x = var_10995_cast_fp16)[name = tensor("op_11392_cast_fp16")]; + tensor var_11399_begin_0 = const()[name = tensor("op_11399_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_11399_end_0 = const()[name = tensor("op_11399_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_11399_end_mask_0 = const()[name = tensor("op_11399_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11399_cast_fp16 = slice_by_index(begin = var_11399_begin_0, end = var_11399_end_0, end_mask = var_11399_end_mask_0, x = var_10995_cast_fp16)[name = tensor("op_11399_cast_fp16")]; + tensor var_11406_begin_0 = const()[name = tensor("op_11406_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_11406_end_0 = const()[name = tensor("op_11406_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_11406_end_mask_0 = const()[name = tensor("op_11406_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11406_cast_fp16 = slice_by_index(begin = var_11406_begin_0, end = var_11406_end_0, end_mask = var_11406_end_mask_0, x = var_10995_cast_fp16)[name = tensor("op_11406_cast_fp16")]; + tensor var_11413_begin_0 = const()[name = tensor("op_11413_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_11413_end_0 = const()[name = tensor("op_11413_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_11413_end_mask_0 = const()[name = tensor("op_11413_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11413_cast_fp16 = slice_by_index(begin = var_11413_begin_0, end = var_11413_end_0, end_mask = var_11413_end_mask_0, x = var_10995_cast_fp16)[name = tensor("op_11413_cast_fp16")]; + tensor var_11420_begin_0 = const()[name = tensor("op_11420_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11420_end_0 = const()[name = tensor("op_11420_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_11420_end_mask_0 = const()[name = tensor("op_11420_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11420_cast_fp16 = slice_by_index(begin = var_11420_begin_0, end = var_11420_end_0, end_mask = var_11420_end_mask_0, x = var_10999_cast_fp16)[name = tensor("op_11420_cast_fp16")]; + tensor var_11427_begin_0 = const()[name = tensor("op_11427_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_11427_end_0 = const()[name = tensor("op_11427_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_11427_end_mask_0 = const()[name = tensor("op_11427_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11427_cast_fp16 = slice_by_index(begin = var_11427_begin_0, end = var_11427_end_0, end_mask = var_11427_end_mask_0, x = var_10999_cast_fp16)[name = tensor("op_11427_cast_fp16")]; + tensor var_11434_begin_0 = const()[name = tensor("op_11434_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_11434_end_0 = const()[name = tensor("op_11434_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_11434_end_mask_0 = const()[name = tensor("op_11434_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11434_cast_fp16 = slice_by_index(begin = var_11434_begin_0, end = var_11434_end_0, end_mask = var_11434_end_mask_0, x = var_10999_cast_fp16)[name = tensor("op_11434_cast_fp16")]; + tensor var_11441_begin_0 = const()[name = tensor("op_11441_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_11441_end_0 = const()[name = tensor("op_11441_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_11441_end_mask_0 = const()[name = tensor("op_11441_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11441_cast_fp16 = slice_by_index(begin = var_11441_begin_0, end = var_11441_end_0, end_mask = var_11441_end_mask_0, x = var_10999_cast_fp16)[name = tensor("op_11441_cast_fp16")]; + tensor var_11448_begin_0 = const()[name = tensor("op_11448_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11448_end_0 = const()[name = tensor("op_11448_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_11448_end_mask_0 = const()[name = tensor("op_11448_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11448_cast_fp16 = slice_by_index(begin = var_11448_begin_0, end = var_11448_end_0, end_mask = var_11448_end_mask_0, x = var_11003_cast_fp16)[name = tensor("op_11448_cast_fp16")]; + tensor var_11455_begin_0 = const()[name = tensor("op_11455_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_11455_end_0 = const()[name = tensor("op_11455_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_11455_end_mask_0 = const()[name = tensor("op_11455_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11455_cast_fp16 = slice_by_index(begin = var_11455_begin_0, end = var_11455_end_0, end_mask = var_11455_end_mask_0, x = var_11003_cast_fp16)[name = tensor("op_11455_cast_fp16")]; + tensor var_11462_begin_0 = const()[name = tensor("op_11462_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_11462_end_0 = const()[name = tensor("op_11462_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_11462_end_mask_0 = const()[name = tensor("op_11462_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11462_cast_fp16 = slice_by_index(begin = var_11462_begin_0, end = var_11462_end_0, end_mask = var_11462_end_mask_0, x = var_11003_cast_fp16)[name = tensor("op_11462_cast_fp16")]; + tensor var_11469_begin_0 = const()[name = tensor("op_11469_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_11469_end_0 = const()[name = tensor("op_11469_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_11469_end_mask_0 = const()[name = tensor("op_11469_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11469_cast_fp16 = slice_by_index(begin = var_11469_begin_0, end = var_11469_end_0, end_mask = var_11469_end_mask_0, x = var_11003_cast_fp16)[name = tensor("op_11469_cast_fp16")]; + tensor var_11476_begin_0 = const()[name = tensor("op_11476_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11476_end_0 = const()[name = tensor("op_11476_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_11476_end_mask_0 = const()[name = tensor("op_11476_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11476_cast_fp16 = slice_by_index(begin = var_11476_begin_0, end = var_11476_end_0, end_mask = var_11476_end_mask_0, x = var_11007_cast_fp16)[name = tensor("op_11476_cast_fp16")]; + tensor var_11483_begin_0 = const()[name = tensor("op_11483_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_11483_end_0 = const()[name = tensor("op_11483_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_11483_end_mask_0 = const()[name = tensor("op_11483_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11483_cast_fp16 = slice_by_index(begin = var_11483_begin_0, end = var_11483_end_0, end_mask = var_11483_end_mask_0, x = var_11007_cast_fp16)[name = tensor("op_11483_cast_fp16")]; + tensor var_11490_begin_0 = const()[name = tensor("op_11490_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_11490_end_0 = const()[name = tensor("op_11490_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_11490_end_mask_0 = const()[name = tensor("op_11490_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11490_cast_fp16 = slice_by_index(begin = var_11490_begin_0, end = var_11490_end_0, end_mask = var_11490_end_mask_0, x = var_11007_cast_fp16)[name = tensor("op_11490_cast_fp16")]; + tensor var_11497_begin_0 = const()[name = tensor("op_11497_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_11497_end_0 = const()[name = tensor("op_11497_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_11497_end_mask_0 = const()[name = tensor("op_11497_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11497_cast_fp16 = slice_by_index(begin = var_11497_begin_0, end = var_11497_end_0, end_mask = var_11497_end_mask_0, x = var_11007_cast_fp16)[name = tensor("op_11497_cast_fp16")]; + tensor var_11504_begin_0 = const()[name = tensor("op_11504_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11504_end_0 = const()[name = tensor("op_11504_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_11504_end_mask_0 = const()[name = tensor("op_11504_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11504_cast_fp16 = slice_by_index(begin = var_11504_begin_0, end = var_11504_end_0, end_mask = var_11504_end_mask_0, x = var_11011_cast_fp16)[name = tensor("op_11504_cast_fp16")]; + tensor var_11511_begin_0 = const()[name = tensor("op_11511_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_11511_end_0 = const()[name = tensor("op_11511_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_11511_end_mask_0 = const()[name = tensor("op_11511_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11511_cast_fp16 = slice_by_index(begin = var_11511_begin_0, end = var_11511_end_0, end_mask = var_11511_end_mask_0, x = var_11011_cast_fp16)[name = tensor("op_11511_cast_fp16")]; + tensor var_11518_begin_0 = const()[name = tensor("op_11518_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_11518_end_0 = const()[name = tensor("op_11518_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_11518_end_mask_0 = const()[name = tensor("op_11518_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11518_cast_fp16 = slice_by_index(begin = var_11518_begin_0, end = var_11518_end_0, end_mask = var_11518_end_mask_0, x = var_11011_cast_fp16)[name = tensor("op_11518_cast_fp16")]; + tensor var_11525_begin_0 = const()[name = tensor("op_11525_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_11525_end_0 = const()[name = tensor("op_11525_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_11525_end_mask_0 = const()[name = tensor("op_11525_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11525_cast_fp16 = slice_by_index(begin = var_11525_begin_0, end = var_11525_end_0, end_mask = var_11525_end_mask_0, x = var_11011_cast_fp16)[name = tensor("op_11525_cast_fp16")]; + tensor var_11532_begin_0 = const()[name = tensor("op_11532_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11532_end_0 = const()[name = tensor("op_11532_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_11532_end_mask_0 = const()[name = tensor("op_11532_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11532_cast_fp16 = slice_by_index(begin = var_11532_begin_0, end = var_11532_end_0, end_mask = var_11532_end_mask_0, x = var_11015_cast_fp16)[name = tensor("op_11532_cast_fp16")]; + tensor var_11539_begin_0 = const()[name = tensor("op_11539_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_11539_end_0 = const()[name = tensor("op_11539_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_11539_end_mask_0 = const()[name = tensor("op_11539_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11539_cast_fp16 = slice_by_index(begin = var_11539_begin_0, end = var_11539_end_0, end_mask = var_11539_end_mask_0, x = var_11015_cast_fp16)[name = tensor("op_11539_cast_fp16")]; + tensor var_11546_begin_0 = const()[name = tensor("op_11546_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_11546_end_0 = const()[name = tensor("op_11546_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_11546_end_mask_0 = const()[name = tensor("op_11546_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11546_cast_fp16 = slice_by_index(begin = var_11546_begin_0, end = var_11546_end_0, end_mask = var_11546_end_mask_0, x = var_11015_cast_fp16)[name = tensor("op_11546_cast_fp16")]; + tensor var_11553_begin_0 = const()[name = tensor("op_11553_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_11553_end_0 = const()[name = tensor("op_11553_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_11553_end_mask_0 = const()[name = tensor("op_11553_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11553_cast_fp16 = slice_by_index(begin = var_11553_begin_0, end = var_11553_end_0, end_mask = var_11553_end_mask_0, x = var_11015_cast_fp16)[name = tensor("op_11553_cast_fp16")]; + tensor var_11560_begin_0 = const()[name = tensor("op_11560_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11560_end_0 = const()[name = tensor("op_11560_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_11560_end_mask_0 = const()[name = tensor("op_11560_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11560_cast_fp16 = slice_by_index(begin = var_11560_begin_0, end = var_11560_end_0, end_mask = var_11560_end_mask_0, x = var_11019_cast_fp16)[name = tensor("op_11560_cast_fp16")]; + tensor var_11567_begin_0 = const()[name = tensor("op_11567_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_11567_end_0 = const()[name = tensor("op_11567_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_11567_end_mask_0 = const()[name = tensor("op_11567_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11567_cast_fp16 = slice_by_index(begin = var_11567_begin_0, end = var_11567_end_0, end_mask = var_11567_end_mask_0, x = var_11019_cast_fp16)[name = tensor("op_11567_cast_fp16")]; + tensor var_11574_begin_0 = const()[name = tensor("op_11574_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_11574_end_0 = const()[name = tensor("op_11574_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_11574_end_mask_0 = const()[name = tensor("op_11574_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11574_cast_fp16 = slice_by_index(begin = var_11574_begin_0, end = var_11574_end_0, end_mask = var_11574_end_mask_0, x = var_11019_cast_fp16)[name = tensor("op_11574_cast_fp16")]; + tensor var_11581_begin_0 = const()[name = tensor("op_11581_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_11581_end_0 = const()[name = tensor("op_11581_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_11581_end_mask_0 = const()[name = tensor("op_11581_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11581_cast_fp16 = slice_by_index(begin = var_11581_begin_0, end = var_11581_end_0, end_mask = var_11581_end_mask_0, x = var_11019_cast_fp16)[name = tensor("op_11581_cast_fp16")]; + tensor k_15_perm_0 = const()[name = tensor("k_15_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_11586_begin_0 = const()[name = tensor("op_11586_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11586_end_0 = const()[name = tensor("op_11586_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_11586_end_mask_0 = const()[name = tensor("op_11586_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_24 = transpose(perm = k_15_perm_0, x = key_15_cast_fp16)[name = tensor("transpose_24")]; + tensor var_11586_cast_fp16 = slice_by_index(begin = var_11586_begin_0, end = var_11586_end_0, end_mask = var_11586_end_mask_0, x = transpose_24)[name = tensor("op_11586_cast_fp16")]; + tensor var_11590_begin_0 = const()[name = tensor("op_11590_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_11590_end_0 = const()[name = tensor("op_11590_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_11590_end_mask_0 = const()[name = tensor("op_11590_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11590_cast_fp16 = slice_by_index(begin = var_11590_begin_0, end = var_11590_end_0, end_mask = var_11590_end_mask_0, x = transpose_24)[name = tensor("op_11590_cast_fp16")]; + tensor var_11594_begin_0 = const()[name = tensor("op_11594_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_11594_end_0 = const()[name = tensor("op_11594_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_11594_end_mask_0 = const()[name = tensor("op_11594_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11594_cast_fp16 = slice_by_index(begin = var_11594_begin_0, end = var_11594_end_0, end_mask = var_11594_end_mask_0, x = transpose_24)[name = tensor("op_11594_cast_fp16")]; + tensor var_11598_begin_0 = const()[name = tensor("op_11598_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_11598_end_0 = const()[name = tensor("op_11598_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_11598_end_mask_0 = const()[name = tensor("op_11598_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11598_cast_fp16 = slice_by_index(begin = var_11598_begin_0, end = var_11598_end_0, end_mask = var_11598_end_mask_0, x = transpose_24)[name = tensor("op_11598_cast_fp16")]; + tensor var_11602_begin_0 = const()[name = tensor("op_11602_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_11602_end_0 = const()[name = tensor("op_11602_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_11602_end_mask_0 = const()[name = tensor("op_11602_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11602_cast_fp16 = slice_by_index(begin = var_11602_begin_0, end = var_11602_end_0, end_mask = var_11602_end_mask_0, x = transpose_24)[name = tensor("op_11602_cast_fp16")]; + tensor var_11606_begin_0 = const()[name = tensor("op_11606_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_11606_end_0 = const()[name = tensor("op_11606_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_11606_end_mask_0 = const()[name = tensor("op_11606_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11606_cast_fp16 = slice_by_index(begin = var_11606_begin_0, end = var_11606_end_0, end_mask = var_11606_end_mask_0, x = transpose_24)[name = tensor("op_11606_cast_fp16")]; + tensor var_11610_begin_0 = const()[name = tensor("op_11610_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_11610_end_0 = const()[name = tensor("op_11610_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_11610_end_mask_0 = const()[name = tensor("op_11610_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11610_cast_fp16 = slice_by_index(begin = var_11610_begin_0, end = var_11610_end_0, end_mask = var_11610_end_mask_0, x = transpose_24)[name = tensor("op_11610_cast_fp16")]; + tensor var_11614_begin_0 = const()[name = tensor("op_11614_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_11614_end_0 = const()[name = tensor("op_11614_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_11614_end_mask_0 = const()[name = tensor("op_11614_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11614_cast_fp16 = slice_by_index(begin = var_11614_begin_0, end = var_11614_end_0, end_mask = var_11614_end_mask_0, x = transpose_24)[name = tensor("op_11614_cast_fp16")]; + tensor var_11618_begin_0 = const()[name = tensor("op_11618_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_11618_end_0 = const()[name = tensor("op_11618_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_11618_end_mask_0 = const()[name = tensor("op_11618_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11618_cast_fp16 = slice_by_index(begin = var_11618_begin_0, end = var_11618_end_0, end_mask = var_11618_end_mask_0, x = transpose_24)[name = tensor("op_11618_cast_fp16")]; + tensor var_11622_begin_0 = const()[name = tensor("op_11622_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_11622_end_0 = const()[name = tensor("op_11622_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_11622_end_mask_0 = const()[name = tensor("op_11622_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11622_cast_fp16 = slice_by_index(begin = var_11622_begin_0, end = var_11622_end_0, end_mask = var_11622_end_mask_0, x = transpose_24)[name = tensor("op_11622_cast_fp16")]; + tensor var_11626_begin_0 = const()[name = tensor("op_11626_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_11626_end_0 = const()[name = tensor("op_11626_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_11626_end_mask_0 = const()[name = tensor("op_11626_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11626_cast_fp16 = slice_by_index(begin = var_11626_begin_0, end = var_11626_end_0, end_mask = var_11626_end_mask_0, x = transpose_24)[name = tensor("op_11626_cast_fp16")]; + tensor var_11630_begin_0 = const()[name = tensor("op_11630_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_11630_end_0 = const()[name = tensor("op_11630_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_11630_end_mask_0 = const()[name = tensor("op_11630_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11630_cast_fp16 = slice_by_index(begin = var_11630_begin_0, end = var_11630_end_0, end_mask = var_11630_end_mask_0, x = transpose_24)[name = tensor("op_11630_cast_fp16")]; + tensor var_11634_begin_0 = const()[name = tensor("op_11634_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_11634_end_0 = const()[name = tensor("op_11634_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_11634_end_mask_0 = const()[name = tensor("op_11634_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11634_cast_fp16 = slice_by_index(begin = var_11634_begin_0, end = var_11634_end_0, end_mask = var_11634_end_mask_0, x = transpose_24)[name = tensor("op_11634_cast_fp16")]; + tensor var_11638_begin_0 = const()[name = tensor("op_11638_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_11638_end_0 = const()[name = tensor("op_11638_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_11638_end_mask_0 = const()[name = tensor("op_11638_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11638_cast_fp16 = slice_by_index(begin = var_11638_begin_0, end = var_11638_end_0, end_mask = var_11638_end_mask_0, x = transpose_24)[name = tensor("op_11638_cast_fp16")]; + tensor var_11642_begin_0 = const()[name = tensor("op_11642_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_11642_end_0 = const()[name = tensor("op_11642_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_11642_end_mask_0 = const()[name = tensor("op_11642_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11642_cast_fp16 = slice_by_index(begin = var_11642_begin_0, end = var_11642_end_0, end_mask = var_11642_end_mask_0, x = transpose_24)[name = tensor("op_11642_cast_fp16")]; + tensor var_11646_begin_0 = const()[name = tensor("op_11646_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_11646_end_0 = const()[name = tensor("op_11646_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_11646_end_mask_0 = const()[name = tensor("op_11646_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11646_cast_fp16 = slice_by_index(begin = var_11646_begin_0, end = var_11646_end_0, end_mask = var_11646_end_mask_0, x = transpose_24)[name = tensor("op_11646_cast_fp16")]; + tensor var_11650_begin_0 = const()[name = tensor("op_11650_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_11650_end_0 = const()[name = tensor("op_11650_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_11650_end_mask_0 = const()[name = tensor("op_11650_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11650_cast_fp16 = slice_by_index(begin = var_11650_begin_0, end = var_11650_end_0, end_mask = var_11650_end_mask_0, x = transpose_24)[name = tensor("op_11650_cast_fp16")]; + tensor var_11654_begin_0 = const()[name = tensor("op_11654_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_11654_end_0 = const()[name = tensor("op_11654_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_11654_end_mask_0 = const()[name = tensor("op_11654_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11654_cast_fp16 = slice_by_index(begin = var_11654_begin_0, end = var_11654_end_0, end_mask = var_11654_end_mask_0, x = transpose_24)[name = tensor("op_11654_cast_fp16")]; + tensor var_11658_begin_0 = const()[name = tensor("op_11658_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_11658_end_0 = const()[name = tensor("op_11658_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_11658_end_mask_0 = const()[name = tensor("op_11658_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11658_cast_fp16 = slice_by_index(begin = var_11658_begin_0, end = var_11658_end_0, end_mask = var_11658_end_mask_0, x = transpose_24)[name = tensor("op_11658_cast_fp16")]; + tensor var_11662_begin_0 = const()[name = tensor("op_11662_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_11662_end_0 = const()[name = tensor("op_11662_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_11662_end_mask_0 = const()[name = tensor("op_11662_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_11662_cast_fp16 = slice_by_index(begin = var_11662_begin_0, end = var_11662_end_0, end_mask = var_11662_end_mask_0, x = transpose_24)[name = tensor("op_11662_cast_fp16")]; + tensor var_11664_begin_0 = const()[name = tensor("op_11664_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_11664_end_0 = const()[name = tensor("op_11664_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_11664_end_mask_0 = const()[name = tensor("op_11664_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_11664_cast_fp16 = slice_by_index(begin = var_11664_begin_0, end = var_11664_end_0, end_mask = var_11664_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_11664_cast_fp16")]; + tensor var_11668_begin_0 = const()[name = tensor("op_11668_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_11668_end_0 = const()[name = tensor("op_11668_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_11668_end_mask_0 = const()[name = tensor("op_11668_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_11668_cast_fp16 = slice_by_index(begin = var_11668_begin_0, end = var_11668_end_0, end_mask = var_11668_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_11668_cast_fp16")]; + tensor var_11672_begin_0 = const()[name = tensor("op_11672_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_11672_end_0 = const()[name = tensor("op_11672_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_11672_end_mask_0 = const()[name = tensor("op_11672_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_11672_cast_fp16 = slice_by_index(begin = var_11672_begin_0, end = var_11672_end_0, end_mask = var_11672_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_11672_cast_fp16")]; + tensor var_11676_begin_0 = const()[name = tensor("op_11676_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_11676_end_0 = const()[name = tensor("op_11676_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_11676_end_mask_0 = const()[name = tensor("op_11676_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_11676_cast_fp16 = slice_by_index(begin = var_11676_begin_0, end = var_11676_end_0, end_mask = var_11676_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_11676_cast_fp16")]; + tensor var_11680_begin_0 = const()[name = tensor("op_11680_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_11680_end_0 = const()[name = tensor("op_11680_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_11680_end_mask_0 = const()[name = tensor("op_11680_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_11680_cast_fp16 = slice_by_index(begin = var_11680_begin_0, end = var_11680_end_0, end_mask = var_11680_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_11680_cast_fp16")]; + tensor var_11684_begin_0 = const()[name = tensor("op_11684_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_11684_end_0 = const()[name = tensor("op_11684_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_11684_end_mask_0 = const()[name = tensor("op_11684_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_11684_cast_fp16 = slice_by_index(begin = var_11684_begin_0, end = var_11684_end_0, end_mask = var_11684_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_11684_cast_fp16")]; + tensor var_11688_begin_0 = const()[name = tensor("op_11688_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_11688_end_0 = const()[name = tensor("op_11688_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_11688_end_mask_0 = const()[name = tensor("op_11688_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_11688_cast_fp16 = slice_by_index(begin = var_11688_begin_0, end = var_11688_end_0, end_mask = var_11688_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_11688_cast_fp16")]; + tensor var_11692_begin_0 = const()[name = tensor("op_11692_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_11692_end_0 = const()[name = tensor("op_11692_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_11692_end_mask_0 = const()[name = tensor("op_11692_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_11692_cast_fp16 = slice_by_index(begin = var_11692_begin_0, end = var_11692_end_0, end_mask = var_11692_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_11692_cast_fp16")]; + tensor var_11696_begin_0 = const()[name = tensor("op_11696_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_11696_end_0 = const()[name = tensor("op_11696_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_11696_end_mask_0 = const()[name = tensor("op_11696_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_11696_cast_fp16 = slice_by_index(begin = var_11696_begin_0, end = var_11696_end_0, end_mask = var_11696_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_11696_cast_fp16")]; + tensor var_11700_begin_0 = const()[name = tensor("op_11700_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_11700_end_0 = const()[name = tensor("op_11700_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_11700_end_mask_0 = const()[name = tensor("op_11700_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_11700_cast_fp16 = slice_by_index(begin = var_11700_begin_0, end = var_11700_end_0, end_mask = var_11700_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_11700_cast_fp16")]; + tensor var_11704_begin_0 = const()[name = tensor("op_11704_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_11704_end_0 = const()[name = tensor("op_11704_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_11704_end_mask_0 = const()[name = tensor("op_11704_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_11704_cast_fp16 = slice_by_index(begin = var_11704_begin_0, end = var_11704_end_0, end_mask = var_11704_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_11704_cast_fp16")]; + tensor var_11708_begin_0 = const()[name = tensor("op_11708_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_11708_end_0 = const()[name = tensor("op_11708_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_11708_end_mask_0 = const()[name = tensor("op_11708_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_11708_cast_fp16 = slice_by_index(begin = var_11708_begin_0, end = var_11708_end_0, end_mask = var_11708_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_11708_cast_fp16")]; + tensor var_11712_begin_0 = const()[name = tensor("op_11712_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_11712_end_0 = const()[name = tensor("op_11712_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_11712_end_mask_0 = const()[name = tensor("op_11712_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_11712_cast_fp16 = slice_by_index(begin = var_11712_begin_0, end = var_11712_end_0, end_mask = var_11712_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_11712_cast_fp16")]; + tensor var_11716_begin_0 = const()[name = tensor("op_11716_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_11716_end_0 = const()[name = tensor("op_11716_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_11716_end_mask_0 = const()[name = tensor("op_11716_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_11716_cast_fp16 = slice_by_index(begin = var_11716_begin_0, end = var_11716_end_0, end_mask = var_11716_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_11716_cast_fp16")]; + tensor var_11720_begin_0 = const()[name = tensor("op_11720_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_11720_end_0 = const()[name = tensor("op_11720_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_11720_end_mask_0 = const()[name = tensor("op_11720_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_11720_cast_fp16 = slice_by_index(begin = var_11720_begin_0, end = var_11720_end_0, end_mask = var_11720_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_11720_cast_fp16")]; + tensor var_11724_begin_0 = const()[name = tensor("op_11724_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_11724_end_0 = const()[name = tensor("op_11724_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_11724_end_mask_0 = const()[name = tensor("op_11724_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_11724_cast_fp16 = slice_by_index(begin = var_11724_begin_0, end = var_11724_end_0, end_mask = var_11724_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_11724_cast_fp16")]; + tensor var_11728_begin_0 = const()[name = tensor("op_11728_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_11728_end_0 = const()[name = tensor("op_11728_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_11728_end_mask_0 = const()[name = tensor("op_11728_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_11728_cast_fp16 = slice_by_index(begin = var_11728_begin_0, end = var_11728_end_0, end_mask = var_11728_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_11728_cast_fp16")]; + tensor var_11732_begin_0 = const()[name = tensor("op_11732_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_11732_end_0 = const()[name = tensor("op_11732_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_11732_end_mask_0 = const()[name = tensor("op_11732_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_11732_cast_fp16 = slice_by_index(begin = var_11732_begin_0, end = var_11732_end_0, end_mask = var_11732_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_11732_cast_fp16")]; + tensor var_11736_begin_0 = const()[name = tensor("op_11736_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_11736_end_0 = const()[name = tensor("op_11736_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_11736_end_mask_0 = const()[name = tensor("op_11736_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_11736_cast_fp16 = slice_by_index(begin = var_11736_begin_0, end = var_11736_end_0, end_mask = var_11736_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_11736_cast_fp16")]; + tensor var_11740_begin_0 = const()[name = tensor("op_11740_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_11740_end_0 = const()[name = tensor("op_11740_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_11740_end_mask_0 = const()[name = tensor("op_11740_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_11740_cast_fp16 = slice_by_index(begin = var_11740_begin_0, end = var_11740_end_0, end_mask = var_11740_end_mask_0, x = value_15_cast_fp16)[name = tensor("op_11740_cast_fp16")]; + tensor var_11744_equation_0 = const()[name = tensor("op_11744_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11744_cast_fp16 = einsum(equation = var_11744_equation_0, values = (var_11586_cast_fp16, var_11028_cast_fp16))[name = tensor("op_11744_cast_fp16")]; + tensor var_11745_to_fp16 = const()[name = tensor("op_11745_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1121_cast_fp16 = mul(x = var_11744_cast_fp16, y = var_11745_to_fp16)[name = tensor("aw_chunk_1121_cast_fp16")]; + tensor var_11748_equation_0 = const()[name = tensor("op_11748_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11748_cast_fp16 = einsum(equation = var_11748_equation_0, values = (var_11586_cast_fp16, var_11035_cast_fp16))[name = tensor("op_11748_cast_fp16")]; + tensor var_11749_to_fp16 = const()[name = tensor("op_11749_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1123_cast_fp16 = mul(x = var_11748_cast_fp16, y = var_11749_to_fp16)[name = tensor("aw_chunk_1123_cast_fp16")]; + tensor var_11752_equation_0 = const()[name = tensor("op_11752_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11752_cast_fp16 = einsum(equation = var_11752_equation_0, values = (var_11586_cast_fp16, var_11042_cast_fp16))[name = tensor("op_11752_cast_fp16")]; + tensor var_11753_to_fp16 = const()[name = tensor("op_11753_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1125_cast_fp16 = mul(x = var_11752_cast_fp16, y = var_11753_to_fp16)[name = tensor("aw_chunk_1125_cast_fp16")]; + tensor var_11756_equation_0 = const()[name = tensor("op_11756_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11756_cast_fp16 = einsum(equation = var_11756_equation_0, values = (var_11586_cast_fp16, var_11049_cast_fp16))[name = tensor("op_11756_cast_fp16")]; + tensor var_11757_to_fp16 = const()[name = tensor("op_11757_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1127_cast_fp16 = mul(x = var_11756_cast_fp16, y = var_11757_to_fp16)[name = tensor("aw_chunk_1127_cast_fp16")]; + tensor var_11760_equation_0 = const()[name = tensor("op_11760_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11760_cast_fp16 = einsum(equation = var_11760_equation_0, values = (var_11590_cast_fp16, var_11056_cast_fp16))[name = tensor("op_11760_cast_fp16")]; + tensor var_11761_to_fp16 = const()[name = tensor("op_11761_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1129_cast_fp16 = mul(x = var_11760_cast_fp16, y = var_11761_to_fp16)[name = tensor("aw_chunk_1129_cast_fp16")]; + tensor var_11764_equation_0 = const()[name = tensor("op_11764_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11764_cast_fp16 = einsum(equation = var_11764_equation_0, values = (var_11590_cast_fp16, var_11063_cast_fp16))[name = tensor("op_11764_cast_fp16")]; + tensor var_11765_to_fp16 = const()[name = tensor("op_11765_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1131_cast_fp16 = mul(x = var_11764_cast_fp16, y = var_11765_to_fp16)[name = tensor("aw_chunk_1131_cast_fp16")]; + tensor var_11768_equation_0 = const()[name = tensor("op_11768_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11768_cast_fp16 = einsum(equation = var_11768_equation_0, values = (var_11590_cast_fp16, var_11070_cast_fp16))[name = tensor("op_11768_cast_fp16")]; + tensor var_11769_to_fp16 = const()[name = tensor("op_11769_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1133_cast_fp16 = mul(x = var_11768_cast_fp16, y = var_11769_to_fp16)[name = tensor("aw_chunk_1133_cast_fp16")]; + tensor var_11772_equation_0 = const()[name = tensor("op_11772_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11772_cast_fp16 = einsum(equation = var_11772_equation_0, values = (var_11590_cast_fp16, var_11077_cast_fp16))[name = tensor("op_11772_cast_fp16")]; + tensor var_11773_to_fp16 = const()[name = tensor("op_11773_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1135_cast_fp16 = mul(x = var_11772_cast_fp16, y = var_11773_to_fp16)[name = tensor("aw_chunk_1135_cast_fp16")]; + tensor var_11776_equation_0 = const()[name = tensor("op_11776_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11776_cast_fp16 = einsum(equation = var_11776_equation_0, values = (var_11594_cast_fp16, var_11084_cast_fp16))[name = tensor("op_11776_cast_fp16")]; + tensor var_11777_to_fp16 = const()[name = tensor("op_11777_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1137_cast_fp16 = mul(x = var_11776_cast_fp16, y = var_11777_to_fp16)[name = tensor("aw_chunk_1137_cast_fp16")]; + tensor var_11780_equation_0 = const()[name = tensor("op_11780_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11780_cast_fp16 = einsum(equation = var_11780_equation_0, values = (var_11594_cast_fp16, var_11091_cast_fp16))[name = tensor("op_11780_cast_fp16")]; + tensor var_11781_to_fp16 = const()[name = tensor("op_11781_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1139_cast_fp16 = mul(x = var_11780_cast_fp16, y = var_11781_to_fp16)[name = tensor("aw_chunk_1139_cast_fp16")]; + tensor var_11784_equation_0 = const()[name = tensor("op_11784_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11784_cast_fp16 = einsum(equation = var_11784_equation_0, values = (var_11594_cast_fp16, var_11098_cast_fp16))[name = tensor("op_11784_cast_fp16")]; + tensor var_11785_to_fp16 = const()[name = tensor("op_11785_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1141_cast_fp16 = mul(x = var_11784_cast_fp16, y = var_11785_to_fp16)[name = tensor("aw_chunk_1141_cast_fp16")]; + tensor var_11788_equation_0 = const()[name = tensor("op_11788_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11788_cast_fp16 = einsum(equation = var_11788_equation_0, values = (var_11594_cast_fp16, var_11105_cast_fp16))[name = tensor("op_11788_cast_fp16")]; + tensor var_11789_to_fp16 = const()[name = tensor("op_11789_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1143_cast_fp16 = mul(x = var_11788_cast_fp16, y = var_11789_to_fp16)[name = tensor("aw_chunk_1143_cast_fp16")]; + tensor var_11792_equation_0 = const()[name = tensor("op_11792_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11792_cast_fp16 = einsum(equation = var_11792_equation_0, values = (var_11598_cast_fp16, var_11112_cast_fp16))[name = tensor("op_11792_cast_fp16")]; + tensor var_11793_to_fp16 = const()[name = tensor("op_11793_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1145_cast_fp16 = mul(x = var_11792_cast_fp16, y = var_11793_to_fp16)[name = tensor("aw_chunk_1145_cast_fp16")]; + tensor var_11796_equation_0 = const()[name = tensor("op_11796_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11796_cast_fp16 = einsum(equation = var_11796_equation_0, values = (var_11598_cast_fp16, var_11119_cast_fp16))[name = tensor("op_11796_cast_fp16")]; + tensor var_11797_to_fp16 = const()[name = tensor("op_11797_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1147_cast_fp16 = mul(x = var_11796_cast_fp16, y = var_11797_to_fp16)[name = tensor("aw_chunk_1147_cast_fp16")]; + tensor var_11800_equation_0 = const()[name = tensor("op_11800_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11800_cast_fp16 = einsum(equation = var_11800_equation_0, values = (var_11598_cast_fp16, var_11126_cast_fp16))[name = tensor("op_11800_cast_fp16")]; + tensor var_11801_to_fp16 = const()[name = tensor("op_11801_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1149_cast_fp16 = mul(x = var_11800_cast_fp16, y = var_11801_to_fp16)[name = tensor("aw_chunk_1149_cast_fp16")]; + tensor var_11804_equation_0 = const()[name = tensor("op_11804_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11804_cast_fp16 = einsum(equation = var_11804_equation_0, values = (var_11598_cast_fp16, var_11133_cast_fp16))[name = tensor("op_11804_cast_fp16")]; + tensor var_11805_to_fp16 = const()[name = tensor("op_11805_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1151_cast_fp16 = mul(x = var_11804_cast_fp16, y = var_11805_to_fp16)[name = tensor("aw_chunk_1151_cast_fp16")]; + tensor var_11808_equation_0 = const()[name = tensor("op_11808_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11808_cast_fp16 = einsum(equation = var_11808_equation_0, values = (var_11602_cast_fp16, var_11140_cast_fp16))[name = tensor("op_11808_cast_fp16")]; + tensor var_11809_to_fp16 = const()[name = tensor("op_11809_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1153_cast_fp16 = mul(x = var_11808_cast_fp16, y = var_11809_to_fp16)[name = tensor("aw_chunk_1153_cast_fp16")]; + tensor var_11812_equation_0 = const()[name = tensor("op_11812_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11812_cast_fp16 = einsum(equation = var_11812_equation_0, values = (var_11602_cast_fp16, var_11147_cast_fp16))[name = tensor("op_11812_cast_fp16")]; + tensor var_11813_to_fp16 = const()[name = tensor("op_11813_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1155_cast_fp16 = mul(x = var_11812_cast_fp16, y = var_11813_to_fp16)[name = tensor("aw_chunk_1155_cast_fp16")]; + tensor var_11816_equation_0 = const()[name = tensor("op_11816_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11816_cast_fp16 = einsum(equation = var_11816_equation_0, values = (var_11602_cast_fp16, var_11154_cast_fp16))[name = tensor("op_11816_cast_fp16")]; + tensor var_11817_to_fp16 = const()[name = tensor("op_11817_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1157_cast_fp16 = mul(x = var_11816_cast_fp16, y = var_11817_to_fp16)[name = tensor("aw_chunk_1157_cast_fp16")]; + tensor var_11820_equation_0 = const()[name = tensor("op_11820_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11820_cast_fp16 = einsum(equation = var_11820_equation_0, values = (var_11602_cast_fp16, var_11161_cast_fp16))[name = tensor("op_11820_cast_fp16")]; + tensor var_11821_to_fp16 = const()[name = tensor("op_11821_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1159_cast_fp16 = mul(x = var_11820_cast_fp16, y = var_11821_to_fp16)[name = tensor("aw_chunk_1159_cast_fp16")]; + tensor var_11824_equation_0 = const()[name = tensor("op_11824_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11824_cast_fp16 = einsum(equation = var_11824_equation_0, values = (var_11606_cast_fp16, var_11168_cast_fp16))[name = tensor("op_11824_cast_fp16")]; + tensor var_11825_to_fp16 = const()[name = tensor("op_11825_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1161_cast_fp16 = mul(x = var_11824_cast_fp16, y = var_11825_to_fp16)[name = tensor("aw_chunk_1161_cast_fp16")]; + tensor var_11828_equation_0 = const()[name = tensor("op_11828_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11828_cast_fp16 = einsum(equation = var_11828_equation_0, values = (var_11606_cast_fp16, var_11175_cast_fp16))[name = tensor("op_11828_cast_fp16")]; + tensor var_11829_to_fp16 = const()[name = tensor("op_11829_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1163_cast_fp16 = mul(x = var_11828_cast_fp16, y = var_11829_to_fp16)[name = tensor("aw_chunk_1163_cast_fp16")]; + tensor var_11832_equation_0 = const()[name = tensor("op_11832_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11832_cast_fp16 = einsum(equation = var_11832_equation_0, values = (var_11606_cast_fp16, var_11182_cast_fp16))[name = tensor("op_11832_cast_fp16")]; + tensor var_11833_to_fp16 = const()[name = tensor("op_11833_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1165_cast_fp16 = mul(x = var_11832_cast_fp16, y = var_11833_to_fp16)[name = tensor("aw_chunk_1165_cast_fp16")]; + tensor var_11836_equation_0 = const()[name = tensor("op_11836_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11836_cast_fp16 = einsum(equation = var_11836_equation_0, values = (var_11606_cast_fp16, var_11189_cast_fp16))[name = tensor("op_11836_cast_fp16")]; + tensor var_11837_to_fp16 = const()[name = tensor("op_11837_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1167_cast_fp16 = mul(x = var_11836_cast_fp16, y = var_11837_to_fp16)[name = tensor("aw_chunk_1167_cast_fp16")]; + tensor var_11840_equation_0 = const()[name = tensor("op_11840_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11840_cast_fp16 = einsum(equation = var_11840_equation_0, values = (var_11610_cast_fp16, var_11196_cast_fp16))[name = tensor("op_11840_cast_fp16")]; + tensor var_11841_to_fp16 = const()[name = tensor("op_11841_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1169_cast_fp16 = mul(x = var_11840_cast_fp16, y = var_11841_to_fp16)[name = tensor("aw_chunk_1169_cast_fp16")]; + tensor var_11844_equation_0 = const()[name = tensor("op_11844_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11844_cast_fp16 = einsum(equation = var_11844_equation_0, values = (var_11610_cast_fp16, var_11203_cast_fp16))[name = tensor("op_11844_cast_fp16")]; + tensor var_11845_to_fp16 = const()[name = tensor("op_11845_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1171_cast_fp16 = mul(x = var_11844_cast_fp16, y = var_11845_to_fp16)[name = tensor("aw_chunk_1171_cast_fp16")]; + tensor var_11848_equation_0 = const()[name = tensor("op_11848_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11848_cast_fp16 = einsum(equation = var_11848_equation_0, values = (var_11610_cast_fp16, var_11210_cast_fp16))[name = tensor("op_11848_cast_fp16")]; + tensor var_11849_to_fp16 = const()[name = tensor("op_11849_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1173_cast_fp16 = mul(x = var_11848_cast_fp16, y = var_11849_to_fp16)[name = tensor("aw_chunk_1173_cast_fp16")]; + tensor var_11852_equation_0 = const()[name = tensor("op_11852_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11852_cast_fp16 = einsum(equation = var_11852_equation_0, values = (var_11610_cast_fp16, var_11217_cast_fp16))[name = tensor("op_11852_cast_fp16")]; + tensor var_11853_to_fp16 = const()[name = tensor("op_11853_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1175_cast_fp16 = mul(x = var_11852_cast_fp16, y = var_11853_to_fp16)[name = tensor("aw_chunk_1175_cast_fp16")]; + tensor var_11856_equation_0 = const()[name = tensor("op_11856_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11856_cast_fp16 = einsum(equation = var_11856_equation_0, values = (var_11614_cast_fp16, var_11224_cast_fp16))[name = tensor("op_11856_cast_fp16")]; + tensor var_11857_to_fp16 = const()[name = tensor("op_11857_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1177_cast_fp16 = mul(x = var_11856_cast_fp16, y = var_11857_to_fp16)[name = tensor("aw_chunk_1177_cast_fp16")]; + tensor var_11860_equation_0 = const()[name = tensor("op_11860_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11860_cast_fp16 = einsum(equation = var_11860_equation_0, values = (var_11614_cast_fp16, var_11231_cast_fp16))[name = tensor("op_11860_cast_fp16")]; + tensor var_11861_to_fp16 = const()[name = tensor("op_11861_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1179_cast_fp16 = mul(x = var_11860_cast_fp16, y = var_11861_to_fp16)[name = tensor("aw_chunk_1179_cast_fp16")]; + tensor var_11864_equation_0 = const()[name = tensor("op_11864_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11864_cast_fp16 = einsum(equation = var_11864_equation_0, values = (var_11614_cast_fp16, var_11238_cast_fp16))[name = tensor("op_11864_cast_fp16")]; + tensor var_11865_to_fp16 = const()[name = tensor("op_11865_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1181_cast_fp16 = mul(x = var_11864_cast_fp16, y = var_11865_to_fp16)[name = tensor("aw_chunk_1181_cast_fp16")]; + tensor var_11868_equation_0 = const()[name = tensor("op_11868_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11868_cast_fp16 = einsum(equation = var_11868_equation_0, values = (var_11614_cast_fp16, var_11245_cast_fp16))[name = tensor("op_11868_cast_fp16")]; + tensor var_11869_to_fp16 = const()[name = tensor("op_11869_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1183_cast_fp16 = mul(x = var_11868_cast_fp16, y = var_11869_to_fp16)[name = tensor("aw_chunk_1183_cast_fp16")]; + tensor var_11872_equation_0 = const()[name = tensor("op_11872_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11872_cast_fp16 = einsum(equation = var_11872_equation_0, values = (var_11618_cast_fp16, var_11252_cast_fp16))[name = tensor("op_11872_cast_fp16")]; + tensor var_11873_to_fp16 = const()[name = tensor("op_11873_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1185_cast_fp16 = mul(x = var_11872_cast_fp16, y = var_11873_to_fp16)[name = tensor("aw_chunk_1185_cast_fp16")]; + tensor var_11876_equation_0 = const()[name = tensor("op_11876_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11876_cast_fp16 = einsum(equation = var_11876_equation_0, values = (var_11618_cast_fp16, var_11259_cast_fp16))[name = tensor("op_11876_cast_fp16")]; + tensor var_11877_to_fp16 = const()[name = tensor("op_11877_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1187_cast_fp16 = mul(x = var_11876_cast_fp16, y = var_11877_to_fp16)[name = tensor("aw_chunk_1187_cast_fp16")]; + tensor var_11880_equation_0 = const()[name = tensor("op_11880_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11880_cast_fp16 = einsum(equation = var_11880_equation_0, values = (var_11618_cast_fp16, var_11266_cast_fp16))[name = tensor("op_11880_cast_fp16")]; + tensor var_11881_to_fp16 = const()[name = tensor("op_11881_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1189_cast_fp16 = mul(x = var_11880_cast_fp16, y = var_11881_to_fp16)[name = tensor("aw_chunk_1189_cast_fp16")]; + tensor var_11884_equation_0 = const()[name = tensor("op_11884_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11884_cast_fp16 = einsum(equation = var_11884_equation_0, values = (var_11618_cast_fp16, var_11273_cast_fp16))[name = tensor("op_11884_cast_fp16")]; + tensor var_11885_to_fp16 = const()[name = tensor("op_11885_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1191_cast_fp16 = mul(x = var_11884_cast_fp16, y = var_11885_to_fp16)[name = tensor("aw_chunk_1191_cast_fp16")]; + tensor var_11888_equation_0 = const()[name = tensor("op_11888_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11888_cast_fp16 = einsum(equation = var_11888_equation_0, values = (var_11622_cast_fp16, var_11280_cast_fp16))[name = tensor("op_11888_cast_fp16")]; + tensor var_11889_to_fp16 = const()[name = tensor("op_11889_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1193_cast_fp16 = mul(x = var_11888_cast_fp16, y = var_11889_to_fp16)[name = tensor("aw_chunk_1193_cast_fp16")]; + tensor var_11892_equation_0 = const()[name = tensor("op_11892_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11892_cast_fp16 = einsum(equation = var_11892_equation_0, values = (var_11622_cast_fp16, var_11287_cast_fp16))[name = tensor("op_11892_cast_fp16")]; + tensor var_11893_to_fp16 = const()[name = tensor("op_11893_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1195_cast_fp16 = mul(x = var_11892_cast_fp16, y = var_11893_to_fp16)[name = tensor("aw_chunk_1195_cast_fp16")]; + tensor var_11896_equation_0 = const()[name = tensor("op_11896_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11896_cast_fp16 = einsum(equation = var_11896_equation_0, values = (var_11622_cast_fp16, var_11294_cast_fp16))[name = tensor("op_11896_cast_fp16")]; + tensor var_11897_to_fp16 = const()[name = tensor("op_11897_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1197_cast_fp16 = mul(x = var_11896_cast_fp16, y = var_11897_to_fp16)[name = tensor("aw_chunk_1197_cast_fp16")]; + tensor var_11900_equation_0 = const()[name = tensor("op_11900_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11900_cast_fp16 = einsum(equation = var_11900_equation_0, values = (var_11622_cast_fp16, var_11301_cast_fp16))[name = tensor("op_11900_cast_fp16")]; + tensor var_11901_to_fp16 = const()[name = tensor("op_11901_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1199_cast_fp16 = mul(x = var_11900_cast_fp16, y = var_11901_to_fp16)[name = tensor("aw_chunk_1199_cast_fp16")]; + tensor var_11904_equation_0 = const()[name = tensor("op_11904_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11904_cast_fp16 = einsum(equation = var_11904_equation_0, values = (var_11626_cast_fp16, var_11308_cast_fp16))[name = tensor("op_11904_cast_fp16")]; + tensor var_11905_to_fp16 = const()[name = tensor("op_11905_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1201_cast_fp16 = mul(x = var_11904_cast_fp16, y = var_11905_to_fp16)[name = tensor("aw_chunk_1201_cast_fp16")]; + tensor var_11908_equation_0 = const()[name = tensor("op_11908_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11908_cast_fp16 = einsum(equation = var_11908_equation_0, values = (var_11626_cast_fp16, var_11315_cast_fp16))[name = tensor("op_11908_cast_fp16")]; + tensor var_11909_to_fp16 = const()[name = tensor("op_11909_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1203_cast_fp16 = mul(x = var_11908_cast_fp16, y = var_11909_to_fp16)[name = tensor("aw_chunk_1203_cast_fp16")]; + tensor var_11912_equation_0 = const()[name = tensor("op_11912_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11912_cast_fp16 = einsum(equation = var_11912_equation_0, values = (var_11626_cast_fp16, var_11322_cast_fp16))[name = tensor("op_11912_cast_fp16")]; + tensor var_11913_to_fp16 = const()[name = tensor("op_11913_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1205_cast_fp16 = mul(x = var_11912_cast_fp16, y = var_11913_to_fp16)[name = tensor("aw_chunk_1205_cast_fp16")]; + tensor var_11916_equation_0 = const()[name = tensor("op_11916_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11916_cast_fp16 = einsum(equation = var_11916_equation_0, values = (var_11626_cast_fp16, var_11329_cast_fp16))[name = tensor("op_11916_cast_fp16")]; + tensor var_11917_to_fp16 = const()[name = tensor("op_11917_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1207_cast_fp16 = mul(x = var_11916_cast_fp16, y = var_11917_to_fp16)[name = tensor("aw_chunk_1207_cast_fp16")]; + tensor var_11920_equation_0 = const()[name = tensor("op_11920_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11920_cast_fp16 = einsum(equation = var_11920_equation_0, values = (var_11630_cast_fp16, var_11336_cast_fp16))[name = tensor("op_11920_cast_fp16")]; + tensor var_11921_to_fp16 = const()[name = tensor("op_11921_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1209_cast_fp16 = mul(x = var_11920_cast_fp16, y = var_11921_to_fp16)[name = tensor("aw_chunk_1209_cast_fp16")]; + tensor var_11924_equation_0 = const()[name = tensor("op_11924_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11924_cast_fp16 = einsum(equation = var_11924_equation_0, values = (var_11630_cast_fp16, var_11343_cast_fp16))[name = tensor("op_11924_cast_fp16")]; + tensor var_11925_to_fp16 = const()[name = tensor("op_11925_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1211_cast_fp16 = mul(x = var_11924_cast_fp16, y = var_11925_to_fp16)[name = tensor("aw_chunk_1211_cast_fp16")]; + tensor var_11928_equation_0 = const()[name = tensor("op_11928_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11928_cast_fp16 = einsum(equation = var_11928_equation_0, values = (var_11630_cast_fp16, var_11350_cast_fp16))[name = tensor("op_11928_cast_fp16")]; + tensor var_11929_to_fp16 = const()[name = tensor("op_11929_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1213_cast_fp16 = mul(x = var_11928_cast_fp16, y = var_11929_to_fp16)[name = tensor("aw_chunk_1213_cast_fp16")]; + tensor var_11932_equation_0 = const()[name = tensor("op_11932_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11932_cast_fp16 = einsum(equation = var_11932_equation_0, values = (var_11630_cast_fp16, var_11357_cast_fp16))[name = tensor("op_11932_cast_fp16")]; + tensor var_11933_to_fp16 = const()[name = tensor("op_11933_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1215_cast_fp16 = mul(x = var_11932_cast_fp16, y = var_11933_to_fp16)[name = tensor("aw_chunk_1215_cast_fp16")]; + tensor var_11936_equation_0 = const()[name = tensor("op_11936_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11936_cast_fp16 = einsum(equation = var_11936_equation_0, values = (var_11634_cast_fp16, var_11364_cast_fp16))[name = tensor("op_11936_cast_fp16")]; + tensor var_11937_to_fp16 = const()[name = tensor("op_11937_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1217_cast_fp16 = mul(x = var_11936_cast_fp16, y = var_11937_to_fp16)[name = tensor("aw_chunk_1217_cast_fp16")]; + tensor var_11940_equation_0 = const()[name = tensor("op_11940_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11940_cast_fp16 = einsum(equation = var_11940_equation_0, values = (var_11634_cast_fp16, var_11371_cast_fp16))[name = tensor("op_11940_cast_fp16")]; + tensor var_11941_to_fp16 = const()[name = tensor("op_11941_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1219_cast_fp16 = mul(x = var_11940_cast_fp16, y = var_11941_to_fp16)[name = tensor("aw_chunk_1219_cast_fp16")]; + tensor var_11944_equation_0 = const()[name = tensor("op_11944_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11944_cast_fp16 = einsum(equation = var_11944_equation_0, values = (var_11634_cast_fp16, var_11378_cast_fp16))[name = tensor("op_11944_cast_fp16")]; + tensor var_11945_to_fp16 = const()[name = tensor("op_11945_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1221_cast_fp16 = mul(x = var_11944_cast_fp16, y = var_11945_to_fp16)[name = tensor("aw_chunk_1221_cast_fp16")]; + tensor var_11948_equation_0 = const()[name = tensor("op_11948_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11948_cast_fp16 = einsum(equation = var_11948_equation_0, values = (var_11634_cast_fp16, var_11385_cast_fp16))[name = tensor("op_11948_cast_fp16")]; + tensor var_11949_to_fp16 = const()[name = tensor("op_11949_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1223_cast_fp16 = mul(x = var_11948_cast_fp16, y = var_11949_to_fp16)[name = tensor("aw_chunk_1223_cast_fp16")]; + tensor var_11952_equation_0 = const()[name = tensor("op_11952_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11952_cast_fp16 = einsum(equation = var_11952_equation_0, values = (var_11638_cast_fp16, var_11392_cast_fp16))[name = tensor("op_11952_cast_fp16")]; + tensor var_11953_to_fp16 = const()[name = tensor("op_11953_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1225_cast_fp16 = mul(x = var_11952_cast_fp16, y = var_11953_to_fp16)[name = tensor("aw_chunk_1225_cast_fp16")]; + tensor var_11956_equation_0 = const()[name = tensor("op_11956_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11956_cast_fp16 = einsum(equation = var_11956_equation_0, values = (var_11638_cast_fp16, var_11399_cast_fp16))[name = tensor("op_11956_cast_fp16")]; + tensor var_11957_to_fp16 = const()[name = tensor("op_11957_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1227_cast_fp16 = mul(x = var_11956_cast_fp16, y = var_11957_to_fp16)[name = tensor("aw_chunk_1227_cast_fp16")]; + tensor var_11960_equation_0 = const()[name = tensor("op_11960_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11960_cast_fp16 = einsum(equation = var_11960_equation_0, values = (var_11638_cast_fp16, var_11406_cast_fp16))[name = tensor("op_11960_cast_fp16")]; + tensor var_11961_to_fp16 = const()[name = tensor("op_11961_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1229_cast_fp16 = mul(x = var_11960_cast_fp16, y = var_11961_to_fp16)[name = tensor("aw_chunk_1229_cast_fp16")]; + tensor var_11964_equation_0 = const()[name = tensor("op_11964_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11964_cast_fp16 = einsum(equation = var_11964_equation_0, values = (var_11638_cast_fp16, var_11413_cast_fp16))[name = tensor("op_11964_cast_fp16")]; + tensor var_11965_to_fp16 = const()[name = tensor("op_11965_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1231_cast_fp16 = mul(x = var_11964_cast_fp16, y = var_11965_to_fp16)[name = tensor("aw_chunk_1231_cast_fp16")]; + tensor var_11968_equation_0 = const()[name = tensor("op_11968_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11968_cast_fp16 = einsum(equation = var_11968_equation_0, values = (var_11642_cast_fp16, var_11420_cast_fp16))[name = tensor("op_11968_cast_fp16")]; + tensor var_11969_to_fp16 = const()[name = tensor("op_11969_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1233_cast_fp16 = mul(x = var_11968_cast_fp16, y = var_11969_to_fp16)[name = tensor("aw_chunk_1233_cast_fp16")]; + tensor var_11972_equation_0 = const()[name = tensor("op_11972_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11972_cast_fp16 = einsum(equation = var_11972_equation_0, values = (var_11642_cast_fp16, var_11427_cast_fp16))[name = tensor("op_11972_cast_fp16")]; + tensor var_11973_to_fp16 = const()[name = tensor("op_11973_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1235_cast_fp16 = mul(x = var_11972_cast_fp16, y = var_11973_to_fp16)[name = tensor("aw_chunk_1235_cast_fp16")]; + tensor var_11976_equation_0 = const()[name = tensor("op_11976_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11976_cast_fp16 = einsum(equation = var_11976_equation_0, values = (var_11642_cast_fp16, var_11434_cast_fp16))[name = tensor("op_11976_cast_fp16")]; + tensor var_11977_to_fp16 = const()[name = tensor("op_11977_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1237_cast_fp16 = mul(x = var_11976_cast_fp16, y = var_11977_to_fp16)[name = tensor("aw_chunk_1237_cast_fp16")]; + tensor var_11980_equation_0 = const()[name = tensor("op_11980_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11980_cast_fp16 = einsum(equation = var_11980_equation_0, values = (var_11642_cast_fp16, var_11441_cast_fp16))[name = tensor("op_11980_cast_fp16")]; + tensor var_11981_to_fp16 = const()[name = tensor("op_11981_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1239_cast_fp16 = mul(x = var_11980_cast_fp16, y = var_11981_to_fp16)[name = tensor("aw_chunk_1239_cast_fp16")]; + tensor var_11984_equation_0 = const()[name = tensor("op_11984_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11984_cast_fp16 = einsum(equation = var_11984_equation_0, values = (var_11646_cast_fp16, var_11448_cast_fp16))[name = tensor("op_11984_cast_fp16")]; + tensor var_11985_to_fp16 = const()[name = tensor("op_11985_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1241_cast_fp16 = mul(x = var_11984_cast_fp16, y = var_11985_to_fp16)[name = tensor("aw_chunk_1241_cast_fp16")]; + tensor var_11988_equation_0 = const()[name = tensor("op_11988_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11988_cast_fp16 = einsum(equation = var_11988_equation_0, values = (var_11646_cast_fp16, var_11455_cast_fp16))[name = tensor("op_11988_cast_fp16")]; + tensor var_11989_to_fp16 = const()[name = tensor("op_11989_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1243_cast_fp16 = mul(x = var_11988_cast_fp16, y = var_11989_to_fp16)[name = tensor("aw_chunk_1243_cast_fp16")]; + tensor var_11992_equation_0 = const()[name = tensor("op_11992_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11992_cast_fp16 = einsum(equation = var_11992_equation_0, values = (var_11646_cast_fp16, var_11462_cast_fp16))[name = tensor("op_11992_cast_fp16")]; + tensor var_11993_to_fp16 = const()[name = tensor("op_11993_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1245_cast_fp16 = mul(x = var_11992_cast_fp16, y = var_11993_to_fp16)[name = tensor("aw_chunk_1245_cast_fp16")]; + tensor var_11996_equation_0 = const()[name = tensor("op_11996_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_11996_cast_fp16 = einsum(equation = var_11996_equation_0, values = (var_11646_cast_fp16, var_11469_cast_fp16))[name = tensor("op_11996_cast_fp16")]; + tensor var_11997_to_fp16 = const()[name = tensor("op_11997_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1247_cast_fp16 = mul(x = var_11996_cast_fp16, y = var_11997_to_fp16)[name = tensor("aw_chunk_1247_cast_fp16")]; + tensor var_12000_equation_0 = const()[name = tensor("op_12000_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_12000_cast_fp16 = einsum(equation = var_12000_equation_0, values = (var_11650_cast_fp16, var_11476_cast_fp16))[name = tensor("op_12000_cast_fp16")]; + tensor var_12001_to_fp16 = const()[name = tensor("op_12001_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1249_cast_fp16 = mul(x = var_12000_cast_fp16, y = var_12001_to_fp16)[name = tensor("aw_chunk_1249_cast_fp16")]; + tensor var_12004_equation_0 = const()[name = tensor("op_12004_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_12004_cast_fp16 = einsum(equation = var_12004_equation_0, values = (var_11650_cast_fp16, var_11483_cast_fp16))[name = tensor("op_12004_cast_fp16")]; + tensor var_12005_to_fp16 = const()[name = tensor("op_12005_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1251_cast_fp16 = mul(x = var_12004_cast_fp16, y = var_12005_to_fp16)[name = tensor("aw_chunk_1251_cast_fp16")]; + tensor var_12008_equation_0 = const()[name = tensor("op_12008_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_12008_cast_fp16 = einsum(equation = var_12008_equation_0, values = (var_11650_cast_fp16, var_11490_cast_fp16))[name = tensor("op_12008_cast_fp16")]; + tensor var_12009_to_fp16 = const()[name = tensor("op_12009_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1253_cast_fp16 = mul(x = var_12008_cast_fp16, y = var_12009_to_fp16)[name = tensor("aw_chunk_1253_cast_fp16")]; + tensor var_12012_equation_0 = const()[name = tensor("op_12012_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_12012_cast_fp16 = einsum(equation = var_12012_equation_0, values = (var_11650_cast_fp16, var_11497_cast_fp16))[name = tensor("op_12012_cast_fp16")]; + tensor var_12013_to_fp16 = const()[name = tensor("op_12013_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1255_cast_fp16 = mul(x = var_12012_cast_fp16, y = var_12013_to_fp16)[name = tensor("aw_chunk_1255_cast_fp16")]; + tensor var_12016_equation_0 = const()[name = tensor("op_12016_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_12016_cast_fp16 = einsum(equation = var_12016_equation_0, values = (var_11654_cast_fp16, var_11504_cast_fp16))[name = tensor("op_12016_cast_fp16")]; + tensor var_12017_to_fp16 = const()[name = tensor("op_12017_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1257_cast_fp16 = mul(x = var_12016_cast_fp16, y = var_12017_to_fp16)[name = tensor("aw_chunk_1257_cast_fp16")]; + tensor var_12020_equation_0 = const()[name = tensor("op_12020_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_12020_cast_fp16 = einsum(equation = var_12020_equation_0, values = (var_11654_cast_fp16, var_11511_cast_fp16))[name = tensor("op_12020_cast_fp16")]; + tensor var_12021_to_fp16 = const()[name = tensor("op_12021_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1259_cast_fp16 = mul(x = var_12020_cast_fp16, y = var_12021_to_fp16)[name = tensor("aw_chunk_1259_cast_fp16")]; + tensor var_12024_equation_0 = const()[name = tensor("op_12024_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_12024_cast_fp16 = einsum(equation = var_12024_equation_0, values = (var_11654_cast_fp16, var_11518_cast_fp16))[name = tensor("op_12024_cast_fp16")]; + tensor var_12025_to_fp16 = const()[name = tensor("op_12025_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1261_cast_fp16 = mul(x = var_12024_cast_fp16, y = var_12025_to_fp16)[name = tensor("aw_chunk_1261_cast_fp16")]; + tensor var_12028_equation_0 = const()[name = tensor("op_12028_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_12028_cast_fp16 = einsum(equation = var_12028_equation_0, values = (var_11654_cast_fp16, var_11525_cast_fp16))[name = tensor("op_12028_cast_fp16")]; + tensor var_12029_to_fp16 = const()[name = tensor("op_12029_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1263_cast_fp16 = mul(x = var_12028_cast_fp16, y = var_12029_to_fp16)[name = tensor("aw_chunk_1263_cast_fp16")]; + tensor var_12032_equation_0 = const()[name = tensor("op_12032_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_12032_cast_fp16 = einsum(equation = var_12032_equation_0, values = (var_11658_cast_fp16, var_11532_cast_fp16))[name = tensor("op_12032_cast_fp16")]; + tensor var_12033_to_fp16 = const()[name = tensor("op_12033_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1265_cast_fp16 = mul(x = var_12032_cast_fp16, y = var_12033_to_fp16)[name = tensor("aw_chunk_1265_cast_fp16")]; + tensor var_12036_equation_0 = const()[name = tensor("op_12036_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_12036_cast_fp16 = einsum(equation = var_12036_equation_0, values = (var_11658_cast_fp16, var_11539_cast_fp16))[name = tensor("op_12036_cast_fp16")]; + tensor var_12037_to_fp16 = const()[name = tensor("op_12037_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1267_cast_fp16 = mul(x = var_12036_cast_fp16, y = var_12037_to_fp16)[name = tensor("aw_chunk_1267_cast_fp16")]; + tensor var_12040_equation_0 = const()[name = tensor("op_12040_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_12040_cast_fp16 = einsum(equation = var_12040_equation_0, values = (var_11658_cast_fp16, var_11546_cast_fp16))[name = tensor("op_12040_cast_fp16")]; + tensor var_12041_to_fp16 = const()[name = tensor("op_12041_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1269_cast_fp16 = mul(x = var_12040_cast_fp16, y = var_12041_to_fp16)[name = tensor("aw_chunk_1269_cast_fp16")]; + tensor var_12044_equation_0 = const()[name = tensor("op_12044_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_12044_cast_fp16 = einsum(equation = var_12044_equation_0, values = (var_11658_cast_fp16, var_11553_cast_fp16))[name = tensor("op_12044_cast_fp16")]; + tensor var_12045_to_fp16 = const()[name = tensor("op_12045_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1271_cast_fp16 = mul(x = var_12044_cast_fp16, y = var_12045_to_fp16)[name = tensor("aw_chunk_1271_cast_fp16")]; + tensor var_12048_equation_0 = const()[name = tensor("op_12048_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_12048_cast_fp16 = einsum(equation = var_12048_equation_0, values = (var_11662_cast_fp16, var_11560_cast_fp16))[name = tensor("op_12048_cast_fp16")]; + tensor var_12049_to_fp16 = const()[name = tensor("op_12049_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1273_cast_fp16 = mul(x = var_12048_cast_fp16, y = var_12049_to_fp16)[name = tensor("aw_chunk_1273_cast_fp16")]; + tensor var_12052_equation_0 = const()[name = tensor("op_12052_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_12052_cast_fp16 = einsum(equation = var_12052_equation_0, values = (var_11662_cast_fp16, var_11567_cast_fp16))[name = tensor("op_12052_cast_fp16")]; + tensor var_12053_to_fp16 = const()[name = tensor("op_12053_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1275_cast_fp16 = mul(x = var_12052_cast_fp16, y = var_12053_to_fp16)[name = tensor("aw_chunk_1275_cast_fp16")]; + tensor var_12056_equation_0 = const()[name = tensor("op_12056_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_12056_cast_fp16 = einsum(equation = var_12056_equation_0, values = (var_11662_cast_fp16, var_11574_cast_fp16))[name = tensor("op_12056_cast_fp16")]; + tensor var_12057_to_fp16 = const()[name = tensor("op_12057_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1277_cast_fp16 = mul(x = var_12056_cast_fp16, y = var_12057_to_fp16)[name = tensor("aw_chunk_1277_cast_fp16")]; + tensor var_12060_equation_0 = const()[name = tensor("op_12060_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_12060_cast_fp16 = einsum(equation = var_12060_equation_0, values = (var_11662_cast_fp16, var_11581_cast_fp16))[name = tensor("op_12060_cast_fp16")]; + tensor var_12061_to_fp16 = const()[name = tensor("op_12061_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1279_cast_fp16 = mul(x = var_12060_cast_fp16, y = var_12061_to_fp16)[name = tensor("aw_chunk_1279_cast_fp16")]; + tensor var_12063_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1121_cast_fp16)[name = tensor("op_12063_cast_fp16")]; + tensor var_12064_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1123_cast_fp16)[name = tensor("op_12064_cast_fp16")]; + tensor var_12065_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1125_cast_fp16)[name = tensor("op_12065_cast_fp16")]; + tensor var_12066_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1127_cast_fp16)[name = tensor("op_12066_cast_fp16")]; + tensor var_12067_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1129_cast_fp16)[name = tensor("op_12067_cast_fp16")]; + tensor var_12068_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1131_cast_fp16)[name = tensor("op_12068_cast_fp16")]; + tensor var_12069_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1133_cast_fp16)[name = tensor("op_12069_cast_fp16")]; + tensor var_12070_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1135_cast_fp16)[name = tensor("op_12070_cast_fp16")]; + tensor var_12071_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1137_cast_fp16)[name = tensor("op_12071_cast_fp16")]; + tensor var_12072_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1139_cast_fp16)[name = tensor("op_12072_cast_fp16")]; + tensor var_12073_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1141_cast_fp16)[name = tensor("op_12073_cast_fp16")]; + tensor var_12074_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1143_cast_fp16)[name = tensor("op_12074_cast_fp16")]; + tensor var_12075_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1145_cast_fp16)[name = tensor("op_12075_cast_fp16")]; + tensor var_12076_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1147_cast_fp16)[name = tensor("op_12076_cast_fp16")]; + tensor var_12077_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1149_cast_fp16)[name = tensor("op_12077_cast_fp16")]; + tensor var_12078_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1151_cast_fp16)[name = tensor("op_12078_cast_fp16")]; + tensor var_12079_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1153_cast_fp16)[name = tensor("op_12079_cast_fp16")]; + tensor var_12080_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1155_cast_fp16)[name = tensor("op_12080_cast_fp16")]; + tensor var_12081_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1157_cast_fp16)[name = tensor("op_12081_cast_fp16")]; + tensor var_12082_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1159_cast_fp16)[name = tensor("op_12082_cast_fp16")]; + tensor var_12083_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1161_cast_fp16)[name = tensor("op_12083_cast_fp16")]; + tensor var_12084_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1163_cast_fp16)[name = tensor("op_12084_cast_fp16")]; + tensor var_12085_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1165_cast_fp16)[name = tensor("op_12085_cast_fp16")]; + tensor var_12086_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1167_cast_fp16)[name = tensor("op_12086_cast_fp16")]; + tensor var_12087_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1169_cast_fp16)[name = tensor("op_12087_cast_fp16")]; + tensor var_12088_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1171_cast_fp16)[name = tensor("op_12088_cast_fp16")]; + tensor var_12089_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1173_cast_fp16)[name = tensor("op_12089_cast_fp16")]; + tensor var_12090_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1175_cast_fp16)[name = tensor("op_12090_cast_fp16")]; + tensor var_12091_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1177_cast_fp16)[name = tensor("op_12091_cast_fp16")]; + tensor var_12092_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1179_cast_fp16)[name = tensor("op_12092_cast_fp16")]; + tensor var_12093_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1181_cast_fp16)[name = tensor("op_12093_cast_fp16")]; + tensor var_12094_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1183_cast_fp16)[name = tensor("op_12094_cast_fp16")]; + tensor var_12095_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1185_cast_fp16)[name = tensor("op_12095_cast_fp16")]; + tensor var_12096_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1187_cast_fp16)[name = tensor("op_12096_cast_fp16")]; + tensor var_12097_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1189_cast_fp16)[name = tensor("op_12097_cast_fp16")]; + tensor var_12098_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1191_cast_fp16)[name = tensor("op_12098_cast_fp16")]; + tensor var_12099_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1193_cast_fp16)[name = tensor("op_12099_cast_fp16")]; + tensor var_12100_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1195_cast_fp16)[name = tensor("op_12100_cast_fp16")]; + tensor var_12101_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1197_cast_fp16)[name = tensor("op_12101_cast_fp16")]; + tensor var_12102_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1199_cast_fp16)[name = tensor("op_12102_cast_fp16")]; + tensor var_12103_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1201_cast_fp16)[name = tensor("op_12103_cast_fp16")]; + tensor var_12104_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1203_cast_fp16)[name = tensor("op_12104_cast_fp16")]; + tensor var_12105_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1205_cast_fp16)[name = tensor("op_12105_cast_fp16")]; + tensor var_12106_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1207_cast_fp16)[name = tensor("op_12106_cast_fp16")]; + tensor var_12107_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1209_cast_fp16)[name = tensor("op_12107_cast_fp16")]; + tensor var_12108_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1211_cast_fp16)[name = tensor("op_12108_cast_fp16")]; + tensor var_12109_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1213_cast_fp16)[name = tensor("op_12109_cast_fp16")]; + tensor var_12110_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1215_cast_fp16)[name = tensor("op_12110_cast_fp16")]; + tensor var_12111_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1217_cast_fp16)[name = tensor("op_12111_cast_fp16")]; + tensor var_12112_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1219_cast_fp16)[name = tensor("op_12112_cast_fp16")]; + tensor var_12113_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1221_cast_fp16)[name = tensor("op_12113_cast_fp16")]; + tensor var_12114_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1223_cast_fp16)[name = tensor("op_12114_cast_fp16")]; + tensor var_12115_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1225_cast_fp16)[name = tensor("op_12115_cast_fp16")]; + tensor var_12116_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1227_cast_fp16)[name = tensor("op_12116_cast_fp16")]; + tensor var_12117_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1229_cast_fp16)[name = tensor("op_12117_cast_fp16")]; + tensor var_12118_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1231_cast_fp16)[name = tensor("op_12118_cast_fp16")]; + tensor var_12119_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1233_cast_fp16)[name = tensor("op_12119_cast_fp16")]; + tensor var_12120_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1235_cast_fp16)[name = tensor("op_12120_cast_fp16")]; + tensor var_12121_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1237_cast_fp16)[name = tensor("op_12121_cast_fp16")]; + tensor var_12122_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1239_cast_fp16)[name = tensor("op_12122_cast_fp16")]; + tensor var_12123_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1241_cast_fp16)[name = tensor("op_12123_cast_fp16")]; + tensor var_12124_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1243_cast_fp16)[name = tensor("op_12124_cast_fp16")]; + tensor var_12125_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1245_cast_fp16)[name = tensor("op_12125_cast_fp16")]; + tensor var_12126_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1247_cast_fp16)[name = tensor("op_12126_cast_fp16")]; + tensor var_12127_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1249_cast_fp16)[name = tensor("op_12127_cast_fp16")]; + tensor var_12128_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1251_cast_fp16)[name = tensor("op_12128_cast_fp16")]; + tensor var_12129_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1253_cast_fp16)[name = tensor("op_12129_cast_fp16")]; + tensor var_12130_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1255_cast_fp16)[name = tensor("op_12130_cast_fp16")]; + tensor var_12131_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1257_cast_fp16)[name = tensor("op_12131_cast_fp16")]; + tensor var_12132_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1259_cast_fp16)[name = tensor("op_12132_cast_fp16")]; + tensor var_12133_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1261_cast_fp16)[name = tensor("op_12133_cast_fp16")]; + tensor var_12134_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1263_cast_fp16)[name = tensor("op_12134_cast_fp16")]; + tensor var_12135_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1265_cast_fp16)[name = tensor("op_12135_cast_fp16")]; + tensor var_12136_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1267_cast_fp16)[name = tensor("op_12136_cast_fp16")]; + tensor var_12137_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1269_cast_fp16)[name = tensor("op_12137_cast_fp16")]; + tensor var_12138_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1271_cast_fp16)[name = tensor("op_12138_cast_fp16")]; + tensor var_12139_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1273_cast_fp16)[name = tensor("op_12139_cast_fp16")]; + tensor var_12140_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1275_cast_fp16)[name = tensor("op_12140_cast_fp16")]; + tensor var_12141_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1277_cast_fp16)[name = tensor("op_12141_cast_fp16")]; + tensor var_12142_cast_fp16 = softmax(axis = var_10888, x = aw_chunk_1279_cast_fp16)[name = tensor("op_12142_cast_fp16")]; + tensor var_12144_equation_0 = const()[name = tensor("op_12144_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12144_cast_fp16 = einsum(equation = var_12144_equation_0, values = (var_11664_cast_fp16, var_12063_cast_fp16))[name = tensor("op_12144_cast_fp16")]; + tensor var_12146_equation_0 = const()[name = tensor("op_12146_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12146_cast_fp16 = einsum(equation = var_12146_equation_0, values = (var_11664_cast_fp16, var_12064_cast_fp16))[name = tensor("op_12146_cast_fp16")]; + tensor var_12148_equation_0 = const()[name = tensor("op_12148_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12148_cast_fp16 = einsum(equation = var_12148_equation_0, values = (var_11664_cast_fp16, var_12065_cast_fp16))[name = tensor("op_12148_cast_fp16")]; + tensor var_12150_equation_0 = const()[name = tensor("op_12150_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12150_cast_fp16 = einsum(equation = var_12150_equation_0, values = (var_11664_cast_fp16, var_12066_cast_fp16))[name = tensor("op_12150_cast_fp16")]; + tensor var_12152_equation_0 = const()[name = tensor("op_12152_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12152_cast_fp16 = einsum(equation = var_12152_equation_0, values = (var_11668_cast_fp16, var_12067_cast_fp16))[name = tensor("op_12152_cast_fp16")]; + tensor var_12154_equation_0 = const()[name = tensor("op_12154_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12154_cast_fp16 = einsum(equation = var_12154_equation_0, values = (var_11668_cast_fp16, var_12068_cast_fp16))[name = tensor("op_12154_cast_fp16")]; + tensor var_12156_equation_0 = const()[name = tensor("op_12156_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12156_cast_fp16 = einsum(equation = var_12156_equation_0, values = (var_11668_cast_fp16, var_12069_cast_fp16))[name = tensor("op_12156_cast_fp16")]; + tensor var_12158_equation_0 = const()[name = tensor("op_12158_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12158_cast_fp16 = einsum(equation = var_12158_equation_0, values = (var_11668_cast_fp16, var_12070_cast_fp16))[name = tensor("op_12158_cast_fp16")]; + tensor var_12160_equation_0 = const()[name = tensor("op_12160_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12160_cast_fp16 = einsum(equation = var_12160_equation_0, values = (var_11672_cast_fp16, var_12071_cast_fp16))[name = tensor("op_12160_cast_fp16")]; + tensor var_12162_equation_0 = const()[name = tensor("op_12162_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12162_cast_fp16 = einsum(equation = var_12162_equation_0, values = (var_11672_cast_fp16, var_12072_cast_fp16))[name = tensor("op_12162_cast_fp16")]; + tensor var_12164_equation_0 = const()[name = tensor("op_12164_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12164_cast_fp16 = einsum(equation = var_12164_equation_0, values = (var_11672_cast_fp16, var_12073_cast_fp16))[name = tensor("op_12164_cast_fp16")]; + tensor var_12166_equation_0 = const()[name = tensor("op_12166_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12166_cast_fp16 = einsum(equation = var_12166_equation_0, values = (var_11672_cast_fp16, var_12074_cast_fp16))[name = tensor("op_12166_cast_fp16")]; + tensor var_12168_equation_0 = const()[name = tensor("op_12168_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12168_cast_fp16 = einsum(equation = var_12168_equation_0, values = (var_11676_cast_fp16, var_12075_cast_fp16))[name = tensor("op_12168_cast_fp16")]; + tensor var_12170_equation_0 = const()[name = tensor("op_12170_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12170_cast_fp16 = einsum(equation = var_12170_equation_0, values = (var_11676_cast_fp16, var_12076_cast_fp16))[name = tensor("op_12170_cast_fp16")]; + tensor var_12172_equation_0 = const()[name = tensor("op_12172_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12172_cast_fp16 = einsum(equation = var_12172_equation_0, values = (var_11676_cast_fp16, var_12077_cast_fp16))[name = tensor("op_12172_cast_fp16")]; + tensor var_12174_equation_0 = const()[name = tensor("op_12174_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12174_cast_fp16 = einsum(equation = var_12174_equation_0, values = (var_11676_cast_fp16, var_12078_cast_fp16))[name = tensor("op_12174_cast_fp16")]; + tensor var_12176_equation_0 = const()[name = tensor("op_12176_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12176_cast_fp16 = einsum(equation = var_12176_equation_0, values = (var_11680_cast_fp16, var_12079_cast_fp16))[name = tensor("op_12176_cast_fp16")]; + tensor var_12178_equation_0 = const()[name = tensor("op_12178_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12178_cast_fp16 = einsum(equation = var_12178_equation_0, values = (var_11680_cast_fp16, var_12080_cast_fp16))[name = tensor("op_12178_cast_fp16")]; + tensor var_12180_equation_0 = const()[name = tensor("op_12180_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12180_cast_fp16 = einsum(equation = var_12180_equation_0, values = (var_11680_cast_fp16, var_12081_cast_fp16))[name = tensor("op_12180_cast_fp16")]; + tensor var_12182_equation_0 = const()[name = tensor("op_12182_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12182_cast_fp16 = einsum(equation = var_12182_equation_0, values = (var_11680_cast_fp16, var_12082_cast_fp16))[name = tensor("op_12182_cast_fp16")]; + tensor var_12184_equation_0 = const()[name = tensor("op_12184_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12184_cast_fp16 = einsum(equation = var_12184_equation_0, values = (var_11684_cast_fp16, var_12083_cast_fp16))[name = tensor("op_12184_cast_fp16")]; + tensor var_12186_equation_0 = const()[name = tensor("op_12186_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12186_cast_fp16 = einsum(equation = var_12186_equation_0, values = (var_11684_cast_fp16, var_12084_cast_fp16))[name = tensor("op_12186_cast_fp16")]; + tensor var_12188_equation_0 = const()[name = tensor("op_12188_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12188_cast_fp16 = einsum(equation = var_12188_equation_0, values = (var_11684_cast_fp16, var_12085_cast_fp16))[name = tensor("op_12188_cast_fp16")]; + tensor var_12190_equation_0 = const()[name = tensor("op_12190_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12190_cast_fp16 = einsum(equation = var_12190_equation_0, values = (var_11684_cast_fp16, var_12086_cast_fp16))[name = tensor("op_12190_cast_fp16")]; + tensor var_12192_equation_0 = const()[name = tensor("op_12192_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12192_cast_fp16 = einsum(equation = var_12192_equation_0, values = (var_11688_cast_fp16, var_12087_cast_fp16))[name = tensor("op_12192_cast_fp16")]; + tensor var_12194_equation_0 = const()[name = tensor("op_12194_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12194_cast_fp16 = einsum(equation = var_12194_equation_0, values = (var_11688_cast_fp16, var_12088_cast_fp16))[name = tensor("op_12194_cast_fp16")]; + tensor var_12196_equation_0 = const()[name = tensor("op_12196_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12196_cast_fp16 = einsum(equation = var_12196_equation_0, values = (var_11688_cast_fp16, var_12089_cast_fp16))[name = tensor("op_12196_cast_fp16")]; + tensor var_12198_equation_0 = const()[name = tensor("op_12198_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12198_cast_fp16 = einsum(equation = var_12198_equation_0, values = (var_11688_cast_fp16, var_12090_cast_fp16))[name = tensor("op_12198_cast_fp16")]; + tensor var_12200_equation_0 = const()[name = tensor("op_12200_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12200_cast_fp16 = einsum(equation = var_12200_equation_0, values = (var_11692_cast_fp16, var_12091_cast_fp16))[name = tensor("op_12200_cast_fp16")]; + tensor var_12202_equation_0 = const()[name = tensor("op_12202_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12202_cast_fp16 = einsum(equation = var_12202_equation_0, values = (var_11692_cast_fp16, var_12092_cast_fp16))[name = tensor("op_12202_cast_fp16")]; + tensor var_12204_equation_0 = const()[name = tensor("op_12204_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12204_cast_fp16 = einsum(equation = var_12204_equation_0, values = (var_11692_cast_fp16, var_12093_cast_fp16))[name = tensor("op_12204_cast_fp16")]; + tensor var_12206_equation_0 = const()[name = tensor("op_12206_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12206_cast_fp16 = einsum(equation = var_12206_equation_0, values = (var_11692_cast_fp16, var_12094_cast_fp16))[name = tensor("op_12206_cast_fp16")]; + tensor var_12208_equation_0 = const()[name = tensor("op_12208_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12208_cast_fp16 = einsum(equation = var_12208_equation_0, values = (var_11696_cast_fp16, var_12095_cast_fp16))[name = tensor("op_12208_cast_fp16")]; + tensor var_12210_equation_0 = const()[name = tensor("op_12210_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12210_cast_fp16 = einsum(equation = var_12210_equation_0, values = (var_11696_cast_fp16, var_12096_cast_fp16))[name = tensor("op_12210_cast_fp16")]; + tensor var_12212_equation_0 = const()[name = tensor("op_12212_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12212_cast_fp16 = einsum(equation = var_12212_equation_0, values = (var_11696_cast_fp16, var_12097_cast_fp16))[name = tensor("op_12212_cast_fp16")]; + tensor var_12214_equation_0 = const()[name = tensor("op_12214_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12214_cast_fp16 = einsum(equation = var_12214_equation_0, values = (var_11696_cast_fp16, var_12098_cast_fp16))[name = tensor("op_12214_cast_fp16")]; + tensor var_12216_equation_0 = const()[name = tensor("op_12216_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12216_cast_fp16 = einsum(equation = var_12216_equation_0, values = (var_11700_cast_fp16, var_12099_cast_fp16))[name = tensor("op_12216_cast_fp16")]; + tensor var_12218_equation_0 = const()[name = tensor("op_12218_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12218_cast_fp16 = einsum(equation = var_12218_equation_0, values = (var_11700_cast_fp16, var_12100_cast_fp16))[name = tensor("op_12218_cast_fp16")]; + tensor var_12220_equation_0 = const()[name = tensor("op_12220_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12220_cast_fp16 = einsum(equation = var_12220_equation_0, values = (var_11700_cast_fp16, var_12101_cast_fp16))[name = tensor("op_12220_cast_fp16")]; + tensor var_12222_equation_0 = const()[name = tensor("op_12222_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12222_cast_fp16 = einsum(equation = var_12222_equation_0, values = (var_11700_cast_fp16, var_12102_cast_fp16))[name = tensor("op_12222_cast_fp16")]; + tensor var_12224_equation_0 = const()[name = tensor("op_12224_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12224_cast_fp16 = einsum(equation = var_12224_equation_0, values = (var_11704_cast_fp16, var_12103_cast_fp16))[name = tensor("op_12224_cast_fp16")]; + tensor var_12226_equation_0 = const()[name = tensor("op_12226_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12226_cast_fp16 = einsum(equation = var_12226_equation_0, values = (var_11704_cast_fp16, var_12104_cast_fp16))[name = tensor("op_12226_cast_fp16")]; + tensor var_12228_equation_0 = const()[name = tensor("op_12228_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12228_cast_fp16 = einsum(equation = var_12228_equation_0, values = (var_11704_cast_fp16, var_12105_cast_fp16))[name = tensor("op_12228_cast_fp16")]; + tensor var_12230_equation_0 = const()[name = tensor("op_12230_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12230_cast_fp16 = einsum(equation = var_12230_equation_0, values = (var_11704_cast_fp16, var_12106_cast_fp16))[name = tensor("op_12230_cast_fp16")]; + tensor var_12232_equation_0 = const()[name = tensor("op_12232_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12232_cast_fp16 = einsum(equation = var_12232_equation_0, values = (var_11708_cast_fp16, var_12107_cast_fp16))[name = tensor("op_12232_cast_fp16")]; + tensor var_12234_equation_0 = const()[name = tensor("op_12234_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12234_cast_fp16 = einsum(equation = var_12234_equation_0, values = (var_11708_cast_fp16, var_12108_cast_fp16))[name = tensor("op_12234_cast_fp16")]; + tensor var_12236_equation_0 = const()[name = tensor("op_12236_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12236_cast_fp16 = einsum(equation = var_12236_equation_0, values = (var_11708_cast_fp16, var_12109_cast_fp16))[name = tensor("op_12236_cast_fp16")]; + tensor var_12238_equation_0 = const()[name = tensor("op_12238_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12238_cast_fp16 = einsum(equation = var_12238_equation_0, values = (var_11708_cast_fp16, var_12110_cast_fp16))[name = tensor("op_12238_cast_fp16")]; + tensor var_12240_equation_0 = const()[name = tensor("op_12240_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12240_cast_fp16 = einsum(equation = var_12240_equation_0, values = (var_11712_cast_fp16, var_12111_cast_fp16))[name = tensor("op_12240_cast_fp16")]; + tensor var_12242_equation_0 = const()[name = tensor("op_12242_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12242_cast_fp16 = einsum(equation = var_12242_equation_0, values = (var_11712_cast_fp16, var_12112_cast_fp16))[name = tensor("op_12242_cast_fp16")]; + tensor var_12244_equation_0 = const()[name = tensor("op_12244_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12244_cast_fp16 = einsum(equation = var_12244_equation_0, values = (var_11712_cast_fp16, var_12113_cast_fp16))[name = tensor("op_12244_cast_fp16")]; + tensor var_12246_equation_0 = const()[name = tensor("op_12246_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12246_cast_fp16 = einsum(equation = var_12246_equation_0, values = (var_11712_cast_fp16, var_12114_cast_fp16))[name = tensor("op_12246_cast_fp16")]; + tensor var_12248_equation_0 = const()[name = tensor("op_12248_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12248_cast_fp16 = einsum(equation = var_12248_equation_0, values = (var_11716_cast_fp16, var_12115_cast_fp16))[name = tensor("op_12248_cast_fp16")]; + tensor var_12250_equation_0 = const()[name = tensor("op_12250_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12250_cast_fp16 = einsum(equation = var_12250_equation_0, values = (var_11716_cast_fp16, var_12116_cast_fp16))[name = tensor("op_12250_cast_fp16")]; + tensor var_12252_equation_0 = const()[name = tensor("op_12252_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12252_cast_fp16 = einsum(equation = var_12252_equation_0, values = (var_11716_cast_fp16, var_12117_cast_fp16))[name = tensor("op_12252_cast_fp16")]; + tensor var_12254_equation_0 = const()[name = tensor("op_12254_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12254_cast_fp16 = einsum(equation = var_12254_equation_0, values = (var_11716_cast_fp16, var_12118_cast_fp16))[name = tensor("op_12254_cast_fp16")]; + tensor var_12256_equation_0 = const()[name = tensor("op_12256_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12256_cast_fp16 = einsum(equation = var_12256_equation_0, values = (var_11720_cast_fp16, var_12119_cast_fp16))[name = tensor("op_12256_cast_fp16")]; + tensor var_12258_equation_0 = const()[name = tensor("op_12258_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12258_cast_fp16 = einsum(equation = var_12258_equation_0, values = (var_11720_cast_fp16, var_12120_cast_fp16))[name = tensor("op_12258_cast_fp16")]; + tensor var_12260_equation_0 = const()[name = tensor("op_12260_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12260_cast_fp16 = einsum(equation = var_12260_equation_0, values = (var_11720_cast_fp16, var_12121_cast_fp16))[name = tensor("op_12260_cast_fp16")]; + tensor var_12262_equation_0 = const()[name = tensor("op_12262_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12262_cast_fp16 = einsum(equation = var_12262_equation_0, values = (var_11720_cast_fp16, var_12122_cast_fp16))[name = tensor("op_12262_cast_fp16")]; + tensor var_12264_equation_0 = const()[name = tensor("op_12264_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12264_cast_fp16 = einsum(equation = var_12264_equation_0, values = (var_11724_cast_fp16, var_12123_cast_fp16))[name = tensor("op_12264_cast_fp16")]; + tensor var_12266_equation_0 = const()[name = tensor("op_12266_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12266_cast_fp16 = einsum(equation = var_12266_equation_0, values = (var_11724_cast_fp16, var_12124_cast_fp16))[name = tensor("op_12266_cast_fp16")]; + tensor var_12268_equation_0 = const()[name = tensor("op_12268_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12268_cast_fp16 = einsum(equation = var_12268_equation_0, values = (var_11724_cast_fp16, var_12125_cast_fp16))[name = tensor("op_12268_cast_fp16")]; + tensor var_12270_equation_0 = const()[name = tensor("op_12270_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12270_cast_fp16 = einsum(equation = var_12270_equation_0, values = (var_11724_cast_fp16, var_12126_cast_fp16))[name = tensor("op_12270_cast_fp16")]; + tensor var_12272_equation_0 = const()[name = tensor("op_12272_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12272_cast_fp16 = einsum(equation = var_12272_equation_0, values = (var_11728_cast_fp16, var_12127_cast_fp16))[name = tensor("op_12272_cast_fp16")]; + tensor var_12274_equation_0 = const()[name = tensor("op_12274_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12274_cast_fp16 = einsum(equation = var_12274_equation_0, values = (var_11728_cast_fp16, var_12128_cast_fp16))[name = tensor("op_12274_cast_fp16")]; + tensor var_12276_equation_0 = const()[name = tensor("op_12276_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12276_cast_fp16 = einsum(equation = var_12276_equation_0, values = (var_11728_cast_fp16, var_12129_cast_fp16))[name = tensor("op_12276_cast_fp16")]; + tensor var_12278_equation_0 = const()[name = tensor("op_12278_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12278_cast_fp16 = einsum(equation = var_12278_equation_0, values = (var_11728_cast_fp16, var_12130_cast_fp16))[name = tensor("op_12278_cast_fp16")]; + tensor var_12280_equation_0 = const()[name = tensor("op_12280_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12280_cast_fp16 = einsum(equation = var_12280_equation_0, values = (var_11732_cast_fp16, var_12131_cast_fp16))[name = tensor("op_12280_cast_fp16")]; + tensor var_12282_equation_0 = const()[name = tensor("op_12282_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12282_cast_fp16 = einsum(equation = var_12282_equation_0, values = (var_11732_cast_fp16, var_12132_cast_fp16))[name = tensor("op_12282_cast_fp16")]; + tensor var_12284_equation_0 = const()[name = tensor("op_12284_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12284_cast_fp16 = einsum(equation = var_12284_equation_0, values = (var_11732_cast_fp16, var_12133_cast_fp16))[name = tensor("op_12284_cast_fp16")]; + tensor var_12286_equation_0 = const()[name = tensor("op_12286_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12286_cast_fp16 = einsum(equation = var_12286_equation_0, values = (var_11732_cast_fp16, var_12134_cast_fp16))[name = tensor("op_12286_cast_fp16")]; + tensor var_12288_equation_0 = const()[name = tensor("op_12288_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12288_cast_fp16 = einsum(equation = var_12288_equation_0, values = (var_11736_cast_fp16, var_12135_cast_fp16))[name = tensor("op_12288_cast_fp16")]; + tensor var_12290_equation_0 = const()[name = tensor("op_12290_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12290_cast_fp16 = einsum(equation = var_12290_equation_0, values = (var_11736_cast_fp16, var_12136_cast_fp16))[name = tensor("op_12290_cast_fp16")]; + tensor var_12292_equation_0 = const()[name = tensor("op_12292_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12292_cast_fp16 = einsum(equation = var_12292_equation_0, values = (var_11736_cast_fp16, var_12137_cast_fp16))[name = tensor("op_12292_cast_fp16")]; + tensor var_12294_equation_0 = const()[name = tensor("op_12294_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12294_cast_fp16 = einsum(equation = var_12294_equation_0, values = (var_11736_cast_fp16, var_12138_cast_fp16))[name = tensor("op_12294_cast_fp16")]; + tensor var_12296_equation_0 = const()[name = tensor("op_12296_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12296_cast_fp16 = einsum(equation = var_12296_equation_0, values = (var_11740_cast_fp16, var_12139_cast_fp16))[name = tensor("op_12296_cast_fp16")]; + tensor var_12298_equation_0 = const()[name = tensor("op_12298_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12298_cast_fp16 = einsum(equation = var_12298_equation_0, values = (var_11740_cast_fp16, var_12140_cast_fp16))[name = tensor("op_12298_cast_fp16")]; + tensor var_12300_equation_0 = const()[name = tensor("op_12300_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12300_cast_fp16 = einsum(equation = var_12300_equation_0, values = (var_11740_cast_fp16, var_12141_cast_fp16))[name = tensor("op_12300_cast_fp16")]; + tensor var_12302_equation_0 = const()[name = tensor("op_12302_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_12302_cast_fp16 = einsum(equation = var_12302_equation_0, values = (var_11740_cast_fp16, var_12142_cast_fp16))[name = tensor("op_12302_cast_fp16")]; + tensor var_12304_interleave_0 = const()[name = tensor("op_12304_interleave_0"), val = tensor(false)]; + tensor var_12304_cast_fp16 = concat(axis = var_10863, interleave = var_12304_interleave_0, values = (var_12144_cast_fp16, var_12146_cast_fp16, var_12148_cast_fp16, var_12150_cast_fp16))[name = tensor("op_12304_cast_fp16")]; + tensor var_12306_interleave_0 = const()[name = tensor("op_12306_interleave_0"), val = tensor(false)]; + tensor var_12306_cast_fp16 = concat(axis = var_10863, interleave = var_12306_interleave_0, values = (var_12152_cast_fp16, var_12154_cast_fp16, var_12156_cast_fp16, var_12158_cast_fp16))[name = tensor("op_12306_cast_fp16")]; + tensor var_12308_interleave_0 = const()[name = tensor("op_12308_interleave_0"), val = tensor(false)]; + tensor var_12308_cast_fp16 = concat(axis = var_10863, interleave = var_12308_interleave_0, values = (var_12160_cast_fp16, var_12162_cast_fp16, var_12164_cast_fp16, var_12166_cast_fp16))[name = tensor("op_12308_cast_fp16")]; + tensor var_12310_interleave_0 = const()[name = tensor("op_12310_interleave_0"), val = tensor(false)]; + tensor var_12310_cast_fp16 = concat(axis = var_10863, interleave = var_12310_interleave_0, values = (var_12168_cast_fp16, var_12170_cast_fp16, var_12172_cast_fp16, var_12174_cast_fp16))[name = tensor("op_12310_cast_fp16")]; + tensor var_12312_interleave_0 = const()[name = tensor("op_12312_interleave_0"), val = tensor(false)]; + tensor var_12312_cast_fp16 = concat(axis = var_10863, interleave = var_12312_interleave_0, values = (var_12176_cast_fp16, var_12178_cast_fp16, var_12180_cast_fp16, var_12182_cast_fp16))[name = tensor("op_12312_cast_fp16")]; + tensor var_12314_interleave_0 = const()[name = tensor("op_12314_interleave_0"), val = tensor(false)]; + tensor var_12314_cast_fp16 = concat(axis = var_10863, interleave = var_12314_interleave_0, values = (var_12184_cast_fp16, var_12186_cast_fp16, var_12188_cast_fp16, var_12190_cast_fp16))[name = tensor("op_12314_cast_fp16")]; + tensor var_12316_interleave_0 = const()[name = tensor("op_12316_interleave_0"), val = tensor(false)]; + tensor var_12316_cast_fp16 = concat(axis = var_10863, interleave = var_12316_interleave_0, values = (var_12192_cast_fp16, var_12194_cast_fp16, var_12196_cast_fp16, var_12198_cast_fp16))[name = tensor("op_12316_cast_fp16")]; + tensor var_12318_interleave_0 = const()[name = tensor("op_12318_interleave_0"), val = tensor(false)]; + tensor var_12318_cast_fp16 = concat(axis = var_10863, interleave = var_12318_interleave_0, values = (var_12200_cast_fp16, var_12202_cast_fp16, var_12204_cast_fp16, var_12206_cast_fp16))[name = tensor("op_12318_cast_fp16")]; + tensor var_12320_interleave_0 = const()[name = tensor("op_12320_interleave_0"), val = tensor(false)]; + tensor var_12320_cast_fp16 = concat(axis = var_10863, interleave = var_12320_interleave_0, values = (var_12208_cast_fp16, var_12210_cast_fp16, var_12212_cast_fp16, var_12214_cast_fp16))[name = tensor("op_12320_cast_fp16")]; + tensor var_12322_interleave_0 = const()[name = tensor("op_12322_interleave_0"), val = tensor(false)]; + tensor var_12322_cast_fp16 = concat(axis = var_10863, interleave = var_12322_interleave_0, values = (var_12216_cast_fp16, var_12218_cast_fp16, var_12220_cast_fp16, var_12222_cast_fp16))[name = tensor("op_12322_cast_fp16")]; + tensor var_12324_interleave_0 = const()[name = tensor("op_12324_interleave_0"), val = tensor(false)]; + tensor var_12324_cast_fp16 = concat(axis = var_10863, interleave = var_12324_interleave_0, values = (var_12224_cast_fp16, var_12226_cast_fp16, var_12228_cast_fp16, var_12230_cast_fp16))[name = tensor("op_12324_cast_fp16")]; + tensor var_12326_interleave_0 = const()[name = tensor("op_12326_interleave_0"), val = tensor(false)]; + tensor var_12326_cast_fp16 = concat(axis = var_10863, interleave = var_12326_interleave_0, values = (var_12232_cast_fp16, var_12234_cast_fp16, var_12236_cast_fp16, var_12238_cast_fp16))[name = tensor("op_12326_cast_fp16")]; + tensor var_12328_interleave_0 = const()[name = tensor("op_12328_interleave_0"), val = tensor(false)]; + tensor var_12328_cast_fp16 = concat(axis = var_10863, interleave = var_12328_interleave_0, values = (var_12240_cast_fp16, var_12242_cast_fp16, var_12244_cast_fp16, var_12246_cast_fp16))[name = tensor("op_12328_cast_fp16")]; + tensor var_12330_interleave_0 = const()[name = tensor("op_12330_interleave_0"), val = tensor(false)]; + tensor var_12330_cast_fp16 = concat(axis = var_10863, interleave = var_12330_interleave_0, values = (var_12248_cast_fp16, var_12250_cast_fp16, var_12252_cast_fp16, var_12254_cast_fp16))[name = tensor("op_12330_cast_fp16")]; + tensor var_12332_interleave_0 = const()[name = tensor("op_12332_interleave_0"), val = tensor(false)]; + tensor var_12332_cast_fp16 = concat(axis = var_10863, interleave = var_12332_interleave_0, values = (var_12256_cast_fp16, var_12258_cast_fp16, var_12260_cast_fp16, var_12262_cast_fp16))[name = tensor("op_12332_cast_fp16")]; + tensor var_12334_interleave_0 = const()[name = tensor("op_12334_interleave_0"), val = tensor(false)]; + tensor var_12334_cast_fp16 = concat(axis = var_10863, interleave = var_12334_interleave_0, values = (var_12264_cast_fp16, var_12266_cast_fp16, var_12268_cast_fp16, var_12270_cast_fp16))[name = tensor("op_12334_cast_fp16")]; + tensor var_12336_interleave_0 = const()[name = tensor("op_12336_interleave_0"), val = tensor(false)]; + tensor var_12336_cast_fp16 = concat(axis = var_10863, interleave = var_12336_interleave_0, values = (var_12272_cast_fp16, var_12274_cast_fp16, var_12276_cast_fp16, var_12278_cast_fp16))[name = tensor("op_12336_cast_fp16")]; + tensor var_12338_interleave_0 = const()[name = tensor("op_12338_interleave_0"), val = tensor(false)]; + tensor var_12338_cast_fp16 = concat(axis = var_10863, interleave = var_12338_interleave_0, values = (var_12280_cast_fp16, var_12282_cast_fp16, var_12284_cast_fp16, var_12286_cast_fp16))[name = tensor("op_12338_cast_fp16")]; + tensor var_12340_interleave_0 = const()[name = tensor("op_12340_interleave_0"), val = tensor(false)]; + tensor var_12340_cast_fp16 = concat(axis = var_10863, interleave = var_12340_interleave_0, values = (var_12288_cast_fp16, var_12290_cast_fp16, var_12292_cast_fp16, var_12294_cast_fp16))[name = tensor("op_12340_cast_fp16")]; + tensor var_12342_interleave_0 = const()[name = tensor("op_12342_interleave_0"), val = tensor(false)]; + tensor var_12342_cast_fp16 = concat(axis = var_10863, interleave = var_12342_interleave_0, values = (var_12296_cast_fp16, var_12298_cast_fp16, var_12300_cast_fp16, var_12302_cast_fp16))[name = tensor("op_12342_cast_fp16")]; + tensor input_57_interleave_0 = const()[name = tensor("input_57_interleave_0"), val = tensor(false)]; + tensor input_57_cast_fp16 = concat(axis = var_10888, interleave = input_57_interleave_0, values = (var_12304_cast_fp16, var_12306_cast_fp16, var_12308_cast_fp16, var_12310_cast_fp16, var_12312_cast_fp16, var_12314_cast_fp16, var_12316_cast_fp16, var_12318_cast_fp16, var_12320_cast_fp16, var_12322_cast_fp16, var_12324_cast_fp16, var_12326_cast_fp16, var_12328_cast_fp16, var_12330_cast_fp16, var_12332_cast_fp16, var_12334_cast_fp16, var_12336_cast_fp16, var_12338_cast_fp16, var_12340_cast_fp16, var_12342_cast_fp16))[name = tensor("input_57_cast_fp16")]; + tensor var_12347 = const()[name = tensor("op_12347"), val = tensor([1, 1])]; + tensor var_12349 = const()[name = tensor("op_12349"), val = tensor([1, 1])]; + tensor obj_31_pad_type_0 = const()[name = tensor("obj_31_pad_type_0"), val = tensor("custom")]; + tensor obj_31_pad_0 = const()[name = tensor("obj_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_7_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_7_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(299609600)))]; + tensor layers_7_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_7_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(302886464)))]; + tensor obj_31_cast_fp16 = conv(bias = layers_7_self_attn_o_proj_bias_to_fp16, dilations = var_12349, groups = var_10888, pad = obj_31_pad_0, pad_type = obj_31_pad_type_0, strides = var_12347, weight = layers_7_self_attn_o_proj_weight_to_fp16, x = input_57_cast_fp16)[name = tensor("obj_31_cast_fp16")]; + tensor inputs_31_cast_fp16 = add(x = inputs_29_cast_fp16, y = obj_31_cast_fp16)[name = tensor("inputs_31_cast_fp16")]; + tensor var_12355 = const()[name = tensor("op_12355"), val = tensor([1])]; + tensor channels_mean_31_cast_fp16 = reduce_mean(axes = var_12355, keep_dims = var_10889, x = inputs_31_cast_fp16)[name = tensor("channels_mean_31_cast_fp16")]; + tensor zero_mean_31_cast_fp16 = sub(x = inputs_31_cast_fp16, y = channels_mean_31_cast_fp16)[name = tensor("zero_mean_31_cast_fp16")]; + tensor zero_mean_sq_31_cast_fp16 = mul(x = zero_mean_31_cast_fp16, y = zero_mean_31_cast_fp16)[name = tensor("zero_mean_sq_31_cast_fp16")]; + tensor var_12359 = const()[name = tensor("op_12359"), val = tensor([1])]; + tensor var_12360_cast_fp16 = reduce_mean(axes = var_12359, keep_dims = var_10889, x = zero_mean_sq_31_cast_fp16)[name = tensor("op_12360_cast_fp16")]; + tensor var_12361_to_fp16 = const()[name = tensor("op_12361_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_12362_cast_fp16 = add(x = var_12360_cast_fp16, y = var_12361_to_fp16)[name = tensor("op_12362_cast_fp16")]; + tensor denom_31_epsilon_0_to_fp16 = const()[name = tensor("denom_31_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_31_cast_fp16 = rsqrt(epsilon = denom_31_epsilon_0_to_fp16, x = var_12362_cast_fp16)[name = tensor("denom_31_cast_fp16")]; + tensor out_31_cast_fp16 = mul(x = zero_mean_31_cast_fp16, y = denom_31_cast_fp16)[name = tensor("out_31_cast_fp16")]; + tensor input_59_gamma_0_to_fp16 = const()[name = tensor("input_59_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(302889088)))]; + tensor input_59_beta_0_to_fp16 = const()[name = tensor("input_59_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(302891712)))]; + tensor input_59_epsilon_0_to_fp16 = const()[name = tensor("input_59_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_59_cast_fp16 = batch_norm(beta = input_59_beta_0_to_fp16, epsilon = input_59_epsilon_0_to_fp16, gamma = input_59_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_31_cast_fp16)[name = tensor("input_59_cast_fp16")]; + tensor var_12373 = const()[name = tensor("op_12373"), val = tensor([1, 1])]; + tensor var_12375 = const()[name = tensor("op_12375"), val = tensor([1, 1])]; + tensor input_61_pad_type_0 = const()[name = tensor("input_61_pad_type_0"), val = tensor("custom")]; + tensor input_61_pad_0 = const()[name = tensor("input_61_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_7_fc1_weight_to_fp16 = const()[name = tensor("layers_7_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(302894336)))]; + tensor layers_7_fc1_bias_to_fp16 = const()[name = tensor("layers_7_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(316001600)))]; + tensor input_61_cast_fp16 = conv(bias = layers_7_fc1_bias_to_fp16, dilations = var_12375, groups = var_10888, pad = input_61_pad_0, pad_type = input_61_pad_type_0, strides = var_12373, weight = layers_7_fc1_weight_to_fp16, x = input_59_cast_fp16)[name = tensor("input_61_cast_fp16")]; + tensor input_63_mode_0 = const()[name = tensor("input_63_mode_0"), val = tensor("EXACT")]; + tensor input_63_cast_fp16 = gelu(mode = input_63_mode_0, x = input_61_cast_fp16)[name = tensor("input_63_cast_fp16")]; + tensor var_12381 = const()[name = tensor("op_12381"), val = tensor([1, 1])]; + tensor var_12383 = const()[name = tensor("op_12383"), val = tensor([1, 1])]; + tensor hidden_states_19_pad_type_0 = const()[name = tensor("hidden_states_19_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_19_pad_0 = const()[name = tensor("hidden_states_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_7_fc2_weight_to_fp16 = const()[name = tensor("layers_7_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(316011904)))]; + tensor layers_7_fc2_bias_to_fp16 = const()[name = tensor("layers_7_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(329119168)))]; + tensor hidden_states_19_cast_fp16 = conv(bias = layers_7_fc2_bias_to_fp16, dilations = var_12383, groups = var_10888, pad = hidden_states_19_pad_0, pad_type = hidden_states_19_pad_type_0, strides = var_12381, weight = layers_7_fc2_weight_to_fp16, x = input_63_cast_fp16)[name = tensor("hidden_states_19_cast_fp16")]; + tensor inputs_33_cast_fp16 = add(x = inputs_31_cast_fp16, y = hidden_states_19_cast_fp16)[name = tensor("inputs_33_cast_fp16")]; + tensor var_12390 = const()[name = tensor("op_12390"), val = tensor(3)]; + tensor var_12415 = const()[name = tensor("op_12415"), val = tensor(1)]; + tensor var_12416 = const()[name = tensor("op_12416"), val = tensor(true)]; + tensor var_12426 = const()[name = tensor("op_12426"), val = tensor([1])]; + tensor channels_mean_33_cast_fp16 = reduce_mean(axes = var_12426, keep_dims = var_12416, x = inputs_33_cast_fp16)[name = tensor("channels_mean_33_cast_fp16")]; + tensor zero_mean_33_cast_fp16 = sub(x = inputs_33_cast_fp16, y = channels_mean_33_cast_fp16)[name = tensor("zero_mean_33_cast_fp16")]; + tensor zero_mean_sq_33_cast_fp16 = mul(x = zero_mean_33_cast_fp16, y = zero_mean_33_cast_fp16)[name = tensor("zero_mean_sq_33_cast_fp16")]; + tensor var_12430 = const()[name = tensor("op_12430"), val = tensor([1])]; + tensor var_12431_cast_fp16 = reduce_mean(axes = var_12430, keep_dims = var_12416, x = zero_mean_sq_33_cast_fp16)[name = tensor("op_12431_cast_fp16")]; + tensor var_12432_to_fp16 = const()[name = tensor("op_12432_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_12433_cast_fp16 = add(x = var_12431_cast_fp16, y = var_12432_to_fp16)[name = tensor("op_12433_cast_fp16")]; + tensor denom_33_epsilon_0_to_fp16 = const()[name = tensor("denom_33_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_33_cast_fp16 = rsqrt(epsilon = denom_33_epsilon_0_to_fp16, x = var_12433_cast_fp16)[name = tensor("denom_33_cast_fp16")]; + tensor out_33_cast_fp16 = mul(x = zero_mean_33_cast_fp16, y = denom_33_cast_fp16)[name = tensor("out_33_cast_fp16")]; + tensor obj_33_gamma_0_to_fp16 = const()[name = tensor("obj_33_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(329121792)))]; + tensor obj_33_beta_0_to_fp16 = const()[name = tensor("obj_33_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(329124416)))]; + tensor obj_33_epsilon_0_to_fp16 = const()[name = tensor("obj_33_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_33_cast_fp16 = batch_norm(beta = obj_33_beta_0_to_fp16, epsilon = obj_33_epsilon_0_to_fp16, gamma = obj_33_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_33_cast_fp16)[name = tensor("obj_33_cast_fp16")]; + tensor var_12448 = const()[name = tensor("op_12448"), val = tensor([1, 1])]; + tensor var_12450 = const()[name = tensor("op_12450"), val = tensor([1, 1])]; + tensor query_17_pad_type_0 = const()[name = tensor("query_17_pad_type_0"), val = tensor("custom")]; + tensor query_17_pad_0 = const()[name = tensor("query_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_8_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_8_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(329127040)))]; + tensor layers_8_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_8_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(332403904)))]; + tensor query_17_cast_fp16 = conv(bias = layers_8_self_attn_q_proj_bias_to_fp16, dilations = var_12450, groups = var_12415, pad = query_17_pad_0, pad_type = query_17_pad_type_0, strides = var_12448, weight = layers_8_self_attn_q_proj_weight_to_fp16, x = obj_33_cast_fp16)[name = tensor("query_17_cast_fp16")]; + tensor var_12454 = const()[name = tensor("op_12454"), val = tensor([1, 1])]; + tensor var_12456 = const()[name = tensor("op_12456"), val = tensor([1, 1])]; + tensor key_17_pad_type_0 = const()[name = tensor("key_17_pad_type_0"), val = tensor("custom")]; + tensor key_17_pad_0 = const()[name = tensor("key_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_8_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_8_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(332406528)))]; + tensor key_17_cast_fp16 = conv(dilations = var_12456, groups = var_12415, pad = key_17_pad_0, pad_type = key_17_pad_type_0, strides = var_12454, weight = layers_8_self_attn_k_proj_weight_to_fp16, x = obj_33_cast_fp16)[name = tensor("key_17_cast_fp16")]; + tensor var_12461 = const()[name = tensor("op_12461"), val = tensor([1, 1])]; + tensor var_12463 = const()[name = tensor("op_12463"), val = tensor([1, 1])]; + tensor value_17_pad_type_0 = const()[name = tensor("value_17_pad_type_0"), val = tensor("custom")]; + tensor value_17_pad_0 = const()[name = tensor("value_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_8_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_8_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(335683392)))]; + tensor layers_8_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_8_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(338960256)))]; + tensor value_17_cast_fp16 = conv(bias = layers_8_self_attn_v_proj_bias_to_fp16, dilations = var_12463, groups = var_12415, pad = value_17_pad_0, pad_type = value_17_pad_type_0, strides = var_12461, weight = layers_8_self_attn_v_proj_weight_to_fp16, x = obj_33_cast_fp16)[name = tensor("value_17_cast_fp16")]; + tensor var_12470_begin_0 = const()[name = tensor("op_12470_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12470_end_0 = const()[name = tensor("op_12470_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_12470_end_mask_0 = const()[name = tensor("op_12470_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_12470_cast_fp16 = slice_by_index(begin = var_12470_begin_0, end = var_12470_end_0, end_mask = var_12470_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_12470_cast_fp16")]; + tensor var_12474_begin_0 = const()[name = tensor("op_12474_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_12474_end_0 = const()[name = tensor("op_12474_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_12474_end_mask_0 = const()[name = tensor("op_12474_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_12474_cast_fp16 = slice_by_index(begin = var_12474_begin_0, end = var_12474_end_0, end_mask = var_12474_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_12474_cast_fp16")]; + tensor var_12478_begin_0 = const()[name = tensor("op_12478_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_12478_end_0 = const()[name = tensor("op_12478_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_12478_end_mask_0 = const()[name = tensor("op_12478_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_12478_cast_fp16 = slice_by_index(begin = var_12478_begin_0, end = var_12478_end_0, end_mask = var_12478_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_12478_cast_fp16")]; + tensor var_12482_begin_0 = const()[name = tensor("op_12482_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_12482_end_0 = const()[name = tensor("op_12482_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_12482_end_mask_0 = const()[name = tensor("op_12482_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_12482_cast_fp16 = slice_by_index(begin = var_12482_begin_0, end = var_12482_end_0, end_mask = var_12482_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_12482_cast_fp16")]; + tensor var_12486_begin_0 = const()[name = tensor("op_12486_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_12486_end_0 = const()[name = tensor("op_12486_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_12486_end_mask_0 = const()[name = tensor("op_12486_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_12486_cast_fp16 = slice_by_index(begin = var_12486_begin_0, end = var_12486_end_0, end_mask = var_12486_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_12486_cast_fp16")]; + tensor var_12490_begin_0 = const()[name = tensor("op_12490_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_12490_end_0 = const()[name = tensor("op_12490_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_12490_end_mask_0 = const()[name = tensor("op_12490_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_12490_cast_fp16 = slice_by_index(begin = var_12490_begin_0, end = var_12490_end_0, end_mask = var_12490_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_12490_cast_fp16")]; + tensor var_12494_begin_0 = const()[name = tensor("op_12494_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_12494_end_0 = const()[name = tensor("op_12494_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_12494_end_mask_0 = const()[name = tensor("op_12494_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_12494_cast_fp16 = slice_by_index(begin = var_12494_begin_0, end = var_12494_end_0, end_mask = var_12494_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_12494_cast_fp16")]; + tensor var_12498_begin_0 = const()[name = tensor("op_12498_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_12498_end_0 = const()[name = tensor("op_12498_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_12498_end_mask_0 = const()[name = tensor("op_12498_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_12498_cast_fp16 = slice_by_index(begin = var_12498_begin_0, end = var_12498_end_0, end_mask = var_12498_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_12498_cast_fp16")]; + tensor var_12502_begin_0 = const()[name = tensor("op_12502_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_12502_end_0 = const()[name = tensor("op_12502_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_12502_end_mask_0 = const()[name = tensor("op_12502_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_12502_cast_fp16 = slice_by_index(begin = var_12502_begin_0, end = var_12502_end_0, end_mask = var_12502_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_12502_cast_fp16")]; + tensor var_12506_begin_0 = const()[name = tensor("op_12506_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_12506_end_0 = const()[name = tensor("op_12506_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_12506_end_mask_0 = const()[name = tensor("op_12506_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_12506_cast_fp16 = slice_by_index(begin = var_12506_begin_0, end = var_12506_end_0, end_mask = var_12506_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_12506_cast_fp16")]; + tensor var_12510_begin_0 = const()[name = tensor("op_12510_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_12510_end_0 = const()[name = tensor("op_12510_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_12510_end_mask_0 = const()[name = tensor("op_12510_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_12510_cast_fp16 = slice_by_index(begin = var_12510_begin_0, end = var_12510_end_0, end_mask = var_12510_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_12510_cast_fp16")]; + tensor var_12514_begin_0 = const()[name = tensor("op_12514_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_12514_end_0 = const()[name = tensor("op_12514_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_12514_end_mask_0 = const()[name = tensor("op_12514_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_12514_cast_fp16 = slice_by_index(begin = var_12514_begin_0, end = var_12514_end_0, end_mask = var_12514_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_12514_cast_fp16")]; + tensor var_12518_begin_0 = const()[name = tensor("op_12518_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_12518_end_0 = const()[name = tensor("op_12518_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_12518_end_mask_0 = const()[name = tensor("op_12518_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_12518_cast_fp16 = slice_by_index(begin = var_12518_begin_0, end = var_12518_end_0, end_mask = var_12518_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_12518_cast_fp16")]; + tensor var_12522_begin_0 = const()[name = tensor("op_12522_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_12522_end_0 = const()[name = tensor("op_12522_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_12522_end_mask_0 = const()[name = tensor("op_12522_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_12522_cast_fp16 = slice_by_index(begin = var_12522_begin_0, end = var_12522_end_0, end_mask = var_12522_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_12522_cast_fp16")]; + tensor var_12526_begin_0 = const()[name = tensor("op_12526_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_12526_end_0 = const()[name = tensor("op_12526_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_12526_end_mask_0 = const()[name = tensor("op_12526_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_12526_cast_fp16 = slice_by_index(begin = var_12526_begin_0, end = var_12526_end_0, end_mask = var_12526_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_12526_cast_fp16")]; + tensor var_12530_begin_0 = const()[name = tensor("op_12530_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_12530_end_0 = const()[name = tensor("op_12530_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_12530_end_mask_0 = const()[name = tensor("op_12530_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_12530_cast_fp16 = slice_by_index(begin = var_12530_begin_0, end = var_12530_end_0, end_mask = var_12530_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_12530_cast_fp16")]; + tensor var_12534_begin_0 = const()[name = tensor("op_12534_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_12534_end_0 = const()[name = tensor("op_12534_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_12534_end_mask_0 = const()[name = tensor("op_12534_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_12534_cast_fp16 = slice_by_index(begin = var_12534_begin_0, end = var_12534_end_0, end_mask = var_12534_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_12534_cast_fp16")]; + tensor var_12538_begin_0 = const()[name = tensor("op_12538_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_12538_end_0 = const()[name = tensor("op_12538_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_12538_end_mask_0 = const()[name = tensor("op_12538_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_12538_cast_fp16 = slice_by_index(begin = var_12538_begin_0, end = var_12538_end_0, end_mask = var_12538_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_12538_cast_fp16")]; + tensor var_12542_begin_0 = const()[name = tensor("op_12542_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_12542_end_0 = const()[name = tensor("op_12542_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_12542_end_mask_0 = const()[name = tensor("op_12542_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_12542_cast_fp16 = slice_by_index(begin = var_12542_begin_0, end = var_12542_end_0, end_mask = var_12542_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_12542_cast_fp16")]; + tensor var_12546_begin_0 = const()[name = tensor("op_12546_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_12546_end_0 = const()[name = tensor("op_12546_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_12546_end_mask_0 = const()[name = tensor("op_12546_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_12546_cast_fp16 = slice_by_index(begin = var_12546_begin_0, end = var_12546_end_0, end_mask = var_12546_end_mask_0, x = query_17_cast_fp16)[name = tensor("op_12546_cast_fp16")]; + tensor var_12555_begin_0 = const()[name = tensor("op_12555_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12555_end_0 = const()[name = tensor("op_12555_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_12555_end_mask_0 = const()[name = tensor("op_12555_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12555_cast_fp16 = slice_by_index(begin = var_12555_begin_0, end = var_12555_end_0, end_mask = var_12555_end_mask_0, x = var_12470_cast_fp16)[name = tensor("op_12555_cast_fp16")]; + tensor var_12562_begin_0 = const()[name = tensor("op_12562_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_12562_end_0 = const()[name = tensor("op_12562_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_12562_end_mask_0 = const()[name = tensor("op_12562_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12562_cast_fp16 = slice_by_index(begin = var_12562_begin_0, end = var_12562_end_0, end_mask = var_12562_end_mask_0, x = var_12470_cast_fp16)[name = tensor("op_12562_cast_fp16")]; + tensor var_12569_begin_0 = const()[name = tensor("op_12569_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_12569_end_0 = const()[name = tensor("op_12569_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_12569_end_mask_0 = const()[name = tensor("op_12569_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12569_cast_fp16 = slice_by_index(begin = var_12569_begin_0, end = var_12569_end_0, end_mask = var_12569_end_mask_0, x = var_12470_cast_fp16)[name = tensor("op_12569_cast_fp16")]; + tensor var_12576_begin_0 = const()[name = tensor("op_12576_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_12576_end_0 = const()[name = tensor("op_12576_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_12576_end_mask_0 = const()[name = tensor("op_12576_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12576_cast_fp16 = slice_by_index(begin = var_12576_begin_0, end = var_12576_end_0, end_mask = var_12576_end_mask_0, x = var_12470_cast_fp16)[name = tensor("op_12576_cast_fp16")]; + tensor var_12583_begin_0 = const()[name = tensor("op_12583_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12583_end_0 = const()[name = tensor("op_12583_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_12583_end_mask_0 = const()[name = tensor("op_12583_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12583_cast_fp16 = slice_by_index(begin = var_12583_begin_0, end = var_12583_end_0, end_mask = var_12583_end_mask_0, x = var_12474_cast_fp16)[name = tensor("op_12583_cast_fp16")]; + tensor var_12590_begin_0 = const()[name = tensor("op_12590_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_12590_end_0 = const()[name = tensor("op_12590_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_12590_end_mask_0 = const()[name = tensor("op_12590_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12590_cast_fp16 = slice_by_index(begin = var_12590_begin_0, end = var_12590_end_0, end_mask = var_12590_end_mask_0, x = var_12474_cast_fp16)[name = tensor("op_12590_cast_fp16")]; + tensor var_12597_begin_0 = const()[name = tensor("op_12597_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_12597_end_0 = const()[name = tensor("op_12597_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_12597_end_mask_0 = const()[name = tensor("op_12597_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12597_cast_fp16 = slice_by_index(begin = var_12597_begin_0, end = var_12597_end_0, end_mask = var_12597_end_mask_0, x = var_12474_cast_fp16)[name = tensor("op_12597_cast_fp16")]; + tensor var_12604_begin_0 = const()[name = tensor("op_12604_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_12604_end_0 = const()[name = tensor("op_12604_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_12604_end_mask_0 = const()[name = tensor("op_12604_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12604_cast_fp16 = slice_by_index(begin = var_12604_begin_0, end = var_12604_end_0, end_mask = var_12604_end_mask_0, x = var_12474_cast_fp16)[name = tensor("op_12604_cast_fp16")]; + tensor var_12611_begin_0 = const()[name = tensor("op_12611_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12611_end_0 = const()[name = tensor("op_12611_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_12611_end_mask_0 = const()[name = tensor("op_12611_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12611_cast_fp16 = slice_by_index(begin = var_12611_begin_0, end = var_12611_end_0, end_mask = var_12611_end_mask_0, x = var_12478_cast_fp16)[name = tensor("op_12611_cast_fp16")]; + tensor var_12618_begin_0 = const()[name = tensor("op_12618_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_12618_end_0 = const()[name = tensor("op_12618_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_12618_end_mask_0 = const()[name = tensor("op_12618_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12618_cast_fp16 = slice_by_index(begin = var_12618_begin_0, end = var_12618_end_0, end_mask = var_12618_end_mask_0, x = var_12478_cast_fp16)[name = tensor("op_12618_cast_fp16")]; + tensor var_12625_begin_0 = const()[name = tensor("op_12625_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_12625_end_0 = const()[name = tensor("op_12625_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_12625_end_mask_0 = const()[name = tensor("op_12625_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12625_cast_fp16 = slice_by_index(begin = var_12625_begin_0, end = var_12625_end_0, end_mask = var_12625_end_mask_0, x = var_12478_cast_fp16)[name = tensor("op_12625_cast_fp16")]; + tensor var_12632_begin_0 = const()[name = tensor("op_12632_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_12632_end_0 = const()[name = tensor("op_12632_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_12632_end_mask_0 = const()[name = tensor("op_12632_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12632_cast_fp16 = slice_by_index(begin = var_12632_begin_0, end = var_12632_end_0, end_mask = var_12632_end_mask_0, x = var_12478_cast_fp16)[name = tensor("op_12632_cast_fp16")]; + tensor var_12639_begin_0 = const()[name = tensor("op_12639_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12639_end_0 = const()[name = tensor("op_12639_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_12639_end_mask_0 = const()[name = tensor("op_12639_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12639_cast_fp16 = slice_by_index(begin = var_12639_begin_0, end = var_12639_end_0, end_mask = var_12639_end_mask_0, x = var_12482_cast_fp16)[name = tensor("op_12639_cast_fp16")]; + tensor var_12646_begin_0 = const()[name = tensor("op_12646_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_12646_end_0 = const()[name = tensor("op_12646_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_12646_end_mask_0 = const()[name = tensor("op_12646_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12646_cast_fp16 = slice_by_index(begin = var_12646_begin_0, end = var_12646_end_0, end_mask = var_12646_end_mask_0, x = var_12482_cast_fp16)[name = tensor("op_12646_cast_fp16")]; + tensor var_12653_begin_0 = const()[name = tensor("op_12653_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_12653_end_0 = const()[name = tensor("op_12653_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_12653_end_mask_0 = const()[name = tensor("op_12653_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12653_cast_fp16 = slice_by_index(begin = var_12653_begin_0, end = var_12653_end_0, end_mask = var_12653_end_mask_0, x = var_12482_cast_fp16)[name = tensor("op_12653_cast_fp16")]; + tensor var_12660_begin_0 = const()[name = tensor("op_12660_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_12660_end_0 = const()[name = tensor("op_12660_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_12660_end_mask_0 = const()[name = tensor("op_12660_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12660_cast_fp16 = slice_by_index(begin = var_12660_begin_0, end = var_12660_end_0, end_mask = var_12660_end_mask_0, x = var_12482_cast_fp16)[name = tensor("op_12660_cast_fp16")]; + tensor var_12667_begin_0 = const()[name = tensor("op_12667_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12667_end_0 = const()[name = tensor("op_12667_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_12667_end_mask_0 = const()[name = tensor("op_12667_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12667_cast_fp16 = slice_by_index(begin = var_12667_begin_0, end = var_12667_end_0, end_mask = var_12667_end_mask_0, x = var_12486_cast_fp16)[name = tensor("op_12667_cast_fp16")]; + tensor var_12674_begin_0 = const()[name = tensor("op_12674_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_12674_end_0 = const()[name = tensor("op_12674_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_12674_end_mask_0 = const()[name = tensor("op_12674_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12674_cast_fp16 = slice_by_index(begin = var_12674_begin_0, end = var_12674_end_0, end_mask = var_12674_end_mask_0, x = var_12486_cast_fp16)[name = tensor("op_12674_cast_fp16")]; + tensor var_12681_begin_0 = const()[name = tensor("op_12681_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_12681_end_0 = const()[name = tensor("op_12681_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_12681_end_mask_0 = const()[name = tensor("op_12681_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12681_cast_fp16 = slice_by_index(begin = var_12681_begin_0, end = var_12681_end_0, end_mask = var_12681_end_mask_0, x = var_12486_cast_fp16)[name = tensor("op_12681_cast_fp16")]; + tensor var_12688_begin_0 = const()[name = tensor("op_12688_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_12688_end_0 = const()[name = tensor("op_12688_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_12688_end_mask_0 = const()[name = tensor("op_12688_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12688_cast_fp16 = slice_by_index(begin = var_12688_begin_0, end = var_12688_end_0, end_mask = var_12688_end_mask_0, x = var_12486_cast_fp16)[name = tensor("op_12688_cast_fp16")]; + tensor var_12695_begin_0 = const()[name = tensor("op_12695_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12695_end_0 = const()[name = tensor("op_12695_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_12695_end_mask_0 = const()[name = tensor("op_12695_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12695_cast_fp16 = slice_by_index(begin = var_12695_begin_0, end = var_12695_end_0, end_mask = var_12695_end_mask_0, x = var_12490_cast_fp16)[name = tensor("op_12695_cast_fp16")]; + tensor var_12702_begin_0 = const()[name = tensor("op_12702_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_12702_end_0 = const()[name = tensor("op_12702_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_12702_end_mask_0 = const()[name = tensor("op_12702_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12702_cast_fp16 = slice_by_index(begin = var_12702_begin_0, end = var_12702_end_0, end_mask = var_12702_end_mask_0, x = var_12490_cast_fp16)[name = tensor("op_12702_cast_fp16")]; + tensor var_12709_begin_0 = const()[name = tensor("op_12709_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_12709_end_0 = const()[name = tensor("op_12709_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_12709_end_mask_0 = const()[name = tensor("op_12709_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12709_cast_fp16 = slice_by_index(begin = var_12709_begin_0, end = var_12709_end_0, end_mask = var_12709_end_mask_0, x = var_12490_cast_fp16)[name = tensor("op_12709_cast_fp16")]; + tensor var_12716_begin_0 = const()[name = tensor("op_12716_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_12716_end_0 = const()[name = tensor("op_12716_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_12716_end_mask_0 = const()[name = tensor("op_12716_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12716_cast_fp16 = slice_by_index(begin = var_12716_begin_0, end = var_12716_end_0, end_mask = var_12716_end_mask_0, x = var_12490_cast_fp16)[name = tensor("op_12716_cast_fp16")]; + tensor var_12723_begin_0 = const()[name = tensor("op_12723_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12723_end_0 = const()[name = tensor("op_12723_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_12723_end_mask_0 = const()[name = tensor("op_12723_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12723_cast_fp16 = slice_by_index(begin = var_12723_begin_0, end = var_12723_end_0, end_mask = var_12723_end_mask_0, x = var_12494_cast_fp16)[name = tensor("op_12723_cast_fp16")]; + tensor var_12730_begin_0 = const()[name = tensor("op_12730_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_12730_end_0 = const()[name = tensor("op_12730_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_12730_end_mask_0 = const()[name = tensor("op_12730_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12730_cast_fp16 = slice_by_index(begin = var_12730_begin_0, end = var_12730_end_0, end_mask = var_12730_end_mask_0, x = var_12494_cast_fp16)[name = tensor("op_12730_cast_fp16")]; + tensor var_12737_begin_0 = const()[name = tensor("op_12737_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_12737_end_0 = const()[name = tensor("op_12737_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_12737_end_mask_0 = const()[name = tensor("op_12737_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12737_cast_fp16 = slice_by_index(begin = var_12737_begin_0, end = var_12737_end_0, end_mask = var_12737_end_mask_0, x = var_12494_cast_fp16)[name = tensor("op_12737_cast_fp16")]; + tensor var_12744_begin_0 = const()[name = tensor("op_12744_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_12744_end_0 = const()[name = tensor("op_12744_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_12744_end_mask_0 = const()[name = tensor("op_12744_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12744_cast_fp16 = slice_by_index(begin = var_12744_begin_0, end = var_12744_end_0, end_mask = var_12744_end_mask_0, x = var_12494_cast_fp16)[name = tensor("op_12744_cast_fp16")]; + tensor var_12751_begin_0 = const()[name = tensor("op_12751_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12751_end_0 = const()[name = tensor("op_12751_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_12751_end_mask_0 = const()[name = tensor("op_12751_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12751_cast_fp16 = slice_by_index(begin = var_12751_begin_0, end = var_12751_end_0, end_mask = var_12751_end_mask_0, x = var_12498_cast_fp16)[name = tensor("op_12751_cast_fp16")]; + tensor var_12758_begin_0 = const()[name = tensor("op_12758_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_12758_end_0 = const()[name = tensor("op_12758_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_12758_end_mask_0 = const()[name = tensor("op_12758_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12758_cast_fp16 = slice_by_index(begin = var_12758_begin_0, end = var_12758_end_0, end_mask = var_12758_end_mask_0, x = var_12498_cast_fp16)[name = tensor("op_12758_cast_fp16")]; + tensor var_12765_begin_0 = const()[name = tensor("op_12765_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_12765_end_0 = const()[name = tensor("op_12765_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_12765_end_mask_0 = const()[name = tensor("op_12765_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12765_cast_fp16 = slice_by_index(begin = var_12765_begin_0, end = var_12765_end_0, end_mask = var_12765_end_mask_0, x = var_12498_cast_fp16)[name = tensor("op_12765_cast_fp16")]; + tensor var_12772_begin_0 = const()[name = tensor("op_12772_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_12772_end_0 = const()[name = tensor("op_12772_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_12772_end_mask_0 = const()[name = tensor("op_12772_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12772_cast_fp16 = slice_by_index(begin = var_12772_begin_0, end = var_12772_end_0, end_mask = var_12772_end_mask_0, x = var_12498_cast_fp16)[name = tensor("op_12772_cast_fp16")]; + tensor var_12779_begin_0 = const()[name = tensor("op_12779_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12779_end_0 = const()[name = tensor("op_12779_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_12779_end_mask_0 = const()[name = tensor("op_12779_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12779_cast_fp16 = slice_by_index(begin = var_12779_begin_0, end = var_12779_end_0, end_mask = var_12779_end_mask_0, x = var_12502_cast_fp16)[name = tensor("op_12779_cast_fp16")]; + tensor var_12786_begin_0 = const()[name = tensor("op_12786_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_12786_end_0 = const()[name = tensor("op_12786_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_12786_end_mask_0 = const()[name = tensor("op_12786_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12786_cast_fp16 = slice_by_index(begin = var_12786_begin_0, end = var_12786_end_0, end_mask = var_12786_end_mask_0, x = var_12502_cast_fp16)[name = tensor("op_12786_cast_fp16")]; + tensor var_12793_begin_0 = const()[name = tensor("op_12793_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_12793_end_0 = const()[name = tensor("op_12793_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_12793_end_mask_0 = const()[name = tensor("op_12793_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12793_cast_fp16 = slice_by_index(begin = var_12793_begin_0, end = var_12793_end_0, end_mask = var_12793_end_mask_0, x = var_12502_cast_fp16)[name = tensor("op_12793_cast_fp16")]; + tensor var_12800_begin_0 = const()[name = tensor("op_12800_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_12800_end_0 = const()[name = tensor("op_12800_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_12800_end_mask_0 = const()[name = tensor("op_12800_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12800_cast_fp16 = slice_by_index(begin = var_12800_begin_0, end = var_12800_end_0, end_mask = var_12800_end_mask_0, x = var_12502_cast_fp16)[name = tensor("op_12800_cast_fp16")]; + tensor var_12807_begin_0 = const()[name = tensor("op_12807_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12807_end_0 = const()[name = tensor("op_12807_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_12807_end_mask_0 = const()[name = tensor("op_12807_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12807_cast_fp16 = slice_by_index(begin = var_12807_begin_0, end = var_12807_end_0, end_mask = var_12807_end_mask_0, x = var_12506_cast_fp16)[name = tensor("op_12807_cast_fp16")]; + tensor var_12814_begin_0 = const()[name = tensor("op_12814_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_12814_end_0 = const()[name = tensor("op_12814_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_12814_end_mask_0 = const()[name = tensor("op_12814_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12814_cast_fp16 = slice_by_index(begin = var_12814_begin_0, end = var_12814_end_0, end_mask = var_12814_end_mask_0, x = var_12506_cast_fp16)[name = tensor("op_12814_cast_fp16")]; + tensor var_12821_begin_0 = const()[name = tensor("op_12821_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_12821_end_0 = const()[name = tensor("op_12821_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_12821_end_mask_0 = const()[name = tensor("op_12821_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12821_cast_fp16 = slice_by_index(begin = var_12821_begin_0, end = var_12821_end_0, end_mask = var_12821_end_mask_0, x = var_12506_cast_fp16)[name = tensor("op_12821_cast_fp16")]; + tensor var_12828_begin_0 = const()[name = tensor("op_12828_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_12828_end_0 = const()[name = tensor("op_12828_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_12828_end_mask_0 = const()[name = tensor("op_12828_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12828_cast_fp16 = slice_by_index(begin = var_12828_begin_0, end = var_12828_end_0, end_mask = var_12828_end_mask_0, x = var_12506_cast_fp16)[name = tensor("op_12828_cast_fp16")]; + tensor var_12835_begin_0 = const()[name = tensor("op_12835_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12835_end_0 = const()[name = tensor("op_12835_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_12835_end_mask_0 = const()[name = tensor("op_12835_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12835_cast_fp16 = slice_by_index(begin = var_12835_begin_0, end = var_12835_end_0, end_mask = var_12835_end_mask_0, x = var_12510_cast_fp16)[name = tensor("op_12835_cast_fp16")]; + tensor var_12842_begin_0 = const()[name = tensor("op_12842_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_12842_end_0 = const()[name = tensor("op_12842_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_12842_end_mask_0 = const()[name = tensor("op_12842_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12842_cast_fp16 = slice_by_index(begin = var_12842_begin_0, end = var_12842_end_0, end_mask = var_12842_end_mask_0, x = var_12510_cast_fp16)[name = tensor("op_12842_cast_fp16")]; + tensor var_12849_begin_0 = const()[name = tensor("op_12849_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_12849_end_0 = const()[name = tensor("op_12849_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_12849_end_mask_0 = const()[name = tensor("op_12849_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12849_cast_fp16 = slice_by_index(begin = var_12849_begin_0, end = var_12849_end_0, end_mask = var_12849_end_mask_0, x = var_12510_cast_fp16)[name = tensor("op_12849_cast_fp16")]; + tensor var_12856_begin_0 = const()[name = tensor("op_12856_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_12856_end_0 = const()[name = tensor("op_12856_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_12856_end_mask_0 = const()[name = tensor("op_12856_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12856_cast_fp16 = slice_by_index(begin = var_12856_begin_0, end = var_12856_end_0, end_mask = var_12856_end_mask_0, x = var_12510_cast_fp16)[name = tensor("op_12856_cast_fp16")]; + tensor var_12863_begin_0 = const()[name = tensor("op_12863_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12863_end_0 = const()[name = tensor("op_12863_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_12863_end_mask_0 = const()[name = tensor("op_12863_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12863_cast_fp16 = slice_by_index(begin = var_12863_begin_0, end = var_12863_end_0, end_mask = var_12863_end_mask_0, x = var_12514_cast_fp16)[name = tensor("op_12863_cast_fp16")]; + tensor var_12870_begin_0 = const()[name = tensor("op_12870_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_12870_end_0 = const()[name = tensor("op_12870_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_12870_end_mask_0 = const()[name = tensor("op_12870_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12870_cast_fp16 = slice_by_index(begin = var_12870_begin_0, end = var_12870_end_0, end_mask = var_12870_end_mask_0, x = var_12514_cast_fp16)[name = tensor("op_12870_cast_fp16")]; + tensor var_12877_begin_0 = const()[name = tensor("op_12877_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_12877_end_0 = const()[name = tensor("op_12877_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_12877_end_mask_0 = const()[name = tensor("op_12877_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12877_cast_fp16 = slice_by_index(begin = var_12877_begin_0, end = var_12877_end_0, end_mask = var_12877_end_mask_0, x = var_12514_cast_fp16)[name = tensor("op_12877_cast_fp16")]; + tensor var_12884_begin_0 = const()[name = tensor("op_12884_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_12884_end_0 = const()[name = tensor("op_12884_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_12884_end_mask_0 = const()[name = tensor("op_12884_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12884_cast_fp16 = slice_by_index(begin = var_12884_begin_0, end = var_12884_end_0, end_mask = var_12884_end_mask_0, x = var_12514_cast_fp16)[name = tensor("op_12884_cast_fp16")]; + tensor var_12891_begin_0 = const()[name = tensor("op_12891_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12891_end_0 = const()[name = tensor("op_12891_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_12891_end_mask_0 = const()[name = tensor("op_12891_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12891_cast_fp16 = slice_by_index(begin = var_12891_begin_0, end = var_12891_end_0, end_mask = var_12891_end_mask_0, x = var_12518_cast_fp16)[name = tensor("op_12891_cast_fp16")]; + tensor var_12898_begin_0 = const()[name = tensor("op_12898_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_12898_end_0 = const()[name = tensor("op_12898_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_12898_end_mask_0 = const()[name = tensor("op_12898_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12898_cast_fp16 = slice_by_index(begin = var_12898_begin_0, end = var_12898_end_0, end_mask = var_12898_end_mask_0, x = var_12518_cast_fp16)[name = tensor("op_12898_cast_fp16")]; + tensor var_12905_begin_0 = const()[name = tensor("op_12905_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_12905_end_0 = const()[name = tensor("op_12905_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_12905_end_mask_0 = const()[name = tensor("op_12905_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12905_cast_fp16 = slice_by_index(begin = var_12905_begin_0, end = var_12905_end_0, end_mask = var_12905_end_mask_0, x = var_12518_cast_fp16)[name = tensor("op_12905_cast_fp16")]; + tensor var_12912_begin_0 = const()[name = tensor("op_12912_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_12912_end_0 = const()[name = tensor("op_12912_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_12912_end_mask_0 = const()[name = tensor("op_12912_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12912_cast_fp16 = slice_by_index(begin = var_12912_begin_0, end = var_12912_end_0, end_mask = var_12912_end_mask_0, x = var_12518_cast_fp16)[name = tensor("op_12912_cast_fp16")]; + tensor var_12919_begin_0 = const()[name = tensor("op_12919_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12919_end_0 = const()[name = tensor("op_12919_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_12919_end_mask_0 = const()[name = tensor("op_12919_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12919_cast_fp16 = slice_by_index(begin = var_12919_begin_0, end = var_12919_end_0, end_mask = var_12919_end_mask_0, x = var_12522_cast_fp16)[name = tensor("op_12919_cast_fp16")]; + tensor var_12926_begin_0 = const()[name = tensor("op_12926_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_12926_end_0 = const()[name = tensor("op_12926_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_12926_end_mask_0 = const()[name = tensor("op_12926_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12926_cast_fp16 = slice_by_index(begin = var_12926_begin_0, end = var_12926_end_0, end_mask = var_12926_end_mask_0, x = var_12522_cast_fp16)[name = tensor("op_12926_cast_fp16")]; + tensor var_12933_begin_0 = const()[name = tensor("op_12933_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_12933_end_0 = const()[name = tensor("op_12933_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_12933_end_mask_0 = const()[name = tensor("op_12933_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12933_cast_fp16 = slice_by_index(begin = var_12933_begin_0, end = var_12933_end_0, end_mask = var_12933_end_mask_0, x = var_12522_cast_fp16)[name = tensor("op_12933_cast_fp16")]; + tensor var_12940_begin_0 = const()[name = tensor("op_12940_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_12940_end_0 = const()[name = tensor("op_12940_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_12940_end_mask_0 = const()[name = tensor("op_12940_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12940_cast_fp16 = slice_by_index(begin = var_12940_begin_0, end = var_12940_end_0, end_mask = var_12940_end_mask_0, x = var_12522_cast_fp16)[name = tensor("op_12940_cast_fp16")]; + tensor var_12947_begin_0 = const()[name = tensor("op_12947_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12947_end_0 = const()[name = tensor("op_12947_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_12947_end_mask_0 = const()[name = tensor("op_12947_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12947_cast_fp16 = slice_by_index(begin = var_12947_begin_0, end = var_12947_end_0, end_mask = var_12947_end_mask_0, x = var_12526_cast_fp16)[name = tensor("op_12947_cast_fp16")]; + tensor var_12954_begin_0 = const()[name = tensor("op_12954_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_12954_end_0 = const()[name = tensor("op_12954_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_12954_end_mask_0 = const()[name = tensor("op_12954_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12954_cast_fp16 = slice_by_index(begin = var_12954_begin_0, end = var_12954_end_0, end_mask = var_12954_end_mask_0, x = var_12526_cast_fp16)[name = tensor("op_12954_cast_fp16")]; + tensor var_12961_begin_0 = const()[name = tensor("op_12961_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_12961_end_0 = const()[name = tensor("op_12961_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_12961_end_mask_0 = const()[name = tensor("op_12961_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12961_cast_fp16 = slice_by_index(begin = var_12961_begin_0, end = var_12961_end_0, end_mask = var_12961_end_mask_0, x = var_12526_cast_fp16)[name = tensor("op_12961_cast_fp16")]; + tensor var_12968_begin_0 = const()[name = tensor("op_12968_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_12968_end_0 = const()[name = tensor("op_12968_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_12968_end_mask_0 = const()[name = tensor("op_12968_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12968_cast_fp16 = slice_by_index(begin = var_12968_begin_0, end = var_12968_end_0, end_mask = var_12968_end_mask_0, x = var_12526_cast_fp16)[name = tensor("op_12968_cast_fp16")]; + tensor var_12975_begin_0 = const()[name = tensor("op_12975_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_12975_end_0 = const()[name = tensor("op_12975_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_12975_end_mask_0 = const()[name = tensor("op_12975_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12975_cast_fp16 = slice_by_index(begin = var_12975_begin_0, end = var_12975_end_0, end_mask = var_12975_end_mask_0, x = var_12530_cast_fp16)[name = tensor("op_12975_cast_fp16")]; + tensor var_12982_begin_0 = const()[name = tensor("op_12982_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_12982_end_0 = const()[name = tensor("op_12982_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_12982_end_mask_0 = const()[name = tensor("op_12982_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12982_cast_fp16 = slice_by_index(begin = var_12982_begin_0, end = var_12982_end_0, end_mask = var_12982_end_mask_0, x = var_12530_cast_fp16)[name = tensor("op_12982_cast_fp16")]; + tensor var_12989_begin_0 = const()[name = tensor("op_12989_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_12989_end_0 = const()[name = tensor("op_12989_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_12989_end_mask_0 = const()[name = tensor("op_12989_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12989_cast_fp16 = slice_by_index(begin = var_12989_begin_0, end = var_12989_end_0, end_mask = var_12989_end_mask_0, x = var_12530_cast_fp16)[name = tensor("op_12989_cast_fp16")]; + tensor var_12996_begin_0 = const()[name = tensor("op_12996_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_12996_end_0 = const()[name = tensor("op_12996_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_12996_end_mask_0 = const()[name = tensor("op_12996_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_12996_cast_fp16 = slice_by_index(begin = var_12996_begin_0, end = var_12996_end_0, end_mask = var_12996_end_mask_0, x = var_12530_cast_fp16)[name = tensor("op_12996_cast_fp16")]; + tensor var_13003_begin_0 = const()[name = tensor("op_13003_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_13003_end_0 = const()[name = tensor("op_13003_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_13003_end_mask_0 = const()[name = tensor("op_13003_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13003_cast_fp16 = slice_by_index(begin = var_13003_begin_0, end = var_13003_end_0, end_mask = var_13003_end_mask_0, x = var_12534_cast_fp16)[name = tensor("op_13003_cast_fp16")]; + tensor var_13010_begin_0 = const()[name = tensor("op_13010_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_13010_end_0 = const()[name = tensor("op_13010_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_13010_end_mask_0 = const()[name = tensor("op_13010_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13010_cast_fp16 = slice_by_index(begin = var_13010_begin_0, end = var_13010_end_0, end_mask = var_13010_end_mask_0, x = var_12534_cast_fp16)[name = tensor("op_13010_cast_fp16")]; + tensor var_13017_begin_0 = const()[name = tensor("op_13017_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_13017_end_0 = const()[name = tensor("op_13017_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_13017_end_mask_0 = const()[name = tensor("op_13017_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13017_cast_fp16 = slice_by_index(begin = var_13017_begin_0, end = var_13017_end_0, end_mask = var_13017_end_mask_0, x = var_12534_cast_fp16)[name = tensor("op_13017_cast_fp16")]; + tensor var_13024_begin_0 = const()[name = tensor("op_13024_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_13024_end_0 = const()[name = tensor("op_13024_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_13024_end_mask_0 = const()[name = tensor("op_13024_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13024_cast_fp16 = slice_by_index(begin = var_13024_begin_0, end = var_13024_end_0, end_mask = var_13024_end_mask_0, x = var_12534_cast_fp16)[name = tensor("op_13024_cast_fp16")]; + tensor var_13031_begin_0 = const()[name = tensor("op_13031_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_13031_end_0 = const()[name = tensor("op_13031_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_13031_end_mask_0 = const()[name = tensor("op_13031_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13031_cast_fp16 = slice_by_index(begin = var_13031_begin_0, end = var_13031_end_0, end_mask = var_13031_end_mask_0, x = var_12538_cast_fp16)[name = tensor("op_13031_cast_fp16")]; + tensor var_13038_begin_0 = const()[name = tensor("op_13038_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_13038_end_0 = const()[name = tensor("op_13038_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_13038_end_mask_0 = const()[name = tensor("op_13038_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13038_cast_fp16 = slice_by_index(begin = var_13038_begin_0, end = var_13038_end_0, end_mask = var_13038_end_mask_0, x = var_12538_cast_fp16)[name = tensor("op_13038_cast_fp16")]; + tensor var_13045_begin_0 = const()[name = tensor("op_13045_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_13045_end_0 = const()[name = tensor("op_13045_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_13045_end_mask_0 = const()[name = tensor("op_13045_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13045_cast_fp16 = slice_by_index(begin = var_13045_begin_0, end = var_13045_end_0, end_mask = var_13045_end_mask_0, x = var_12538_cast_fp16)[name = tensor("op_13045_cast_fp16")]; + tensor var_13052_begin_0 = const()[name = tensor("op_13052_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_13052_end_0 = const()[name = tensor("op_13052_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_13052_end_mask_0 = const()[name = tensor("op_13052_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13052_cast_fp16 = slice_by_index(begin = var_13052_begin_0, end = var_13052_end_0, end_mask = var_13052_end_mask_0, x = var_12538_cast_fp16)[name = tensor("op_13052_cast_fp16")]; + tensor var_13059_begin_0 = const()[name = tensor("op_13059_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_13059_end_0 = const()[name = tensor("op_13059_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_13059_end_mask_0 = const()[name = tensor("op_13059_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13059_cast_fp16 = slice_by_index(begin = var_13059_begin_0, end = var_13059_end_0, end_mask = var_13059_end_mask_0, x = var_12542_cast_fp16)[name = tensor("op_13059_cast_fp16")]; + tensor var_13066_begin_0 = const()[name = tensor("op_13066_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_13066_end_0 = const()[name = tensor("op_13066_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_13066_end_mask_0 = const()[name = tensor("op_13066_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13066_cast_fp16 = slice_by_index(begin = var_13066_begin_0, end = var_13066_end_0, end_mask = var_13066_end_mask_0, x = var_12542_cast_fp16)[name = tensor("op_13066_cast_fp16")]; + tensor var_13073_begin_0 = const()[name = tensor("op_13073_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_13073_end_0 = const()[name = tensor("op_13073_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_13073_end_mask_0 = const()[name = tensor("op_13073_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13073_cast_fp16 = slice_by_index(begin = var_13073_begin_0, end = var_13073_end_0, end_mask = var_13073_end_mask_0, x = var_12542_cast_fp16)[name = tensor("op_13073_cast_fp16")]; + tensor var_13080_begin_0 = const()[name = tensor("op_13080_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_13080_end_0 = const()[name = tensor("op_13080_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_13080_end_mask_0 = const()[name = tensor("op_13080_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13080_cast_fp16 = slice_by_index(begin = var_13080_begin_0, end = var_13080_end_0, end_mask = var_13080_end_mask_0, x = var_12542_cast_fp16)[name = tensor("op_13080_cast_fp16")]; + tensor var_13087_begin_0 = const()[name = tensor("op_13087_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_13087_end_0 = const()[name = tensor("op_13087_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_13087_end_mask_0 = const()[name = tensor("op_13087_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13087_cast_fp16 = slice_by_index(begin = var_13087_begin_0, end = var_13087_end_0, end_mask = var_13087_end_mask_0, x = var_12546_cast_fp16)[name = tensor("op_13087_cast_fp16")]; + tensor var_13094_begin_0 = const()[name = tensor("op_13094_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_13094_end_0 = const()[name = tensor("op_13094_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_13094_end_mask_0 = const()[name = tensor("op_13094_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13094_cast_fp16 = slice_by_index(begin = var_13094_begin_0, end = var_13094_end_0, end_mask = var_13094_end_mask_0, x = var_12546_cast_fp16)[name = tensor("op_13094_cast_fp16")]; + tensor var_13101_begin_0 = const()[name = tensor("op_13101_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_13101_end_0 = const()[name = tensor("op_13101_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_13101_end_mask_0 = const()[name = tensor("op_13101_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13101_cast_fp16 = slice_by_index(begin = var_13101_begin_0, end = var_13101_end_0, end_mask = var_13101_end_mask_0, x = var_12546_cast_fp16)[name = tensor("op_13101_cast_fp16")]; + tensor var_13108_begin_0 = const()[name = tensor("op_13108_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_13108_end_0 = const()[name = tensor("op_13108_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_13108_end_mask_0 = const()[name = tensor("op_13108_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13108_cast_fp16 = slice_by_index(begin = var_13108_begin_0, end = var_13108_end_0, end_mask = var_13108_end_mask_0, x = var_12546_cast_fp16)[name = tensor("op_13108_cast_fp16")]; + tensor k_17_perm_0 = const()[name = tensor("k_17_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_13113_begin_0 = const()[name = tensor("op_13113_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_13113_end_0 = const()[name = tensor("op_13113_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_13113_end_mask_0 = const()[name = tensor("op_13113_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_23 = transpose(perm = k_17_perm_0, x = key_17_cast_fp16)[name = tensor("transpose_23")]; + tensor var_13113_cast_fp16 = slice_by_index(begin = var_13113_begin_0, end = var_13113_end_0, end_mask = var_13113_end_mask_0, x = transpose_23)[name = tensor("op_13113_cast_fp16")]; + tensor var_13117_begin_0 = const()[name = tensor("op_13117_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_13117_end_0 = const()[name = tensor("op_13117_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_13117_end_mask_0 = const()[name = tensor("op_13117_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13117_cast_fp16 = slice_by_index(begin = var_13117_begin_0, end = var_13117_end_0, end_mask = var_13117_end_mask_0, x = transpose_23)[name = tensor("op_13117_cast_fp16")]; + tensor var_13121_begin_0 = const()[name = tensor("op_13121_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_13121_end_0 = const()[name = tensor("op_13121_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_13121_end_mask_0 = const()[name = tensor("op_13121_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13121_cast_fp16 = slice_by_index(begin = var_13121_begin_0, end = var_13121_end_0, end_mask = var_13121_end_mask_0, x = transpose_23)[name = tensor("op_13121_cast_fp16")]; + tensor var_13125_begin_0 = const()[name = tensor("op_13125_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_13125_end_0 = const()[name = tensor("op_13125_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_13125_end_mask_0 = const()[name = tensor("op_13125_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13125_cast_fp16 = slice_by_index(begin = var_13125_begin_0, end = var_13125_end_0, end_mask = var_13125_end_mask_0, x = transpose_23)[name = tensor("op_13125_cast_fp16")]; + tensor var_13129_begin_0 = const()[name = tensor("op_13129_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_13129_end_0 = const()[name = tensor("op_13129_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_13129_end_mask_0 = const()[name = tensor("op_13129_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13129_cast_fp16 = slice_by_index(begin = var_13129_begin_0, end = var_13129_end_0, end_mask = var_13129_end_mask_0, x = transpose_23)[name = tensor("op_13129_cast_fp16")]; + tensor var_13133_begin_0 = const()[name = tensor("op_13133_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_13133_end_0 = const()[name = tensor("op_13133_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_13133_end_mask_0 = const()[name = tensor("op_13133_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13133_cast_fp16 = slice_by_index(begin = var_13133_begin_0, end = var_13133_end_0, end_mask = var_13133_end_mask_0, x = transpose_23)[name = tensor("op_13133_cast_fp16")]; + tensor var_13137_begin_0 = const()[name = tensor("op_13137_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_13137_end_0 = const()[name = tensor("op_13137_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_13137_end_mask_0 = const()[name = tensor("op_13137_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13137_cast_fp16 = slice_by_index(begin = var_13137_begin_0, end = var_13137_end_0, end_mask = var_13137_end_mask_0, x = transpose_23)[name = tensor("op_13137_cast_fp16")]; + tensor var_13141_begin_0 = const()[name = tensor("op_13141_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_13141_end_0 = const()[name = tensor("op_13141_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_13141_end_mask_0 = const()[name = tensor("op_13141_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13141_cast_fp16 = slice_by_index(begin = var_13141_begin_0, end = var_13141_end_0, end_mask = var_13141_end_mask_0, x = transpose_23)[name = tensor("op_13141_cast_fp16")]; + tensor var_13145_begin_0 = const()[name = tensor("op_13145_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_13145_end_0 = const()[name = tensor("op_13145_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_13145_end_mask_0 = const()[name = tensor("op_13145_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13145_cast_fp16 = slice_by_index(begin = var_13145_begin_0, end = var_13145_end_0, end_mask = var_13145_end_mask_0, x = transpose_23)[name = tensor("op_13145_cast_fp16")]; + tensor var_13149_begin_0 = const()[name = tensor("op_13149_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_13149_end_0 = const()[name = tensor("op_13149_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_13149_end_mask_0 = const()[name = tensor("op_13149_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13149_cast_fp16 = slice_by_index(begin = var_13149_begin_0, end = var_13149_end_0, end_mask = var_13149_end_mask_0, x = transpose_23)[name = tensor("op_13149_cast_fp16")]; + tensor var_13153_begin_0 = const()[name = tensor("op_13153_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_13153_end_0 = const()[name = tensor("op_13153_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_13153_end_mask_0 = const()[name = tensor("op_13153_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13153_cast_fp16 = slice_by_index(begin = var_13153_begin_0, end = var_13153_end_0, end_mask = var_13153_end_mask_0, x = transpose_23)[name = tensor("op_13153_cast_fp16")]; + tensor var_13157_begin_0 = const()[name = tensor("op_13157_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_13157_end_0 = const()[name = tensor("op_13157_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_13157_end_mask_0 = const()[name = tensor("op_13157_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13157_cast_fp16 = slice_by_index(begin = var_13157_begin_0, end = var_13157_end_0, end_mask = var_13157_end_mask_0, x = transpose_23)[name = tensor("op_13157_cast_fp16")]; + tensor var_13161_begin_0 = const()[name = tensor("op_13161_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_13161_end_0 = const()[name = tensor("op_13161_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_13161_end_mask_0 = const()[name = tensor("op_13161_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13161_cast_fp16 = slice_by_index(begin = var_13161_begin_0, end = var_13161_end_0, end_mask = var_13161_end_mask_0, x = transpose_23)[name = tensor("op_13161_cast_fp16")]; + tensor var_13165_begin_0 = const()[name = tensor("op_13165_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_13165_end_0 = const()[name = tensor("op_13165_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_13165_end_mask_0 = const()[name = tensor("op_13165_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13165_cast_fp16 = slice_by_index(begin = var_13165_begin_0, end = var_13165_end_0, end_mask = var_13165_end_mask_0, x = transpose_23)[name = tensor("op_13165_cast_fp16")]; + tensor var_13169_begin_0 = const()[name = tensor("op_13169_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_13169_end_0 = const()[name = tensor("op_13169_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_13169_end_mask_0 = const()[name = tensor("op_13169_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13169_cast_fp16 = slice_by_index(begin = var_13169_begin_0, end = var_13169_end_0, end_mask = var_13169_end_mask_0, x = transpose_23)[name = tensor("op_13169_cast_fp16")]; + tensor var_13173_begin_0 = const()[name = tensor("op_13173_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_13173_end_0 = const()[name = tensor("op_13173_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_13173_end_mask_0 = const()[name = tensor("op_13173_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13173_cast_fp16 = slice_by_index(begin = var_13173_begin_0, end = var_13173_end_0, end_mask = var_13173_end_mask_0, x = transpose_23)[name = tensor("op_13173_cast_fp16")]; + tensor var_13177_begin_0 = const()[name = tensor("op_13177_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_13177_end_0 = const()[name = tensor("op_13177_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_13177_end_mask_0 = const()[name = tensor("op_13177_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13177_cast_fp16 = slice_by_index(begin = var_13177_begin_0, end = var_13177_end_0, end_mask = var_13177_end_mask_0, x = transpose_23)[name = tensor("op_13177_cast_fp16")]; + tensor var_13181_begin_0 = const()[name = tensor("op_13181_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_13181_end_0 = const()[name = tensor("op_13181_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_13181_end_mask_0 = const()[name = tensor("op_13181_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13181_cast_fp16 = slice_by_index(begin = var_13181_begin_0, end = var_13181_end_0, end_mask = var_13181_end_mask_0, x = transpose_23)[name = tensor("op_13181_cast_fp16")]; + tensor var_13185_begin_0 = const()[name = tensor("op_13185_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_13185_end_0 = const()[name = tensor("op_13185_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_13185_end_mask_0 = const()[name = tensor("op_13185_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13185_cast_fp16 = slice_by_index(begin = var_13185_begin_0, end = var_13185_end_0, end_mask = var_13185_end_mask_0, x = transpose_23)[name = tensor("op_13185_cast_fp16")]; + tensor var_13189_begin_0 = const()[name = tensor("op_13189_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_13189_end_0 = const()[name = tensor("op_13189_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_13189_end_mask_0 = const()[name = tensor("op_13189_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_13189_cast_fp16 = slice_by_index(begin = var_13189_begin_0, end = var_13189_end_0, end_mask = var_13189_end_mask_0, x = transpose_23)[name = tensor("op_13189_cast_fp16")]; + tensor var_13191_begin_0 = const()[name = tensor("op_13191_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_13191_end_0 = const()[name = tensor("op_13191_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_13191_end_mask_0 = const()[name = tensor("op_13191_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_13191_cast_fp16 = slice_by_index(begin = var_13191_begin_0, end = var_13191_end_0, end_mask = var_13191_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13191_cast_fp16")]; + tensor var_13195_begin_0 = const()[name = tensor("op_13195_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_13195_end_0 = const()[name = tensor("op_13195_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_13195_end_mask_0 = const()[name = tensor("op_13195_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_13195_cast_fp16 = slice_by_index(begin = var_13195_begin_0, end = var_13195_end_0, end_mask = var_13195_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13195_cast_fp16")]; + tensor var_13199_begin_0 = const()[name = tensor("op_13199_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_13199_end_0 = const()[name = tensor("op_13199_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_13199_end_mask_0 = const()[name = tensor("op_13199_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_13199_cast_fp16 = slice_by_index(begin = var_13199_begin_0, end = var_13199_end_0, end_mask = var_13199_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13199_cast_fp16")]; + tensor var_13203_begin_0 = const()[name = tensor("op_13203_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_13203_end_0 = const()[name = tensor("op_13203_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_13203_end_mask_0 = const()[name = tensor("op_13203_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_13203_cast_fp16 = slice_by_index(begin = var_13203_begin_0, end = var_13203_end_0, end_mask = var_13203_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13203_cast_fp16")]; + tensor var_13207_begin_0 = const()[name = tensor("op_13207_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_13207_end_0 = const()[name = tensor("op_13207_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_13207_end_mask_0 = const()[name = tensor("op_13207_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_13207_cast_fp16 = slice_by_index(begin = var_13207_begin_0, end = var_13207_end_0, end_mask = var_13207_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13207_cast_fp16")]; + tensor var_13211_begin_0 = const()[name = tensor("op_13211_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_13211_end_0 = const()[name = tensor("op_13211_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_13211_end_mask_0 = const()[name = tensor("op_13211_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_13211_cast_fp16 = slice_by_index(begin = var_13211_begin_0, end = var_13211_end_0, end_mask = var_13211_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13211_cast_fp16")]; + tensor var_13215_begin_0 = const()[name = tensor("op_13215_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_13215_end_0 = const()[name = tensor("op_13215_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_13215_end_mask_0 = const()[name = tensor("op_13215_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_13215_cast_fp16 = slice_by_index(begin = var_13215_begin_0, end = var_13215_end_0, end_mask = var_13215_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13215_cast_fp16")]; + tensor var_13219_begin_0 = const()[name = tensor("op_13219_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_13219_end_0 = const()[name = tensor("op_13219_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_13219_end_mask_0 = const()[name = tensor("op_13219_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_13219_cast_fp16 = slice_by_index(begin = var_13219_begin_0, end = var_13219_end_0, end_mask = var_13219_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13219_cast_fp16")]; + tensor var_13223_begin_0 = const()[name = tensor("op_13223_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_13223_end_0 = const()[name = tensor("op_13223_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_13223_end_mask_0 = const()[name = tensor("op_13223_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_13223_cast_fp16 = slice_by_index(begin = var_13223_begin_0, end = var_13223_end_0, end_mask = var_13223_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13223_cast_fp16")]; + tensor var_13227_begin_0 = const()[name = tensor("op_13227_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_13227_end_0 = const()[name = tensor("op_13227_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_13227_end_mask_0 = const()[name = tensor("op_13227_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_13227_cast_fp16 = slice_by_index(begin = var_13227_begin_0, end = var_13227_end_0, end_mask = var_13227_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13227_cast_fp16")]; + tensor var_13231_begin_0 = const()[name = tensor("op_13231_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_13231_end_0 = const()[name = tensor("op_13231_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_13231_end_mask_0 = const()[name = tensor("op_13231_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_13231_cast_fp16 = slice_by_index(begin = var_13231_begin_0, end = var_13231_end_0, end_mask = var_13231_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13231_cast_fp16")]; + tensor var_13235_begin_0 = const()[name = tensor("op_13235_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_13235_end_0 = const()[name = tensor("op_13235_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_13235_end_mask_0 = const()[name = tensor("op_13235_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_13235_cast_fp16 = slice_by_index(begin = var_13235_begin_0, end = var_13235_end_0, end_mask = var_13235_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13235_cast_fp16")]; + tensor var_13239_begin_0 = const()[name = tensor("op_13239_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_13239_end_0 = const()[name = tensor("op_13239_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_13239_end_mask_0 = const()[name = tensor("op_13239_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_13239_cast_fp16 = slice_by_index(begin = var_13239_begin_0, end = var_13239_end_0, end_mask = var_13239_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13239_cast_fp16")]; + tensor var_13243_begin_0 = const()[name = tensor("op_13243_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_13243_end_0 = const()[name = tensor("op_13243_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_13243_end_mask_0 = const()[name = tensor("op_13243_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_13243_cast_fp16 = slice_by_index(begin = var_13243_begin_0, end = var_13243_end_0, end_mask = var_13243_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13243_cast_fp16")]; + tensor var_13247_begin_0 = const()[name = tensor("op_13247_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_13247_end_0 = const()[name = tensor("op_13247_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_13247_end_mask_0 = const()[name = tensor("op_13247_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_13247_cast_fp16 = slice_by_index(begin = var_13247_begin_0, end = var_13247_end_0, end_mask = var_13247_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13247_cast_fp16")]; + tensor var_13251_begin_0 = const()[name = tensor("op_13251_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_13251_end_0 = const()[name = tensor("op_13251_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_13251_end_mask_0 = const()[name = tensor("op_13251_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_13251_cast_fp16 = slice_by_index(begin = var_13251_begin_0, end = var_13251_end_0, end_mask = var_13251_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13251_cast_fp16")]; + tensor var_13255_begin_0 = const()[name = tensor("op_13255_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_13255_end_0 = const()[name = tensor("op_13255_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_13255_end_mask_0 = const()[name = tensor("op_13255_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_13255_cast_fp16 = slice_by_index(begin = var_13255_begin_0, end = var_13255_end_0, end_mask = var_13255_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13255_cast_fp16")]; + tensor var_13259_begin_0 = const()[name = tensor("op_13259_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_13259_end_0 = const()[name = tensor("op_13259_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_13259_end_mask_0 = const()[name = tensor("op_13259_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_13259_cast_fp16 = slice_by_index(begin = var_13259_begin_0, end = var_13259_end_0, end_mask = var_13259_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13259_cast_fp16")]; + tensor var_13263_begin_0 = const()[name = tensor("op_13263_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_13263_end_0 = const()[name = tensor("op_13263_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_13263_end_mask_0 = const()[name = tensor("op_13263_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_13263_cast_fp16 = slice_by_index(begin = var_13263_begin_0, end = var_13263_end_0, end_mask = var_13263_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13263_cast_fp16")]; + tensor var_13267_begin_0 = const()[name = tensor("op_13267_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_13267_end_0 = const()[name = tensor("op_13267_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_13267_end_mask_0 = const()[name = tensor("op_13267_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_13267_cast_fp16 = slice_by_index(begin = var_13267_begin_0, end = var_13267_end_0, end_mask = var_13267_end_mask_0, x = value_17_cast_fp16)[name = tensor("op_13267_cast_fp16")]; + tensor var_13271_equation_0 = const()[name = tensor("op_13271_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13271_cast_fp16 = einsum(equation = var_13271_equation_0, values = (var_13113_cast_fp16, var_12555_cast_fp16))[name = tensor("op_13271_cast_fp16")]; + tensor var_13272_to_fp16 = const()[name = tensor("op_13272_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1281_cast_fp16 = mul(x = var_13271_cast_fp16, y = var_13272_to_fp16)[name = tensor("aw_chunk_1281_cast_fp16")]; + tensor var_13275_equation_0 = const()[name = tensor("op_13275_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13275_cast_fp16 = einsum(equation = var_13275_equation_0, values = (var_13113_cast_fp16, var_12562_cast_fp16))[name = tensor("op_13275_cast_fp16")]; + tensor var_13276_to_fp16 = const()[name = tensor("op_13276_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1283_cast_fp16 = mul(x = var_13275_cast_fp16, y = var_13276_to_fp16)[name = tensor("aw_chunk_1283_cast_fp16")]; + tensor var_13279_equation_0 = const()[name = tensor("op_13279_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13279_cast_fp16 = einsum(equation = var_13279_equation_0, values = (var_13113_cast_fp16, var_12569_cast_fp16))[name = tensor("op_13279_cast_fp16")]; + tensor var_13280_to_fp16 = const()[name = tensor("op_13280_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1285_cast_fp16 = mul(x = var_13279_cast_fp16, y = var_13280_to_fp16)[name = tensor("aw_chunk_1285_cast_fp16")]; + tensor var_13283_equation_0 = const()[name = tensor("op_13283_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13283_cast_fp16 = einsum(equation = var_13283_equation_0, values = (var_13113_cast_fp16, var_12576_cast_fp16))[name = tensor("op_13283_cast_fp16")]; + tensor var_13284_to_fp16 = const()[name = tensor("op_13284_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1287_cast_fp16 = mul(x = var_13283_cast_fp16, y = var_13284_to_fp16)[name = tensor("aw_chunk_1287_cast_fp16")]; + tensor var_13287_equation_0 = const()[name = tensor("op_13287_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13287_cast_fp16 = einsum(equation = var_13287_equation_0, values = (var_13117_cast_fp16, var_12583_cast_fp16))[name = tensor("op_13287_cast_fp16")]; + tensor var_13288_to_fp16 = const()[name = tensor("op_13288_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1289_cast_fp16 = mul(x = var_13287_cast_fp16, y = var_13288_to_fp16)[name = tensor("aw_chunk_1289_cast_fp16")]; + tensor var_13291_equation_0 = const()[name = tensor("op_13291_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13291_cast_fp16 = einsum(equation = var_13291_equation_0, values = (var_13117_cast_fp16, var_12590_cast_fp16))[name = tensor("op_13291_cast_fp16")]; + tensor var_13292_to_fp16 = const()[name = tensor("op_13292_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1291_cast_fp16 = mul(x = var_13291_cast_fp16, y = var_13292_to_fp16)[name = tensor("aw_chunk_1291_cast_fp16")]; + tensor var_13295_equation_0 = const()[name = tensor("op_13295_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13295_cast_fp16 = einsum(equation = var_13295_equation_0, values = (var_13117_cast_fp16, var_12597_cast_fp16))[name = tensor("op_13295_cast_fp16")]; + tensor var_13296_to_fp16 = const()[name = tensor("op_13296_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1293_cast_fp16 = mul(x = var_13295_cast_fp16, y = var_13296_to_fp16)[name = tensor("aw_chunk_1293_cast_fp16")]; + tensor var_13299_equation_0 = const()[name = tensor("op_13299_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13299_cast_fp16 = einsum(equation = var_13299_equation_0, values = (var_13117_cast_fp16, var_12604_cast_fp16))[name = tensor("op_13299_cast_fp16")]; + tensor var_13300_to_fp16 = const()[name = tensor("op_13300_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1295_cast_fp16 = mul(x = var_13299_cast_fp16, y = var_13300_to_fp16)[name = tensor("aw_chunk_1295_cast_fp16")]; + tensor var_13303_equation_0 = const()[name = tensor("op_13303_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13303_cast_fp16 = einsum(equation = var_13303_equation_0, values = (var_13121_cast_fp16, var_12611_cast_fp16))[name = tensor("op_13303_cast_fp16")]; + tensor var_13304_to_fp16 = const()[name = tensor("op_13304_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1297_cast_fp16 = mul(x = var_13303_cast_fp16, y = var_13304_to_fp16)[name = tensor("aw_chunk_1297_cast_fp16")]; + tensor var_13307_equation_0 = const()[name = tensor("op_13307_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13307_cast_fp16 = einsum(equation = var_13307_equation_0, values = (var_13121_cast_fp16, var_12618_cast_fp16))[name = tensor("op_13307_cast_fp16")]; + tensor var_13308_to_fp16 = const()[name = tensor("op_13308_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1299_cast_fp16 = mul(x = var_13307_cast_fp16, y = var_13308_to_fp16)[name = tensor("aw_chunk_1299_cast_fp16")]; + tensor var_13311_equation_0 = const()[name = tensor("op_13311_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13311_cast_fp16 = einsum(equation = var_13311_equation_0, values = (var_13121_cast_fp16, var_12625_cast_fp16))[name = tensor("op_13311_cast_fp16")]; + tensor var_13312_to_fp16 = const()[name = tensor("op_13312_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1301_cast_fp16 = mul(x = var_13311_cast_fp16, y = var_13312_to_fp16)[name = tensor("aw_chunk_1301_cast_fp16")]; + tensor var_13315_equation_0 = const()[name = tensor("op_13315_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13315_cast_fp16 = einsum(equation = var_13315_equation_0, values = (var_13121_cast_fp16, var_12632_cast_fp16))[name = tensor("op_13315_cast_fp16")]; + tensor var_13316_to_fp16 = const()[name = tensor("op_13316_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1303_cast_fp16 = mul(x = var_13315_cast_fp16, y = var_13316_to_fp16)[name = tensor("aw_chunk_1303_cast_fp16")]; + tensor var_13319_equation_0 = const()[name = tensor("op_13319_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13319_cast_fp16 = einsum(equation = var_13319_equation_0, values = (var_13125_cast_fp16, var_12639_cast_fp16))[name = tensor("op_13319_cast_fp16")]; + tensor var_13320_to_fp16 = const()[name = tensor("op_13320_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1305_cast_fp16 = mul(x = var_13319_cast_fp16, y = var_13320_to_fp16)[name = tensor("aw_chunk_1305_cast_fp16")]; + tensor var_13323_equation_0 = const()[name = tensor("op_13323_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13323_cast_fp16 = einsum(equation = var_13323_equation_0, values = (var_13125_cast_fp16, var_12646_cast_fp16))[name = tensor("op_13323_cast_fp16")]; + tensor var_13324_to_fp16 = const()[name = tensor("op_13324_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1307_cast_fp16 = mul(x = var_13323_cast_fp16, y = var_13324_to_fp16)[name = tensor("aw_chunk_1307_cast_fp16")]; + tensor var_13327_equation_0 = const()[name = tensor("op_13327_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13327_cast_fp16 = einsum(equation = var_13327_equation_0, values = (var_13125_cast_fp16, var_12653_cast_fp16))[name = tensor("op_13327_cast_fp16")]; + tensor var_13328_to_fp16 = const()[name = tensor("op_13328_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1309_cast_fp16 = mul(x = var_13327_cast_fp16, y = var_13328_to_fp16)[name = tensor("aw_chunk_1309_cast_fp16")]; + tensor var_13331_equation_0 = const()[name = tensor("op_13331_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13331_cast_fp16 = einsum(equation = var_13331_equation_0, values = (var_13125_cast_fp16, var_12660_cast_fp16))[name = tensor("op_13331_cast_fp16")]; + tensor var_13332_to_fp16 = const()[name = tensor("op_13332_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1311_cast_fp16 = mul(x = var_13331_cast_fp16, y = var_13332_to_fp16)[name = tensor("aw_chunk_1311_cast_fp16")]; + tensor var_13335_equation_0 = const()[name = tensor("op_13335_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13335_cast_fp16 = einsum(equation = var_13335_equation_0, values = (var_13129_cast_fp16, var_12667_cast_fp16))[name = tensor("op_13335_cast_fp16")]; + tensor var_13336_to_fp16 = const()[name = tensor("op_13336_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1313_cast_fp16 = mul(x = var_13335_cast_fp16, y = var_13336_to_fp16)[name = tensor("aw_chunk_1313_cast_fp16")]; + tensor var_13339_equation_0 = const()[name = tensor("op_13339_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13339_cast_fp16 = einsum(equation = var_13339_equation_0, values = (var_13129_cast_fp16, var_12674_cast_fp16))[name = tensor("op_13339_cast_fp16")]; + tensor var_13340_to_fp16 = const()[name = tensor("op_13340_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1315_cast_fp16 = mul(x = var_13339_cast_fp16, y = var_13340_to_fp16)[name = tensor("aw_chunk_1315_cast_fp16")]; + tensor var_13343_equation_0 = const()[name = tensor("op_13343_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13343_cast_fp16 = einsum(equation = var_13343_equation_0, values = (var_13129_cast_fp16, var_12681_cast_fp16))[name = tensor("op_13343_cast_fp16")]; + tensor var_13344_to_fp16 = const()[name = tensor("op_13344_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1317_cast_fp16 = mul(x = var_13343_cast_fp16, y = var_13344_to_fp16)[name = tensor("aw_chunk_1317_cast_fp16")]; + tensor var_13347_equation_0 = const()[name = tensor("op_13347_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13347_cast_fp16 = einsum(equation = var_13347_equation_0, values = (var_13129_cast_fp16, var_12688_cast_fp16))[name = tensor("op_13347_cast_fp16")]; + tensor var_13348_to_fp16 = const()[name = tensor("op_13348_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1319_cast_fp16 = mul(x = var_13347_cast_fp16, y = var_13348_to_fp16)[name = tensor("aw_chunk_1319_cast_fp16")]; + tensor var_13351_equation_0 = const()[name = tensor("op_13351_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13351_cast_fp16 = einsum(equation = var_13351_equation_0, values = (var_13133_cast_fp16, var_12695_cast_fp16))[name = tensor("op_13351_cast_fp16")]; + tensor var_13352_to_fp16 = const()[name = tensor("op_13352_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1321_cast_fp16 = mul(x = var_13351_cast_fp16, y = var_13352_to_fp16)[name = tensor("aw_chunk_1321_cast_fp16")]; + tensor var_13355_equation_0 = const()[name = tensor("op_13355_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13355_cast_fp16 = einsum(equation = var_13355_equation_0, values = (var_13133_cast_fp16, var_12702_cast_fp16))[name = tensor("op_13355_cast_fp16")]; + tensor var_13356_to_fp16 = const()[name = tensor("op_13356_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1323_cast_fp16 = mul(x = var_13355_cast_fp16, y = var_13356_to_fp16)[name = tensor("aw_chunk_1323_cast_fp16")]; + tensor var_13359_equation_0 = const()[name = tensor("op_13359_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13359_cast_fp16 = einsum(equation = var_13359_equation_0, values = (var_13133_cast_fp16, var_12709_cast_fp16))[name = tensor("op_13359_cast_fp16")]; + tensor var_13360_to_fp16 = const()[name = tensor("op_13360_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1325_cast_fp16 = mul(x = var_13359_cast_fp16, y = var_13360_to_fp16)[name = tensor("aw_chunk_1325_cast_fp16")]; + tensor var_13363_equation_0 = const()[name = tensor("op_13363_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13363_cast_fp16 = einsum(equation = var_13363_equation_0, values = (var_13133_cast_fp16, var_12716_cast_fp16))[name = tensor("op_13363_cast_fp16")]; + tensor var_13364_to_fp16 = const()[name = tensor("op_13364_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1327_cast_fp16 = mul(x = var_13363_cast_fp16, y = var_13364_to_fp16)[name = tensor("aw_chunk_1327_cast_fp16")]; + tensor var_13367_equation_0 = const()[name = tensor("op_13367_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13367_cast_fp16 = einsum(equation = var_13367_equation_0, values = (var_13137_cast_fp16, var_12723_cast_fp16))[name = tensor("op_13367_cast_fp16")]; + tensor var_13368_to_fp16 = const()[name = tensor("op_13368_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1329_cast_fp16 = mul(x = var_13367_cast_fp16, y = var_13368_to_fp16)[name = tensor("aw_chunk_1329_cast_fp16")]; + tensor var_13371_equation_0 = const()[name = tensor("op_13371_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13371_cast_fp16 = einsum(equation = var_13371_equation_0, values = (var_13137_cast_fp16, var_12730_cast_fp16))[name = tensor("op_13371_cast_fp16")]; + tensor var_13372_to_fp16 = const()[name = tensor("op_13372_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1331_cast_fp16 = mul(x = var_13371_cast_fp16, y = var_13372_to_fp16)[name = tensor("aw_chunk_1331_cast_fp16")]; + tensor var_13375_equation_0 = const()[name = tensor("op_13375_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13375_cast_fp16 = einsum(equation = var_13375_equation_0, values = (var_13137_cast_fp16, var_12737_cast_fp16))[name = tensor("op_13375_cast_fp16")]; + tensor var_13376_to_fp16 = const()[name = tensor("op_13376_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1333_cast_fp16 = mul(x = var_13375_cast_fp16, y = var_13376_to_fp16)[name = tensor("aw_chunk_1333_cast_fp16")]; + tensor var_13379_equation_0 = const()[name = tensor("op_13379_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13379_cast_fp16 = einsum(equation = var_13379_equation_0, values = (var_13137_cast_fp16, var_12744_cast_fp16))[name = tensor("op_13379_cast_fp16")]; + tensor var_13380_to_fp16 = const()[name = tensor("op_13380_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1335_cast_fp16 = mul(x = var_13379_cast_fp16, y = var_13380_to_fp16)[name = tensor("aw_chunk_1335_cast_fp16")]; + tensor var_13383_equation_0 = const()[name = tensor("op_13383_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13383_cast_fp16 = einsum(equation = var_13383_equation_0, values = (var_13141_cast_fp16, var_12751_cast_fp16))[name = tensor("op_13383_cast_fp16")]; + tensor var_13384_to_fp16 = const()[name = tensor("op_13384_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1337_cast_fp16 = mul(x = var_13383_cast_fp16, y = var_13384_to_fp16)[name = tensor("aw_chunk_1337_cast_fp16")]; + tensor var_13387_equation_0 = const()[name = tensor("op_13387_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13387_cast_fp16 = einsum(equation = var_13387_equation_0, values = (var_13141_cast_fp16, var_12758_cast_fp16))[name = tensor("op_13387_cast_fp16")]; + tensor var_13388_to_fp16 = const()[name = tensor("op_13388_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1339_cast_fp16 = mul(x = var_13387_cast_fp16, y = var_13388_to_fp16)[name = tensor("aw_chunk_1339_cast_fp16")]; + tensor var_13391_equation_0 = const()[name = tensor("op_13391_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13391_cast_fp16 = einsum(equation = var_13391_equation_0, values = (var_13141_cast_fp16, var_12765_cast_fp16))[name = tensor("op_13391_cast_fp16")]; + tensor var_13392_to_fp16 = const()[name = tensor("op_13392_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1341_cast_fp16 = mul(x = var_13391_cast_fp16, y = var_13392_to_fp16)[name = tensor("aw_chunk_1341_cast_fp16")]; + tensor var_13395_equation_0 = const()[name = tensor("op_13395_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13395_cast_fp16 = einsum(equation = var_13395_equation_0, values = (var_13141_cast_fp16, var_12772_cast_fp16))[name = tensor("op_13395_cast_fp16")]; + tensor var_13396_to_fp16 = const()[name = tensor("op_13396_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1343_cast_fp16 = mul(x = var_13395_cast_fp16, y = var_13396_to_fp16)[name = tensor("aw_chunk_1343_cast_fp16")]; + tensor var_13399_equation_0 = const()[name = tensor("op_13399_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13399_cast_fp16 = einsum(equation = var_13399_equation_0, values = (var_13145_cast_fp16, var_12779_cast_fp16))[name = tensor("op_13399_cast_fp16")]; + tensor var_13400_to_fp16 = const()[name = tensor("op_13400_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1345_cast_fp16 = mul(x = var_13399_cast_fp16, y = var_13400_to_fp16)[name = tensor("aw_chunk_1345_cast_fp16")]; + tensor var_13403_equation_0 = const()[name = tensor("op_13403_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13403_cast_fp16 = einsum(equation = var_13403_equation_0, values = (var_13145_cast_fp16, var_12786_cast_fp16))[name = tensor("op_13403_cast_fp16")]; + tensor var_13404_to_fp16 = const()[name = tensor("op_13404_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1347_cast_fp16 = mul(x = var_13403_cast_fp16, y = var_13404_to_fp16)[name = tensor("aw_chunk_1347_cast_fp16")]; + tensor var_13407_equation_0 = const()[name = tensor("op_13407_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13407_cast_fp16 = einsum(equation = var_13407_equation_0, values = (var_13145_cast_fp16, var_12793_cast_fp16))[name = tensor("op_13407_cast_fp16")]; + tensor var_13408_to_fp16 = const()[name = tensor("op_13408_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1349_cast_fp16 = mul(x = var_13407_cast_fp16, y = var_13408_to_fp16)[name = tensor("aw_chunk_1349_cast_fp16")]; + tensor var_13411_equation_0 = const()[name = tensor("op_13411_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13411_cast_fp16 = einsum(equation = var_13411_equation_0, values = (var_13145_cast_fp16, var_12800_cast_fp16))[name = tensor("op_13411_cast_fp16")]; + tensor var_13412_to_fp16 = const()[name = tensor("op_13412_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1351_cast_fp16 = mul(x = var_13411_cast_fp16, y = var_13412_to_fp16)[name = tensor("aw_chunk_1351_cast_fp16")]; + tensor var_13415_equation_0 = const()[name = tensor("op_13415_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13415_cast_fp16 = einsum(equation = var_13415_equation_0, values = (var_13149_cast_fp16, var_12807_cast_fp16))[name = tensor("op_13415_cast_fp16")]; + tensor var_13416_to_fp16 = const()[name = tensor("op_13416_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1353_cast_fp16 = mul(x = var_13415_cast_fp16, y = var_13416_to_fp16)[name = tensor("aw_chunk_1353_cast_fp16")]; + tensor var_13419_equation_0 = const()[name = tensor("op_13419_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13419_cast_fp16 = einsum(equation = var_13419_equation_0, values = (var_13149_cast_fp16, var_12814_cast_fp16))[name = tensor("op_13419_cast_fp16")]; + tensor var_13420_to_fp16 = const()[name = tensor("op_13420_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1355_cast_fp16 = mul(x = var_13419_cast_fp16, y = var_13420_to_fp16)[name = tensor("aw_chunk_1355_cast_fp16")]; + tensor var_13423_equation_0 = const()[name = tensor("op_13423_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13423_cast_fp16 = einsum(equation = var_13423_equation_0, values = (var_13149_cast_fp16, var_12821_cast_fp16))[name = tensor("op_13423_cast_fp16")]; + tensor var_13424_to_fp16 = const()[name = tensor("op_13424_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1357_cast_fp16 = mul(x = var_13423_cast_fp16, y = var_13424_to_fp16)[name = tensor("aw_chunk_1357_cast_fp16")]; + tensor var_13427_equation_0 = const()[name = tensor("op_13427_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13427_cast_fp16 = einsum(equation = var_13427_equation_0, values = (var_13149_cast_fp16, var_12828_cast_fp16))[name = tensor("op_13427_cast_fp16")]; + tensor var_13428_to_fp16 = const()[name = tensor("op_13428_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1359_cast_fp16 = mul(x = var_13427_cast_fp16, y = var_13428_to_fp16)[name = tensor("aw_chunk_1359_cast_fp16")]; + tensor var_13431_equation_0 = const()[name = tensor("op_13431_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13431_cast_fp16 = einsum(equation = var_13431_equation_0, values = (var_13153_cast_fp16, var_12835_cast_fp16))[name = tensor("op_13431_cast_fp16")]; + tensor var_13432_to_fp16 = const()[name = tensor("op_13432_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1361_cast_fp16 = mul(x = var_13431_cast_fp16, y = var_13432_to_fp16)[name = tensor("aw_chunk_1361_cast_fp16")]; + tensor var_13435_equation_0 = const()[name = tensor("op_13435_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13435_cast_fp16 = einsum(equation = var_13435_equation_0, values = (var_13153_cast_fp16, var_12842_cast_fp16))[name = tensor("op_13435_cast_fp16")]; + tensor var_13436_to_fp16 = const()[name = tensor("op_13436_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1363_cast_fp16 = mul(x = var_13435_cast_fp16, y = var_13436_to_fp16)[name = tensor("aw_chunk_1363_cast_fp16")]; + tensor var_13439_equation_0 = const()[name = tensor("op_13439_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13439_cast_fp16 = einsum(equation = var_13439_equation_0, values = (var_13153_cast_fp16, var_12849_cast_fp16))[name = tensor("op_13439_cast_fp16")]; + tensor var_13440_to_fp16 = const()[name = tensor("op_13440_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1365_cast_fp16 = mul(x = var_13439_cast_fp16, y = var_13440_to_fp16)[name = tensor("aw_chunk_1365_cast_fp16")]; + tensor var_13443_equation_0 = const()[name = tensor("op_13443_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13443_cast_fp16 = einsum(equation = var_13443_equation_0, values = (var_13153_cast_fp16, var_12856_cast_fp16))[name = tensor("op_13443_cast_fp16")]; + tensor var_13444_to_fp16 = const()[name = tensor("op_13444_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1367_cast_fp16 = mul(x = var_13443_cast_fp16, y = var_13444_to_fp16)[name = tensor("aw_chunk_1367_cast_fp16")]; + tensor var_13447_equation_0 = const()[name = tensor("op_13447_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13447_cast_fp16 = einsum(equation = var_13447_equation_0, values = (var_13157_cast_fp16, var_12863_cast_fp16))[name = tensor("op_13447_cast_fp16")]; + tensor var_13448_to_fp16 = const()[name = tensor("op_13448_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1369_cast_fp16 = mul(x = var_13447_cast_fp16, y = var_13448_to_fp16)[name = tensor("aw_chunk_1369_cast_fp16")]; + tensor var_13451_equation_0 = const()[name = tensor("op_13451_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13451_cast_fp16 = einsum(equation = var_13451_equation_0, values = (var_13157_cast_fp16, var_12870_cast_fp16))[name = tensor("op_13451_cast_fp16")]; + tensor var_13452_to_fp16 = const()[name = tensor("op_13452_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1371_cast_fp16 = mul(x = var_13451_cast_fp16, y = var_13452_to_fp16)[name = tensor("aw_chunk_1371_cast_fp16")]; + tensor var_13455_equation_0 = const()[name = tensor("op_13455_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13455_cast_fp16 = einsum(equation = var_13455_equation_0, values = (var_13157_cast_fp16, var_12877_cast_fp16))[name = tensor("op_13455_cast_fp16")]; + tensor var_13456_to_fp16 = const()[name = tensor("op_13456_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1373_cast_fp16 = mul(x = var_13455_cast_fp16, y = var_13456_to_fp16)[name = tensor("aw_chunk_1373_cast_fp16")]; + tensor var_13459_equation_0 = const()[name = tensor("op_13459_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13459_cast_fp16 = einsum(equation = var_13459_equation_0, values = (var_13157_cast_fp16, var_12884_cast_fp16))[name = tensor("op_13459_cast_fp16")]; + tensor var_13460_to_fp16 = const()[name = tensor("op_13460_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1375_cast_fp16 = mul(x = var_13459_cast_fp16, y = var_13460_to_fp16)[name = tensor("aw_chunk_1375_cast_fp16")]; + tensor var_13463_equation_0 = const()[name = tensor("op_13463_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13463_cast_fp16 = einsum(equation = var_13463_equation_0, values = (var_13161_cast_fp16, var_12891_cast_fp16))[name = tensor("op_13463_cast_fp16")]; + tensor var_13464_to_fp16 = const()[name = tensor("op_13464_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1377_cast_fp16 = mul(x = var_13463_cast_fp16, y = var_13464_to_fp16)[name = tensor("aw_chunk_1377_cast_fp16")]; + tensor var_13467_equation_0 = const()[name = tensor("op_13467_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13467_cast_fp16 = einsum(equation = var_13467_equation_0, values = (var_13161_cast_fp16, var_12898_cast_fp16))[name = tensor("op_13467_cast_fp16")]; + tensor var_13468_to_fp16 = const()[name = tensor("op_13468_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1379_cast_fp16 = mul(x = var_13467_cast_fp16, y = var_13468_to_fp16)[name = tensor("aw_chunk_1379_cast_fp16")]; + tensor var_13471_equation_0 = const()[name = tensor("op_13471_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13471_cast_fp16 = einsum(equation = var_13471_equation_0, values = (var_13161_cast_fp16, var_12905_cast_fp16))[name = tensor("op_13471_cast_fp16")]; + tensor var_13472_to_fp16 = const()[name = tensor("op_13472_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1381_cast_fp16 = mul(x = var_13471_cast_fp16, y = var_13472_to_fp16)[name = tensor("aw_chunk_1381_cast_fp16")]; + tensor var_13475_equation_0 = const()[name = tensor("op_13475_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13475_cast_fp16 = einsum(equation = var_13475_equation_0, values = (var_13161_cast_fp16, var_12912_cast_fp16))[name = tensor("op_13475_cast_fp16")]; + tensor var_13476_to_fp16 = const()[name = tensor("op_13476_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1383_cast_fp16 = mul(x = var_13475_cast_fp16, y = var_13476_to_fp16)[name = tensor("aw_chunk_1383_cast_fp16")]; + tensor var_13479_equation_0 = const()[name = tensor("op_13479_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13479_cast_fp16 = einsum(equation = var_13479_equation_0, values = (var_13165_cast_fp16, var_12919_cast_fp16))[name = tensor("op_13479_cast_fp16")]; + tensor var_13480_to_fp16 = const()[name = tensor("op_13480_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1385_cast_fp16 = mul(x = var_13479_cast_fp16, y = var_13480_to_fp16)[name = tensor("aw_chunk_1385_cast_fp16")]; + tensor var_13483_equation_0 = const()[name = tensor("op_13483_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13483_cast_fp16 = einsum(equation = var_13483_equation_0, values = (var_13165_cast_fp16, var_12926_cast_fp16))[name = tensor("op_13483_cast_fp16")]; + tensor var_13484_to_fp16 = const()[name = tensor("op_13484_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1387_cast_fp16 = mul(x = var_13483_cast_fp16, y = var_13484_to_fp16)[name = tensor("aw_chunk_1387_cast_fp16")]; + tensor var_13487_equation_0 = const()[name = tensor("op_13487_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13487_cast_fp16 = einsum(equation = var_13487_equation_0, values = (var_13165_cast_fp16, var_12933_cast_fp16))[name = tensor("op_13487_cast_fp16")]; + tensor var_13488_to_fp16 = const()[name = tensor("op_13488_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1389_cast_fp16 = mul(x = var_13487_cast_fp16, y = var_13488_to_fp16)[name = tensor("aw_chunk_1389_cast_fp16")]; + tensor var_13491_equation_0 = const()[name = tensor("op_13491_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13491_cast_fp16 = einsum(equation = var_13491_equation_0, values = (var_13165_cast_fp16, var_12940_cast_fp16))[name = tensor("op_13491_cast_fp16")]; + tensor var_13492_to_fp16 = const()[name = tensor("op_13492_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1391_cast_fp16 = mul(x = var_13491_cast_fp16, y = var_13492_to_fp16)[name = tensor("aw_chunk_1391_cast_fp16")]; + tensor var_13495_equation_0 = const()[name = tensor("op_13495_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13495_cast_fp16 = einsum(equation = var_13495_equation_0, values = (var_13169_cast_fp16, var_12947_cast_fp16))[name = tensor("op_13495_cast_fp16")]; + tensor var_13496_to_fp16 = const()[name = tensor("op_13496_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1393_cast_fp16 = mul(x = var_13495_cast_fp16, y = var_13496_to_fp16)[name = tensor("aw_chunk_1393_cast_fp16")]; + tensor var_13499_equation_0 = const()[name = tensor("op_13499_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13499_cast_fp16 = einsum(equation = var_13499_equation_0, values = (var_13169_cast_fp16, var_12954_cast_fp16))[name = tensor("op_13499_cast_fp16")]; + tensor var_13500_to_fp16 = const()[name = tensor("op_13500_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1395_cast_fp16 = mul(x = var_13499_cast_fp16, y = var_13500_to_fp16)[name = tensor("aw_chunk_1395_cast_fp16")]; + tensor var_13503_equation_0 = const()[name = tensor("op_13503_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13503_cast_fp16 = einsum(equation = var_13503_equation_0, values = (var_13169_cast_fp16, var_12961_cast_fp16))[name = tensor("op_13503_cast_fp16")]; + tensor var_13504_to_fp16 = const()[name = tensor("op_13504_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1397_cast_fp16 = mul(x = var_13503_cast_fp16, y = var_13504_to_fp16)[name = tensor("aw_chunk_1397_cast_fp16")]; + tensor var_13507_equation_0 = const()[name = tensor("op_13507_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13507_cast_fp16 = einsum(equation = var_13507_equation_0, values = (var_13169_cast_fp16, var_12968_cast_fp16))[name = tensor("op_13507_cast_fp16")]; + tensor var_13508_to_fp16 = const()[name = tensor("op_13508_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1399_cast_fp16 = mul(x = var_13507_cast_fp16, y = var_13508_to_fp16)[name = tensor("aw_chunk_1399_cast_fp16")]; + tensor var_13511_equation_0 = const()[name = tensor("op_13511_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13511_cast_fp16 = einsum(equation = var_13511_equation_0, values = (var_13173_cast_fp16, var_12975_cast_fp16))[name = tensor("op_13511_cast_fp16")]; + tensor var_13512_to_fp16 = const()[name = tensor("op_13512_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1401_cast_fp16 = mul(x = var_13511_cast_fp16, y = var_13512_to_fp16)[name = tensor("aw_chunk_1401_cast_fp16")]; + tensor var_13515_equation_0 = const()[name = tensor("op_13515_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13515_cast_fp16 = einsum(equation = var_13515_equation_0, values = (var_13173_cast_fp16, var_12982_cast_fp16))[name = tensor("op_13515_cast_fp16")]; + tensor var_13516_to_fp16 = const()[name = tensor("op_13516_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1403_cast_fp16 = mul(x = var_13515_cast_fp16, y = var_13516_to_fp16)[name = tensor("aw_chunk_1403_cast_fp16")]; + tensor var_13519_equation_0 = const()[name = tensor("op_13519_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13519_cast_fp16 = einsum(equation = var_13519_equation_0, values = (var_13173_cast_fp16, var_12989_cast_fp16))[name = tensor("op_13519_cast_fp16")]; + tensor var_13520_to_fp16 = const()[name = tensor("op_13520_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1405_cast_fp16 = mul(x = var_13519_cast_fp16, y = var_13520_to_fp16)[name = tensor("aw_chunk_1405_cast_fp16")]; + tensor var_13523_equation_0 = const()[name = tensor("op_13523_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13523_cast_fp16 = einsum(equation = var_13523_equation_0, values = (var_13173_cast_fp16, var_12996_cast_fp16))[name = tensor("op_13523_cast_fp16")]; + tensor var_13524_to_fp16 = const()[name = tensor("op_13524_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1407_cast_fp16 = mul(x = var_13523_cast_fp16, y = var_13524_to_fp16)[name = tensor("aw_chunk_1407_cast_fp16")]; + tensor var_13527_equation_0 = const()[name = tensor("op_13527_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13527_cast_fp16 = einsum(equation = var_13527_equation_0, values = (var_13177_cast_fp16, var_13003_cast_fp16))[name = tensor("op_13527_cast_fp16")]; + tensor var_13528_to_fp16 = const()[name = tensor("op_13528_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1409_cast_fp16 = mul(x = var_13527_cast_fp16, y = var_13528_to_fp16)[name = tensor("aw_chunk_1409_cast_fp16")]; + tensor var_13531_equation_0 = const()[name = tensor("op_13531_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13531_cast_fp16 = einsum(equation = var_13531_equation_0, values = (var_13177_cast_fp16, var_13010_cast_fp16))[name = tensor("op_13531_cast_fp16")]; + tensor var_13532_to_fp16 = const()[name = tensor("op_13532_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1411_cast_fp16 = mul(x = var_13531_cast_fp16, y = var_13532_to_fp16)[name = tensor("aw_chunk_1411_cast_fp16")]; + tensor var_13535_equation_0 = const()[name = tensor("op_13535_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13535_cast_fp16 = einsum(equation = var_13535_equation_0, values = (var_13177_cast_fp16, var_13017_cast_fp16))[name = tensor("op_13535_cast_fp16")]; + tensor var_13536_to_fp16 = const()[name = tensor("op_13536_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1413_cast_fp16 = mul(x = var_13535_cast_fp16, y = var_13536_to_fp16)[name = tensor("aw_chunk_1413_cast_fp16")]; + tensor var_13539_equation_0 = const()[name = tensor("op_13539_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13539_cast_fp16 = einsum(equation = var_13539_equation_0, values = (var_13177_cast_fp16, var_13024_cast_fp16))[name = tensor("op_13539_cast_fp16")]; + tensor var_13540_to_fp16 = const()[name = tensor("op_13540_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1415_cast_fp16 = mul(x = var_13539_cast_fp16, y = var_13540_to_fp16)[name = tensor("aw_chunk_1415_cast_fp16")]; + tensor var_13543_equation_0 = const()[name = tensor("op_13543_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13543_cast_fp16 = einsum(equation = var_13543_equation_0, values = (var_13181_cast_fp16, var_13031_cast_fp16))[name = tensor("op_13543_cast_fp16")]; + tensor var_13544_to_fp16 = const()[name = tensor("op_13544_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1417_cast_fp16 = mul(x = var_13543_cast_fp16, y = var_13544_to_fp16)[name = tensor("aw_chunk_1417_cast_fp16")]; + tensor var_13547_equation_0 = const()[name = tensor("op_13547_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13547_cast_fp16 = einsum(equation = var_13547_equation_0, values = (var_13181_cast_fp16, var_13038_cast_fp16))[name = tensor("op_13547_cast_fp16")]; + tensor var_13548_to_fp16 = const()[name = tensor("op_13548_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1419_cast_fp16 = mul(x = var_13547_cast_fp16, y = var_13548_to_fp16)[name = tensor("aw_chunk_1419_cast_fp16")]; + tensor var_13551_equation_0 = const()[name = tensor("op_13551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13551_cast_fp16 = einsum(equation = var_13551_equation_0, values = (var_13181_cast_fp16, var_13045_cast_fp16))[name = tensor("op_13551_cast_fp16")]; + tensor var_13552_to_fp16 = const()[name = tensor("op_13552_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1421_cast_fp16 = mul(x = var_13551_cast_fp16, y = var_13552_to_fp16)[name = tensor("aw_chunk_1421_cast_fp16")]; + tensor var_13555_equation_0 = const()[name = tensor("op_13555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13555_cast_fp16 = einsum(equation = var_13555_equation_0, values = (var_13181_cast_fp16, var_13052_cast_fp16))[name = tensor("op_13555_cast_fp16")]; + tensor var_13556_to_fp16 = const()[name = tensor("op_13556_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1423_cast_fp16 = mul(x = var_13555_cast_fp16, y = var_13556_to_fp16)[name = tensor("aw_chunk_1423_cast_fp16")]; + tensor var_13559_equation_0 = const()[name = tensor("op_13559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13559_cast_fp16 = einsum(equation = var_13559_equation_0, values = (var_13185_cast_fp16, var_13059_cast_fp16))[name = tensor("op_13559_cast_fp16")]; + tensor var_13560_to_fp16 = const()[name = tensor("op_13560_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1425_cast_fp16 = mul(x = var_13559_cast_fp16, y = var_13560_to_fp16)[name = tensor("aw_chunk_1425_cast_fp16")]; + tensor var_13563_equation_0 = const()[name = tensor("op_13563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13563_cast_fp16 = einsum(equation = var_13563_equation_0, values = (var_13185_cast_fp16, var_13066_cast_fp16))[name = tensor("op_13563_cast_fp16")]; + tensor var_13564_to_fp16 = const()[name = tensor("op_13564_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1427_cast_fp16 = mul(x = var_13563_cast_fp16, y = var_13564_to_fp16)[name = tensor("aw_chunk_1427_cast_fp16")]; + tensor var_13567_equation_0 = const()[name = tensor("op_13567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13567_cast_fp16 = einsum(equation = var_13567_equation_0, values = (var_13185_cast_fp16, var_13073_cast_fp16))[name = tensor("op_13567_cast_fp16")]; + tensor var_13568_to_fp16 = const()[name = tensor("op_13568_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1429_cast_fp16 = mul(x = var_13567_cast_fp16, y = var_13568_to_fp16)[name = tensor("aw_chunk_1429_cast_fp16")]; + tensor var_13571_equation_0 = const()[name = tensor("op_13571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13571_cast_fp16 = einsum(equation = var_13571_equation_0, values = (var_13185_cast_fp16, var_13080_cast_fp16))[name = tensor("op_13571_cast_fp16")]; + tensor var_13572_to_fp16 = const()[name = tensor("op_13572_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1431_cast_fp16 = mul(x = var_13571_cast_fp16, y = var_13572_to_fp16)[name = tensor("aw_chunk_1431_cast_fp16")]; + tensor var_13575_equation_0 = const()[name = tensor("op_13575_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13575_cast_fp16 = einsum(equation = var_13575_equation_0, values = (var_13189_cast_fp16, var_13087_cast_fp16))[name = tensor("op_13575_cast_fp16")]; + tensor var_13576_to_fp16 = const()[name = tensor("op_13576_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1433_cast_fp16 = mul(x = var_13575_cast_fp16, y = var_13576_to_fp16)[name = tensor("aw_chunk_1433_cast_fp16")]; + tensor var_13579_equation_0 = const()[name = tensor("op_13579_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13579_cast_fp16 = einsum(equation = var_13579_equation_0, values = (var_13189_cast_fp16, var_13094_cast_fp16))[name = tensor("op_13579_cast_fp16")]; + tensor var_13580_to_fp16 = const()[name = tensor("op_13580_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1435_cast_fp16 = mul(x = var_13579_cast_fp16, y = var_13580_to_fp16)[name = tensor("aw_chunk_1435_cast_fp16")]; + tensor var_13583_equation_0 = const()[name = tensor("op_13583_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13583_cast_fp16 = einsum(equation = var_13583_equation_0, values = (var_13189_cast_fp16, var_13101_cast_fp16))[name = tensor("op_13583_cast_fp16")]; + tensor var_13584_to_fp16 = const()[name = tensor("op_13584_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1437_cast_fp16 = mul(x = var_13583_cast_fp16, y = var_13584_to_fp16)[name = tensor("aw_chunk_1437_cast_fp16")]; + tensor var_13587_equation_0 = const()[name = tensor("op_13587_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_13587_cast_fp16 = einsum(equation = var_13587_equation_0, values = (var_13189_cast_fp16, var_13108_cast_fp16))[name = tensor("op_13587_cast_fp16")]; + tensor var_13588_to_fp16 = const()[name = tensor("op_13588_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1439_cast_fp16 = mul(x = var_13587_cast_fp16, y = var_13588_to_fp16)[name = tensor("aw_chunk_1439_cast_fp16")]; + tensor var_13590_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1281_cast_fp16)[name = tensor("op_13590_cast_fp16")]; + tensor var_13591_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1283_cast_fp16)[name = tensor("op_13591_cast_fp16")]; + tensor var_13592_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1285_cast_fp16)[name = tensor("op_13592_cast_fp16")]; + tensor var_13593_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1287_cast_fp16)[name = tensor("op_13593_cast_fp16")]; + tensor var_13594_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1289_cast_fp16)[name = tensor("op_13594_cast_fp16")]; + tensor var_13595_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1291_cast_fp16)[name = tensor("op_13595_cast_fp16")]; + tensor var_13596_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1293_cast_fp16)[name = tensor("op_13596_cast_fp16")]; + tensor var_13597_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1295_cast_fp16)[name = tensor("op_13597_cast_fp16")]; + tensor var_13598_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1297_cast_fp16)[name = tensor("op_13598_cast_fp16")]; + tensor var_13599_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1299_cast_fp16)[name = tensor("op_13599_cast_fp16")]; + tensor var_13600_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1301_cast_fp16)[name = tensor("op_13600_cast_fp16")]; + tensor var_13601_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1303_cast_fp16)[name = tensor("op_13601_cast_fp16")]; + tensor var_13602_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1305_cast_fp16)[name = tensor("op_13602_cast_fp16")]; + tensor var_13603_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1307_cast_fp16)[name = tensor("op_13603_cast_fp16")]; + tensor var_13604_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1309_cast_fp16)[name = tensor("op_13604_cast_fp16")]; + tensor var_13605_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1311_cast_fp16)[name = tensor("op_13605_cast_fp16")]; + tensor var_13606_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1313_cast_fp16)[name = tensor("op_13606_cast_fp16")]; + tensor var_13607_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1315_cast_fp16)[name = tensor("op_13607_cast_fp16")]; + tensor var_13608_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1317_cast_fp16)[name = tensor("op_13608_cast_fp16")]; + tensor var_13609_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1319_cast_fp16)[name = tensor("op_13609_cast_fp16")]; + tensor var_13610_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1321_cast_fp16)[name = tensor("op_13610_cast_fp16")]; + tensor var_13611_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1323_cast_fp16)[name = tensor("op_13611_cast_fp16")]; + tensor var_13612_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1325_cast_fp16)[name = tensor("op_13612_cast_fp16")]; + tensor var_13613_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1327_cast_fp16)[name = tensor("op_13613_cast_fp16")]; + tensor var_13614_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1329_cast_fp16)[name = tensor("op_13614_cast_fp16")]; + tensor var_13615_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1331_cast_fp16)[name = tensor("op_13615_cast_fp16")]; + tensor var_13616_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1333_cast_fp16)[name = tensor("op_13616_cast_fp16")]; + tensor var_13617_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1335_cast_fp16)[name = tensor("op_13617_cast_fp16")]; + tensor var_13618_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1337_cast_fp16)[name = tensor("op_13618_cast_fp16")]; + tensor var_13619_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1339_cast_fp16)[name = tensor("op_13619_cast_fp16")]; + tensor var_13620_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1341_cast_fp16)[name = tensor("op_13620_cast_fp16")]; + tensor var_13621_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1343_cast_fp16)[name = tensor("op_13621_cast_fp16")]; + tensor var_13622_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1345_cast_fp16)[name = tensor("op_13622_cast_fp16")]; + tensor var_13623_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1347_cast_fp16)[name = tensor("op_13623_cast_fp16")]; + tensor var_13624_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1349_cast_fp16)[name = tensor("op_13624_cast_fp16")]; + tensor var_13625_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1351_cast_fp16)[name = tensor("op_13625_cast_fp16")]; + tensor var_13626_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1353_cast_fp16)[name = tensor("op_13626_cast_fp16")]; + tensor var_13627_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1355_cast_fp16)[name = tensor("op_13627_cast_fp16")]; + tensor var_13628_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1357_cast_fp16)[name = tensor("op_13628_cast_fp16")]; + tensor var_13629_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1359_cast_fp16)[name = tensor("op_13629_cast_fp16")]; + tensor var_13630_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1361_cast_fp16)[name = tensor("op_13630_cast_fp16")]; + tensor var_13631_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1363_cast_fp16)[name = tensor("op_13631_cast_fp16")]; + tensor var_13632_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1365_cast_fp16)[name = tensor("op_13632_cast_fp16")]; + tensor var_13633_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1367_cast_fp16)[name = tensor("op_13633_cast_fp16")]; + tensor var_13634_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1369_cast_fp16)[name = tensor("op_13634_cast_fp16")]; + tensor var_13635_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1371_cast_fp16)[name = tensor("op_13635_cast_fp16")]; + tensor var_13636_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1373_cast_fp16)[name = tensor("op_13636_cast_fp16")]; + tensor var_13637_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1375_cast_fp16)[name = tensor("op_13637_cast_fp16")]; + tensor var_13638_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1377_cast_fp16)[name = tensor("op_13638_cast_fp16")]; + tensor var_13639_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1379_cast_fp16)[name = tensor("op_13639_cast_fp16")]; + tensor var_13640_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1381_cast_fp16)[name = tensor("op_13640_cast_fp16")]; + tensor var_13641_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1383_cast_fp16)[name = tensor("op_13641_cast_fp16")]; + tensor var_13642_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1385_cast_fp16)[name = tensor("op_13642_cast_fp16")]; + tensor var_13643_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1387_cast_fp16)[name = tensor("op_13643_cast_fp16")]; + tensor var_13644_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1389_cast_fp16)[name = tensor("op_13644_cast_fp16")]; + tensor var_13645_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1391_cast_fp16)[name = tensor("op_13645_cast_fp16")]; + tensor var_13646_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1393_cast_fp16)[name = tensor("op_13646_cast_fp16")]; + tensor var_13647_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1395_cast_fp16)[name = tensor("op_13647_cast_fp16")]; + tensor var_13648_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1397_cast_fp16)[name = tensor("op_13648_cast_fp16")]; + tensor var_13649_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1399_cast_fp16)[name = tensor("op_13649_cast_fp16")]; + tensor var_13650_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1401_cast_fp16)[name = tensor("op_13650_cast_fp16")]; + tensor var_13651_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1403_cast_fp16)[name = tensor("op_13651_cast_fp16")]; + tensor var_13652_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1405_cast_fp16)[name = tensor("op_13652_cast_fp16")]; + tensor var_13653_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1407_cast_fp16)[name = tensor("op_13653_cast_fp16")]; + tensor var_13654_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1409_cast_fp16)[name = tensor("op_13654_cast_fp16")]; + tensor var_13655_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1411_cast_fp16)[name = tensor("op_13655_cast_fp16")]; + tensor var_13656_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1413_cast_fp16)[name = tensor("op_13656_cast_fp16")]; + tensor var_13657_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1415_cast_fp16)[name = tensor("op_13657_cast_fp16")]; + tensor var_13658_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1417_cast_fp16)[name = tensor("op_13658_cast_fp16")]; + tensor var_13659_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1419_cast_fp16)[name = tensor("op_13659_cast_fp16")]; + tensor var_13660_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1421_cast_fp16)[name = tensor("op_13660_cast_fp16")]; + tensor var_13661_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1423_cast_fp16)[name = tensor("op_13661_cast_fp16")]; + tensor var_13662_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1425_cast_fp16)[name = tensor("op_13662_cast_fp16")]; + tensor var_13663_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1427_cast_fp16)[name = tensor("op_13663_cast_fp16")]; + tensor var_13664_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1429_cast_fp16)[name = tensor("op_13664_cast_fp16")]; + tensor var_13665_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1431_cast_fp16)[name = tensor("op_13665_cast_fp16")]; + tensor var_13666_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1433_cast_fp16)[name = tensor("op_13666_cast_fp16")]; + tensor var_13667_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1435_cast_fp16)[name = tensor("op_13667_cast_fp16")]; + tensor var_13668_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1437_cast_fp16)[name = tensor("op_13668_cast_fp16")]; + tensor var_13669_cast_fp16 = softmax(axis = var_12415, x = aw_chunk_1439_cast_fp16)[name = tensor("op_13669_cast_fp16")]; + tensor var_13671_equation_0 = const()[name = tensor("op_13671_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13671_cast_fp16 = einsum(equation = var_13671_equation_0, values = (var_13191_cast_fp16, var_13590_cast_fp16))[name = tensor("op_13671_cast_fp16")]; + tensor var_13673_equation_0 = const()[name = tensor("op_13673_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13673_cast_fp16 = einsum(equation = var_13673_equation_0, values = (var_13191_cast_fp16, var_13591_cast_fp16))[name = tensor("op_13673_cast_fp16")]; + tensor var_13675_equation_0 = const()[name = tensor("op_13675_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13675_cast_fp16 = einsum(equation = var_13675_equation_0, values = (var_13191_cast_fp16, var_13592_cast_fp16))[name = tensor("op_13675_cast_fp16")]; + tensor var_13677_equation_0 = const()[name = tensor("op_13677_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13677_cast_fp16 = einsum(equation = var_13677_equation_0, values = (var_13191_cast_fp16, var_13593_cast_fp16))[name = tensor("op_13677_cast_fp16")]; + tensor var_13679_equation_0 = const()[name = tensor("op_13679_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13679_cast_fp16 = einsum(equation = var_13679_equation_0, values = (var_13195_cast_fp16, var_13594_cast_fp16))[name = tensor("op_13679_cast_fp16")]; + tensor var_13681_equation_0 = const()[name = tensor("op_13681_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13681_cast_fp16 = einsum(equation = var_13681_equation_0, values = (var_13195_cast_fp16, var_13595_cast_fp16))[name = tensor("op_13681_cast_fp16")]; + tensor var_13683_equation_0 = const()[name = tensor("op_13683_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13683_cast_fp16 = einsum(equation = var_13683_equation_0, values = (var_13195_cast_fp16, var_13596_cast_fp16))[name = tensor("op_13683_cast_fp16")]; + tensor var_13685_equation_0 = const()[name = tensor("op_13685_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13685_cast_fp16 = einsum(equation = var_13685_equation_0, values = (var_13195_cast_fp16, var_13597_cast_fp16))[name = tensor("op_13685_cast_fp16")]; + tensor var_13687_equation_0 = const()[name = tensor("op_13687_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13687_cast_fp16 = einsum(equation = var_13687_equation_0, values = (var_13199_cast_fp16, var_13598_cast_fp16))[name = tensor("op_13687_cast_fp16")]; + tensor var_13689_equation_0 = const()[name = tensor("op_13689_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13689_cast_fp16 = einsum(equation = var_13689_equation_0, values = (var_13199_cast_fp16, var_13599_cast_fp16))[name = tensor("op_13689_cast_fp16")]; + tensor var_13691_equation_0 = const()[name = tensor("op_13691_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13691_cast_fp16 = einsum(equation = var_13691_equation_0, values = (var_13199_cast_fp16, var_13600_cast_fp16))[name = tensor("op_13691_cast_fp16")]; + tensor var_13693_equation_0 = const()[name = tensor("op_13693_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13693_cast_fp16 = einsum(equation = var_13693_equation_0, values = (var_13199_cast_fp16, var_13601_cast_fp16))[name = tensor("op_13693_cast_fp16")]; + tensor var_13695_equation_0 = const()[name = tensor("op_13695_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13695_cast_fp16 = einsum(equation = var_13695_equation_0, values = (var_13203_cast_fp16, var_13602_cast_fp16))[name = tensor("op_13695_cast_fp16")]; + tensor var_13697_equation_0 = const()[name = tensor("op_13697_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13697_cast_fp16 = einsum(equation = var_13697_equation_0, values = (var_13203_cast_fp16, var_13603_cast_fp16))[name = tensor("op_13697_cast_fp16")]; + tensor var_13699_equation_0 = const()[name = tensor("op_13699_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13699_cast_fp16 = einsum(equation = var_13699_equation_0, values = (var_13203_cast_fp16, var_13604_cast_fp16))[name = tensor("op_13699_cast_fp16")]; + tensor var_13701_equation_0 = const()[name = tensor("op_13701_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13701_cast_fp16 = einsum(equation = var_13701_equation_0, values = (var_13203_cast_fp16, var_13605_cast_fp16))[name = tensor("op_13701_cast_fp16")]; + tensor var_13703_equation_0 = const()[name = tensor("op_13703_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13703_cast_fp16 = einsum(equation = var_13703_equation_0, values = (var_13207_cast_fp16, var_13606_cast_fp16))[name = tensor("op_13703_cast_fp16")]; + tensor var_13705_equation_0 = const()[name = tensor("op_13705_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13705_cast_fp16 = einsum(equation = var_13705_equation_0, values = (var_13207_cast_fp16, var_13607_cast_fp16))[name = tensor("op_13705_cast_fp16")]; + tensor var_13707_equation_0 = const()[name = tensor("op_13707_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13707_cast_fp16 = einsum(equation = var_13707_equation_0, values = (var_13207_cast_fp16, var_13608_cast_fp16))[name = tensor("op_13707_cast_fp16")]; + tensor var_13709_equation_0 = const()[name = tensor("op_13709_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13709_cast_fp16 = einsum(equation = var_13709_equation_0, values = (var_13207_cast_fp16, var_13609_cast_fp16))[name = tensor("op_13709_cast_fp16")]; + tensor var_13711_equation_0 = const()[name = tensor("op_13711_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13711_cast_fp16 = einsum(equation = var_13711_equation_0, values = (var_13211_cast_fp16, var_13610_cast_fp16))[name = tensor("op_13711_cast_fp16")]; + tensor var_13713_equation_0 = const()[name = tensor("op_13713_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13713_cast_fp16 = einsum(equation = var_13713_equation_0, values = (var_13211_cast_fp16, var_13611_cast_fp16))[name = tensor("op_13713_cast_fp16")]; + tensor var_13715_equation_0 = const()[name = tensor("op_13715_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13715_cast_fp16 = einsum(equation = var_13715_equation_0, values = (var_13211_cast_fp16, var_13612_cast_fp16))[name = tensor("op_13715_cast_fp16")]; + tensor var_13717_equation_0 = const()[name = tensor("op_13717_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13717_cast_fp16 = einsum(equation = var_13717_equation_0, values = (var_13211_cast_fp16, var_13613_cast_fp16))[name = tensor("op_13717_cast_fp16")]; + tensor var_13719_equation_0 = const()[name = tensor("op_13719_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13719_cast_fp16 = einsum(equation = var_13719_equation_0, values = (var_13215_cast_fp16, var_13614_cast_fp16))[name = tensor("op_13719_cast_fp16")]; + tensor var_13721_equation_0 = const()[name = tensor("op_13721_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13721_cast_fp16 = einsum(equation = var_13721_equation_0, values = (var_13215_cast_fp16, var_13615_cast_fp16))[name = tensor("op_13721_cast_fp16")]; + tensor var_13723_equation_0 = const()[name = tensor("op_13723_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13723_cast_fp16 = einsum(equation = var_13723_equation_0, values = (var_13215_cast_fp16, var_13616_cast_fp16))[name = tensor("op_13723_cast_fp16")]; + tensor var_13725_equation_0 = const()[name = tensor("op_13725_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13725_cast_fp16 = einsum(equation = var_13725_equation_0, values = (var_13215_cast_fp16, var_13617_cast_fp16))[name = tensor("op_13725_cast_fp16")]; + tensor var_13727_equation_0 = const()[name = tensor("op_13727_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13727_cast_fp16 = einsum(equation = var_13727_equation_0, values = (var_13219_cast_fp16, var_13618_cast_fp16))[name = tensor("op_13727_cast_fp16")]; + tensor var_13729_equation_0 = const()[name = tensor("op_13729_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13729_cast_fp16 = einsum(equation = var_13729_equation_0, values = (var_13219_cast_fp16, var_13619_cast_fp16))[name = tensor("op_13729_cast_fp16")]; + tensor var_13731_equation_0 = const()[name = tensor("op_13731_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13731_cast_fp16 = einsum(equation = var_13731_equation_0, values = (var_13219_cast_fp16, var_13620_cast_fp16))[name = tensor("op_13731_cast_fp16")]; + tensor var_13733_equation_0 = const()[name = tensor("op_13733_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13733_cast_fp16 = einsum(equation = var_13733_equation_0, values = (var_13219_cast_fp16, var_13621_cast_fp16))[name = tensor("op_13733_cast_fp16")]; + tensor var_13735_equation_0 = const()[name = tensor("op_13735_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13735_cast_fp16 = einsum(equation = var_13735_equation_0, values = (var_13223_cast_fp16, var_13622_cast_fp16))[name = tensor("op_13735_cast_fp16")]; + tensor var_13737_equation_0 = const()[name = tensor("op_13737_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13737_cast_fp16 = einsum(equation = var_13737_equation_0, values = (var_13223_cast_fp16, var_13623_cast_fp16))[name = tensor("op_13737_cast_fp16")]; + tensor var_13739_equation_0 = const()[name = tensor("op_13739_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13739_cast_fp16 = einsum(equation = var_13739_equation_0, values = (var_13223_cast_fp16, var_13624_cast_fp16))[name = tensor("op_13739_cast_fp16")]; + tensor var_13741_equation_0 = const()[name = tensor("op_13741_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13741_cast_fp16 = einsum(equation = var_13741_equation_0, values = (var_13223_cast_fp16, var_13625_cast_fp16))[name = tensor("op_13741_cast_fp16")]; + tensor var_13743_equation_0 = const()[name = tensor("op_13743_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13743_cast_fp16 = einsum(equation = var_13743_equation_0, values = (var_13227_cast_fp16, var_13626_cast_fp16))[name = tensor("op_13743_cast_fp16")]; + tensor var_13745_equation_0 = const()[name = tensor("op_13745_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13745_cast_fp16 = einsum(equation = var_13745_equation_0, values = (var_13227_cast_fp16, var_13627_cast_fp16))[name = tensor("op_13745_cast_fp16")]; + tensor var_13747_equation_0 = const()[name = tensor("op_13747_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13747_cast_fp16 = einsum(equation = var_13747_equation_0, values = (var_13227_cast_fp16, var_13628_cast_fp16))[name = tensor("op_13747_cast_fp16")]; + tensor var_13749_equation_0 = const()[name = tensor("op_13749_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13749_cast_fp16 = einsum(equation = var_13749_equation_0, values = (var_13227_cast_fp16, var_13629_cast_fp16))[name = tensor("op_13749_cast_fp16")]; + tensor var_13751_equation_0 = const()[name = tensor("op_13751_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13751_cast_fp16 = einsum(equation = var_13751_equation_0, values = (var_13231_cast_fp16, var_13630_cast_fp16))[name = tensor("op_13751_cast_fp16")]; + tensor var_13753_equation_0 = const()[name = tensor("op_13753_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13753_cast_fp16 = einsum(equation = var_13753_equation_0, values = (var_13231_cast_fp16, var_13631_cast_fp16))[name = tensor("op_13753_cast_fp16")]; + tensor var_13755_equation_0 = const()[name = tensor("op_13755_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13755_cast_fp16 = einsum(equation = var_13755_equation_0, values = (var_13231_cast_fp16, var_13632_cast_fp16))[name = tensor("op_13755_cast_fp16")]; + tensor var_13757_equation_0 = const()[name = tensor("op_13757_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13757_cast_fp16 = einsum(equation = var_13757_equation_0, values = (var_13231_cast_fp16, var_13633_cast_fp16))[name = tensor("op_13757_cast_fp16")]; + tensor var_13759_equation_0 = const()[name = tensor("op_13759_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13759_cast_fp16 = einsum(equation = var_13759_equation_0, values = (var_13235_cast_fp16, var_13634_cast_fp16))[name = tensor("op_13759_cast_fp16")]; + tensor var_13761_equation_0 = const()[name = tensor("op_13761_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13761_cast_fp16 = einsum(equation = var_13761_equation_0, values = (var_13235_cast_fp16, var_13635_cast_fp16))[name = tensor("op_13761_cast_fp16")]; + tensor var_13763_equation_0 = const()[name = tensor("op_13763_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13763_cast_fp16 = einsum(equation = var_13763_equation_0, values = (var_13235_cast_fp16, var_13636_cast_fp16))[name = tensor("op_13763_cast_fp16")]; + tensor var_13765_equation_0 = const()[name = tensor("op_13765_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13765_cast_fp16 = einsum(equation = var_13765_equation_0, values = (var_13235_cast_fp16, var_13637_cast_fp16))[name = tensor("op_13765_cast_fp16")]; + tensor var_13767_equation_0 = const()[name = tensor("op_13767_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13767_cast_fp16 = einsum(equation = var_13767_equation_0, values = (var_13239_cast_fp16, var_13638_cast_fp16))[name = tensor("op_13767_cast_fp16")]; + tensor var_13769_equation_0 = const()[name = tensor("op_13769_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13769_cast_fp16 = einsum(equation = var_13769_equation_0, values = (var_13239_cast_fp16, var_13639_cast_fp16))[name = tensor("op_13769_cast_fp16")]; + tensor var_13771_equation_0 = const()[name = tensor("op_13771_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13771_cast_fp16 = einsum(equation = var_13771_equation_0, values = (var_13239_cast_fp16, var_13640_cast_fp16))[name = tensor("op_13771_cast_fp16")]; + tensor var_13773_equation_0 = const()[name = tensor("op_13773_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13773_cast_fp16 = einsum(equation = var_13773_equation_0, values = (var_13239_cast_fp16, var_13641_cast_fp16))[name = tensor("op_13773_cast_fp16")]; + tensor var_13775_equation_0 = const()[name = tensor("op_13775_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13775_cast_fp16 = einsum(equation = var_13775_equation_0, values = (var_13243_cast_fp16, var_13642_cast_fp16))[name = tensor("op_13775_cast_fp16")]; + tensor var_13777_equation_0 = const()[name = tensor("op_13777_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13777_cast_fp16 = einsum(equation = var_13777_equation_0, values = (var_13243_cast_fp16, var_13643_cast_fp16))[name = tensor("op_13777_cast_fp16")]; + tensor var_13779_equation_0 = const()[name = tensor("op_13779_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13779_cast_fp16 = einsum(equation = var_13779_equation_0, values = (var_13243_cast_fp16, var_13644_cast_fp16))[name = tensor("op_13779_cast_fp16")]; + tensor var_13781_equation_0 = const()[name = tensor("op_13781_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13781_cast_fp16 = einsum(equation = var_13781_equation_0, values = (var_13243_cast_fp16, var_13645_cast_fp16))[name = tensor("op_13781_cast_fp16")]; + tensor var_13783_equation_0 = const()[name = tensor("op_13783_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13783_cast_fp16 = einsum(equation = var_13783_equation_0, values = (var_13247_cast_fp16, var_13646_cast_fp16))[name = tensor("op_13783_cast_fp16")]; + tensor var_13785_equation_0 = const()[name = tensor("op_13785_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13785_cast_fp16 = einsum(equation = var_13785_equation_0, values = (var_13247_cast_fp16, var_13647_cast_fp16))[name = tensor("op_13785_cast_fp16")]; + tensor var_13787_equation_0 = const()[name = tensor("op_13787_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13787_cast_fp16 = einsum(equation = var_13787_equation_0, values = (var_13247_cast_fp16, var_13648_cast_fp16))[name = tensor("op_13787_cast_fp16")]; + tensor var_13789_equation_0 = const()[name = tensor("op_13789_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13789_cast_fp16 = einsum(equation = var_13789_equation_0, values = (var_13247_cast_fp16, var_13649_cast_fp16))[name = tensor("op_13789_cast_fp16")]; + tensor var_13791_equation_0 = const()[name = tensor("op_13791_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13791_cast_fp16 = einsum(equation = var_13791_equation_0, values = (var_13251_cast_fp16, var_13650_cast_fp16))[name = tensor("op_13791_cast_fp16")]; + tensor var_13793_equation_0 = const()[name = tensor("op_13793_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13793_cast_fp16 = einsum(equation = var_13793_equation_0, values = (var_13251_cast_fp16, var_13651_cast_fp16))[name = tensor("op_13793_cast_fp16")]; + tensor var_13795_equation_0 = const()[name = tensor("op_13795_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13795_cast_fp16 = einsum(equation = var_13795_equation_0, values = (var_13251_cast_fp16, var_13652_cast_fp16))[name = tensor("op_13795_cast_fp16")]; + tensor var_13797_equation_0 = const()[name = tensor("op_13797_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13797_cast_fp16 = einsum(equation = var_13797_equation_0, values = (var_13251_cast_fp16, var_13653_cast_fp16))[name = tensor("op_13797_cast_fp16")]; + tensor var_13799_equation_0 = const()[name = tensor("op_13799_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13799_cast_fp16 = einsum(equation = var_13799_equation_0, values = (var_13255_cast_fp16, var_13654_cast_fp16))[name = tensor("op_13799_cast_fp16")]; + tensor var_13801_equation_0 = const()[name = tensor("op_13801_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13801_cast_fp16 = einsum(equation = var_13801_equation_0, values = (var_13255_cast_fp16, var_13655_cast_fp16))[name = tensor("op_13801_cast_fp16")]; + tensor var_13803_equation_0 = const()[name = tensor("op_13803_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13803_cast_fp16 = einsum(equation = var_13803_equation_0, values = (var_13255_cast_fp16, var_13656_cast_fp16))[name = tensor("op_13803_cast_fp16")]; + tensor var_13805_equation_0 = const()[name = tensor("op_13805_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13805_cast_fp16 = einsum(equation = var_13805_equation_0, values = (var_13255_cast_fp16, var_13657_cast_fp16))[name = tensor("op_13805_cast_fp16")]; + tensor var_13807_equation_0 = const()[name = tensor("op_13807_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13807_cast_fp16 = einsum(equation = var_13807_equation_0, values = (var_13259_cast_fp16, var_13658_cast_fp16))[name = tensor("op_13807_cast_fp16")]; + tensor var_13809_equation_0 = const()[name = tensor("op_13809_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13809_cast_fp16 = einsum(equation = var_13809_equation_0, values = (var_13259_cast_fp16, var_13659_cast_fp16))[name = tensor("op_13809_cast_fp16")]; + tensor var_13811_equation_0 = const()[name = tensor("op_13811_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13811_cast_fp16 = einsum(equation = var_13811_equation_0, values = (var_13259_cast_fp16, var_13660_cast_fp16))[name = tensor("op_13811_cast_fp16")]; + tensor var_13813_equation_0 = const()[name = tensor("op_13813_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13813_cast_fp16 = einsum(equation = var_13813_equation_0, values = (var_13259_cast_fp16, var_13661_cast_fp16))[name = tensor("op_13813_cast_fp16")]; + tensor var_13815_equation_0 = const()[name = tensor("op_13815_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13815_cast_fp16 = einsum(equation = var_13815_equation_0, values = (var_13263_cast_fp16, var_13662_cast_fp16))[name = tensor("op_13815_cast_fp16")]; + tensor var_13817_equation_0 = const()[name = tensor("op_13817_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13817_cast_fp16 = einsum(equation = var_13817_equation_0, values = (var_13263_cast_fp16, var_13663_cast_fp16))[name = tensor("op_13817_cast_fp16")]; + tensor var_13819_equation_0 = const()[name = tensor("op_13819_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13819_cast_fp16 = einsum(equation = var_13819_equation_0, values = (var_13263_cast_fp16, var_13664_cast_fp16))[name = tensor("op_13819_cast_fp16")]; + tensor var_13821_equation_0 = const()[name = tensor("op_13821_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13821_cast_fp16 = einsum(equation = var_13821_equation_0, values = (var_13263_cast_fp16, var_13665_cast_fp16))[name = tensor("op_13821_cast_fp16")]; + tensor var_13823_equation_0 = const()[name = tensor("op_13823_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13823_cast_fp16 = einsum(equation = var_13823_equation_0, values = (var_13267_cast_fp16, var_13666_cast_fp16))[name = tensor("op_13823_cast_fp16")]; + tensor var_13825_equation_0 = const()[name = tensor("op_13825_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13825_cast_fp16 = einsum(equation = var_13825_equation_0, values = (var_13267_cast_fp16, var_13667_cast_fp16))[name = tensor("op_13825_cast_fp16")]; + tensor var_13827_equation_0 = const()[name = tensor("op_13827_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13827_cast_fp16 = einsum(equation = var_13827_equation_0, values = (var_13267_cast_fp16, var_13668_cast_fp16))[name = tensor("op_13827_cast_fp16")]; + tensor var_13829_equation_0 = const()[name = tensor("op_13829_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_13829_cast_fp16 = einsum(equation = var_13829_equation_0, values = (var_13267_cast_fp16, var_13669_cast_fp16))[name = tensor("op_13829_cast_fp16")]; + tensor var_13831_interleave_0 = const()[name = tensor("op_13831_interleave_0"), val = tensor(false)]; + tensor var_13831_cast_fp16 = concat(axis = var_12390, interleave = var_13831_interleave_0, values = (var_13671_cast_fp16, var_13673_cast_fp16, var_13675_cast_fp16, var_13677_cast_fp16))[name = tensor("op_13831_cast_fp16")]; + tensor var_13833_interleave_0 = const()[name = tensor("op_13833_interleave_0"), val = tensor(false)]; + tensor var_13833_cast_fp16 = concat(axis = var_12390, interleave = var_13833_interleave_0, values = (var_13679_cast_fp16, var_13681_cast_fp16, var_13683_cast_fp16, var_13685_cast_fp16))[name = tensor("op_13833_cast_fp16")]; + tensor var_13835_interleave_0 = const()[name = tensor("op_13835_interleave_0"), val = tensor(false)]; + tensor var_13835_cast_fp16 = concat(axis = var_12390, interleave = var_13835_interleave_0, values = (var_13687_cast_fp16, var_13689_cast_fp16, var_13691_cast_fp16, var_13693_cast_fp16))[name = tensor("op_13835_cast_fp16")]; + tensor var_13837_interleave_0 = const()[name = tensor("op_13837_interleave_0"), val = tensor(false)]; + tensor var_13837_cast_fp16 = concat(axis = var_12390, interleave = var_13837_interleave_0, values = (var_13695_cast_fp16, var_13697_cast_fp16, var_13699_cast_fp16, var_13701_cast_fp16))[name = tensor("op_13837_cast_fp16")]; + tensor var_13839_interleave_0 = const()[name = tensor("op_13839_interleave_0"), val = tensor(false)]; + tensor var_13839_cast_fp16 = concat(axis = var_12390, interleave = var_13839_interleave_0, values = (var_13703_cast_fp16, var_13705_cast_fp16, var_13707_cast_fp16, var_13709_cast_fp16))[name = tensor("op_13839_cast_fp16")]; + tensor var_13841_interleave_0 = const()[name = tensor("op_13841_interleave_0"), val = tensor(false)]; + tensor var_13841_cast_fp16 = concat(axis = var_12390, interleave = var_13841_interleave_0, values = (var_13711_cast_fp16, var_13713_cast_fp16, var_13715_cast_fp16, var_13717_cast_fp16))[name = tensor("op_13841_cast_fp16")]; + tensor var_13843_interleave_0 = const()[name = tensor("op_13843_interleave_0"), val = tensor(false)]; + tensor var_13843_cast_fp16 = concat(axis = var_12390, interleave = var_13843_interleave_0, values = (var_13719_cast_fp16, var_13721_cast_fp16, var_13723_cast_fp16, var_13725_cast_fp16))[name = tensor("op_13843_cast_fp16")]; + tensor var_13845_interleave_0 = const()[name = tensor("op_13845_interleave_0"), val = tensor(false)]; + tensor var_13845_cast_fp16 = concat(axis = var_12390, interleave = var_13845_interleave_0, values = (var_13727_cast_fp16, var_13729_cast_fp16, var_13731_cast_fp16, var_13733_cast_fp16))[name = tensor("op_13845_cast_fp16")]; + tensor var_13847_interleave_0 = const()[name = tensor("op_13847_interleave_0"), val = tensor(false)]; + tensor var_13847_cast_fp16 = concat(axis = var_12390, interleave = var_13847_interleave_0, values = (var_13735_cast_fp16, var_13737_cast_fp16, var_13739_cast_fp16, var_13741_cast_fp16))[name = tensor("op_13847_cast_fp16")]; + tensor var_13849_interleave_0 = const()[name = tensor("op_13849_interleave_0"), val = tensor(false)]; + tensor var_13849_cast_fp16 = concat(axis = var_12390, interleave = var_13849_interleave_0, values = (var_13743_cast_fp16, var_13745_cast_fp16, var_13747_cast_fp16, var_13749_cast_fp16))[name = tensor("op_13849_cast_fp16")]; + tensor var_13851_interleave_0 = const()[name = tensor("op_13851_interleave_0"), val = tensor(false)]; + tensor var_13851_cast_fp16 = concat(axis = var_12390, interleave = var_13851_interleave_0, values = (var_13751_cast_fp16, var_13753_cast_fp16, var_13755_cast_fp16, var_13757_cast_fp16))[name = tensor("op_13851_cast_fp16")]; + tensor var_13853_interleave_0 = const()[name = tensor("op_13853_interleave_0"), val = tensor(false)]; + tensor var_13853_cast_fp16 = concat(axis = var_12390, interleave = var_13853_interleave_0, values = (var_13759_cast_fp16, var_13761_cast_fp16, var_13763_cast_fp16, var_13765_cast_fp16))[name = tensor("op_13853_cast_fp16")]; + tensor var_13855_interleave_0 = const()[name = tensor("op_13855_interleave_0"), val = tensor(false)]; + tensor var_13855_cast_fp16 = concat(axis = var_12390, interleave = var_13855_interleave_0, values = (var_13767_cast_fp16, var_13769_cast_fp16, var_13771_cast_fp16, var_13773_cast_fp16))[name = tensor("op_13855_cast_fp16")]; + tensor var_13857_interleave_0 = const()[name = tensor("op_13857_interleave_0"), val = tensor(false)]; + tensor var_13857_cast_fp16 = concat(axis = var_12390, interleave = var_13857_interleave_0, values = (var_13775_cast_fp16, var_13777_cast_fp16, var_13779_cast_fp16, var_13781_cast_fp16))[name = tensor("op_13857_cast_fp16")]; + tensor var_13859_interleave_0 = const()[name = tensor("op_13859_interleave_0"), val = tensor(false)]; + tensor var_13859_cast_fp16 = concat(axis = var_12390, interleave = var_13859_interleave_0, values = (var_13783_cast_fp16, var_13785_cast_fp16, var_13787_cast_fp16, var_13789_cast_fp16))[name = tensor("op_13859_cast_fp16")]; + tensor var_13861_interleave_0 = const()[name = tensor("op_13861_interleave_0"), val = tensor(false)]; + tensor var_13861_cast_fp16 = concat(axis = var_12390, interleave = var_13861_interleave_0, values = (var_13791_cast_fp16, var_13793_cast_fp16, var_13795_cast_fp16, var_13797_cast_fp16))[name = tensor("op_13861_cast_fp16")]; + tensor var_13863_interleave_0 = const()[name = tensor("op_13863_interleave_0"), val = tensor(false)]; + tensor var_13863_cast_fp16 = concat(axis = var_12390, interleave = var_13863_interleave_0, values = (var_13799_cast_fp16, var_13801_cast_fp16, var_13803_cast_fp16, var_13805_cast_fp16))[name = tensor("op_13863_cast_fp16")]; + tensor var_13865_interleave_0 = const()[name = tensor("op_13865_interleave_0"), val = tensor(false)]; + tensor var_13865_cast_fp16 = concat(axis = var_12390, interleave = var_13865_interleave_0, values = (var_13807_cast_fp16, var_13809_cast_fp16, var_13811_cast_fp16, var_13813_cast_fp16))[name = tensor("op_13865_cast_fp16")]; + tensor var_13867_interleave_0 = const()[name = tensor("op_13867_interleave_0"), val = tensor(false)]; + tensor var_13867_cast_fp16 = concat(axis = var_12390, interleave = var_13867_interleave_0, values = (var_13815_cast_fp16, var_13817_cast_fp16, var_13819_cast_fp16, var_13821_cast_fp16))[name = tensor("op_13867_cast_fp16")]; + tensor var_13869_interleave_0 = const()[name = tensor("op_13869_interleave_0"), val = tensor(false)]; + tensor var_13869_cast_fp16 = concat(axis = var_12390, interleave = var_13869_interleave_0, values = (var_13823_cast_fp16, var_13825_cast_fp16, var_13827_cast_fp16, var_13829_cast_fp16))[name = tensor("op_13869_cast_fp16")]; + tensor input_65_interleave_0 = const()[name = tensor("input_65_interleave_0"), val = tensor(false)]; + tensor input_65_cast_fp16 = concat(axis = var_12415, interleave = input_65_interleave_0, values = (var_13831_cast_fp16, var_13833_cast_fp16, var_13835_cast_fp16, var_13837_cast_fp16, var_13839_cast_fp16, var_13841_cast_fp16, var_13843_cast_fp16, var_13845_cast_fp16, var_13847_cast_fp16, var_13849_cast_fp16, var_13851_cast_fp16, var_13853_cast_fp16, var_13855_cast_fp16, var_13857_cast_fp16, var_13859_cast_fp16, var_13861_cast_fp16, var_13863_cast_fp16, var_13865_cast_fp16, var_13867_cast_fp16, var_13869_cast_fp16))[name = tensor("input_65_cast_fp16")]; + tensor var_13874 = const()[name = tensor("op_13874"), val = tensor([1, 1])]; + tensor var_13876 = const()[name = tensor("op_13876"), val = tensor([1, 1])]; + tensor obj_35_pad_type_0 = const()[name = tensor("obj_35_pad_type_0"), val = tensor("custom")]; + tensor obj_35_pad_0 = const()[name = tensor("obj_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_8_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_8_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(338962880)))]; + tensor layers_8_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_8_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(342239744)))]; + tensor obj_35_cast_fp16 = conv(bias = layers_8_self_attn_o_proj_bias_to_fp16, dilations = var_13876, groups = var_12415, pad = obj_35_pad_0, pad_type = obj_35_pad_type_0, strides = var_13874, weight = layers_8_self_attn_o_proj_weight_to_fp16, x = input_65_cast_fp16)[name = tensor("obj_35_cast_fp16")]; + tensor inputs_35_cast_fp16 = add(x = inputs_33_cast_fp16, y = obj_35_cast_fp16)[name = tensor("inputs_35_cast_fp16")]; + tensor var_13882 = const()[name = tensor("op_13882"), val = tensor([1])]; + tensor channels_mean_35_cast_fp16 = reduce_mean(axes = var_13882, keep_dims = var_12416, x = inputs_35_cast_fp16)[name = tensor("channels_mean_35_cast_fp16")]; + tensor zero_mean_35_cast_fp16 = sub(x = inputs_35_cast_fp16, y = channels_mean_35_cast_fp16)[name = tensor("zero_mean_35_cast_fp16")]; + tensor zero_mean_sq_35_cast_fp16 = mul(x = zero_mean_35_cast_fp16, y = zero_mean_35_cast_fp16)[name = tensor("zero_mean_sq_35_cast_fp16")]; + tensor var_13886 = const()[name = tensor("op_13886"), val = tensor([1])]; + tensor var_13887_cast_fp16 = reduce_mean(axes = var_13886, keep_dims = var_12416, x = zero_mean_sq_35_cast_fp16)[name = tensor("op_13887_cast_fp16")]; + tensor var_13888_to_fp16 = const()[name = tensor("op_13888_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_13889_cast_fp16 = add(x = var_13887_cast_fp16, y = var_13888_to_fp16)[name = tensor("op_13889_cast_fp16")]; + tensor denom_35_epsilon_0_to_fp16 = const()[name = tensor("denom_35_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_35_cast_fp16 = rsqrt(epsilon = denom_35_epsilon_0_to_fp16, x = var_13889_cast_fp16)[name = tensor("denom_35_cast_fp16")]; + tensor out_35_cast_fp16 = mul(x = zero_mean_35_cast_fp16, y = denom_35_cast_fp16)[name = tensor("out_35_cast_fp16")]; + tensor input_67_gamma_0_to_fp16 = const()[name = tensor("input_67_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(342242368)))]; + tensor input_67_beta_0_to_fp16 = const()[name = tensor("input_67_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(342244992)))]; + tensor input_67_epsilon_0_to_fp16 = const()[name = tensor("input_67_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_67_cast_fp16 = batch_norm(beta = input_67_beta_0_to_fp16, epsilon = input_67_epsilon_0_to_fp16, gamma = input_67_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_35_cast_fp16)[name = tensor("input_67_cast_fp16")]; + tensor var_13900 = const()[name = tensor("op_13900"), val = tensor([1, 1])]; + tensor var_13902 = const()[name = tensor("op_13902"), val = tensor([1, 1])]; + tensor input_69_pad_type_0 = const()[name = tensor("input_69_pad_type_0"), val = tensor("custom")]; + tensor input_69_pad_0 = const()[name = tensor("input_69_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_8_fc1_weight_to_fp16 = const()[name = tensor("layers_8_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(342247616)))]; + tensor layers_8_fc1_bias_to_fp16 = const()[name = tensor("layers_8_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(355354880)))]; + tensor input_69_cast_fp16 = conv(bias = layers_8_fc1_bias_to_fp16, dilations = var_13902, groups = var_12415, pad = input_69_pad_0, pad_type = input_69_pad_type_0, strides = var_13900, weight = layers_8_fc1_weight_to_fp16, x = input_67_cast_fp16)[name = tensor("input_69_cast_fp16")]; + tensor input_71_mode_0 = const()[name = tensor("input_71_mode_0"), val = tensor("EXACT")]; + tensor input_71_cast_fp16 = gelu(mode = input_71_mode_0, x = input_69_cast_fp16)[name = tensor("input_71_cast_fp16")]; + tensor var_13908 = const()[name = tensor("op_13908"), val = tensor([1, 1])]; + tensor var_13910 = const()[name = tensor("op_13910"), val = tensor([1, 1])]; + tensor hidden_states_21_pad_type_0 = const()[name = tensor("hidden_states_21_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_21_pad_0 = const()[name = tensor("hidden_states_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_8_fc2_weight_to_fp16 = const()[name = tensor("layers_8_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(355365184)))]; + tensor layers_8_fc2_bias_to_fp16 = const()[name = tensor("layers_8_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(368472448)))]; + tensor hidden_states_21_cast_fp16 = conv(bias = layers_8_fc2_bias_to_fp16, dilations = var_13910, groups = var_12415, pad = hidden_states_21_pad_0, pad_type = hidden_states_21_pad_type_0, strides = var_13908, weight = layers_8_fc2_weight_to_fp16, x = input_71_cast_fp16)[name = tensor("hidden_states_21_cast_fp16")]; + tensor inputs_37_cast_fp16 = add(x = inputs_35_cast_fp16, y = hidden_states_21_cast_fp16)[name = tensor("inputs_37_cast_fp16")]; + tensor var_13917 = const()[name = tensor("op_13917"), val = tensor(3)]; + tensor var_13942 = const()[name = tensor("op_13942"), val = tensor(1)]; + tensor var_13943 = const()[name = tensor("op_13943"), val = tensor(true)]; + tensor var_13953 = const()[name = tensor("op_13953"), val = tensor([1])]; + tensor channels_mean_37_cast_fp16 = reduce_mean(axes = var_13953, keep_dims = var_13943, x = inputs_37_cast_fp16)[name = tensor("channels_mean_37_cast_fp16")]; + tensor zero_mean_37_cast_fp16 = sub(x = inputs_37_cast_fp16, y = channels_mean_37_cast_fp16)[name = tensor("zero_mean_37_cast_fp16")]; + tensor zero_mean_sq_37_cast_fp16 = mul(x = zero_mean_37_cast_fp16, y = zero_mean_37_cast_fp16)[name = tensor("zero_mean_sq_37_cast_fp16")]; + tensor var_13957 = const()[name = tensor("op_13957"), val = tensor([1])]; + tensor var_13958_cast_fp16 = reduce_mean(axes = var_13957, keep_dims = var_13943, x = zero_mean_sq_37_cast_fp16)[name = tensor("op_13958_cast_fp16")]; + tensor var_13959_to_fp16 = const()[name = tensor("op_13959_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_13960_cast_fp16 = add(x = var_13958_cast_fp16, y = var_13959_to_fp16)[name = tensor("op_13960_cast_fp16")]; + tensor denom_37_epsilon_0_to_fp16 = const()[name = tensor("denom_37_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_37_cast_fp16 = rsqrt(epsilon = denom_37_epsilon_0_to_fp16, x = var_13960_cast_fp16)[name = tensor("denom_37_cast_fp16")]; + tensor out_37_cast_fp16 = mul(x = zero_mean_37_cast_fp16, y = denom_37_cast_fp16)[name = tensor("out_37_cast_fp16")]; + tensor obj_37_gamma_0_to_fp16 = const()[name = tensor("obj_37_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(368475072)))]; + tensor obj_37_beta_0_to_fp16 = const()[name = tensor("obj_37_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(368477696)))]; + tensor obj_37_epsilon_0_to_fp16 = const()[name = tensor("obj_37_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_37_cast_fp16 = batch_norm(beta = obj_37_beta_0_to_fp16, epsilon = obj_37_epsilon_0_to_fp16, gamma = obj_37_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_37_cast_fp16)[name = tensor("obj_37_cast_fp16")]; + tensor var_13975 = const()[name = tensor("op_13975"), val = tensor([1, 1])]; + tensor var_13977 = const()[name = tensor("op_13977"), val = tensor([1, 1])]; + tensor query_19_pad_type_0 = const()[name = tensor("query_19_pad_type_0"), val = tensor("custom")]; + tensor query_19_pad_0 = const()[name = tensor("query_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_9_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_9_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(368480320)))]; + tensor layers_9_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_9_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371757184)))]; + tensor query_19_cast_fp16 = conv(bias = layers_9_self_attn_q_proj_bias_to_fp16, dilations = var_13977, groups = var_13942, pad = query_19_pad_0, pad_type = query_19_pad_type_0, strides = var_13975, weight = layers_9_self_attn_q_proj_weight_to_fp16, x = obj_37_cast_fp16)[name = tensor("query_19_cast_fp16")]; + tensor var_13981 = const()[name = tensor("op_13981"), val = tensor([1, 1])]; + tensor var_13983 = const()[name = tensor("op_13983"), val = tensor([1, 1])]; + tensor key_19_pad_type_0 = const()[name = tensor("key_19_pad_type_0"), val = tensor("custom")]; + tensor key_19_pad_0 = const()[name = tensor("key_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_9_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_9_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(371759808)))]; + tensor key_19_cast_fp16 = conv(dilations = var_13983, groups = var_13942, pad = key_19_pad_0, pad_type = key_19_pad_type_0, strides = var_13981, weight = layers_9_self_attn_k_proj_weight_to_fp16, x = obj_37_cast_fp16)[name = tensor("key_19_cast_fp16")]; + tensor var_13988 = const()[name = tensor("op_13988"), val = tensor([1, 1])]; + tensor var_13990 = const()[name = tensor("op_13990"), val = tensor([1, 1])]; + tensor value_19_pad_type_0 = const()[name = tensor("value_19_pad_type_0"), val = tensor("custom")]; + tensor value_19_pad_0 = const()[name = tensor("value_19_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_9_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_9_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(375036672)))]; + tensor layers_9_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_9_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(378313536)))]; + tensor value_19_cast_fp16 = conv(bias = layers_9_self_attn_v_proj_bias_to_fp16, dilations = var_13990, groups = var_13942, pad = value_19_pad_0, pad_type = value_19_pad_type_0, strides = var_13988, weight = layers_9_self_attn_v_proj_weight_to_fp16, x = obj_37_cast_fp16)[name = tensor("value_19_cast_fp16")]; + tensor var_13997_begin_0 = const()[name = tensor("op_13997_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_13997_end_0 = const()[name = tensor("op_13997_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_13997_end_mask_0 = const()[name = tensor("op_13997_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_13997_cast_fp16 = slice_by_index(begin = var_13997_begin_0, end = var_13997_end_0, end_mask = var_13997_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_13997_cast_fp16")]; + tensor var_14001_begin_0 = const()[name = tensor("op_14001_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_14001_end_0 = const()[name = tensor("op_14001_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_14001_end_mask_0 = const()[name = tensor("op_14001_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14001_cast_fp16 = slice_by_index(begin = var_14001_begin_0, end = var_14001_end_0, end_mask = var_14001_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14001_cast_fp16")]; + tensor var_14005_begin_0 = const()[name = tensor("op_14005_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_14005_end_0 = const()[name = tensor("op_14005_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_14005_end_mask_0 = const()[name = tensor("op_14005_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14005_cast_fp16 = slice_by_index(begin = var_14005_begin_0, end = var_14005_end_0, end_mask = var_14005_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14005_cast_fp16")]; + tensor var_14009_begin_0 = const()[name = tensor("op_14009_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_14009_end_0 = const()[name = tensor("op_14009_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_14009_end_mask_0 = const()[name = tensor("op_14009_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14009_cast_fp16 = slice_by_index(begin = var_14009_begin_0, end = var_14009_end_0, end_mask = var_14009_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14009_cast_fp16")]; + tensor var_14013_begin_0 = const()[name = tensor("op_14013_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_14013_end_0 = const()[name = tensor("op_14013_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_14013_end_mask_0 = const()[name = tensor("op_14013_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14013_cast_fp16 = slice_by_index(begin = var_14013_begin_0, end = var_14013_end_0, end_mask = var_14013_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14013_cast_fp16")]; + tensor var_14017_begin_0 = const()[name = tensor("op_14017_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_14017_end_0 = const()[name = tensor("op_14017_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_14017_end_mask_0 = const()[name = tensor("op_14017_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14017_cast_fp16 = slice_by_index(begin = var_14017_begin_0, end = var_14017_end_0, end_mask = var_14017_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14017_cast_fp16")]; + tensor var_14021_begin_0 = const()[name = tensor("op_14021_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_14021_end_0 = const()[name = tensor("op_14021_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_14021_end_mask_0 = const()[name = tensor("op_14021_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14021_cast_fp16 = slice_by_index(begin = var_14021_begin_0, end = var_14021_end_0, end_mask = var_14021_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14021_cast_fp16")]; + tensor var_14025_begin_0 = const()[name = tensor("op_14025_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_14025_end_0 = const()[name = tensor("op_14025_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_14025_end_mask_0 = const()[name = tensor("op_14025_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14025_cast_fp16 = slice_by_index(begin = var_14025_begin_0, end = var_14025_end_0, end_mask = var_14025_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14025_cast_fp16")]; + tensor var_14029_begin_0 = const()[name = tensor("op_14029_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_14029_end_0 = const()[name = tensor("op_14029_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_14029_end_mask_0 = const()[name = tensor("op_14029_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14029_cast_fp16 = slice_by_index(begin = var_14029_begin_0, end = var_14029_end_0, end_mask = var_14029_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14029_cast_fp16")]; + tensor var_14033_begin_0 = const()[name = tensor("op_14033_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_14033_end_0 = const()[name = tensor("op_14033_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_14033_end_mask_0 = const()[name = tensor("op_14033_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14033_cast_fp16 = slice_by_index(begin = var_14033_begin_0, end = var_14033_end_0, end_mask = var_14033_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14033_cast_fp16")]; + tensor var_14037_begin_0 = const()[name = tensor("op_14037_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_14037_end_0 = const()[name = tensor("op_14037_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_14037_end_mask_0 = const()[name = tensor("op_14037_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14037_cast_fp16 = slice_by_index(begin = var_14037_begin_0, end = var_14037_end_0, end_mask = var_14037_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14037_cast_fp16")]; + tensor var_14041_begin_0 = const()[name = tensor("op_14041_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_14041_end_0 = const()[name = tensor("op_14041_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_14041_end_mask_0 = const()[name = tensor("op_14041_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14041_cast_fp16 = slice_by_index(begin = var_14041_begin_0, end = var_14041_end_0, end_mask = var_14041_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14041_cast_fp16")]; + tensor var_14045_begin_0 = const()[name = tensor("op_14045_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_14045_end_0 = const()[name = tensor("op_14045_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_14045_end_mask_0 = const()[name = tensor("op_14045_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14045_cast_fp16 = slice_by_index(begin = var_14045_begin_0, end = var_14045_end_0, end_mask = var_14045_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14045_cast_fp16")]; + tensor var_14049_begin_0 = const()[name = tensor("op_14049_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_14049_end_0 = const()[name = tensor("op_14049_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_14049_end_mask_0 = const()[name = tensor("op_14049_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14049_cast_fp16 = slice_by_index(begin = var_14049_begin_0, end = var_14049_end_0, end_mask = var_14049_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14049_cast_fp16")]; + tensor var_14053_begin_0 = const()[name = tensor("op_14053_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_14053_end_0 = const()[name = tensor("op_14053_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_14053_end_mask_0 = const()[name = tensor("op_14053_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14053_cast_fp16 = slice_by_index(begin = var_14053_begin_0, end = var_14053_end_0, end_mask = var_14053_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14053_cast_fp16")]; + tensor var_14057_begin_0 = const()[name = tensor("op_14057_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_14057_end_0 = const()[name = tensor("op_14057_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_14057_end_mask_0 = const()[name = tensor("op_14057_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14057_cast_fp16 = slice_by_index(begin = var_14057_begin_0, end = var_14057_end_0, end_mask = var_14057_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14057_cast_fp16")]; + tensor var_14061_begin_0 = const()[name = tensor("op_14061_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_14061_end_0 = const()[name = tensor("op_14061_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_14061_end_mask_0 = const()[name = tensor("op_14061_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14061_cast_fp16 = slice_by_index(begin = var_14061_begin_0, end = var_14061_end_0, end_mask = var_14061_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14061_cast_fp16")]; + tensor var_14065_begin_0 = const()[name = tensor("op_14065_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_14065_end_0 = const()[name = tensor("op_14065_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_14065_end_mask_0 = const()[name = tensor("op_14065_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14065_cast_fp16 = slice_by_index(begin = var_14065_begin_0, end = var_14065_end_0, end_mask = var_14065_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14065_cast_fp16")]; + tensor var_14069_begin_0 = const()[name = tensor("op_14069_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_14069_end_0 = const()[name = tensor("op_14069_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_14069_end_mask_0 = const()[name = tensor("op_14069_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14069_cast_fp16 = slice_by_index(begin = var_14069_begin_0, end = var_14069_end_0, end_mask = var_14069_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14069_cast_fp16")]; + tensor var_14073_begin_0 = const()[name = tensor("op_14073_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_14073_end_0 = const()[name = tensor("op_14073_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_14073_end_mask_0 = const()[name = tensor("op_14073_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14073_cast_fp16 = slice_by_index(begin = var_14073_begin_0, end = var_14073_end_0, end_mask = var_14073_end_mask_0, x = query_19_cast_fp16)[name = tensor("op_14073_cast_fp16")]; + tensor var_14082_begin_0 = const()[name = tensor("op_14082_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14082_end_0 = const()[name = tensor("op_14082_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_14082_end_mask_0 = const()[name = tensor("op_14082_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14082_cast_fp16 = slice_by_index(begin = var_14082_begin_0, end = var_14082_end_0, end_mask = var_14082_end_mask_0, x = var_13997_cast_fp16)[name = tensor("op_14082_cast_fp16")]; + tensor var_14089_begin_0 = const()[name = tensor("op_14089_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_14089_end_0 = const()[name = tensor("op_14089_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_14089_end_mask_0 = const()[name = tensor("op_14089_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14089_cast_fp16 = slice_by_index(begin = var_14089_begin_0, end = var_14089_end_0, end_mask = var_14089_end_mask_0, x = var_13997_cast_fp16)[name = tensor("op_14089_cast_fp16")]; + tensor var_14096_begin_0 = const()[name = tensor("op_14096_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_14096_end_0 = const()[name = tensor("op_14096_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_14096_end_mask_0 = const()[name = tensor("op_14096_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14096_cast_fp16 = slice_by_index(begin = var_14096_begin_0, end = var_14096_end_0, end_mask = var_14096_end_mask_0, x = var_13997_cast_fp16)[name = tensor("op_14096_cast_fp16")]; + tensor var_14103_begin_0 = const()[name = tensor("op_14103_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_14103_end_0 = const()[name = tensor("op_14103_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_14103_end_mask_0 = const()[name = tensor("op_14103_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14103_cast_fp16 = slice_by_index(begin = var_14103_begin_0, end = var_14103_end_0, end_mask = var_14103_end_mask_0, x = var_13997_cast_fp16)[name = tensor("op_14103_cast_fp16")]; + tensor var_14110_begin_0 = const()[name = tensor("op_14110_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14110_end_0 = const()[name = tensor("op_14110_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_14110_end_mask_0 = const()[name = tensor("op_14110_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14110_cast_fp16 = slice_by_index(begin = var_14110_begin_0, end = var_14110_end_0, end_mask = var_14110_end_mask_0, x = var_14001_cast_fp16)[name = tensor("op_14110_cast_fp16")]; + tensor var_14117_begin_0 = const()[name = tensor("op_14117_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_14117_end_0 = const()[name = tensor("op_14117_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_14117_end_mask_0 = const()[name = tensor("op_14117_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14117_cast_fp16 = slice_by_index(begin = var_14117_begin_0, end = var_14117_end_0, end_mask = var_14117_end_mask_0, x = var_14001_cast_fp16)[name = tensor("op_14117_cast_fp16")]; + tensor var_14124_begin_0 = const()[name = tensor("op_14124_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_14124_end_0 = const()[name = tensor("op_14124_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_14124_end_mask_0 = const()[name = tensor("op_14124_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14124_cast_fp16 = slice_by_index(begin = var_14124_begin_0, end = var_14124_end_0, end_mask = var_14124_end_mask_0, x = var_14001_cast_fp16)[name = tensor("op_14124_cast_fp16")]; + tensor var_14131_begin_0 = const()[name = tensor("op_14131_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_14131_end_0 = const()[name = tensor("op_14131_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_14131_end_mask_0 = const()[name = tensor("op_14131_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14131_cast_fp16 = slice_by_index(begin = var_14131_begin_0, end = var_14131_end_0, end_mask = var_14131_end_mask_0, x = var_14001_cast_fp16)[name = tensor("op_14131_cast_fp16")]; + tensor var_14138_begin_0 = const()[name = tensor("op_14138_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14138_end_0 = const()[name = tensor("op_14138_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_14138_end_mask_0 = const()[name = tensor("op_14138_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14138_cast_fp16 = slice_by_index(begin = var_14138_begin_0, end = var_14138_end_0, end_mask = var_14138_end_mask_0, x = var_14005_cast_fp16)[name = tensor("op_14138_cast_fp16")]; + tensor var_14145_begin_0 = const()[name = tensor("op_14145_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_14145_end_0 = const()[name = tensor("op_14145_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_14145_end_mask_0 = const()[name = tensor("op_14145_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14145_cast_fp16 = slice_by_index(begin = var_14145_begin_0, end = var_14145_end_0, end_mask = var_14145_end_mask_0, x = var_14005_cast_fp16)[name = tensor("op_14145_cast_fp16")]; + tensor var_14152_begin_0 = const()[name = tensor("op_14152_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_14152_end_0 = const()[name = tensor("op_14152_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_14152_end_mask_0 = const()[name = tensor("op_14152_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14152_cast_fp16 = slice_by_index(begin = var_14152_begin_0, end = var_14152_end_0, end_mask = var_14152_end_mask_0, x = var_14005_cast_fp16)[name = tensor("op_14152_cast_fp16")]; + tensor var_14159_begin_0 = const()[name = tensor("op_14159_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_14159_end_0 = const()[name = tensor("op_14159_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_14159_end_mask_0 = const()[name = tensor("op_14159_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14159_cast_fp16 = slice_by_index(begin = var_14159_begin_0, end = var_14159_end_0, end_mask = var_14159_end_mask_0, x = var_14005_cast_fp16)[name = tensor("op_14159_cast_fp16")]; + tensor var_14166_begin_0 = const()[name = tensor("op_14166_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14166_end_0 = const()[name = tensor("op_14166_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_14166_end_mask_0 = const()[name = tensor("op_14166_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14166_cast_fp16 = slice_by_index(begin = var_14166_begin_0, end = var_14166_end_0, end_mask = var_14166_end_mask_0, x = var_14009_cast_fp16)[name = tensor("op_14166_cast_fp16")]; + tensor var_14173_begin_0 = const()[name = tensor("op_14173_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_14173_end_0 = const()[name = tensor("op_14173_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_14173_end_mask_0 = const()[name = tensor("op_14173_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14173_cast_fp16 = slice_by_index(begin = var_14173_begin_0, end = var_14173_end_0, end_mask = var_14173_end_mask_0, x = var_14009_cast_fp16)[name = tensor("op_14173_cast_fp16")]; + tensor var_14180_begin_0 = const()[name = tensor("op_14180_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_14180_end_0 = const()[name = tensor("op_14180_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_14180_end_mask_0 = const()[name = tensor("op_14180_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14180_cast_fp16 = slice_by_index(begin = var_14180_begin_0, end = var_14180_end_0, end_mask = var_14180_end_mask_0, x = var_14009_cast_fp16)[name = tensor("op_14180_cast_fp16")]; + tensor var_14187_begin_0 = const()[name = tensor("op_14187_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_14187_end_0 = const()[name = tensor("op_14187_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_14187_end_mask_0 = const()[name = tensor("op_14187_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14187_cast_fp16 = slice_by_index(begin = var_14187_begin_0, end = var_14187_end_0, end_mask = var_14187_end_mask_0, x = var_14009_cast_fp16)[name = tensor("op_14187_cast_fp16")]; + tensor var_14194_begin_0 = const()[name = tensor("op_14194_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14194_end_0 = const()[name = tensor("op_14194_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_14194_end_mask_0 = const()[name = tensor("op_14194_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14194_cast_fp16 = slice_by_index(begin = var_14194_begin_0, end = var_14194_end_0, end_mask = var_14194_end_mask_0, x = var_14013_cast_fp16)[name = tensor("op_14194_cast_fp16")]; + tensor var_14201_begin_0 = const()[name = tensor("op_14201_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_14201_end_0 = const()[name = tensor("op_14201_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_14201_end_mask_0 = const()[name = tensor("op_14201_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14201_cast_fp16 = slice_by_index(begin = var_14201_begin_0, end = var_14201_end_0, end_mask = var_14201_end_mask_0, x = var_14013_cast_fp16)[name = tensor("op_14201_cast_fp16")]; + tensor var_14208_begin_0 = const()[name = tensor("op_14208_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_14208_end_0 = const()[name = tensor("op_14208_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_14208_end_mask_0 = const()[name = tensor("op_14208_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14208_cast_fp16 = slice_by_index(begin = var_14208_begin_0, end = var_14208_end_0, end_mask = var_14208_end_mask_0, x = var_14013_cast_fp16)[name = tensor("op_14208_cast_fp16")]; + tensor var_14215_begin_0 = const()[name = tensor("op_14215_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_14215_end_0 = const()[name = tensor("op_14215_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_14215_end_mask_0 = const()[name = tensor("op_14215_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14215_cast_fp16 = slice_by_index(begin = var_14215_begin_0, end = var_14215_end_0, end_mask = var_14215_end_mask_0, x = var_14013_cast_fp16)[name = tensor("op_14215_cast_fp16")]; + tensor var_14222_begin_0 = const()[name = tensor("op_14222_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14222_end_0 = const()[name = tensor("op_14222_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_14222_end_mask_0 = const()[name = tensor("op_14222_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14222_cast_fp16 = slice_by_index(begin = var_14222_begin_0, end = var_14222_end_0, end_mask = var_14222_end_mask_0, x = var_14017_cast_fp16)[name = tensor("op_14222_cast_fp16")]; + tensor var_14229_begin_0 = const()[name = tensor("op_14229_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_14229_end_0 = const()[name = tensor("op_14229_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_14229_end_mask_0 = const()[name = tensor("op_14229_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14229_cast_fp16 = slice_by_index(begin = var_14229_begin_0, end = var_14229_end_0, end_mask = var_14229_end_mask_0, x = var_14017_cast_fp16)[name = tensor("op_14229_cast_fp16")]; + tensor var_14236_begin_0 = const()[name = tensor("op_14236_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_14236_end_0 = const()[name = tensor("op_14236_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_14236_end_mask_0 = const()[name = tensor("op_14236_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14236_cast_fp16 = slice_by_index(begin = var_14236_begin_0, end = var_14236_end_0, end_mask = var_14236_end_mask_0, x = var_14017_cast_fp16)[name = tensor("op_14236_cast_fp16")]; + tensor var_14243_begin_0 = const()[name = tensor("op_14243_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_14243_end_0 = const()[name = tensor("op_14243_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_14243_end_mask_0 = const()[name = tensor("op_14243_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14243_cast_fp16 = slice_by_index(begin = var_14243_begin_0, end = var_14243_end_0, end_mask = var_14243_end_mask_0, x = var_14017_cast_fp16)[name = tensor("op_14243_cast_fp16")]; + tensor var_14250_begin_0 = const()[name = tensor("op_14250_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14250_end_0 = const()[name = tensor("op_14250_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_14250_end_mask_0 = const()[name = tensor("op_14250_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14250_cast_fp16 = slice_by_index(begin = var_14250_begin_0, end = var_14250_end_0, end_mask = var_14250_end_mask_0, x = var_14021_cast_fp16)[name = tensor("op_14250_cast_fp16")]; + tensor var_14257_begin_0 = const()[name = tensor("op_14257_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_14257_end_0 = const()[name = tensor("op_14257_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_14257_end_mask_0 = const()[name = tensor("op_14257_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14257_cast_fp16 = slice_by_index(begin = var_14257_begin_0, end = var_14257_end_0, end_mask = var_14257_end_mask_0, x = var_14021_cast_fp16)[name = tensor("op_14257_cast_fp16")]; + tensor var_14264_begin_0 = const()[name = tensor("op_14264_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_14264_end_0 = const()[name = tensor("op_14264_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_14264_end_mask_0 = const()[name = tensor("op_14264_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14264_cast_fp16 = slice_by_index(begin = var_14264_begin_0, end = var_14264_end_0, end_mask = var_14264_end_mask_0, x = var_14021_cast_fp16)[name = tensor("op_14264_cast_fp16")]; + tensor var_14271_begin_0 = const()[name = tensor("op_14271_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_14271_end_0 = const()[name = tensor("op_14271_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_14271_end_mask_0 = const()[name = tensor("op_14271_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14271_cast_fp16 = slice_by_index(begin = var_14271_begin_0, end = var_14271_end_0, end_mask = var_14271_end_mask_0, x = var_14021_cast_fp16)[name = tensor("op_14271_cast_fp16")]; + tensor var_14278_begin_0 = const()[name = tensor("op_14278_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14278_end_0 = const()[name = tensor("op_14278_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_14278_end_mask_0 = const()[name = tensor("op_14278_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14278_cast_fp16 = slice_by_index(begin = var_14278_begin_0, end = var_14278_end_0, end_mask = var_14278_end_mask_0, x = var_14025_cast_fp16)[name = tensor("op_14278_cast_fp16")]; + tensor var_14285_begin_0 = const()[name = tensor("op_14285_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_14285_end_0 = const()[name = tensor("op_14285_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_14285_end_mask_0 = const()[name = tensor("op_14285_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14285_cast_fp16 = slice_by_index(begin = var_14285_begin_0, end = var_14285_end_0, end_mask = var_14285_end_mask_0, x = var_14025_cast_fp16)[name = tensor("op_14285_cast_fp16")]; + tensor var_14292_begin_0 = const()[name = tensor("op_14292_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_14292_end_0 = const()[name = tensor("op_14292_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_14292_end_mask_0 = const()[name = tensor("op_14292_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14292_cast_fp16 = slice_by_index(begin = var_14292_begin_0, end = var_14292_end_0, end_mask = var_14292_end_mask_0, x = var_14025_cast_fp16)[name = tensor("op_14292_cast_fp16")]; + tensor var_14299_begin_0 = const()[name = tensor("op_14299_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_14299_end_0 = const()[name = tensor("op_14299_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_14299_end_mask_0 = const()[name = tensor("op_14299_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14299_cast_fp16 = slice_by_index(begin = var_14299_begin_0, end = var_14299_end_0, end_mask = var_14299_end_mask_0, x = var_14025_cast_fp16)[name = tensor("op_14299_cast_fp16")]; + tensor var_14306_begin_0 = const()[name = tensor("op_14306_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14306_end_0 = const()[name = tensor("op_14306_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_14306_end_mask_0 = const()[name = tensor("op_14306_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14306_cast_fp16 = slice_by_index(begin = var_14306_begin_0, end = var_14306_end_0, end_mask = var_14306_end_mask_0, x = var_14029_cast_fp16)[name = tensor("op_14306_cast_fp16")]; + tensor var_14313_begin_0 = const()[name = tensor("op_14313_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_14313_end_0 = const()[name = tensor("op_14313_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_14313_end_mask_0 = const()[name = tensor("op_14313_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14313_cast_fp16 = slice_by_index(begin = var_14313_begin_0, end = var_14313_end_0, end_mask = var_14313_end_mask_0, x = var_14029_cast_fp16)[name = tensor("op_14313_cast_fp16")]; + tensor var_14320_begin_0 = const()[name = tensor("op_14320_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_14320_end_0 = const()[name = tensor("op_14320_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_14320_end_mask_0 = const()[name = tensor("op_14320_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14320_cast_fp16 = slice_by_index(begin = var_14320_begin_0, end = var_14320_end_0, end_mask = var_14320_end_mask_0, x = var_14029_cast_fp16)[name = tensor("op_14320_cast_fp16")]; + tensor var_14327_begin_0 = const()[name = tensor("op_14327_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_14327_end_0 = const()[name = tensor("op_14327_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_14327_end_mask_0 = const()[name = tensor("op_14327_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14327_cast_fp16 = slice_by_index(begin = var_14327_begin_0, end = var_14327_end_0, end_mask = var_14327_end_mask_0, x = var_14029_cast_fp16)[name = tensor("op_14327_cast_fp16")]; + tensor var_14334_begin_0 = const()[name = tensor("op_14334_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14334_end_0 = const()[name = tensor("op_14334_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_14334_end_mask_0 = const()[name = tensor("op_14334_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14334_cast_fp16 = slice_by_index(begin = var_14334_begin_0, end = var_14334_end_0, end_mask = var_14334_end_mask_0, x = var_14033_cast_fp16)[name = tensor("op_14334_cast_fp16")]; + tensor var_14341_begin_0 = const()[name = tensor("op_14341_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_14341_end_0 = const()[name = tensor("op_14341_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_14341_end_mask_0 = const()[name = tensor("op_14341_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14341_cast_fp16 = slice_by_index(begin = var_14341_begin_0, end = var_14341_end_0, end_mask = var_14341_end_mask_0, x = var_14033_cast_fp16)[name = tensor("op_14341_cast_fp16")]; + tensor var_14348_begin_0 = const()[name = tensor("op_14348_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_14348_end_0 = const()[name = tensor("op_14348_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_14348_end_mask_0 = const()[name = tensor("op_14348_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14348_cast_fp16 = slice_by_index(begin = var_14348_begin_0, end = var_14348_end_0, end_mask = var_14348_end_mask_0, x = var_14033_cast_fp16)[name = tensor("op_14348_cast_fp16")]; + tensor var_14355_begin_0 = const()[name = tensor("op_14355_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_14355_end_0 = const()[name = tensor("op_14355_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_14355_end_mask_0 = const()[name = tensor("op_14355_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14355_cast_fp16 = slice_by_index(begin = var_14355_begin_0, end = var_14355_end_0, end_mask = var_14355_end_mask_0, x = var_14033_cast_fp16)[name = tensor("op_14355_cast_fp16")]; + tensor var_14362_begin_0 = const()[name = tensor("op_14362_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14362_end_0 = const()[name = tensor("op_14362_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_14362_end_mask_0 = const()[name = tensor("op_14362_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14362_cast_fp16 = slice_by_index(begin = var_14362_begin_0, end = var_14362_end_0, end_mask = var_14362_end_mask_0, x = var_14037_cast_fp16)[name = tensor("op_14362_cast_fp16")]; + tensor var_14369_begin_0 = const()[name = tensor("op_14369_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_14369_end_0 = const()[name = tensor("op_14369_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_14369_end_mask_0 = const()[name = tensor("op_14369_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14369_cast_fp16 = slice_by_index(begin = var_14369_begin_0, end = var_14369_end_0, end_mask = var_14369_end_mask_0, x = var_14037_cast_fp16)[name = tensor("op_14369_cast_fp16")]; + tensor var_14376_begin_0 = const()[name = tensor("op_14376_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_14376_end_0 = const()[name = tensor("op_14376_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_14376_end_mask_0 = const()[name = tensor("op_14376_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14376_cast_fp16 = slice_by_index(begin = var_14376_begin_0, end = var_14376_end_0, end_mask = var_14376_end_mask_0, x = var_14037_cast_fp16)[name = tensor("op_14376_cast_fp16")]; + tensor var_14383_begin_0 = const()[name = tensor("op_14383_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_14383_end_0 = const()[name = tensor("op_14383_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_14383_end_mask_0 = const()[name = tensor("op_14383_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14383_cast_fp16 = slice_by_index(begin = var_14383_begin_0, end = var_14383_end_0, end_mask = var_14383_end_mask_0, x = var_14037_cast_fp16)[name = tensor("op_14383_cast_fp16")]; + tensor var_14390_begin_0 = const()[name = tensor("op_14390_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14390_end_0 = const()[name = tensor("op_14390_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_14390_end_mask_0 = const()[name = tensor("op_14390_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14390_cast_fp16 = slice_by_index(begin = var_14390_begin_0, end = var_14390_end_0, end_mask = var_14390_end_mask_0, x = var_14041_cast_fp16)[name = tensor("op_14390_cast_fp16")]; + tensor var_14397_begin_0 = const()[name = tensor("op_14397_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_14397_end_0 = const()[name = tensor("op_14397_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_14397_end_mask_0 = const()[name = tensor("op_14397_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14397_cast_fp16 = slice_by_index(begin = var_14397_begin_0, end = var_14397_end_0, end_mask = var_14397_end_mask_0, x = var_14041_cast_fp16)[name = tensor("op_14397_cast_fp16")]; + tensor var_14404_begin_0 = const()[name = tensor("op_14404_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_14404_end_0 = const()[name = tensor("op_14404_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_14404_end_mask_0 = const()[name = tensor("op_14404_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14404_cast_fp16 = slice_by_index(begin = var_14404_begin_0, end = var_14404_end_0, end_mask = var_14404_end_mask_0, x = var_14041_cast_fp16)[name = tensor("op_14404_cast_fp16")]; + tensor var_14411_begin_0 = const()[name = tensor("op_14411_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_14411_end_0 = const()[name = tensor("op_14411_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_14411_end_mask_0 = const()[name = tensor("op_14411_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14411_cast_fp16 = slice_by_index(begin = var_14411_begin_0, end = var_14411_end_0, end_mask = var_14411_end_mask_0, x = var_14041_cast_fp16)[name = tensor("op_14411_cast_fp16")]; + tensor var_14418_begin_0 = const()[name = tensor("op_14418_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14418_end_0 = const()[name = tensor("op_14418_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_14418_end_mask_0 = const()[name = tensor("op_14418_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14418_cast_fp16 = slice_by_index(begin = var_14418_begin_0, end = var_14418_end_0, end_mask = var_14418_end_mask_0, x = var_14045_cast_fp16)[name = tensor("op_14418_cast_fp16")]; + tensor var_14425_begin_0 = const()[name = tensor("op_14425_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_14425_end_0 = const()[name = tensor("op_14425_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_14425_end_mask_0 = const()[name = tensor("op_14425_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14425_cast_fp16 = slice_by_index(begin = var_14425_begin_0, end = var_14425_end_0, end_mask = var_14425_end_mask_0, x = var_14045_cast_fp16)[name = tensor("op_14425_cast_fp16")]; + tensor var_14432_begin_0 = const()[name = tensor("op_14432_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_14432_end_0 = const()[name = tensor("op_14432_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_14432_end_mask_0 = const()[name = tensor("op_14432_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14432_cast_fp16 = slice_by_index(begin = var_14432_begin_0, end = var_14432_end_0, end_mask = var_14432_end_mask_0, x = var_14045_cast_fp16)[name = tensor("op_14432_cast_fp16")]; + tensor var_14439_begin_0 = const()[name = tensor("op_14439_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_14439_end_0 = const()[name = tensor("op_14439_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_14439_end_mask_0 = const()[name = tensor("op_14439_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14439_cast_fp16 = slice_by_index(begin = var_14439_begin_0, end = var_14439_end_0, end_mask = var_14439_end_mask_0, x = var_14045_cast_fp16)[name = tensor("op_14439_cast_fp16")]; + tensor var_14446_begin_0 = const()[name = tensor("op_14446_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14446_end_0 = const()[name = tensor("op_14446_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_14446_end_mask_0 = const()[name = tensor("op_14446_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14446_cast_fp16 = slice_by_index(begin = var_14446_begin_0, end = var_14446_end_0, end_mask = var_14446_end_mask_0, x = var_14049_cast_fp16)[name = tensor("op_14446_cast_fp16")]; + tensor var_14453_begin_0 = const()[name = tensor("op_14453_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_14453_end_0 = const()[name = tensor("op_14453_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_14453_end_mask_0 = const()[name = tensor("op_14453_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14453_cast_fp16 = slice_by_index(begin = var_14453_begin_0, end = var_14453_end_0, end_mask = var_14453_end_mask_0, x = var_14049_cast_fp16)[name = tensor("op_14453_cast_fp16")]; + tensor var_14460_begin_0 = const()[name = tensor("op_14460_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_14460_end_0 = const()[name = tensor("op_14460_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_14460_end_mask_0 = const()[name = tensor("op_14460_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14460_cast_fp16 = slice_by_index(begin = var_14460_begin_0, end = var_14460_end_0, end_mask = var_14460_end_mask_0, x = var_14049_cast_fp16)[name = tensor("op_14460_cast_fp16")]; + tensor var_14467_begin_0 = const()[name = tensor("op_14467_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_14467_end_0 = const()[name = tensor("op_14467_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_14467_end_mask_0 = const()[name = tensor("op_14467_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14467_cast_fp16 = slice_by_index(begin = var_14467_begin_0, end = var_14467_end_0, end_mask = var_14467_end_mask_0, x = var_14049_cast_fp16)[name = tensor("op_14467_cast_fp16")]; + tensor var_14474_begin_0 = const()[name = tensor("op_14474_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14474_end_0 = const()[name = tensor("op_14474_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_14474_end_mask_0 = const()[name = tensor("op_14474_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14474_cast_fp16 = slice_by_index(begin = var_14474_begin_0, end = var_14474_end_0, end_mask = var_14474_end_mask_0, x = var_14053_cast_fp16)[name = tensor("op_14474_cast_fp16")]; + tensor var_14481_begin_0 = const()[name = tensor("op_14481_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_14481_end_0 = const()[name = tensor("op_14481_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_14481_end_mask_0 = const()[name = tensor("op_14481_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14481_cast_fp16 = slice_by_index(begin = var_14481_begin_0, end = var_14481_end_0, end_mask = var_14481_end_mask_0, x = var_14053_cast_fp16)[name = tensor("op_14481_cast_fp16")]; + tensor var_14488_begin_0 = const()[name = tensor("op_14488_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_14488_end_0 = const()[name = tensor("op_14488_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_14488_end_mask_0 = const()[name = tensor("op_14488_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14488_cast_fp16 = slice_by_index(begin = var_14488_begin_0, end = var_14488_end_0, end_mask = var_14488_end_mask_0, x = var_14053_cast_fp16)[name = tensor("op_14488_cast_fp16")]; + tensor var_14495_begin_0 = const()[name = tensor("op_14495_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_14495_end_0 = const()[name = tensor("op_14495_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_14495_end_mask_0 = const()[name = tensor("op_14495_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14495_cast_fp16 = slice_by_index(begin = var_14495_begin_0, end = var_14495_end_0, end_mask = var_14495_end_mask_0, x = var_14053_cast_fp16)[name = tensor("op_14495_cast_fp16")]; + tensor var_14502_begin_0 = const()[name = tensor("op_14502_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14502_end_0 = const()[name = tensor("op_14502_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_14502_end_mask_0 = const()[name = tensor("op_14502_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14502_cast_fp16 = slice_by_index(begin = var_14502_begin_0, end = var_14502_end_0, end_mask = var_14502_end_mask_0, x = var_14057_cast_fp16)[name = tensor("op_14502_cast_fp16")]; + tensor var_14509_begin_0 = const()[name = tensor("op_14509_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_14509_end_0 = const()[name = tensor("op_14509_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_14509_end_mask_0 = const()[name = tensor("op_14509_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14509_cast_fp16 = slice_by_index(begin = var_14509_begin_0, end = var_14509_end_0, end_mask = var_14509_end_mask_0, x = var_14057_cast_fp16)[name = tensor("op_14509_cast_fp16")]; + tensor var_14516_begin_0 = const()[name = tensor("op_14516_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_14516_end_0 = const()[name = tensor("op_14516_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_14516_end_mask_0 = const()[name = tensor("op_14516_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14516_cast_fp16 = slice_by_index(begin = var_14516_begin_0, end = var_14516_end_0, end_mask = var_14516_end_mask_0, x = var_14057_cast_fp16)[name = tensor("op_14516_cast_fp16")]; + tensor var_14523_begin_0 = const()[name = tensor("op_14523_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_14523_end_0 = const()[name = tensor("op_14523_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_14523_end_mask_0 = const()[name = tensor("op_14523_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14523_cast_fp16 = slice_by_index(begin = var_14523_begin_0, end = var_14523_end_0, end_mask = var_14523_end_mask_0, x = var_14057_cast_fp16)[name = tensor("op_14523_cast_fp16")]; + tensor var_14530_begin_0 = const()[name = tensor("op_14530_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14530_end_0 = const()[name = tensor("op_14530_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_14530_end_mask_0 = const()[name = tensor("op_14530_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14530_cast_fp16 = slice_by_index(begin = var_14530_begin_0, end = var_14530_end_0, end_mask = var_14530_end_mask_0, x = var_14061_cast_fp16)[name = tensor("op_14530_cast_fp16")]; + tensor var_14537_begin_0 = const()[name = tensor("op_14537_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_14537_end_0 = const()[name = tensor("op_14537_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_14537_end_mask_0 = const()[name = tensor("op_14537_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14537_cast_fp16 = slice_by_index(begin = var_14537_begin_0, end = var_14537_end_0, end_mask = var_14537_end_mask_0, x = var_14061_cast_fp16)[name = tensor("op_14537_cast_fp16")]; + tensor var_14544_begin_0 = const()[name = tensor("op_14544_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_14544_end_0 = const()[name = tensor("op_14544_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_14544_end_mask_0 = const()[name = tensor("op_14544_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14544_cast_fp16 = slice_by_index(begin = var_14544_begin_0, end = var_14544_end_0, end_mask = var_14544_end_mask_0, x = var_14061_cast_fp16)[name = tensor("op_14544_cast_fp16")]; + tensor var_14551_begin_0 = const()[name = tensor("op_14551_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_14551_end_0 = const()[name = tensor("op_14551_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_14551_end_mask_0 = const()[name = tensor("op_14551_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14551_cast_fp16 = slice_by_index(begin = var_14551_begin_0, end = var_14551_end_0, end_mask = var_14551_end_mask_0, x = var_14061_cast_fp16)[name = tensor("op_14551_cast_fp16")]; + tensor var_14558_begin_0 = const()[name = tensor("op_14558_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14558_end_0 = const()[name = tensor("op_14558_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_14558_end_mask_0 = const()[name = tensor("op_14558_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14558_cast_fp16 = slice_by_index(begin = var_14558_begin_0, end = var_14558_end_0, end_mask = var_14558_end_mask_0, x = var_14065_cast_fp16)[name = tensor("op_14558_cast_fp16")]; + tensor var_14565_begin_0 = const()[name = tensor("op_14565_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_14565_end_0 = const()[name = tensor("op_14565_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_14565_end_mask_0 = const()[name = tensor("op_14565_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14565_cast_fp16 = slice_by_index(begin = var_14565_begin_0, end = var_14565_end_0, end_mask = var_14565_end_mask_0, x = var_14065_cast_fp16)[name = tensor("op_14565_cast_fp16")]; + tensor var_14572_begin_0 = const()[name = tensor("op_14572_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_14572_end_0 = const()[name = tensor("op_14572_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_14572_end_mask_0 = const()[name = tensor("op_14572_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14572_cast_fp16 = slice_by_index(begin = var_14572_begin_0, end = var_14572_end_0, end_mask = var_14572_end_mask_0, x = var_14065_cast_fp16)[name = tensor("op_14572_cast_fp16")]; + tensor var_14579_begin_0 = const()[name = tensor("op_14579_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_14579_end_0 = const()[name = tensor("op_14579_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_14579_end_mask_0 = const()[name = tensor("op_14579_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14579_cast_fp16 = slice_by_index(begin = var_14579_begin_0, end = var_14579_end_0, end_mask = var_14579_end_mask_0, x = var_14065_cast_fp16)[name = tensor("op_14579_cast_fp16")]; + tensor var_14586_begin_0 = const()[name = tensor("op_14586_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14586_end_0 = const()[name = tensor("op_14586_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_14586_end_mask_0 = const()[name = tensor("op_14586_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14586_cast_fp16 = slice_by_index(begin = var_14586_begin_0, end = var_14586_end_0, end_mask = var_14586_end_mask_0, x = var_14069_cast_fp16)[name = tensor("op_14586_cast_fp16")]; + tensor var_14593_begin_0 = const()[name = tensor("op_14593_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_14593_end_0 = const()[name = tensor("op_14593_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_14593_end_mask_0 = const()[name = tensor("op_14593_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14593_cast_fp16 = slice_by_index(begin = var_14593_begin_0, end = var_14593_end_0, end_mask = var_14593_end_mask_0, x = var_14069_cast_fp16)[name = tensor("op_14593_cast_fp16")]; + tensor var_14600_begin_0 = const()[name = tensor("op_14600_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_14600_end_0 = const()[name = tensor("op_14600_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_14600_end_mask_0 = const()[name = tensor("op_14600_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14600_cast_fp16 = slice_by_index(begin = var_14600_begin_0, end = var_14600_end_0, end_mask = var_14600_end_mask_0, x = var_14069_cast_fp16)[name = tensor("op_14600_cast_fp16")]; + tensor var_14607_begin_0 = const()[name = tensor("op_14607_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_14607_end_0 = const()[name = tensor("op_14607_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_14607_end_mask_0 = const()[name = tensor("op_14607_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14607_cast_fp16 = slice_by_index(begin = var_14607_begin_0, end = var_14607_end_0, end_mask = var_14607_end_mask_0, x = var_14069_cast_fp16)[name = tensor("op_14607_cast_fp16")]; + tensor var_14614_begin_0 = const()[name = tensor("op_14614_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14614_end_0 = const()[name = tensor("op_14614_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_14614_end_mask_0 = const()[name = tensor("op_14614_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14614_cast_fp16 = slice_by_index(begin = var_14614_begin_0, end = var_14614_end_0, end_mask = var_14614_end_mask_0, x = var_14073_cast_fp16)[name = tensor("op_14614_cast_fp16")]; + tensor var_14621_begin_0 = const()[name = tensor("op_14621_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_14621_end_0 = const()[name = tensor("op_14621_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_14621_end_mask_0 = const()[name = tensor("op_14621_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14621_cast_fp16 = slice_by_index(begin = var_14621_begin_0, end = var_14621_end_0, end_mask = var_14621_end_mask_0, x = var_14073_cast_fp16)[name = tensor("op_14621_cast_fp16")]; + tensor var_14628_begin_0 = const()[name = tensor("op_14628_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_14628_end_0 = const()[name = tensor("op_14628_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_14628_end_mask_0 = const()[name = tensor("op_14628_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14628_cast_fp16 = slice_by_index(begin = var_14628_begin_0, end = var_14628_end_0, end_mask = var_14628_end_mask_0, x = var_14073_cast_fp16)[name = tensor("op_14628_cast_fp16")]; + tensor var_14635_begin_0 = const()[name = tensor("op_14635_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_14635_end_0 = const()[name = tensor("op_14635_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_14635_end_mask_0 = const()[name = tensor("op_14635_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14635_cast_fp16 = slice_by_index(begin = var_14635_begin_0, end = var_14635_end_0, end_mask = var_14635_end_mask_0, x = var_14073_cast_fp16)[name = tensor("op_14635_cast_fp16")]; + tensor k_19_perm_0 = const()[name = tensor("k_19_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_14640_begin_0 = const()[name = tensor("op_14640_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14640_end_0 = const()[name = tensor("op_14640_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_14640_end_mask_0 = const()[name = tensor("op_14640_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_22 = transpose(perm = k_19_perm_0, x = key_19_cast_fp16)[name = tensor("transpose_22")]; + tensor var_14640_cast_fp16 = slice_by_index(begin = var_14640_begin_0, end = var_14640_end_0, end_mask = var_14640_end_mask_0, x = transpose_22)[name = tensor("op_14640_cast_fp16")]; + tensor var_14644_begin_0 = const()[name = tensor("op_14644_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_14644_end_0 = const()[name = tensor("op_14644_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_14644_end_mask_0 = const()[name = tensor("op_14644_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14644_cast_fp16 = slice_by_index(begin = var_14644_begin_0, end = var_14644_end_0, end_mask = var_14644_end_mask_0, x = transpose_22)[name = tensor("op_14644_cast_fp16")]; + tensor var_14648_begin_0 = const()[name = tensor("op_14648_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_14648_end_0 = const()[name = tensor("op_14648_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_14648_end_mask_0 = const()[name = tensor("op_14648_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14648_cast_fp16 = slice_by_index(begin = var_14648_begin_0, end = var_14648_end_0, end_mask = var_14648_end_mask_0, x = transpose_22)[name = tensor("op_14648_cast_fp16")]; + tensor var_14652_begin_0 = const()[name = tensor("op_14652_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_14652_end_0 = const()[name = tensor("op_14652_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_14652_end_mask_0 = const()[name = tensor("op_14652_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14652_cast_fp16 = slice_by_index(begin = var_14652_begin_0, end = var_14652_end_0, end_mask = var_14652_end_mask_0, x = transpose_22)[name = tensor("op_14652_cast_fp16")]; + tensor var_14656_begin_0 = const()[name = tensor("op_14656_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_14656_end_0 = const()[name = tensor("op_14656_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_14656_end_mask_0 = const()[name = tensor("op_14656_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14656_cast_fp16 = slice_by_index(begin = var_14656_begin_0, end = var_14656_end_0, end_mask = var_14656_end_mask_0, x = transpose_22)[name = tensor("op_14656_cast_fp16")]; + tensor var_14660_begin_0 = const()[name = tensor("op_14660_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_14660_end_0 = const()[name = tensor("op_14660_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_14660_end_mask_0 = const()[name = tensor("op_14660_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14660_cast_fp16 = slice_by_index(begin = var_14660_begin_0, end = var_14660_end_0, end_mask = var_14660_end_mask_0, x = transpose_22)[name = tensor("op_14660_cast_fp16")]; + tensor var_14664_begin_0 = const()[name = tensor("op_14664_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_14664_end_0 = const()[name = tensor("op_14664_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_14664_end_mask_0 = const()[name = tensor("op_14664_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14664_cast_fp16 = slice_by_index(begin = var_14664_begin_0, end = var_14664_end_0, end_mask = var_14664_end_mask_0, x = transpose_22)[name = tensor("op_14664_cast_fp16")]; + tensor var_14668_begin_0 = const()[name = tensor("op_14668_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_14668_end_0 = const()[name = tensor("op_14668_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_14668_end_mask_0 = const()[name = tensor("op_14668_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14668_cast_fp16 = slice_by_index(begin = var_14668_begin_0, end = var_14668_end_0, end_mask = var_14668_end_mask_0, x = transpose_22)[name = tensor("op_14668_cast_fp16")]; + tensor var_14672_begin_0 = const()[name = tensor("op_14672_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_14672_end_0 = const()[name = tensor("op_14672_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_14672_end_mask_0 = const()[name = tensor("op_14672_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14672_cast_fp16 = slice_by_index(begin = var_14672_begin_0, end = var_14672_end_0, end_mask = var_14672_end_mask_0, x = transpose_22)[name = tensor("op_14672_cast_fp16")]; + tensor var_14676_begin_0 = const()[name = tensor("op_14676_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_14676_end_0 = const()[name = tensor("op_14676_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_14676_end_mask_0 = const()[name = tensor("op_14676_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14676_cast_fp16 = slice_by_index(begin = var_14676_begin_0, end = var_14676_end_0, end_mask = var_14676_end_mask_0, x = transpose_22)[name = tensor("op_14676_cast_fp16")]; + tensor var_14680_begin_0 = const()[name = tensor("op_14680_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_14680_end_0 = const()[name = tensor("op_14680_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_14680_end_mask_0 = const()[name = tensor("op_14680_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14680_cast_fp16 = slice_by_index(begin = var_14680_begin_0, end = var_14680_end_0, end_mask = var_14680_end_mask_0, x = transpose_22)[name = tensor("op_14680_cast_fp16")]; + tensor var_14684_begin_0 = const()[name = tensor("op_14684_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_14684_end_0 = const()[name = tensor("op_14684_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_14684_end_mask_0 = const()[name = tensor("op_14684_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14684_cast_fp16 = slice_by_index(begin = var_14684_begin_0, end = var_14684_end_0, end_mask = var_14684_end_mask_0, x = transpose_22)[name = tensor("op_14684_cast_fp16")]; + tensor var_14688_begin_0 = const()[name = tensor("op_14688_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_14688_end_0 = const()[name = tensor("op_14688_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_14688_end_mask_0 = const()[name = tensor("op_14688_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14688_cast_fp16 = slice_by_index(begin = var_14688_begin_0, end = var_14688_end_0, end_mask = var_14688_end_mask_0, x = transpose_22)[name = tensor("op_14688_cast_fp16")]; + tensor var_14692_begin_0 = const()[name = tensor("op_14692_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_14692_end_0 = const()[name = tensor("op_14692_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_14692_end_mask_0 = const()[name = tensor("op_14692_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14692_cast_fp16 = slice_by_index(begin = var_14692_begin_0, end = var_14692_end_0, end_mask = var_14692_end_mask_0, x = transpose_22)[name = tensor("op_14692_cast_fp16")]; + tensor var_14696_begin_0 = const()[name = tensor("op_14696_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_14696_end_0 = const()[name = tensor("op_14696_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_14696_end_mask_0 = const()[name = tensor("op_14696_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14696_cast_fp16 = slice_by_index(begin = var_14696_begin_0, end = var_14696_end_0, end_mask = var_14696_end_mask_0, x = transpose_22)[name = tensor("op_14696_cast_fp16")]; + tensor var_14700_begin_0 = const()[name = tensor("op_14700_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_14700_end_0 = const()[name = tensor("op_14700_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_14700_end_mask_0 = const()[name = tensor("op_14700_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14700_cast_fp16 = slice_by_index(begin = var_14700_begin_0, end = var_14700_end_0, end_mask = var_14700_end_mask_0, x = transpose_22)[name = tensor("op_14700_cast_fp16")]; + tensor var_14704_begin_0 = const()[name = tensor("op_14704_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_14704_end_0 = const()[name = tensor("op_14704_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_14704_end_mask_0 = const()[name = tensor("op_14704_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14704_cast_fp16 = slice_by_index(begin = var_14704_begin_0, end = var_14704_end_0, end_mask = var_14704_end_mask_0, x = transpose_22)[name = tensor("op_14704_cast_fp16")]; + tensor var_14708_begin_0 = const()[name = tensor("op_14708_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_14708_end_0 = const()[name = tensor("op_14708_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_14708_end_mask_0 = const()[name = tensor("op_14708_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14708_cast_fp16 = slice_by_index(begin = var_14708_begin_0, end = var_14708_end_0, end_mask = var_14708_end_mask_0, x = transpose_22)[name = tensor("op_14708_cast_fp16")]; + tensor var_14712_begin_0 = const()[name = tensor("op_14712_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_14712_end_0 = const()[name = tensor("op_14712_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_14712_end_mask_0 = const()[name = tensor("op_14712_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14712_cast_fp16 = slice_by_index(begin = var_14712_begin_0, end = var_14712_end_0, end_mask = var_14712_end_mask_0, x = transpose_22)[name = tensor("op_14712_cast_fp16")]; + tensor var_14716_begin_0 = const()[name = tensor("op_14716_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_14716_end_0 = const()[name = tensor("op_14716_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_14716_end_mask_0 = const()[name = tensor("op_14716_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_14716_cast_fp16 = slice_by_index(begin = var_14716_begin_0, end = var_14716_end_0, end_mask = var_14716_end_mask_0, x = transpose_22)[name = tensor("op_14716_cast_fp16")]; + tensor var_14718_begin_0 = const()[name = tensor("op_14718_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_14718_end_0 = const()[name = tensor("op_14718_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_14718_end_mask_0 = const()[name = tensor("op_14718_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14718_cast_fp16 = slice_by_index(begin = var_14718_begin_0, end = var_14718_end_0, end_mask = var_14718_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_14718_cast_fp16")]; + tensor var_14722_begin_0 = const()[name = tensor("op_14722_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_14722_end_0 = const()[name = tensor("op_14722_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_14722_end_mask_0 = const()[name = tensor("op_14722_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14722_cast_fp16 = slice_by_index(begin = var_14722_begin_0, end = var_14722_end_0, end_mask = var_14722_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_14722_cast_fp16")]; + tensor var_14726_begin_0 = const()[name = tensor("op_14726_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_14726_end_0 = const()[name = tensor("op_14726_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_14726_end_mask_0 = const()[name = tensor("op_14726_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14726_cast_fp16 = slice_by_index(begin = var_14726_begin_0, end = var_14726_end_0, end_mask = var_14726_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_14726_cast_fp16")]; + tensor var_14730_begin_0 = const()[name = tensor("op_14730_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_14730_end_0 = const()[name = tensor("op_14730_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_14730_end_mask_0 = const()[name = tensor("op_14730_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14730_cast_fp16 = slice_by_index(begin = var_14730_begin_0, end = var_14730_end_0, end_mask = var_14730_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_14730_cast_fp16")]; + tensor var_14734_begin_0 = const()[name = tensor("op_14734_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_14734_end_0 = const()[name = tensor("op_14734_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_14734_end_mask_0 = const()[name = tensor("op_14734_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14734_cast_fp16 = slice_by_index(begin = var_14734_begin_0, end = var_14734_end_0, end_mask = var_14734_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_14734_cast_fp16")]; + tensor var_14738_begin_0 = const()[name = tensor("op_14738_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_14738_end_0 = const()[name = tensor("op_14738_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_14738_end_mask_0 = const()[name = tensor("op_14738_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14738_cast_fp16 = slice_by_index(begin = var_14738_begin_0, end = var_14738_end_0, end_mask = var_14738_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_14738_cast_fp16")]; + tensor var_14742_begin_0 = const()[name = tensor("op_14742_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_14742_end_0 = const()[name = tensor("op_14742_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_14742_end_mask_0 = const()[name = tensor("op_14742_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14742_cast_fp16 = slice_by_index(begin = var_14742_begin_0, end = var_14742_end_0, end_mask = var_14742_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_14742_cast_fp16")]; + tensor var_14746_begin_0 = const()[name = tensor("op_14746_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_14746_end_0 = const()[name = tensor("op_14746_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_14746_end_mask_0 = const()[name = tensor("op_14746_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14746_cast_fp16 = slice_by_index(begin = var_14746_begin_0, end = var_14746_end_0, end_mask = var_14746_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_14746_cast_fp16")]; + tensor var_14750_begin_0 = const()[name = tensor("op_14750_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_14750_end_0 = const()[name = tensor("op_14750_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_14750_end_mask_0 = const()[name = tensor("op_14750_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14750_cast_fp16 = slice_by_index(begin = var_14750_begin_0, end = var_14750_end_0, end_mask = var_14750_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_14750_cast_fp16")]; + tensor var_14754_begin_0 = const()[name = tensor("op_14754_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_14754_end_0 = const()[name = tensor("op_14754_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_14754_end_mask_0 = const()[name = tensor("op_14754_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14754_cast_fp16 = slice_by_index(begin = var_14754_begin_0, end = var_14754_end_0, end_mask = var_14754_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_14754_cast_fp16")]; + tensor var_14758_begin_0 = const()[name = tensor("op_14758_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_14758_end_0 = const()[name = tensor("op_14758_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_14758_end_mask_0 = const()[name = tensor("op_14758_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14758_cast_fp16 = slice_by_index(begin = var_14758_begin_0, end = var_14758_end_0, end_mask = var_14758_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_14758_cast_fp16")]; + tensor var_14762_begin_0 = const()[name = tensor("op_14762_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_14762_end_0 = const()[name = tensor("op_14762_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_14762_end_mask_0 = const()[name = tensor("op_14762_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14762_cast_fp16 = slice_by_index(begin = var_14762_begin_0, end = var_14762_end_0, end_mask = var_14762_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_14762_cast_fp16")]; + tensor var_14766_begin_0 = const()[name = tensor("op_14766_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_14766_end_0 = const()[name = tensor("op_14766_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_14766_end_mask_0 = const()[name = tensor("op_14766_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14766_cast_fp16 = slice_by_index(begin = var_14766_begin_0, end = var_14766_end_0, end_mask = var_14766_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_14766_cast_fp16")]; + tensor var_14770_begin_0 = const()[name = tensor("op_14770_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_14770_end_0 = const()[name = tensor("op_14770_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_14770_end_mask_0 = const()[name = tensor("op_14770_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14770_cast_fp16 = slice_by_index(begin = var_14770_begin_0, end = var_14770_end_0, end_mask = var_14770_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_14770_cast_fp16")]; + tensor var_14774_begin_0 = const()[name = tensor("op_14774_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_14774_end_0 = const()[name = tensor("op_14774_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_14774_end_mask_0 = const()[name = tensor("op_14774_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14774_cast_fp16 = slice_by_index(begin = var_14774_begin_0, end = var_14774_end_0, end_mask = var_14774_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_14774_cast_fp16")]; + tensor var_14778_begin_0 = const()[name = tensor("op_14778_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_14778_end_0 = const()[name = tensor("op_14778_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_14778_end_mask_0 = const()[name = tensor("op_14778_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14778_cast_fp16 = slice_by_index(begin = var_14778_begin_0, end = var_14778_end_0, end_mask = var_14778_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_14778_cast_fp16")]; + tensor var_14782_begin_0 = const()[name = tensor("op_14782_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_14782_end_0 = const()[name = tensor("op_14782_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_14782_end_mask_0 = const()[name = tensor("op_14782_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14782_cast_fp16 = slice_by_index(begin = var_14782_begin_0, end = var_14782_end_0, end_mask = var_14782_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_14782_cast_fp16")]; + tensor var_14786_begin_0 = const()[name = tensor("op_14786_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_14786_end_0 = const()[name = tensor("op_14786_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_14786_end_mask_0 = const()[name = tensor("op_14786_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14786_cast_fp16 = slice_by_index(begin = var_14786_begin_0, end = var_14786_end_0, end_mask = var_14786_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_14786_cast_fp16")]; + tensor var_14790_begin_0 = const()[name = tensor("op_14790_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_14790_end_0 = const()[name = tensor("op_14790_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_14790_end_mask_0 = const()[name = tensor("op_14790_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14790_cast_fp16 = slice_by_index(begin = var_14790_begin_0, end = var_14790_end_0, end_mask = var_14790_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_14790_cast_fp16")]; + tensor var_14794_begin_0 = const()[name = tensor("op_14794_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_14794_end_0 = const()[name = tensor("op_14794_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_14794_end_mask_0 = const()[name = tensor("op_14794_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_14794_cast_fp16 = slice_by_index(begin = var_14794_begin_0, end = var_14794_end_0, end_mask = var_14794_end_mask_0, x = value_19_cast_fp16)[name = tensor("op_14794_cast_fp16")]; + tensor var_14798_equation_0 = const()[name = tensor("op_14798_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14798_cast_fp16 = einsum(equation = var_14798_equation_0, values = (var_14640_cast_fp16, var_14082_cast_fp16))[name = tensor("op_14798_cast_fp16")]; + tensor var_14799_to_fp16 = const()[name = tensor("op_14799_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1441_cast_fp16 = mul(x = var_14798_cast_fp16, y = var_14799_to_fp16)[name = tensor("aw_chunk_1441_cast_fp16")]; + tensor var_14802_equation_0 = const()[name = tensor("op_14802_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14802_cast_fp16 = einsum(equation = var_14802_equation_0, values = (var_14640_cast_fp16, var_14089_cast_fp16))[name = tensor("op_14802_cast_fp16")]; + tensor var_14803_to_fp16 = const()[name = tensor("op_14803_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1443_cast_fp16 = mul(x = var_14802_cast_fp16, y = var_14803_to_fp16)[name = tensor("aw_chunk_1443_cast_fp16")]; + tensor var_14806_equation_0 = const()[name = tensor("op_14806_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14806_cast_fp16 = einsum(equation = var_14806_equation_0, values = (var_14640_cast_fp16, var_14096_cast_fp16))[name = tensor("op_14806_cast_fp16")]; + tensor var_14807_to_fp16 = const()[name = tensor("op_14807_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1445_cast_fp16 = mul(x = var_14806_cast_fp16, y = var_14807_to_fp16)[name = tensor("aw_chunk_1445_cast_fp16")]; + tensor var_14810_equation_0 = const()[name = tensor("op_14810_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14810_cast_fp16 = einsum(equation = var_14810_equation_0, values = (var_14640_cast_fp16, var_14103_cast_fp16))[name = tensor("op_14810_cast_fp16")]; + tensor var_14811_to_fp16 = const()[name = tensor("op_14811_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1447_cast_fp16 = mul(x = var_14810_cast_fp16, y = var_14811_to_fp16)[name = tensor("aw_chunk_1447_cast_fp16")]; + tensor var_14814_equation_0 = const()[name = tensor("op_14814_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14814_cast_fp16 = einsum(equation = var_14814_equation_0, values = (var_14644_cast_fp16, var_14110_cast_fp16))[name = tensor("op_14814_cast_fp16")]; + tensor var_14815_to_fp16 = const()[name = tensor("op_14815_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1449_cast_fp16 = mul(x = var_14814_cast_fp16, y = var_14815_to_fp16)[name = tensor("aw_chunk_1449_cast_fp16")]; + tensor var_14818_equation_0 = const()[name = tensor("op_14818_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14818_cast_fp16 = einsum(equation = var_14818_equation_0, values = (var_14644_cast_fp16, var_14117_cast_fp16))[name = tensor("op_14818_cast_fp16")]; + tensor var_14819_to_fp16 = const()[name = tensor("op_14819_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1451_cast_fp16 = mul(x = var_14818_cast_fp16, y = var_14819_to_fp16)[name = tensor("aw_chunk_1451_cast_fp16")]; + tensor var_14822_equation_0 = const()[name = tensor("op_14822_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14822_cast_fp16 = einsum(equation = var_14822_equation_0, values = (var_14644_cast_fp16, var_14124_cast_fp16))[name = tensor("op_14822_cast_fp16")]; + tensor var_14823_to_fp16 = const()[name = tensor("op_14823_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1453_cast_fp16 = mul(x = var_14822_cast_fp16, y = var_14823_to_fp16)[name = tensor("aw_chunk_1453_cast_fp16")]; + tensor var_14826_equation_0 = const()[name = tensor("op_14826_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14826_cast_fp16 = einsum(equation = var_14826_equation_0, values = (var_14644_cast_fp16, var_14131_cast_fp16))[name = tensor("op_14826_cast_fp16")]; + tensor var_14827_to_fp16 = const()[name = tensor("op_14827_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1455_cast_fp16 = mul(x = var_14826_cast_fp16, y = var_14827_to_fp16)[name = tensor("aw_chunk_1455_cast_fp16")]; + tensor var_14830_equation_0 = const()[name = tensor("op_14830_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14830_cast_fp16 = einsum(equation = var_14830_equation_0, values = (var_14648_cast_fp16, var_14138_cast_fp16))[name = tensor("op_14830_cast_fp16")]; + tensor var_14831_to_fp16 = const()[name = tensor("op_14831_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1457_cast_fp16 = mul(x = var_14830_cast_fp16, y = var_14831_to_fp16)[name = tensor("aw_chunk_1457_cast_fp16")]; + tensor var_14834_equation_0 = const()[name = tensor("op_14834_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14834_cast_fp16 = einsum(equation = var_14834_equation_0, values = (var_14648_cast_fp16, var_14145_cast_fp16))[name = tensor("op_14834_cast_fp16")]; + tensor var_14835_to_fp16 = const()[name = tensor("op_14835_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1459_cast_fp16 = mul(x = var_14834_cast_fp16, y = var_14835_to_fp16)[name = tensor("aw_chunk_1459_cast_fp16")]; + tensor var_14838_equation_0 = const()[name = tensor("op_14838_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14838_cast_fp16 = einsum(equation = var_14838_equation_0, values = (var_14648_cast_fp16, var_14152_cast_fp16))[name = tensor("op_14838_cast_fp16")]; + tensor var_14839_to_fp16 = const()[name = tensor("op_14839_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1461_cast_fp16 = mul(x = var_14838_cast_fp16, y = var_14839_to_fp16)[name = tensor("aw_chunk_1461_cast_fp16")]; + tensor var_14842_equation_0 = const()[name = tensor("op_14842_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14842_cast_fp16 = einsum(equation = var_14842_equation_0, values = (var_14648_cast_fp16, var_14159_cast_fp16))[name = tensor("op_14842_cast_fp16")]; + tensor var_14843_to_fp16 = const()[name = tensor("op_14843_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1463_cast_fp16 = mul(x = var_14842_cast_fp16, y = var_14843_to_fp16)[name = tensor("aw_chunk_1463_cast_fp16")]; + tensor var_14846_equation_0 = const()[name = tensor("op_14846_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14846_cast_fp16 = einsum(equation = var_14846_equation_0, values = (var_14652_cast_fp16, var_14166_cast_fp16))[name = tensor("op_14846_cast_fp16")]; + tensor var_14847_to_fp16 = const()[name = tensor("op_14847_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1465_cast_fp16 = mul(x = var_14846_cast_fp16, y = var_14847_to_fp16)[name = tensor("aw_chunk_1465_cast_fp16")]; + tensor var_14850_equation_0 = const()[name = tensor("op_14850_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14850_cast_fp16 = einsum(equation = var_14850_equation_0, values = (var_14652_cast_fp16, var_14173_cast_fp16))[name = tensor("op_14850_cast_fp16")]; + tensor var_14851_to_fp16 = const()[name = tensor("op_14851_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1467_cast_fp16 = mul(x = var_14850_cast_fp16, y = var_14851_to_fp16)[name = tensor("aw_chunk_1467_cast_fp16")]; + tensor var_14854_equation_0 = const()[name = tensor("op_14854_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14854_cast_fp16 = einsum(equation = var_14854_equation_0, values = (var_14652_cast_fp16, var_14180_cast_fp16))[name = tensor("op_14854_cast_fp16")]; + tensor var_14855_to_fp16 = const()[name = tensor("op_14855_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1469_cast_fp16 = mul(x = var_14854_cast_fp16, y = var_14855_to_fp16)[name = tensor("aw_chunk_1469_cast_fp16")]; + tensor var_14858_equation_0 = const()[name = tensor("op_14858_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14858_cast_fp16 = einsum(equation = var_14858_equation_0, values = (var_14652_cast_fp16, var_14187_cast_fp16))[name = tensor("op_14858_cast_fp16")]; + tensor var_14859_to_fp16 = const()[name = tensor("op_14859_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1471_cast_fp16 = mul(x = var_14858_cast_fp16, y = var_14859_to_fp16)[name = tensor("aw_chunk_1471_cast_fp16")]; + tensor var_14862_equation_0 = const()[name = tensor("op_14862_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14862_cast_fp16 = einsum(equation = var_14862_equation_0, values = (var_14656_cast_fp16, var_14194_cast_fp16))[name = tensor("op_14862_cast_fp16")]; + tensor var_14863_to_fp16 = const()[name = tensor("op_14863_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1473_cast_fp16 = mul(x = var_14862_cast_fp16, y = var_14863_to_fp16)[name = tensor("aw_chunk_1473_cast_fp16")]; + tensor var_14866_equation_0 = const()[name = tensor("op_14866_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14866_cast_fp16 = einsum(equation = var_14866_equation_0, values = (var_14656_cast_fp16, var_14201_cast_fp16))[name = tensor("op_14866_cast_fp16")]; + tensor var_14867_to_fp16 = const()[name = tensor("op_14867_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1475_cast_fp16 = mul(x = var_14866_cast_fp16, y = var_14867_to_fp16)[name = tensor("aw_chunk_1475_cast_fp16")]; + tensor var_14870_equation_0 = const()[name = tensor("op_14870_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14870_cast_fp16 = einsum(equation = var_14870_equation_0, values = (var_14656_cast_fp16, var_14208_cast_fp16))[name = tensor("op_14870_cast_fp16")]; + tensor var_14871_to_fp16 = const()[name = tensor("op_14871_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1477_cast_fp16 = mul(x = var_14870_cast_fp16, y = var_14871_to_fp16)[name = tensor("aw_chunk_1477_cast_fp16")]; + tensor var_14874_equation_0 = const()[name = tensor("op_14874_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14874_cast_fp16 = einsum(equation = var_14874_equation_0, values = (var_14656_cast_fp16, var_14215_cast_fp16))[name = tensor("op_14874_cast_fp16")]; + tensor var_14875_to_fp16 = const()[name = tensor("op_14875_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1479_cast_fp16 = mul(x = var_14874_cast_fp16, y = var_14875_to_fp16)[name = tensor("aw_chunk_1479_cast_fp16")]; + tensor var_14878_equation_0 = const()[name = tensor("op_14878_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14878_cast_fp16 = einsum(equation = var_14878_equation_0, values = (var_14660_cast_fp16, var_14222_cast_fp16))[name = tensor("op_14878_cast_fp16")]; + tensor var_14879_to_fp16 = const()[name = tensor("op_14879_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1481_cast_fp16 = mul(x = var_14878_cast_fp16, y = var_14879_to_fp16)[name = tensor("aw_chunk_1481_cast_fp16")]; + tensor var_14882_equation_0 = const()[name = tensor("op_14882_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14882_cast_fp16 = einsum(equation = var_14882_equation_0, values = (var_14660_cast_fp16, var_14229_cast_fp16))[name = tensor("op_14882_cast_fp16")]; + tensor var_14883_to_fp16 = const()[name = tensor("op_14883_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1483_cast_fp16 = mul(x = var_14882_cast_fp16, y = var_14883_to_fp16)[name = tensor("aw_chunk_1483_cast_fp16")]; + tensor var_14886_equation_0 = const()[name = tensor("op_14886_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14886_cast_fp16 = einsum(equation = var_14886_equation_0, values = (var_14660_cast_fp16, var_14236_cast_fp16))[name = tensor("op_14886_cast_fp16")]; + tensor var_14887_to_fp16 = const()[name = tensor("op_14887_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1485_cast_fp16 = mul(x = var_14886_cast_fp16, y = var_14887_to_fp16)[name = tensor("aw_chunk_1485_cast_fp16")]; + tensor var_14890_equation_0 = const()[name = tensor("op_14890_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14890_cast_fp16 = einsum(equation = var_14890_equation_0, values = (var_14660_cast_fp16, var_14243_cast_fp16))[name = tensor("op_14890_cast_fp16")]; + tensor var_14891_to_fp16 = const()[name = tensor("op_14891_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1487_cast_fp16 = mul(x = var_14890_cast_fp16, y = var_14891_to_fp16)[name = tensor("aw_chunk_1487_cast_fp16")]; + tensor var_14894_equation_0 = const()[name = tensor("op_14894_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14894_cast_fp16 = einsum(equation = var_14894_equation_0, values = (var_14664_cast_fp16, var_14250_cast_fp16))[name = tensor("op_14894_cast_fp16")]; + tensor var_14895_to_fp16 = const()[name = tensor("op_14895_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1489_cast_fp16 = mul(x = var_14894_cast_fp16, y = var_14895_to_fp16)[name = tensor("aw_chunk_1489_cast_fp16")]; + tensor var_14898_equation_0 = const()[name = tensor("op_14898_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14898_cast_fp16 = einsum(equation = var_14898_equation_0, values = (var_14664_cast_fp16, var_14257_cast_fp16))[name = tensor("op_14898_cast_fp16")]; + tensor var_14899_to_fp16 = const()[name = tensor("op_14899_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1491_cast_fp16 = mul(x = var_14898_cast_fp16, y = var_14899_to_fp16)[name = tensor("aw_chunk_1491_cast_fp16")]; + tensor var_14902_equation_0 = const()[name = tensor("op_14902_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14902_cast_fp16 = einsum(equation = var_14902_equation_0, values = (var_14664_cast_fp16, var_14264_cast_fp16))[name = tensor("op_14902_cast_fp16")]; + tensor var_14903_to_fp16 = const()[name = tensor("op_14903_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1493_cast_fp16 = mul(x = var_14902_cast_fp16, y = var_14903_to_fp16)[name = tensor("aw_chunk_1493_cast_fp16")]; + tensor var_14906_equation_0 = const()[name = tensor("op_14906_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14906_cast_fp16 = einsum(equation = var_14906_equation_0, values = (var_14664_cast_fp16, var_14271_cast_fp16))[name = tensor("op_14906_cast_fp16")]; + tensor var_14907_to_fp16 = const()[name = tensor("op_14907_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1495_cast_fp16 = mul(x = var_14906_cast_fp16, y = var_14907_to_fp16)[name = tensor("aw_chunk_1495_cast_fp16")]; + tensor var_14910_equation_0 = const()[name = tensor("op_14910_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14910_cast_fp16 = einsum(equation = var_14910_equation_0, values = (var_14668_cast_fp16, var_14278_cast_fp16))[name = tensor("op_14910_cast_fp16")]; + tensor var_14911_to_fp16 = const()[name = tensor("op_14911_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1497_cast_fp16 = mul(x = var_14910_cast_fp16, y = var_14911_to_fp16)[name = tensor("aw_chunk_1497_cast_fp16")]; + tensor var_14914_equation_0 = const()[name = tensor("op_14914_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14914_cast_fp16 = einsum(equation = var_14914_equation_0, values = (var_14668_cast_fp16, var_14285_cast_fp16))[name = tensor("op_14914_cast_fp16")]; + tensor var_14915_to_fp16 = const()[name = tensor("op_14915_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1499_cast_fp16 = mul(x = var_14914_cast_fp16, y = var_14915_to_fp16)[name = tensor("aw_chunk_1499_cast_fp16")]; + tensor var_14918_equation_0 = const()[name = tensor("op_14918_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14918_cast_fp16 = einsum(equation = var_14918_equation_0, values = (var_14668_cast_fp16, var_14292_cast_fp16))[name = tensor("op_14918_cast_fp16")]; + tensor var_14919_to_fp16 = const()[name = tensor("op_14919_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1501_cast_fp16 = mul(x = var_14918_cast_fp16, y = var_14919_to_fp16)[name = tensor("aw_chunk_1501_cast_fp16")]; + tensor var_14922_equation_0 = const()[name = tensor("op_14922_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14922_cast_fp16 = einsum(equation = var_14922_equation_0, values = (var_14668_cast_fp16, var_14299_cast_fp16))[name = tensor("op_14922_cast_fp16")]; + tensor var_14923_to_fp16 = const()[name = tensor("op_14923_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1503_cast_fp16 = mul(x = var_14922_cast_fp16, y = var_14923_to_fp16)[name = tensor("aw_chunk_1503_cast_fp16")]; + tensor var_14926_equation_0 = const()[name = tensor("op_14926_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14926_cast_fp16 = einsum(equation = var_14926_equation_0, values = (var_14672_cast_fp16, var_14306_cast_fp16))[name = tensor("op_14926_cast_fp16")]; + tensor var_14927_to_fp16 = const()[name = tensor("op_14927_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1505_cast_fp16 = mul(x = var_14926_cast_fp16, y = var_14927_to_fp16)[name = tensor("aw_chunk_1505_cast_fp16")]; + tensor var_14930_equation_0 = const()[name = tensor("op_14930_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14930_cast_fp16 = einsum(equation = var_14930_equation_0, values = (var_14672_cast_fp16, var_14313_cast_fp16))[name = tensor("op_14930_cast_fp16")]; + tensor var_14931_to_fp16 = const()[name = tensor("op_14931_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1507_cast_fp16 = mul(x = var_14930_cast_fp16, y = var_14931_to_fp16)[name = tensor("aw_chunk_1507_cast_fp16")]; + tensor var_14934_equation_0 = const()[name = tensor("op_14934_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14934_cast_fp16 = einsum(equation = var_14934_equation_0, values = (var_14672_cast_fp16, var_14320_cast_fp16))[name = tensor("op_14934_cast_fp16")]; + tensor var_14935_to_fp16 = const()[name = tensor("op_14935_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1509_cast_fp16 = mul(x = var_14934_cast_fp16, y = var_14935_to_fp16)[name = tensor("aw_chunk_1509_cast_fp16")]; + tensor var_14938_equation_0 = const()[name = tensor("op_14938_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14938_cast_fp16 = einsum(equation = var_14938_equation_0, values = (var_14672_cast_fp16, var_14327_cast_fp16))[name = tensor("op_14938_cast_fp16")]; + tensor var_14939_to_fp16 = const()[name = tensor("op_14939_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1511_cast_fp16 = mul(x = var_14938_cast_fp16, y = var_14939_to_fp16)[name = tensor("aw_chunk_1511_cast_fp16")]; + tensor var_14942_equation_0 = const()[name = tensor("op_14942_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14942_cast_fp16 = einsum(equation = var_14942_equation_0, values = (var_14676_cast_fp16, var_14334_cast_fp16))[name = tensor("op_14942_cast_fp16")]; + tensor var_14943_to_fp16 = const()[name = tensor("op_14943_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1513_cast_fp16 = mul(x = var_14942_cast_fp16, y = var_14943_to_fp16)[name = tensor("aw_chunk_1513_cast_fp16")]; + tensor var_14946_equation_0 = const()[name = tensor("op_14946_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14946_cast_fp16 = einsum(equation = var_14946_equation_0, values = (var_14676_cast_fp16, var_14341_cast_fp16))[name = tensor("op_14946_cast_fp16")]; + tensor var_14947_to_fp16 = const()[name = tensor("op_14947_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1515_cast_fp16 = mul(x = var_14946_cast_fp16, y = var_14947_to_fp16)[name = tensor("aw_chunk_1515_cast_fp16")]; + tensor var_14950_equation_0 = const()[name = tensor("op_14950_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14950_cast_fp16 = einsum(equation = var_14950_equation_0, values = (var_14676_cast_fp16, var_14348_cast_fp16))[name = tensor("op_14950_cast_fp16")]; + tensor var_14951_to_fp16 = const()[name = tensor("op_14951_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1517_cast_fp16 = mul(x = var_14950_cast_fp16, y = var_14951_to_fp16)[name = tensor("aw_chunk_1517_cast_fp16")]; + tensor var_14954_equation_0 = const()[name = tensor("op_14954_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14954_cast_fp16 = einsum(equation = var_14954_equation_0, values = (var_14676_cast_fp16, var_14355_cast_fp16))[name = tensor("op_14954_cast_fp16")]; + tensor var_14955_to_fp16 = const()[name = tensor("op_14955_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1519_cast_fp16 = mul(x = var_14954_cast_fp16, y = var_14955_to_fp16)[name = tensor("aw_chunk_1519_cast_fp16")]; + tensor var_14958_equation_0 = const()[name = tensor("op_14958_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14958_cast_fp16 = einsum(equation = var_14958_equation_0, values = (var_14680_cast_fp16, var_14362_cast_fp16))[name = tensor("op_14958_cast_fp16")]; + tensor var_14959_to_fp16 = const()[name = tensor("op_14959_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1521_cast_fp16 = mul(x = var_14958_cast_fp16, y = var_14959_to_fp16)[name = tensor("aw_chunk_1521_cast_fp16")]; + tensor var_14962_equation_0 = const()[name = tensor("op_14962_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14962_cast_fp16 = einsum(equation = var_14962_equation_0, values = (var_14680_cast_fp16, var_14369_cast_fp16))[name = tensor("op_14962_cast_fp16")]; + tensor var_14963_to_fp16 = const()[name = tensor("op_14963_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1523_cast_fp16 = mul(x = var_14962_cast_fp16, y = var_14963_to_fp16)[name = tensor("aw_chunk_1523_cast_fp16")]; + tensor var_14966_equation_0 = const()[name = tensor("op_14966_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14966_cast_fp16 = einsum(equation = var_14966_equation_0, values = (var_14680_cast_fp16, var_14376_cast_fp16))[name = tensor("op_14966_cast_fp16")]; + tensor var_14967_to_fp16 = const()[name = tensor("op_14967_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1525_cast_fp16 = mul(x = var_14966_cast_fp16, y = var_14967_to_fp16)[name = tensor("aw_chunk_1525_cast_fp16")]; + tensor var_14970_equation_0 = const()[name = tensor("op_14970_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14970_cast_fp16 = einsum(equation = var_14970_equation_0, values = (var_14680_cast_fp16, var_14383_cast_fp16))[name = tensor("op_14970_cast_fp16")]; + tensor var_14971_to_fp16 = const()[name = tensor("op_14971_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1527_cast_fp16 = mul(x = var_14970_cast_fp16, y = var_14971_to_fp16)[name = tensor("aw_chunk_1527_cast_fp16")]; + tensor var_14974_equation_0 = const()[name = tensor("op_14974_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14974_cast_fp16 = einsum(equation = var_14974_equation_0, values = (var_14684_cast_fp16, var_14390_cast_fp16))[name = tensor("op_14974_cast_fp16")]; + tensor var_14975_to_fp16 = const()[name = tensor("op_14975_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1529_cast_fp16 = mul(x = var_14974_cast_fp16, y = var_14975_to_fp16)[name = tensor("aw_chunk_1529_cast_fp16")]; + tensor var_14978_equation_0 = const()[name = tensor("op_14978_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14978_cast_fp16 = einsum(equation = var_14978_equation_0, values = (var_14684_cast_fp16, var_14397_cast_fp16))[name = tensor("op_14978_cast_fp16")]; + tensor var_14979_to_fp16 = const()[name = tensor("op_14979_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1531_cast_fp16 = mul(x = var_14978_cast_fp16, y = var_14979_to_fp16)[name = tensor("aw_chunk_1531_cast_fp16")]; + tensor var_14982_equation_0 = const()[name = tensor("op_14982_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14982_cast_fp16 = einsum(equation = var_14982_equation_0, values = (var_14684_cast_fp16, var_14404_cast_fp16))[name = tensor("op_14982_cast_fp16")]; + tensor var_14983_to_fp16 = const()[name = tensor("op_14983_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1533_cast_fp16 = mul(x = var_14982_cast_fp16, y = var_14983_to_fp16)[name = tensor("aw_chunk_1533_cast_fp16")]; + tensor var_14986_equation_0 = const()[name = tensor("op_14986_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14986_cast_fp16 = einsum(equation = var_14986_equation_0, values = (var_14684_cast_fp16, var_14411_cast_fp16))[name = tensor("op_14986_cast_fp16")]; + tensor var_14987_to_fp16 = const()[name = tensor("op_14987_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1535_cast_fp16 = mul(x = var_14986_cast_fp16, y = var_14987_to_fp16)[name = tensor("aw_chunk_1535_cast_fp16")]; + tensor var_14990_equation_0 = const()[name = tensor("op_14990_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14990_cast_fp16 = einsum(equation = var_14990_equation_0, values = (var_14688_cast_fp16, var_14418_cast_fp16))[name = tensor("op_14990_cast_fp16")]; + tensor var_14991_to_fp16 = const()[name = tensor("op_14991_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1537_cast_fp16 = mul(x = var_14990_cast_fp16, y = var_14991_to_fp16)[name = tensor("aw_chunk_1537_cast_fp16")]; + tensor var_14994_equation_0 = const()[name = tensor("op_14994_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14994_cast_fp16 = einsum(equation = var_14994_equation_0, values = (var_14688_cast_fp16, var_14425_cast_fp16))[name = tensor("op_14994_cast_fp16")]; + tensor var_14995_to_fp16 = const()[name = tensor("op_14995_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1539_cast_fp16 = mul(x = var_14994_cast_fp16, y = var_14995_to_fp16)[name = tensor("aw_chunk_1539_cast_fp16")]; + tensor var_14998_equation_0 = const()[name = tensor("op_14998_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_14998_cast_fp16 = einsum(equation = var_14998_equation_0, values = (var_14688_cast_fp16, var_14432_cast_fp16))[name = tensor("op_14998_cast_fp16")]; + tensor var_14999_to_fp16 = const()[name = tensor("op_14999_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1541_cast_fp16 = mul(x = var_14998_cast_fp16, y = var_14999_to_fp16)[name = tensor("aw_chunk_1541_cast_fp16")]; + tensor var_15002_equation_0 = const()[name = tensor("op_15002_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15002_cast_fp16 = einsum(equation = var_15002_equation_0, values = (var_14688_cast_fp16, var_14439_cast_fp16))[name = tensor("op_15002_cast_fp16")]; + tensor var_15003_to_fp16 = const()[name = tensor("op_15003_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1543_cast_fp16 = mul(x = var_15002_cast_fp16, y = var_15003_to_fp16)[name = tensor("aw_chunk_1543_cast_fp16")]; + tensor var_15006_equation_0 = const()[name = tensor("op_15006_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15006_cast_fp16 = einsum(equation = var_15006_equation_0, values = (var_14692_cast_fp16, var_14446_cast_fp16))[name = tensor("op_15006_cast_fp16")]; + tensor var_15007_to_fp16 = const()[name = tensor("op_15007_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1545_cast_fp16 = mul(x = var_15006_cast_fp16, y = var_15007_to_fp16)[name = tensor("aw_chunk_1545_cast_fp16")]; + tensor var_15010_equation_0 = const()[name = tensor("op_15010_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15010_cast_fp16 = einsum(equation = var_15010_equation_0, values = (var_14692_cast_fp16, var_14453_cast_fp16))[name = tensor("op_15010_cast_fp16")]; + tensor var_15011_to_fp16 = const()[name = tensor("op_15011_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1547_cast_fp16 = mul(x = var_15010_cast_fp16, y = var_15011_to_fp16)[name = tensor("aw_chunk_1547_cast_fp16")]; + tensor var_15014_equation_0 = const()[name = tensor("op_15014_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15014_cast_fp16 = einsum(equation = var_15014_equation_0, values = (var_14692_cast_fp16, var_14460_cast_fp16))[name = tensor("op_15014_cast_fp16")]; + tensor var_15015_to_fp16 = const()[name = tensor("op_15015_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1549_cast_fp16 = mul(x = var_15014_cast_fp16, y = var_15015_to_fp16)[name = tensor("aw_chunk_1549_cast_fp16")]; + tensor var_15018_equation_0 = const()[name = tensor("op_15018_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15018_cast_fp16 = einsum(equation = var_15018_equation_0, values = (var_14692_cast_fp16, var_14467_cast_fp16))[name = tensor("op_15018_cast_fp16")]; + tensor var_15019_to_fp16 = const()[name = tensor("op_15019_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1551_cast_fp16 = mul(x = var_15018_cast_fp16, y = var_15019_to_fp16)[name = tensor("aw_chunk_1551_cast_fp16")]; + tensor var_15022_equation_0 = const()[name = tensor("op_15022_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15022_cast_fp16 = einsum(equation = var_15022_equation_0, values = (var_14696_cast_fp16, var_14474_cast_fp16))[name = tensor("op_15022_cast_fp16")]; + tensor var_15023_to_fp16 = const()[name = tensor("op_15023_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1553_cast_fp16 = mul(x = var_15022_cast_fp16, y = var_15023_to_fp16)[name = tensor("aw_chunk_1553_cast_fp16")]; + tensor var_15026_equation_0 = const()[name = tensor("op_15026_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15026_cast_fp16 = einsum(equation = var_15026_equation_0, values = (var_14696_cast_fp16, var_14481_cast_fp16))[name = tensor("op_15026_cast_fp16")]; + tensor var_15027_to_fp16 = const()[name = tensor("op_15027_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1555_cast_fp16 = mul(x = var_15026_cast_fp16, y = var_15027_to_fp16)[name = tensor("aw_chunk_1555_cast_fp16")]; + tensor var_15030_equation_0 = const()[name = tensor("op_15030_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15030_cast_fp16 = einsum(equation = var_15030_equation_0, values = (var_14696_cast_fp16, var_14488_cast_fp16))[name = tensor("op_15030_cast_fp16")]; + tensor var_15031_to_fp16 = const()[name = tensor("op_15031_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1557_cast_fp16 = mul(x = var_15030_cast_fp16, y = var_15031_to_fp16)[name = tensor("aw_chunk_1557_cast_fp16")]; + tensor var_15034_equation_0 = const()[name = tensor("op_15034_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15034_cast_fp16 = einsum(equation = var_15034_equation_0, values = (var_14696_cast_fp16, var_14495_cast_fp16))[name = tensor("op_15034_cast_fp16")]; + tensor var_15035_to_fp16 = const()[name = tensor("op_15035_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1559_cast_fp16 = mul(x = var_15034_cast_fp16, y = var_15035_to_fp16)[name = tensor("aw_chunk_1559_cast_fp16")]; + tensor var_15038_equation_0 = const()[name = tensor("op_15038_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15038_cast_fp16 = einsum(equation = var_15038_equation_0, values = (var_14700_cast_fp16, var_14502_cast_fp16))[name = tensor("op_15038_cast_fp16")]; + tensor var_15039_to_fp16 = const()[name = tensor("op_15039_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1561_cast_fp16 = mul(x = var_15038_cast_fp16, y = var_15039_to_fp16)[name = tensor("aw_chunk_1561_cast_fp16")]; + tensor var_15042_equation_0 = const()[name = tensor("op_15042_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15042_cast_fp16 = einsum(equation = var_15042_equation_0, values = (var_14700_cast_fp16, var_14509_cast_fp16))[name = tensor("op_15042_cast_fp16")]; + tensor var_15043_to_fp16 = const()[name = tensor("op_15043_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1563_cast_fp16 = mul(x = var_15042_cast_fp16, y = var_15043_to_fp16)[name = tensor("aw_chunk_1563_cast_fp16")]; + tensor var_15046_equation_0 = const()[name = tensor("op_15046_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15046_cast_fp16 = einsum(equation = var_15046_equation_0, values = (var_14700_cast_fp16, var_14516_cast_fp16))[name = tensor("op_15046_cast_fp16")]; + tensor var_15047_to_fp16 = const()[name = tensor("op_15047_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1565_cast_fp16 = mul(x = var_15046_cast_fp16, y = var_15047_to_fp16)[name = tensor("aw_chunk_1565_cast_fp16")]; + tensor var_15050_equation_0 = const()[name = tensor("op_15050_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15050_cast_fp16 = einsum(equation = var_15050_equation_0, values = (var_14700_cast_fp16, var_14523_cast_fp16))[name = tensor("op_15050_cast_fp16")]; + tensor var_15051_to_fp16 = const()[name = tensor("op_15051_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1567_cast_fp16 = mul(x = var_15050_cast_fp16, y = var_15051_to_fp16)[name = tensor("aw_chunk_1567_cast_fp16")]; + tensor var_15054_equation_0 = const()[name = tensor("op_15054_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15054_cast_fp16 = einsum(equation = var_15054_equation_0, values = (var_14704_cast_fp16, var_14530_cast_fp16))[name = tensor("op_15054_cast_fp16")]; + tensor var_15055_to_fp16 = const()[name = tensor("op_15055_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1569_cast_fp16 = mul(x = var_15054_cast_fp16, y = var_15055_to_fp16)[name = tensor("aw_chunk_1569_cast_fp16")]; + tensor var_15058_equation_0 = const()[name = tensor("op_15058_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15058_cast_fp16 = einsum(equation = var_15058_equation_0, values = (var_14704_cast_fp16, var_14537_cast_fp16))[name = tensor("op_15058_cast_fp16")]; + tensor var_15059_to_fp16 = const()[name = tensor("op_15059_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1571_cast_fp16 = mul(x = var_15058_cast_fp16, y = var_15059_to_fp16)[name = tensor("aw_chunk_1571_cast_fp16")]; + tensor var_15062_equation_0 = const()[name = tensor("op_15062_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15062_cast_fp16 = einsum(equation = var_15062_equation_0, values = (var_14704_cast_fp16, var_14544_cast_fp16))[name = tensor("op_15062_cast_fp16")]; + tensor var_15063_to_fp16 = const()[name = tensor("op_15063_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1573_cast_fp16 = mul(x = var_15062_cast_fp16, y = var_15063_to_fp16)[name = tensor("aw_chunk_1573_cast_fp16")]; + tensor var_15066_equation_0 = const()[name = tensor("op_15066_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15066_cast_fp16 = einsum(equation = var_15066_equation_0, values = (var_14704_cast_fp16, var_14551_cast_fp16))[name = tensor("op_15066_cast_fp16")]; + tensor var_15067_to_fp16 = const()[name = tensor("op_15067_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1575_cast_fp16 = mul(x = var_15066_cast_fp16, y = var_15067_to_fp16)[name = tensor("aw_chunk_1575_cast_fp16")]; + tensor var_15070_equation_0 = const()[name = tensor("op_15070_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15070_cast_fp16 = einsum(equation = var_15070_equation_0, values = (var_14708_cast_fp16, var_14558_cast_fp16))[name = tensor("op_15070_cast_fp16")]; + tensor var_15071_to_fp16 = const()[name = tensor("op_15071_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1577_cast_fp16 = mul(x = var_15070_cast_fp16, y = var_15071_to_fp16)[name = tensor("aw_chunk_1577_cast_fp16")]; + tensor var_15074_equation_0 = const()[name = tensor("op_15074_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15074_cast_fp16 = einsum(equation = var_15074_equation_0, values = (var_14708_cast_fp16, var_14565_cast_fp16))[name = tensor("op_15074_cast_fp16")]; + tensor var_15075_to_fp16 = const()[name = tensor("op_15075_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1579_cast_fp16 = mul(x = var_15074_cast_fp16, y = var_15075_to_fp16)[name = tensor("aw_chunk_1579_cast_fp16")]; + tensor var_15078_equation_0 = const()[name = tensor("op_15078_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15078_cast_fp16 = einsum(equation = var_15078_equation_0, values = (var_14708_cast_fp16, var_14572_cast_fp16))[name = tensor("op_15078_cast_fp16")]; + tensor var_15079_to_fp16 = const()[name = tensor("op_15079_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1581_cast_fp16 = mul(x = var_15078_cast_fp16, y = var_15079_to_fp16)[name = tensor("aw_chunk_1581_cast_fp16")]; + tensor var_15082_equation_0 = const()[name = tensor("op_15082_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15082_cast_fp16 = einsum(equation = var_15082_equation_0, values = (var_14708_cast_fp16, var_14579_cast_fp16))[name = tensor("op_15082_cast_fp16")]; + tensor var_15083_to_fp16 = const()[name = tensor("op_15083_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1583_cast_fp16 = mul(x = var_15082_cast_fp16, y = var_15083_to_fp16)[name = tensor("aw_chunk_1583_cast_fp16")]; + tensor var_15086_equation_0 = const()[name = tensor("op_15086_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15086_cast_fp16 = einsum(equation = var_15086_equation_0, values = (var_14712_cast_fp16, var_14586_cast_fp16))[name = tensor("op_15086_cast_fp16")]; + tensor var_15087_to_fp16 = const()[name = tensor("op_15087_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1585_cast_fp16 = mul(x = var_15086_cast_fp16, y = var_15087_to_fp16)[name = tensor("aw_chunk_1585_cast_fp16")]; + tensor var_15090_equation_0 = const()[name = tensor("op_15090_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15090_cast_fp16 = einsum(equation = var_15090_equation_0, values = (var_14712_cast_fp16, var_14593_cast_fp16))[name = tensor("op_15090_cast_fp16")]; + tensor var_15091_to_fp16 = const()[name = tensor("op_15091_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1587_cast_fp16 = mul(x = var_15090_cast_fp16, y = var_15091_to_fp16)[name = tensor("aw_chunk_1587_cast_fp16")]; + tensor var_15094_equation_0 = const()[name = tensor("op_15094_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15094_cast_fp16 = einsum(equation = var_15094_equation_0, values = (var_14712_cast_fp16, var_14600_cast_fp16))[name = tensor("op_15094_cast_fp16")]; + tensor var_15095_to_fp16 = const()[name = tensor("op_15095_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1589_cast_fp16 = mul(x = var_15094_cast_fp16, y = var_15095_to_fp16)[name = tensor("aw_chunk_1589_cast_fp16")]; + tensor var_15098_equation_0 = const()[name = tensor("op_15098_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15098_cast_fp16 = einsum(equation = var_15098_equation_0, values = (var_14712_cast_fp16, var_14607_cast_fp16))[name = tensor("op_15098_cast_fp16")]; + tensor var_15099_to_fp16 = const()[name = tensor("op_15099_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1591_cast_fp16 = mul(x = var_15098_cast_fp16, y = var_15099_to_fp16)[name = tensor("aw_chunk_1591_cast_fp16")]; + tensor var_15102_equation_0 = const()[name = tensor("op_15102_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15102_cast_fp16 = einsum(equation = var_15102_equation_0, values = (var_14716_cast_fp16, var_14614_cast_fp16))[name = tensor("op_15102_cast_fp16")]; + tensor var_15103_to_fp16 = const()[name = tensor("op_15103_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1593_cast_fp16 = mul(x = var_15102_cast_fp16, y = var_15103_to_fp16)[name = tensor("aw_chunk_1593_cast_fp16")]; + tensor var_15106_equation_0 = const()[name = tensor("op_15106_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15106_cast_fp16 = einsum(equation = var_15106_equation_0, values = (var_14716_cast_fp16, var_14621_cast_fp16))[name = tensor("op_15106_cast_fp16")]; + tensor var_15107_to_fp16 = const()[name = tensor("op_15107_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1595_cast_fp16 = mul(x = var_15106_cast_fp16, y = var_15107_to_fp16)[name = tensor("aw_chunk_1595_cast_fp16")]; + tensor var_15110_equation_0 = const()[name = tensor("op_15110_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15110_cast_fp16 = einsum(equation = var_15110_equation_0, values = (var_14716_cast_fp16, var_14628_cast_fp16))[name = tensor("op_15110_cast_fp16")]; + tensor var_15111_to_fp16 = const()[name = tensor("op_15111_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1597_cast_fp16 = mul(x = var_15110_cast_fp16, y = var_15111_to_fp16)[name = tensor("aw_chunk_1597_cast_fp16")]; + tensor var_15114_equation_0 = const()[name = tensor("op_15114_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_15114_cast_fp16 = einsum(equation = var_15114_equation_0, values = (var_14716_cast_fp16, var_14635_cast_fp16))[name = tensor("op_15114_cast_fp16")]; + tensor var_15115_to_fp16 = const()[name = tensor("op_15115_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1599_cast_fp16 = mul(x = var_15114_cast_fp16, y = var_15115_to_fp16)[name = tensor("aw_chunk_1599_cast_fp16")]; + tensor var_15117_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1441_cast_fp16)[name = tensor("op_15117_cast_fp16")]; + tensor var_15118_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1443_cast_fp16)[name = tensor("op_15118_cast_fp16")]; + tensor var_15119_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1445_cast_fp16)[name = tensor("op_15119_cast_fp16")]; + tensor var_15120_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1447_cast_fp16)[name = tensor("op_15120_cast_fp16")]; + tensor var_15121_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1449_cast_fp16)[name = tensor("op_15121_cast_fp16")]; + tensor var_15122_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1451_cast_fp16)[name = tensor("op_15122_cast_fp16")]; + tensor var_15123_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1453_cast_fp16)[name = tensor("op_15123_cast_fp16")]; + tensor var_15124_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1455_cast_fp16)[name = tensor("op_15124_cast_fp16")]; + tensor var_15125_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1457_cast_fp16)[name = tensor("op_15125_cast_fp16")]; + tensor var_15126_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1459_cast_fp16)[name = tensor("op_15126_cast_fp16")]; + tensor var_15127_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1461_cast_fp16)[name = tensor("op_15127_cast_fp16")]; + tensor var_15128_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1463_cast_fp16)[name = tensor("op_15128_cast_fp16")]; + tensor var_15129_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1465_cast_fp16)[name = tensor("op_15129_cast_fp16")]; + tensor var_15130_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1467_cast_fp16)[name = tensor("op_15130_cast_fp16")]; + tensor var_15131_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1469_cast_fp16)[name = tensor("op_15131_cast_fp16")]; + tensor var_15132_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1471_cast_fp16)[name = tensor("op_15132_cast_fp16")]; + tensor var_15133_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1473_cast_fp16)[name = tensor("op_15133_cast_fp16")]; + tensor var_15134_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1475_cast_fp16)[name = tensor("op_15134_cast_fp16")]; + tensor var_15135_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1477_cast_fp16)[name = tensor("op_15135_cast_fp16")]; + tensor var_15136_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1479_cast_fp16)[name = tensor("op_15136_cast_fp16")]; + tensor var_15137_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1481_cast_fp16)[name = tensor("op_15137_cast_fp16")]; + tensor var_15138_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1483_cast_fp16)[name = tensor("op_15138_cast_fp16")]; + tensor var_15139_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1485_cast_fp16)[name = tensor("op_15139_cast_fp16")]; + tensor var_15140_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1487_cast_fp16)[name = tensor("op_15140_cast_fp16")]; + tensor var_15141_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1489_cast_fp16)[name = tensor("op_15141_cast_fp16")]; + tensor var_15142_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1491_cast_fp16)[name = tensor("op_15142_cast_fp16")]; + tensor var_15143_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1493_cast_fp16)[name = tensor("op_15143_cast_fp16")]; + tensor var_15144_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1495_cast_fp16)[name = tensor("op_15144_cast_fp16")]; + tensor var_15145_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1497_cast_fp16)[name = tensor("op_15145_cast_fp16")]; + tensor var_15146_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1499_cast_fp16)[name = tensor("op_15146_cast_fp16")]; + tensor var_15147_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1501_cast_fp16)[name = tensor("op_15147_cast_fp16")]; + tensor var_15148_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1503_cast_fp16)[name = tensor("op_15148_cast_fp16")]; + tensor var_15149_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1505_cast_fp16)[name = tensor("op_15149_cast_fp16")]; + tensor var_15150_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1507_cast_fp16)[name = tensor("op_15150_cast_fp16")]; + tensor var_15151_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1509_cast_fp16)[name = tensor("op_15151_cast_fp16")]; + tensor var_15152_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1511_cast_fp16)[name = tensor("op_15152_cast_fp16")]; + tensor var_15153_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1513_cast_fp16)[name = tensor("op_15153_cast_fp16")]; + tensor var_15154_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1515_cast_fp16)[name = tensor("op_15154_cast_fp16")]; + tensor var_15155_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1517_cast_fp16)[name = tensor("op_15155_cast_fp16")]; + tensor var_15156_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1519_cast_fp16)[name = tensor("op_15156_cast_fp16")]; + tensor var_15157_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1521_cast_fp16)[name = tensor("op_15157_cast_fp16")]; + tensor var_15158_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1523_cast_fp16)[name = tensor("op_15158_cast_fp16")]; + tensor var_15159_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1525_cast_fp16)[name = tensor("op_15159_cast_fp16")]; + tensor var_15160_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1527_cast_fp16)[name = tensor("op_15160_cast_fp16")]; + tensor var_15161_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1529_cast_fp16)[name = tensor("op_15161_cast_fp16")]; + tensor var_15162_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1531_cast_fp16)[name = tensor("op_15162_cast_fp16")]; + tensor var_15163_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1533_cast_fp16)[name = tensor("op_15163_cast_fp16")]; + tensor var_15164_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1535_cast_fp16)[name = tensor("op_15164_cast_fp16")]; + tensor var_15165_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1537_cast_fp16)[name = tensor("op_15165_cast_fp16")]; + tensor var_15166_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1539_cast_fp16)[name = tensor("op_15166_cast_fp16")]; + tensor var_15167_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1541_cast_fp16)[name = tensor("op_15167_cast_fp16")]; + tensor var_15168_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1543_cast_fp16)[name = tensor("op_15168_cast_fp16")]; + tensor var_15169_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1545_cast_fp16)[name = tensor("op_15169_cast_fp16")]; + tensor var_15170_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1547_cast_fp16)[name = tensor("op_15170_cast_fp16")]; + tensor var_15171_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1549_cast_fp16)[name = tensor("op_15171_cast_fp16")]; + tensor var_15172_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1551_cast_fp16)[name = tensor("op_15172_cast_fp16")]; + tensor var_15173_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1553_cast_fp16)[name = tensor("op_15173_cast_fp16")]; + tensor var_15174_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1555_cast_fp16)[name = tensor("op_15174_cast_fp16")]; + tensor var_15175_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1557_cast_fp16)[name = tensor("op_15175_cast_fp16")]; + tensor var_15176_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1559_cast_fp16)[name = tensor("op_15176_cast_fp16")]; + tensor var_15177_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1561_cast_fp16)[name = tensor("op_15177_cast_fp16")]; + tensor var_15178_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1563_cast_fp16)[name = tensor("op_15178_cast_fp16")]; + tensor var_15179_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1565_cast_fp16)[name = tensor("op_15179_cast_fp16")]; + tensor var_15180_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1567_cast_fp16)[name = tensor("op_15180_cast_fp16")]; + tensor var_15181_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1569_cast_fp16)[name = tensor("op_15181_cast_fp16")]; + tensor var_15182_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1571_cast_fp16)[name = tensor("op_15182_cast_fp16")]; + tensor var_15183_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1573_cast_fp16)[name = tensor("op_15183_cast_fp16")]; + tensor var_15184_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1575_cast_fp16)[name = tensor("op_15184_cast_fp16")]; + tensor var_15185_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1577_cast_fp16)[name = tensor("op_15185_cast_fp16")]; + tensor var_15186_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1579_cast_fp16)[name = tensor("op_15186_cast_fp16")]; + tensor var_15187_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1581_cast_fp16)[name = tensor("op_15187_cast_fp16")]; + tensor var_15188_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1583_cast_fp16)[name = tensor("op_15188_cast_fp16")]; + tensor var_15189_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1585_cast_fp16)[name = tensor("op_15189_cast_fp16")]; + tensor var_15190_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1587_cast_fp16)[name = tensor("op_15190_cast_fp16")]; + tensor var_15191_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1589_cast_fp16)[name = tensor("op_15191_cast_fp16")]; + tensor var_15192_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1591_cast_fp16)[name = tensor("op_15192_cast_fp16")]; + tensor var_15193_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1593_cast_fp16)[name = tensor("op_15193_cast_fp16")]; + tensor var_15194_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1595_cast_fp16)[name = tensor("op_15194_cast_fp16")]; + tensor var_15195_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1597_cast_fp16)[name = tensor("op_15195_cast_fp16")]; + tensor var_15196_cast_fp16 = softmax(axis = var_13942, x = aw_chunk_1599_cast_fp16)[name = tensor("op_15196_cast_fp16")]; + tensor var_15198_equation_0 = const()[name = tensor("op_15198_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15198_cast_fp16 = einsum(equation = var_15198_equation_0, values = (var_14718_cast_fp16, var_15117_cast_fp16))[name = tensor("op_15198_cast_fp16")]; + tensor var_15200_equation_0 = const()[name = tensor("op_15200_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15200_cast_fp16 = einsum(equation = var_15200_equation_0, values = (var_14718_cast_fp16, var_15118_cast_fp16))[name = tensor("op_15200_cast_fp16")]; + tensor var_15202_equation_0 = const()[name = tensor("op_15202_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15202_cast_fp16 = einsum(equation = var_15202_equation_0, values = (var_14718_cast_fp16, var_15119_cast_fp16))[name = tensor("op_15202_cast_fp16")]; + tensor var_15204_equation_0 = const()[name = tensor("op_15204_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15204_cast_fp16 = einsum(equation = var_15204_equation_0, values = (var_14718_cast_fp16, var_15120_cast_fp16))[name = tensor("op_15204_cast_fp16")]; + tensor var_15206_equation_0 = const()[name = tensor("op_15206_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15206_cast_fp16 = einsum(equation = var_15206_equation_0, values = (var_14722_cast_fp16, var_15121_cast_fp16))[name = tensor("op_15206_cast_fp16")]; + tensor var_15208_equation_0 = const()[name = tensor("op_15208_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15208_cast_fp16 = einsum(equation = var_15208_equation_0, values = (var_14722_cast_fp16, var_15122_cast_fp16))[name = tensor("op_15208_cast_fp16")]; + tensor var_15210_equation_0 = const()[name = tensor("op_15210_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15210_cast_fp16 = einsum(equation = var_15210_equation_0, values = (var_14722_cast_fp16, var_15123_cast_fp16))[name = tensor("op_15210_cast_fp16")]; + tensor var_15212_equation_0 = const()[name = tensor("op_15212_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15212_cast_fp16 = einsum(equation = var_15212_equation_0, values = (var_14722_cast_fp16, var_15124_cast_fp16))[name = tensor("op_15212_cast_fp16")]; + tensor var_15214_equation_0 = const()[name = tensor("op_15214_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15214_cast_fp16 = einsum(equation = var_15214_equation_0, values = (var_14726_cast_fp16, var_15125_cast_fp16))[name = tensor("op_15214_cast_fp16")]; + tensor var_15216_equation_0 = const()[name = tensor("op_15216_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15216_cast_fp16 = einsum(equation = var_15216_equation_0, values = (var_14726_cast_fp16, var_15126_cast_fp16))[name = tensor("op_15216_cast_fp16")]; + tensor var_15218_equation_0 = const()[name = tensor("op_15218_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15218_cast_fp16 = einsum(equation = var_15218_equation_0, values = (var_14726_cast_fp16, var_15127_cast_fp16))[name = tensor("op_15218_cast_fp16")]; + tensor var_15220_equation_0 = const()[name = tensor("op_15220_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15220_cast_fp16 = einsum(equation = var_15220_equation_0, values = (var_14726_cast_fp16, var_15128_cast_fp16))[name = tensor("op_15220_cast_fp16")]; + tensor var_15222_equation_0 = const()[name = tensor("op_15222_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15222_cast_fp16 = einsum(equation = var_15222_equation_0, values = (var_14730_cast_fp16, var_15129_cast_fp16))[name = tensor("op_15222_cast_fp16")]; + tensor var_15224_equation_0 = const()[name = tensor("op_15224_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15224_cast_fp16 = einsum(equation = var_15224_equation_0, values = (var_14730_cast_fp16, var_15130_cast_fp16))[name = tensor("op_15224_cast_fp16")]; + tensor var_15226_equation_0 = const()[name = tensor("op_15226_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15226_cast_fp16 = einsum(equation = var_15226_equation_0, values = (var_14730_cast_fp16, var_15131_cast_fp16))[name = tensor("op_15226_cast_fp16")]; + tensor var_15228_equation_0 = const()[name = tensor("op_15228_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15228_cast_fp16 = einsum(equation = var_15228_equation_0, values = (var_14730_cast_fp16, var_15132_cast_fp16))[name = tensor("op_15228_cast_fp16")]; + tensor var_15230_equation_0 = const()[name = tensor("op_15230_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15230_cast_fp16 = einsum(equation = var_15230_equation_0, values = (var_14734_cast_fp16, var_15133_cast_fp16))[name = tensor("op_15230_cast_fp16")]; + tensor var_15232_equation_0 = const()[name = tensor("op_15232_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15232_cast_fp16 = einsum(equation = var_15232_equation_0, values = (var_14734_cast_fp16, var_15134_cast_fp16))[name = tensor("op_15232_cast_fp16")]; + tensor var_15234_equation_0 = const()[name = tensor("op_15234_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15234_cast_fp16 = einsum(equation = var_15234_equation_0, values = (var_14734_cast_fp16, var_15135_cast_fp16))[name = tensor("op_15234_cast_fp16")]; + tensor var_15236_equation_0 = const()[name = tensor("op_15236_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15236_cast_fp16 = einsum(equation = var_15236_equation_0, values = (var_14734_cast_fp16, var_15136_cast_fp16))[name = tensor("op_15236_cast_fp16")]; + tensor var_15238_equation_0 = const()[name = tensor("op_15238_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15238_cast_fp16 = einsum(equation = var_15238_equation_0, values = (var_14738_cast_fp16, var_15137_cast_fp16))[name = tensor("op_15238_cast_fp16")]; + tensor var_15240_equation_0 = const()[name = tensor("op_15240_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15240_cast_fp16 = einsum(equation = var_15240_equation_0, values = (var_14738_cast_fp16, var_15138_cast_fp16))[name = tensor("op_15240_cast_fp16")]; + tensor var_15242_equation_0 = const()[name = tensor("op_15242_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15242_cast_fp16 = einsum(equation = var_15242_equation_0, values = (var_14738_cast_fp16, var_15139_cast_fp16))[name = tensor("op_15242_cast_fp16")]; + tensor var_15244_equation_0 = const()[name = tensor("op_15244_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15244_cast_fp16 = einsum(equation = var_15244_equation_0, values = (var_14738_cast_fp16, var_15140_cast_fp16))[name = tensor("op_15244_cast_fp16")]; + tensor var_15246_equation_0 = const()[name = tensor("op_15246_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15246_cast_fp16 = einsum(equation = var_15246_equation_0, values = (var_14742_cast_fp16, var_15141_cast_fp16))[name = tensor("op_15246_cast_fp16")]; + tensor var_15248_equation_0 = const()[name = tensor("op_15248_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15248_cast_fp16 = einsum(equation = var_15248_equation_0, values = (var_14742_cast_fp16, var_15142_cast_fp16))[name = tensor("op_15248_cast_fp16")]; + tensor var_15250_equation_0 = const()[name = tensor("op_15250_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15250_cast_fp16 = einsum(equation = var_15250_equation_0, values = (var_14742_cast_fp16, var_15143_cast_fp16))[name = tensor("op_15250_cast_fp16")]; + tensor var_15252_equation_0 = const()[name = tensor("op_15252_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15252_cast_fp16 = einsum(equation = var_15252_equation_0, values = (var_14742_cast_fp16, var_15144_cast_fp16))[name = tensor("op_15252_cast_fp16")]; + tensor var_15254_equation_0 = const()[name = tensor("op_15254_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15254_cast_fp16 = einsum(equation = var_15254_equation_0, values = (var_14746_cast_fp16, var_15145_cast_fp16))[name = tensor("op_15254_cast_fp16")]; + tensor var_15256_equation_0 = const()[name = tensor("op_15256_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15256_cast_fp16 = einsum(equation = var_15256_equation_0, values = (var_14746_cast_fp16, var_15146_cast_fp16))[name = tensor("op_15256_cast_fp16")]; + tensor var_15258_equation_0 = const()[name = tensor("op_15258_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15258_cast_fp16 = einsum(equation = var_15258_equation_0, values = (var_14746_cast_fp16, var_15147_cast_fp16))[name = tensor("op_15258_cast_fp16")]; + tensor var_15260_equation_0 = const()[name = tensor("op_15260_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15260_cast_fp16 = einsum(equation = var_15260_equation_0, values = (var_14746_cast_fp16, var_15148_cast_fp16))[name = tensor("op_15260_cast_fp16")]; + tensor var_15262_equation_0 = const()[name = tensor("op_15262_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15262_cast_fp16 = einsum(equation = var_15262_equation_0, values = (var_14750_cast_fp16, var_15149_cast_fp16))[name = tensor("op_15262_cast_fp16")]; + tensor var_15264_equation_0 = const()[name = tensor("op_15264_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15264_cast_fp16 = einsum(equation = var_15264_equation_0, values = (var_14750_cast_fp16, var_15150_cast_fp16))[name = tensor("op_15264_cast_fp16")]; + tensor var_15266_equation_0 = const()[name = tensor("op_15266_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15266_cast_fp16 = einsum(equation = var_15266_equation_0, values = (var_14750_cast_fp16, var_15151_cast_fp16))[name = tensor("op_15266_cast_fp16")]; + tensor var_15268_equation_0 = const()[name = tensor("op_15268_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15268_cast_fp16 = einsum(equation = var_15268_equation_0, values = (var_14750_cast_fp16, var_15152_cast_fp16))[name = tensor("op_15268_cast_fp16")]; + tensor var_15270_equation_0 = const()[name = tensor("op_15270_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15270_cast_fp16 = einsum(equation = var_15270_equation_0, values = (var_14754_cast_fp16, var_15153_cast_fp16))[name = tensor("op_15270_cast_fp16")]; + tensor var_15272_equation_0 = const()[name = tensor("op_15272_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15272_cast_fp16 = einsum(equation = var_15272_equation_0, values = (var_14754_cast_fp16, var_15154_cast_fp16))[name = tensor("op_15272_cast_fp16")]; + tensor var_15274_equation_0 = const()[name = tensor("op_15274_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15274_cast_fp16 = einsum(equation = var_15274_equation_0, values = (var_14754_cast_fp16, var_15155_cast_fp16))[name = tensor("op_15274_cast_fp16")]; + tensor var_15276_equation_0 = const()[name = tensor("op_15276_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15276_cast_fp16 = einsum(equation = var_15276_equation_0, values = (var_14754_cast_fp16, var_15156_cast_fp16))[name = tensor("op_15276_cast_fp16")]; + tensor var_15278_equation_0 = const()[name = tensor("op_15278_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15278_cast_fp16 = einsum(equation = var_15278_equation_0, values = (var_14758_cast_fp16, var_15157_cast_fp16))[name = tensor("op_15278_cast_fp16")]; + tensor var_15280_equation_0 = const()[name = tensor("op_15280_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15280_cast_fp16 = einsum(equation = var_15280_equation_0, values = (var_14758_cast_fp16, var_15158_cast_fp16))[name = tensor("op_15280_cast_fp16")]; + tensor var_15282_equation_0 = const()[name = tensor("op_15282_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15282_cast_fp16 = einsum(equation = var_15282_equation_0, values = (var_14758_cast_fp16, var_15159_cast_fp16))[name = tensor("op_15282_cast_fp16")]; + tensor var_15284_equation_0 = const()[name = tensor("op_15284_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15284_cast_fp16 = einsum(equation = var_15284_equation_0, values = (var_14758_cast_fp16, var_15160_cast_fp16))[name = tensor("op_15284_cast_fp16")]; + tensor var_15286_equation_0 = const()[name = tensor("op_15286_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15286_cast_fp16 = einsum(equation = var_15286_equation_0, values = (var_14762_cast_fp16, var_15161_cast_fp16))[name = tensor("op_15286_cast_fp16")]; + tensor var_15288_equation_0 = const()[name = tensor("op_15288_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15288_cast_fp16 = einsum(equation = var_15288_equation_0, values = (var_14762_cast_fp16, var_15162_cast_fp16))[name = tensor("op_15288_cast_fp16")]; + tensor var_15290_equation_0 = const()[name = tensor("op_15290_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15290_cast_fp16 = einsum(equation = var_15290_equation_0, values = (var_14762_cast_fp16, var_15163_cast_fp16))[name = tensor("op_15290_cast_fp16")]; + tensor var_15292_equation_0 = const()[name = tensor("op_15292_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15292_cast_fp16 = einsum(equation = var_15292_equation_0, values = (var_14762_cast_fp16, var_15164_cast_fp16))[name = tensor("op_15292_cast_fp16")]; + tensor var_15294_equation_0 = const()[name = tensor("op_15294_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15294_cast_fp16 = einsum(equation = var_15294_equation_0, values = (var_14766_cast_fp16, var_15165_cast_fp16))[name = tensor("op_15294_cast_fp16")]; + tensor var_15296_equation_0 = const()[name = tensor("op_15296_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15296_cast_fp16 = einsum(equation = var_15296_equation_0, values = (var_14766_cast_fp16, var_15166_cast_fp16))[name = tensor("op_15296_cast_fp16")]; + tensor var_15298_equation_0 = const()[name = tensor("op_15298_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15298_cast_fp16 = einsum(equation = var_15298_equation_0, values = (var_14766_cast_fp16, var_15167_cast_fp16))[name = tensor("op_15298_cast_fp16")]; + tensor var_15300_equation_0 = const()[name = tensor("op_15300_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15300_cast_fp16 = einsum(equation = var_15300_equation_0, values = (var_14766_cast_fp16, var_15168_cast_fp16))[name = tensor("op_15300_cast_fp16")]; + tensor var_15302_equation_0 = const()[name = tensor("op_15302_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15302_cast_fp16 = einsum(equation = var_15302_equation_0, values = (var_14770_cast_fp16, var_15169_cast_fp16))[name = tensor("op_15302_cast_fp16")]; + tensor var_15304_equation_0 = const()[name = tensor("op_15304_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15304_cast_fp16 = einsum(equation = var_15304_equation_0, values = (var_14770_cast_fp16, var_15170_cast_fp16))[name = tensor("op_15304_cast_fp16")]; + tensor var_15306_equation_0 = const()[name = tensor("op_15306_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15306_cast_fp16 = einsum(equation = var_15306_equation_0, values = (var_14770_cast_fp16, var_15171_cast_fp16))[name = tensor("op_15306_cast_fp16")]; + tensor var_15308_equation_0 = const()[name = tensor("op_15308_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15308_cast_fp16 = einsum(equation = var_15308_equation_0, values = (var_14770_cast_fp16, var_15172_cast_fp16))[name = tensor("op_15308_cast_fp16")]; + tensor var_15310_equation_0 = const()[name = tensor("op_15310_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15310_cast_fp16 = einsum(equation = var_15310_equation_0, values = (var_14774_cast_fp16, var_15173_cast_fp16))[name = tensor("op_15310_cast_fp16")]; + tensor var_15312_equation_0 = const()[name = tensor("op_15312_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15312_cast_fp16 = einsum(equation = var_15312_equation_0, values = (var_14774_cast_fp16, var_15174_cast_fp16))[name = tensor("op_15312_cast_fp16")]; + tensor var_15314_equation_0 = const()[name = tensor("op_15314_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15314_cast_fp16 = einsum(equation = var_15314_equation_0, values = (var_14774_cast_fp16, var_15175_cast_fp16))[name = tensor("op_15314_cast_fp16")]; + tensor var_15316_equation_0 = const()[name = tensor("op_15316_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15316_cast_fp16 = einsum(equation = var_15316_equation_0, values = (var_14774_cast_fp16, var_15176_cast_fp16))[name = tensor("op_15316_cast_fp16")]; + tensor var_15318_equation_0 = const()[name = tensor("op_15318_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15318_cast_fp16 = einsum(equation = var_15318_equation_0, values = (var_14778_cast_fp16, var_15177_cast_fp16))[name = tensor("op_15318_cast_fp16")]; + tensor var_15320_equation_0 = const()[name = tensor("op_15320_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15320_cast_fp16 = einsum(equation = var_15320_equation_0, values = (var_14778_cast_fp16, var_15178_cast_fp16))[name = tensor("op_15320_cast_fp16")]; + tensor var_15322_equation_0 = const()[name = tensor("op_15322_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15322_cast_fp16 = einsum(equation = var_15322_equation_0, values = (var_14778_cast_fp16, var_15179_cast_fp16))[name = tensor("op_15322_cast_fp16")]; + tensor var_15324_equation_0 = const()[name = tensor("op_15324_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15324_cast_fp16 = einsum(equation = var_15324_equation_0, values = (var_14778_cast_fp16, var_15180_cast_fp16))[name = tensor("op_15324_cast_fp16")]; + tensor var_15326_equation_0 = const()[name = tensor("op_15326_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15326_cast_fp16 = einsum(equation = var_15326_equation_0, values = (var_14782_cast_fp16, var_15181_cast_fp16))[name = tensor("op_15326_cast_fp16")]; + tensor var_15328_equation_0 = const()[name = tensor("op_15328_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15328_cast_fp16 = einsum(equation = var_15328_equation_0, values = (var_14782_cast_fp16, var_15182_cast_fp16))[name = tensor("op_15328_cast_fp16")]; + tensor var_15330_equation_0 = const()[name = tensor("op_15330_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15330_cast_fp16 = einsum(equation = var_15330_equation_0, values = (var_14782_cast_fp16, var_15183_cast_fp16))[name = tensor("op_15330_cast_fp16")]; + tensor var_15332_equation_0 = const()[name = tensor("op_15332_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15332_cast_fp16 = einsum(equation = var_15332_equation_0, values = (var_14782_cast_fp16, var_15184_cast_fp16))[name = tensor("op_15332_cast_fp16")]; + tensor var_15334_equation_0 = const()[name = tensor("op_15334_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15334_cast_fp16 = einsum(equation = var_15334_equation_0, values = (var_14786_cast_fp16, var_15185_cast_fp16))[name = tensor("op_15334_cast_fp16")]; + tensor var_15336_equation_0 = const()[name = tensor("op_15336_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15336_cast_fp16 = einsum(equation = var_15336_equation_0, values = (var_14786_cast_fp16, var_15186_cast_fp16))[name = tensor("op_15336_cast_fp16")]; + tensor var_15338_equation_0 = const()[name = tensor("op_15338_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15338_cast_fp16 = einsum(equation = var_15338_equation_0, values = (var_14786_cast_fp16, var_15187_cast_fp16))[name = tensor("op_15338_cast_fp16")]; + tensor var_15340_equation_0 = const()[name = tensor("op_15340_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15340_cast_fp16 = einsum(equation = var_15340_equation_0, values = (var_14786_cast_fp16, var_15188_cast_fp16))[name = tensor("op_15340_cast_fp16")]; + tensor var_15342_equation_0 = const()[name = tensor("op_15342_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15342_cast_fp16 = einsum(equation = var_15342_equation_0, values = (var_14790_cast_fp16, var_15189_cast_fp16))[name = tensor("op_15342_cast_fp16")]; + tensor var_15344_equation_0 = const()[name = tensor("op_15344_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15344_cast_fp16 = einsum(equation = var_15344_equation_0, values = (var_14790_cast_fp16, var_15190_cast_fp16))[name = tensor("op_15344_cast_fp16")]; + tensor var_15346_equation_0 = const()[name = tensor("op_15346_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15346_cast_fp16 = einsum(equation = var_15346_equation_0, values = (var_14790_cast_fp16, var_15191_cast_fp16))[name = tensor("op_15346_cast_fp16")]; + tensor var_15348_equation_0 = const()[name = tensor("op_15348_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15348_cast_fp16 = einsum(equation = var_15348_equation_0, values = (var_14790_cast_fp16, var_15192_cast_fp16))[name = tensor("op_15348_cast_fp16")]; + tensor var_15350_equation_0 = const()[name = tensor("op_15350_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15350_cast_fp16 = einsum(equation = var_15350_equation_0, values = (var_14794_cast_fp16, var_15193_cast_fp16))[name = tensor("op_15350_cast_fp16")]; + tensor var_15352_equation_0 = const()[name = tensor("op_15352_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15352_cast_fp16 = einsum(equation = var_15352_equation_0, values = (var_14794_cast_fp16, var_15194_cast_fp16))[name = tensor("op_15352_cast_fp16")]; + tensor var_15354_equation_0 = const()[name = tensor("op_15354_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15354_cast_fp16 = einsum(equation = var_15354_equation_0, values = (var_14794_cast_fp16, var_15195_cast_fp16))[name = tensor("op_15354_cast_fp16")]; + tensor var_15356_equation_0 = const()[name = tensor("op_15356_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_15356_cast_fp16 = einsum(equation = var_15356_equation_0, values = (var_14794_cast_fp16, var_15196_cast_fp16))[name = tensor("op_15356_cast_fp16")]; + tensor var_15358_interleave_0 = const()[name = tensor("op_15358_interleave_0"), val = tensor(false)]; + tensor var_15358_cast_fp16 = concat(axis = var_13917, interleave = var_15358_interleave_0, values = (var_15198_cast_fp16, var_15200_cast_fp16, var_15202_cast_fp16, var_15204_cast_fp16))[name = tensor("op_15358_cast_fp16")]; + tensor var_15360_interleave_0 = const()[name = tensor("op_15360_interleave_0"), val = tensor(false)]; + tensor var_15360_cast_fp16 = concat(axis = var_13917, interleave = var_15360_interleave_0, values = (var_15206_cast_fp16, var_15208_cast_fp16, var_15210_cast_fp16, var_15212_cast_fp16))[name = tensor("op_15360_cast_fp16")]; + tensor var_15362_interleave_0 = const()[name = tensor("op_15362_interleave_0"), val = tensor(false)]; + tensor var_15362_cast_fp16 = concat(axis = var_13917, interleave = var_15362_interleave_0, values = (var_15214_cast_fp16, var_15216_cast_fp16, var_15218_cast_fp16, var_15220_cast_fp16))[name = tensor("op_15362_cast_fp16")]; + tensor var_15364_interleave_0 = const()[name = tensor("op_15364_interleave_0"), val = tensor(false)]; + tensor var_15364_cast_fp16 = concat(axis = var_13917, interleave = var_15364_interleave_0, values = (var_15222_cast_fp16, var_15224_cast_fp16, var_15226_cast_fp16, var_15228_cast_fp16))[name = tensor("op_15364_cast_fp16")]; + tensor var_15366_interleave_0 = const()[name = tensor("op_15366_interleave_0"), val = tensor(false)]; + tensor var_15366_cast_fp16 = concat(axis = var_13917, interleave = var_15366_interleave_0, values = (var_15230_cast_fp16, var_15232_cast_fp16, var_15234_cast_fp16, var_15236_cast_fp16))[name = tensor("op_15366_cast_fp16")]; + tensor var_15368_interleave_0 = const()[name = tensor("op_15368_interleave_0"), val = tensor(false)]; + tensor var_15368_cast_fp16 = concat(axis = var_13917, interleave = var_15368_interleave_0, values = (var_15238_cast_fp16, var_15240_cast_fp16, var_15242_cast_fp16, var_15244_cast_fp16))[name = tensor("op_15368_cast_fp16")]; + tensor var_15370_interleave_0 = const()[name = tensor("op_15370_interleave_0"), val = tensor(false)]; + tensor var_15370_cast_fp16 = concat(axis = var_13917, interleave = var_15370_interleave_0, values = (var_15246_cast_fp16, var_15248_cast_fp16, var_15250_cast_fp16, var_15252_cast_fp16))[name = tensor("op_15370_cast_fp16")]; + tensor var_15372_interleave_0 = const()[name = tensor("op_15372_interleave_0"), val = tensor(false)]; + tensor var_15372_cast_fp16 = concat(axis = var_13917, interleave = var_15372_interleave_0, values = (var_15254_cast_fp16, var_15256_cast_fp16, var_15258_cast_fp16, var_15260_cast_fp16))[name = tensor("op_15372_cast_fp16")]; + tensor var_15374_interleave_0 = const()[name = tensor("op_15374_interleave_0"), val = tensor(false)]; + tensor var_15374_cast_fp16 = concat(axis = var_13917, interleave = var_15374_interleave_0, values = (var_15262_cast_fp16, var_15264_cast_fp16, var_15266_cast_fp16, var_15268_cast_fp16))[name = tensor("op_15374_cast_fp16")]; + tensor var_15376_interleave_0 = const()[name = tensor("op_15376_interleave_0"), val = tensor(false)]; + tensor var_15376_cast_fp16 = concat(axis = var_13917, interleave = var_15376_interleave_0, values = (var_15270_cast_fp16, var_15272_cast_fp16, var_15274_cast_fp16, var_15276_cast_fp16))[name = tensor("op_15376_cast_fp16")]; + tensor var_15378_interleave_0 = const()[name = tensor("op_15378_interleave_0"), val = tensor(false)]; + tensor var_15378_cast_fp16 = concat(axis = var_13917, interleave = var_15378_interleave_0, values = (var_15278_cast_fp16, var_15280_cast_fp16, var_15282_cast_fp16, var_15284_cast_fp16))[name = tensor("op_15378_cast_fp16")]; + tensor var_15380_interleave_0 = const()[name = tensor("op_15380_interleave_0"), val = tensor(false)]; + tensor var_15380_cast_fp16 = concat(axis = var_13917, interleave = var_15380_interleave_0, values = (var_15286_cast_fp16, var_15288_cast_fp16, var_15290_cast_fp16, var_15292_cast_fp16))[name = tensor("op_15380_cast_fp16")]; + tensor var_15382_interleave_0 = const()[name = tensor("op_15382_interleave_0"), val = tensor(false)]; + tensor var_15382_cast_fp16 = concat(axis = var_13917, interleave = var_15382_interleave_0, values = (var_15294_cast_fp16, var_15296_cast_fp16, var_15298_cast_fp16, var_15300_cast_fp16))[name = tensor("op_15382_cast_fp16")]; + tensor var_15384_interleave_0 = const()[name = tensor("op_15384_interleave_0"), val = tensor(false)]; + tensor var_15384_cast_fp16 = concat(axis = var_13917, interleave = var_15384_interleave_0, values = (var_15302_cast_fp16, var_15304_cast_fp16, var_15306_cast_fp16, var_15308_cast_fp16))[name = tensor("op_15384_cast_fp16")]; + tensor var_15386_interleave_0 = const()[name = tensor("op_15386_interleave_0"), val = tensor(false)]; + tensor var_15386_cast_fp16 = concat(axis = var_13917, interleave = var_15386_interleave_0, values = (var_15310_cast_fp16, var_15312_cast_fp16, var_15314_cast_fp16, var_15316_cast_fp16))[name = tensor("op_15386_cast_fp16")]; + tensor var_15388_interleave_0 = const()[name = tensor("op_15388_interleave_0"), val = tensor(false)]; + tensor var_15388_cast_fp16 = concat(axis = var_13917, interleave = var_15388_interleave_0, values = (var_15318_cast_fp16, var_15320_cast_fp16, var_15322_cast_fp16, var_15324_cast_fp16))[name = tensor("op_15388_cast_fp16")]; + tensor var_15390_interleave_0 = const()[name = tensor("op_15390_interleave_0"), val = tensor(false)]; + tensor var_15390_cast_fp16 = concat(axis = var_13917, interleave = var_15390_interleave_0, values = (var_15326_cast_fp16, var_15328_cast_fp16, var_15330_cast_fp16, var_15332_cast_fp16))[name = tensor("op_15390_cast_fp16")]; + tensor var_15392_interleave_0 = const()[name = tensor("op_15392_interleave_0"), val = tensor(false)]; + tensor var_15392_cast_fp16 = concat(axis = var_13917, interleave = var_15392_interleave_0, values = (var_15334_cast_fp16, var_15336_cast_fp16, var_15338_cast_fp16, var_15340_cast_fp16))[name = tensor("op_15392_cast_fp16")]; + tensor var_15394_interleave_0 = const()[name = tensor("op_15394_interleave_0"), val = tensor(false)]; + tensor var_15394_cast_fp16 = concat(axis = var_13917, interleave = var_15394_interleave_0, values = (var_15342_cast_fp16, var_15344_cast_fp16, var_15346_cast_fp16, var_15348_cast_fp16))[name = tensor("op_15394_cast_fp16")]; + tensor var_15396_interleave_0 = const()[name = tensor("op_15396_interleave_0"), val = tensor(false)]; + tensor var_15396_cast_fp16 = concat(axis = var_13917, interleave = var_15396_interleave_0, values = (var_15350_cast_fp16, var_15352_cast_fp16, var_15354_cast_fp16, var_15356_cast_fp16))[name = tensor("op_15396_cast_fp16")]; + tensor input_73_interleave_0 = const()[name = tensor("input_73_interleave_0"), val = tensor(false)]; + tensor input_73_cast_fp16 = concat(axis = var_13942, interleave = input_73_interleave_0, values = (var_15358_cast_fp16, var_15360_cast_fp16, var_15362_cast_fp16, var_15364_cast_fp16, var_15366_cast_fp16, var_15368_cast_fp16, var_15370_cast_fp16, var_15372_cast_fp16, var_15374_cast_fp16, var_15376_cast_fp16, var_15378_cast_fp16, var_15380_cast_fp16, var_15382_cast_fp16, var_15384_cast_fp16, var_15386_cast_fp16, var_15388_cast_fp16, var_15390_cast_fp16, var_15392_cast_fp16, var_15394_cast_fp16, var_15396_cast_fp16))[name = tensor("input_73_cast_fp16")]; + tensor var_15401 = const()[name = tensor("op_15401"), val = tensor([1, 1])]; + tensor var_15403 = const()[name = tensor("op_15403"), val = tensor([1, 1])]; + tensor obj_39_pad_type_0 = const()[name = tensor("obj_39_pad_type_0"), val = tensor("custom")]; + tensor obj_39_pad_0 = const()[name = tensor("obj_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_9_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_9_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(378316160)))]; + tensor layers_9_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_9_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(381593024)))]; + tensor obj_39_cast_fp16 = conv(bias = layers_9_self_attn_o_proj_bias_to_fp16, dilations = var_15403, groups = var_13942, pad = obj_39_pad_0, pad_type = obj_39_pad_type_0, strides = var_15401, weight = layers_9_self_attn_o_proj_weight_to_fp16, x = input_73_cast_fp16)[name = tensor("obj_39_cast_fp16")]; + tensor inputs_39_cast_fp16 = add(x = inputs_37_cast_fp16, y = obj_39_cast_fp16)[name = tensor("inputs_39_cast_fp16")]; + tensor var_15409 = const()[name = tensor("op_15409"), val = tensor([1])]; + tensor channels_mean_39_cast_fp16 = reduce_mean(axes = var_15409, keep_dims = var_13943, x = inputs_39_cast_fp16)[name = tensor("channels_mean_39_cast_fp16")]; + tensor zero_mean_39_cast_fp16 = sub(x = inputs_39_cast_fp16, y = channels_mean_39_cast_fp16)[name = tensor("zero_mean_39_cast_fp16")]; + tensor zero_mean_sq_39_cast_fp16 = mul(x = zero_mean_39_cast_fp16, y = zero_mean_39_cast_fp16)[name = tensor("zero_mean_sq_39_cast_fp16")]; + tensor var_15413 = const()[name = tensor("op_15413"), val = tensor([1])]; + tensor var_15414_cast_fp16 = reduce_mean(axes = var_15413, keep_dims = var_13943, x = zero_mean_sq_39_cast_fp16)[name = tensor("op_15414_cast_fp16")]; + tensor var_15415_to_fp16 = const()[name = tensor("op_15415_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_15416_cast_fp16 = add(x = var_15414_cast_fp16, y = var_15415_to_fp16)[name = tensor("op_15416_cast_fp16")]; + tensor denom_39_epsilon_0_to_fp16 = const()[name = tensor("denom_39_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_39_cast_fp16 = rsqrt(epsilon = denom_39_epsilon_0_to_fp16, x = var_15416_cast_fp16)[name = tensor("denom_39_cast_fp16")]; + tensor out_39_cast_fp16 = mul(x = zero_mean_39_cast_fp16, y = denom_39_cast_fp16)[name = tensor("out_39_cast_fp16")]; + tensor input_75_gamma_0_to_fp16 = const()[name = tensor("input_75_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(381595648)))]; + tensor input_75_beta_0_to_fp16 = const()[name = tensor("input_75_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(381598272)))]; + tensor input_75_epsilon_0_to_fp16 = const()[name = tensor("input_75_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_75_cast_fp16 = batch_norm(beta = input_75_beta_0_to_fp16, epsilon = input_75_epsilon_0_to_fp16, gamma = input_75_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_39_cast_fp16)[name = tensor("input_75_cast_fp16")]; + tensor var_15427 = const()[name = tensor("op_15427"), val = tensor([1, 1])]; + tensor var_15429 = const()[name = tensor("op_15429"), val = tensor([1, 1])]; + tensor input_77_pad_type_0 = const()[name = tensor("input_77_pad_type_0"), val = tensor("custom")]; + tensor input_77_pad_0 = const()[name = tensor("input_77_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_9_fc1_weight_to_fp16 = const()[name = tensor("layers_9_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(381600896)))]; + tensor layers_9_fc1_bias_to_fp16 = const()[name = tensor("layers_9_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(394708160)))]; + tensor input_77_cast_fp16 = conv(bias = layers_9_fc1_bias_to_fp16, dilations = var_15429, groups = var_13942, pad = input_77_pad_0, pad_type = input_77_pad_type_0, strides = var_15427, weight = layers_9_fc1_weight_to_fp16, x = input_75_cast_fp16)[name = tensor("input_77_cast_fp16")]; + tensor input_79_mode_0 = const()[name = tensor("input_79_mode_0"), val = tensor("EXACT")]; + tensor input_79_cast_fp16 = gelu(mode = input_79_mode_0, x = input_77_cast_fp16)[name = tensor("input_79_cast_fp16")]; + tensor var_15435 = const()[name = tensor("op_15435"), val = tensor([1, 1])]; + tensor var_15437 = const()[name = tensor("op_15437"), val = tensor([1, 1])]; + tensor hidden_states_23_pad_type_0 = const()[name = tensor("hidden_states_23_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_23_pad_0 = const()[name = tensor("hidden_states_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_9_fc2_weight_to_fp16 = const()[name = tensor("layers_9_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(394718464)))]; + tensor layers_9_fc2_bias_to_fp16 = const()[name = tensor("layers_9_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(407825728)))]; + tensor hidden_states_23_cast_fp16 = conv(bias = layers_9_fc2_bias_to_fp16, dilations = var_15437, groups = var_13942, pad = hidden_states_23_pad_0, pad_type = hidden_states_23_pad_type_0, strides = var_15435, weight = layers_9_fc2_weight_to_fp16, x = input_79_cast_fp16)[name = tensor("hidden_states_23_cast_fp16")]; + tensor inputs_41_cast_fp16 = add(x = inputs_39_cast_fp16, y = hidden_states_23_cast_fp16)[name = tensor("inputs_41_cast_fp16")]; + tensor var_15444 = const()[name = tensor("op_15444"), val = tensor(3)]; + tensor var_15469 = const()[name = tensor("op_15469"), val = tensor(1)]; + tensor var_15470 = const()[name = tensor("op_15470"), val = tensor(true)]; + tensor var_15480 = const()[name = tensor("op_15480"), val = tensor([1])]; + tensor channels_mean_41_cast_fp16 = reduce_mean(axes = var_15480, keep_dims = var_15470, x = inputs_41_cast_fp16)[name = tensor("channels_mean_41_cast_fp16")]; + tensor zero_mean_41_cast_fp16 = sub(x = inputs_41_cast_fp16, y = channels_mean_41_cast_fp16)[name = tensor("zero_mean_41_cast_fp16")]; + tensor zero_mean_sq_41_cast_fp16 = mul(x = zero_mean_41_cast_fp16, y = zero_mean_41_cast_fp16)[name = tensor("zero_mean_sq_41_cast_fp16")]; + tensor var_15484 = const()[name = tensor("op_15484"), val = tensor([1])]; + tensor var_15485_cast_fp16 = reduce_mean(axes = var_15484, keep_dims = var_15470, x = zero_mean_sq_41_cast_fp16)[name = tensor("op_15485_cast_fp16")]; + tensor var_15486_to_fp16 = const()[name = tensor("op_15486_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_15487_cast_fp16 = add(x = var_15485_cast_fp16, y = var_15486_to_fp16)[name = tensor("op_15487_cast_fp16")]; + tensor denom_41_epsilon_0_to_fp16 = const()[name = tensor("denom_41_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_41_cast_fp16 = rsqrt(epsilon = denom_41_epsilon_0_to_fp16, x = var_15487_cast_fp16)[name = tensor("denom_41_cast_fp16")]; + tensor out_41_cast_fp16 = mul(x = zero_mean_41_cast_fp16, y = denom_41_cast_fp16)[name = tensor("out_41_cast_fp16")]; + tensor obj_41_gamma_0_to_fp16 = const()[name = tensor("obj_41_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(407828352)))]; + tensor obj_41_beta_0_to_fp16 = const()[name = tensor("obj_41_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(407830976)))]; + tensor obj_41_epsilon_0_to_fp16 = const()[name = tensor("obj_41_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_41_cast_fp16 = batch_norm(beta = obj_41_beta_0_to_fp16, epsilon = obj_41_epsilon_0_to_fp16, gamma = obj_41_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_41_cast_fp16)[name = tensor("obj_41_cast_fp16")]; + tensor var_15502 = const()[name = tensor("op_15502"), val = tensor([1, 1])]; + tensor var_15504 = const()[name = tensor("op_15504"), val = tensor([1, 1])]; + tensor query_21_pad_type_0 = const()[name = tensor("query_21_pad_type_0"), val = tensor("custom")]; + tensor query_21_pad_0 = const()[name = tensor("query_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_10_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_10_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(407833600)))]; + tensor layers_10_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_10_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411110464)))]; + tensor query_21_cast_fp16 = conv(bias = layers_10_self_attn_q_proj_bias_to_fp16, dilations = var_15504, groups = var_15469, pad = query_21_pad_0, pad_type = query_21_pad_type_0, strides = var_15502, weight = layers_10_self_attn_q_proj_weight_to_fp16, x = obj_41_cast_fp16)[name = tensor("query_21_cast_fp16")]; + tensor var_15508 = const()[name = tensor("op_15508"), val = tensor([1, 1])]; + tensor var_15510 = const()[name = tensor("op_15510"), val = tensor([1, 1])]; + tensor key_21_pad_type_0 = const()[name = tensor("key_21_pad_type_0"), val = tensor("custom")]; + tensor key_21_pad_0 = const()[name = tensor("key_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_10_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_10_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(411113088)))]; + tensor key_21_cast_fp16 = conv(dilations = var_15510, groups = var_15469, pad = key_21_pad_0, pad_type = key_21_pad_type_0, strides = var_15508, weight = layers_10_self_attn_k_proj_weight_to_fp16, x = obj_41_cast_fp16)[name = tensor("key_21_cast_fp16")]; + tensor var_15515 = const()[name = tensor("op_15515"), val = tensor([1, 1])]; + tensor var_15517 = const()[name = tensor("op_15517"), val = tensor([1, 1])]; + tensor value_21_pad_type_0 = const()[name = tensor("value_21_pad_type_0"), val = tensor("custom")]; + tensor value_21_pad_0 = const()[name = tensor("value_21_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_10_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_10_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(414389952)))]; + tensor layers_10_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_10_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(417666816)))]; + tensor value_21_cast_fp16 = conv(bias = layers_10_self_attn_v_proj_bias_to_fp16, dilations = var_15517, groups = var_15469, pad = value_21_pad_0, pad_type = value_21_pad_type_0, strides = var_15515, weight = layers_10_self_attn_v_proj_weight_to_fp16, x = obj_41_cast_fp16)[name = tensor("value_21_cast_fp16")]; + tensor var_15524_begin_0 = const()[name = tensor("op_15524_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_15524_end_0 = const()[name = tensor("op_15524_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_15524_end_mask_0 = const()[name = tensor("op_15524_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_15524_cast_fp16 = slice_by_index(begin = var_15524_begin_0, end = var_15524_end_0, end_mask = var_15524_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_15524_cast_fp16")]; + tensor var_15528_begin_0 = const()[name = tensor("op_15528_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_15528_end_0 = const()[name = tensor("op_15528_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_15528_end_mask_0 = const()[name = tensor("op_15528_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_15528_cast_fp16 = slice_by_index(begin = var_15528_begin_0, end = var_15528_end_0, end_mask = var_15528_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_15528_cast_fp16")]; + tensor var_15532_begin_0 = const()[name = tensor("op_15532_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_15532_end_0 = const()[name = tensor("op_15532_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_15532_end_mask_0 = const()[name = tensor("op_15532_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_15532_cast_fp16 = slice_by_index(begin = var_15532_begin_0, end = var_15532_end_0, end_mask = var_15532_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_15532_cast_fp16")]; + tensor var_15536_begin_0 = const()[name = tensor("op_15536_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_15536_end_0 = const()[name = tensor("op_15536_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_15536_end_mask_0 = const()[name = tensor("op_15536_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_15536_cast_fp16 = slice_by_index(begin = var_15536_begin_0, end = var_15536_end_0, end_mask = var_15536_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_15536_cast_fp16")]; + tensor var_15540_begin_0 = const()[name = tensor("op_15540_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_15540_end_0 = const()[name = tensor("op_15540_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_15540_end_mask_0 = const()[name = tensor("op_15540_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_15540_cast_fp16 = slice_by_index(begin = var_15540_begin_0, end = var_15540_end_0, end_mask = var_15540_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_15540_cast_fp16")]; + tensor var_15544_begin_0 = const()[name = tensor("op_15544_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_15544_end_0 = const()[name = tensor("op_15544_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_15544_end_mask_0 = const()[name = tensor("op_15544_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_15544_cast_fp16 = slice_by_index(begin = var_15544_begin_0, end = var_15544_end_0, end_mask = var_15544_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_15544_cast_fp16")]; + tensor var_15548_begin_0 = const()[name = tensor("op_15548_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_15548_end_0 = const()[name = tensor("op_15548_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_15548_end_mask_0 = const()[name = tensor("op_15548_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_15548_cast_fp16 = slice_by_index(begin = var_15548_begin_0, end = var_15548_end_0, end_mask = var_15548_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_15548_cast_fp16")]; + tensor var_15552_begin_0 = const()[name = tensor("op_15552_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_15552_end_0 = const()[name = tensor("op_15552_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_15552_end_mask_0 = const()[name = tensor("op_15552_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_15552_cast_fp16 = slice_by_index(begin = var_15552_begin_0, end = var_15552_end_0, end_mask = var_15552_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_15552_cast_fp16")]; + tensor var_15556_begin_0 = const()[name = tensor("op_15556_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_15556_end_0 = const()[name = tensor("op_15556_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_15556_end_mask_0 = const()[name = tensor("op_15556_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_15556_cast_fp16 = slice_by_index(begin = var_15556_begin_0, end = var_15556_end_0, end_mask = var_15556_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_15556_cast_fp16")]; + tensor var_15560_begin_0 = const()[name = tensor("op_15560_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_15560_end_0 = const()[name = tensor("op_15560_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_15560_end_mask_0 = const()[name = tensor("op_15560_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_15560_cast_fp16 = slice_by_index(begin = var_15560_begin_0, end = var_15560_end_0, end_mask = var_15560_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_15560_cast_fp16")]; + tensor var_15564_begin_0 = const()[name = tensor("op_15564_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_15564_end_0 = const()[name = tensor("op_15564_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_15564_end_mask_0 = const()[name = tensor("op_15564_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_15564_cast_fp16 = slice_by_index(begin = var_15564_begin_0, end = var_15564_end_0, end_mask = var_15564_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_15564_cast_fp16")]; + tensor var_15568_begin_0 = const()[name = tensor("op_15568_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_15568_end_0 = const()[name = tensor("op_15568_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_15568_end_mask_0 = const()[name = tensor("op_15568_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_15568_cast_fp16 = slice_by_index(begin = var_15568_begin_0, end = var_15568_end_0, end_mask = var_15568_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_15568_cast_fp16")]; + tensor var_15572_begin_0 = const()[name = tensor("op_15572_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_15572_end_0 = const()[name = tensor("op_15572_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_15572_end_mask_0 = const()[name = tensor("op_15572_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_15572_cast_fp16 = slice_by_index(begin = var_15572_begin_0, end = var_15572_end_0, end_mask = var_15572_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_15572_cast_fp16")]; + tensor var_15576_begin_0 = const()[name = tensor("op_15576_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_15576_end_0 = const()[name = tensor("op_15576_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_15576_end_mask_0 = const()[name = tensor("op_15576_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_15576_cast_fp16 = slice_by_index(begin = var_15576_begin_0, end = var_15576_end_0, end_mask = var_15576_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_15576_cast_fp16")]; + tensor var_15580_begin_0 = const()[name = tensor("op_15580_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_15580_end_0 = const()[name = tensor("op_15580_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_15580_end_mask_0 = const()[name = tensor("op_15580_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_15580_cast_fp16 = slice_by_index(begin = var_15580_begin_0, end = var_15580_end_0, end_mask = var_15580_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_15580_cast_fp16")]; + tensor var_15584_begin_0 = const()[name = tensor("op_15584_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_15584_end_0 = const()[name = tensor("op_15584_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_15584_end_mask_0 = const()[name = tensor("op_15584_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_15584_cast_fp16 = slice_by_index(begin = var_15584_begin_0, end = var_15584_end_0, end_mask = var_15584_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_15584_cast_fp16")]; + tensor var_15588_begin_0 = const()[name = tensor("op_15588_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_15588_end_0 = const()[name = tensor("op_15588_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_15588_end_mask_0 = const()[name = tensor("op_15588_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_15588_cast_fp16 = slice_by_index(begin = var_15588_begin_0, end = var_15588_end_0, end_mask = var_15588_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_15588_cast_fp16")]; + tensor var_15592_begin_0 = const()[name = tensor("op_15592_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_15592_end_0 = const()[name = tensor("op_15592_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_15592_end_mask_0 = const()[name = tensor("op_15592_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_15592_cast_fp16 = slice_by_index(begin = var_15592_begin_0, end = var_15592_end_0, end_mask = var_15592_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_15592_cast_fp16")]; + tensor var_15596_begin_0 = const()[name = tensor("op_15596_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_15596_end_0 = const()[name = tensor("op_15596_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_15596_end_mask_0 = const()[name = tensor("op_15596_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_15596_cast_fp16 = slice_by_index(begin = var_15596_begin_0, end = var_15596_end_0, end_mask = var_15596_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_15596_cast_fp16")]; + tensor var_15600_begin_0 = const()[name = tensor("op_15600_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_15600_end_0 = const()[name = tensor("op_15600_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_15600_end_mask_0 = const()[name = tensor("op_15600_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_15600_cast_fp16 = slice_by_index(begin = var_15600_begin_0, end = var_15600_end_0, end_mask = var_15600_end_mask_0, x = query_21_cast_fp16)[name = tensor("op_15600_cast_fp16")]; + tensor var_15609_begin_0 = const()[name = tensor("op_15609_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_15609_end_0 = const()[name = tensor("op_15609_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_15609_end_mask_0 = const()[name = tensor("op_15609_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15609_cast_fp16 = slice_by_index(begin = var_15609_begin_0, end = var_15609_end_0, end_mask = var_15609_end_mask_0, x = var_15524_cast_fp16)[name = tensor("op_15609_cast_fp16")]; + tensor var_15616_begin_0 = const()[name = tensor("op_15616_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_15616_end_0 = const()[name = tensor("op_15616_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_15616_end_mask_0 = const()[name = tensor("op_15616_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15616_cast_fp16 = slice_by_index(begin = var_15616_begin_0, end = var_15616_end_0, end_mask = var_15616_end_mask_0, x = var_15524_cast_fp16)[name = tensor("op_15616_cast_fp16")]; + tensor var_15623_begin_0 = const()[name = tensor("op_15623_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_15623_end_0 = const()[name = tensor("op_15623_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_15623_end_mask_0 = const()[name = tensor("op_15623_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15623_cast_fp16 = slice_by_index(begin = var_15623_begin_0, end = var_15623_end_0, end_mask = var_15623_end_mask_0, x = var_15524_cast_fp16)[name = tensor("op_15623_cast_fp16")]; + tensor var_15630_begin_0 = const()[name = tensor("op_15630_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_15630_end_0 = const()[name = tensor("op_15630_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_15630_end_mask_0 = const()[name = tensor("op_15630_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15630_cast_fp16 = slice_by_index(begin = var_15630_begin_0, end = var_15630_end_0, end_mask = var_15630_end_mask_0, x = var_15524_cast_fp16)[name = tensor("op_15630_cast_fp16")]; + tensor var_15637_begin_0 = const()[name = tensor("op_15637_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_15637_end_0 = const()[name = tensor("op_15637_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_15637_end_mask_0 = const()[name = tensor("op_15637_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15637_cast_fp16 = slice_by_index(begin = var_15637_begin_0, end = var_15637_end_0, end_mask = var_15637_end_mask_0, x = var_15528_cast_fp16)[name = tensor("op_15637_cast_fp16")]; + tensor var_15644_begin_0 = const()[name = tensor("op_15644_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_15644_end_0 = const()[name = tensor("op_15644_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_15644_end_mask_0 = const()[name = tensor("op_15644_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15644_cast_fp16 = slice_by_index(begin = var_15644_begin_0, end = var_15644_end_0, end_mask = var_15644_end_mask_0, x = var_15528_cast_fp16)[name = tensor("op_15644_cast_fp16")]; + tensor var_15651_begin_0 = const()[name = tensor("op_15651_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_15651_end_0 = const()[name = tensor("op_15651_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_15651_end_mask_0 = const()[name = tensor("op_15651_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15651_cast_fp16 = slice_by_index(begin = var_15651_begin_0, end = var_15651_end_0, end_mask = var_15651_end_mask_0, x = var_15528_cast_fp16)[name = tensor("op_15651_cast_fp16")]; + tensor var_15658_begin_0 = const()[name = tensor("op_15658_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_15658_end_0 = const()[name = tensor("op_15658_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_15658_end_mask_0 = const()[name = tensor("op_15658_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15658_cast_fp16 = slice_by_index(begin = var_15658_begin_0, end = var_15658_end_0, end_mask = var_15658_end_mask_0, x = var_15528_cast_fp16)[name = tensor("op_15658_cast_fp16")]; + tensor var_15665_begin_0 = const()[name = tensor("op_15665_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_15665_end_0 = const()[name = tensor("op_15665_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_15665_end_mask_0 = const()[name = tensor("op_15665_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15665_cast_fp16 = slice_by_index(begin = var_15665_begin_0, end = var_15665_end_0, end_mask = var_15665_end_mask_0, x = var_15532_cast_fp16)[name = tensor("op_15665_cast_fp16")]; + tensor var_15672_begin_0 = const()[name = tensor("op_15672_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_15672_end_0 = const()[name = tensor("op_15672_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_15672_end_mask_0 = const()[name = tensor("op_15672_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15672_cast_fp16 = slice_by_index(begin = var_15672_begin_0, end = var_15672_end_0, end_mask = var_15672_end_mask_0, x = var_15532_cast_fp16)[name = tensor("op_15672_cast_fp16")]; + tensor var_15679_begin_0 = const()[name = tensor("op_15679_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_15679_end_0 = const()[name = tensor("op_15679_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_15679_end_mask_0 = const()[name = tensor("op_15679_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15679_cast_fp16 = slice_by_index(begin = var_15679_begin_0, end = var_15679_end_0, end_mask = var_15679_end_mask_0, x = var_15532_cast_fp16)[name = tensor("op_15679_cast_fp16")]; + tensor var_15686_begin_0 = const()[name = tensor("op_15686_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_15686_end_0 = const()[name = tensor("op_15686_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_15686_end_mask_0 = const()[name = tensor("op_15686_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15686_cast_fp16 = slice_by_index(begin = var_15686_begin_0, end = var_15686_end_0, end_mask = var_15686_end_mask_0, x = var_15532_cast_fp16)[name = tensor("op_15686_cast_fp16")]; + tensor var_15693_begin_0 = const()[name = tensor("op_15693_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_15693_end_0 = const()[name = tensor("op_15693_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_15693_end_mask_0 = const()[name = tensor("op_15693_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15693_cast_fp16 = slice_by_index(begin = var_15693_begin_0, end = var_15693_end_0, end_mask = var_15693_end_mask_0, x = var_15536_cast_fp16)[name = tensor("op_15693_cast_fp16")]; + tensor var_15700_begin_0 = const()[name = tensor("op_15700_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_15700_end_0 = const()[name = tensor("op_15700_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_15700_end_mask_0 = const()[name = tensor("op_15700_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15700_cast_fp16 = slice_by_index(begin = var_15700_begin_0, end = var_15700_end_0, end_mask = var_15700_end_mask_0, x = var_15536_cast_fp16)[name = tensor("op_15700_cast_fp16")]; + tensor var_15707_begin_0 = const()[name = tensor("op_15707_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_15707_end_0 = const()[name = tensor("op_15707_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_15707_end_mask_0 = const()[name = tensor("op_15707_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15707_cast_fp16 = slice_by_index(begin = var_15707_begin_0, end = var_15707_end_0, end_mask = var_15707_end_mask_0, x = var_15536_cast_fp16)[name = tensor("op_15707_cast_fp16")]; + tensor var_15714_begin_0 = const()[name = tensor("op_15714_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_15714_end_0 = const()[name = tensor("op_15714_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_15714_end_mask_0 = const()[name = tensor("op_15714_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15714_cast_fp16 = slice_by_index(begin = var_15714_begin_0, end = var_15714_end_0, end_mask = var_15714_end_mask_0, x = var_15536_cast_fp16)[name = tensor("op_15714_cast_fp16")]; + tensor var_15721_begin_0 = const()[name = tensor("op_15721_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_15721_end_0 = const()[name = tensor("op_15721_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_15721_end_mask_0 = const()[name = tensor("op_15721_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15721_cast_fp16 = slice_by_index(begin = var_15721_begin_0, end = var_15721_end_0, end_mask = var_15721_end_mask_0, x = var_15540_cast_fp16)[name = tensor("op_15721_cast_fp16")]; + tensor var_15728_begin_0 = const()[name = tensor("op_15728_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_15728_end_0 = const()[name = tensor("op_15728_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_15728_end_mask_0 = const()[name = tensor("op_15728_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15728_cast_fp16 = slice_by_index(begin = var_15728_begin_0, end = var_15728_end_0, end_mask = var_15728_end_mask_0, x = var_15540_cast_fp16)[name = tensor("op_15728_cast_fp16")]; + tensor var_15735_begin_0 = const()[name = tensor("op_15735_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_15735_end_0 = const()[name = tensor("op_15735_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_15735_end_mask_0 = const()[name = tensor("op_15735_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15735_cast_fp16 = slice_by_index(begin = var_15735_begin_0, end = var_15735_end_0, end_mask = var_15735_end_mask_0, x = var_15540_cast_fp16)[name = tensor("op_15735_cast_fp16")]; + tensor var_15742_begin_0 = const()[name = tensor("op_15742_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_15742_end_0 = const()[name = tensor("op_15742_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_15742_end_mask_0 = const()[name = tensor("op_15742_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15742_cast_fp16 = slice_by_index(begin = var_15742_begin_0, end = var_15742_end_0, end_mask = var_15742_end_mask_0, x = var_15540_cast_fp16)[name = tensor("op_15742_cast_fp16")]; + tensor var_15749_begin_0 = const()[name = tensor("op_15749_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_15749_end_0 = const()[name = tensor("op_15749_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_15749_end_mask_0 = const()[name = tensor("op_15749_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15749_cast_fp16 = slice_by_index(begin = var_15749_begin_0, end = var_15749_end_0, end_mask = var_15749_end_mask_0, x = var_15544_cast_fp16)[name = tensor("op_15749_cast_fp16")]; + tensor var_15756_begin_0 = const()[name = tensor("op_15756_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_15756_end_0 = const()[name = tensor("op_15756_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_15756_end_mask_0 = const()[name = tensor("op_15756_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15756_cast_fp16 = slice_by_index(begin = var_15756_begin_0, end = var_15756_end_0, end_mask = var_15756_end_mask_0, x = var_15544_cast_fp16)[name = tensor("op_15756_cast_fp16")]; + tensor var_15763_begin_0 = const()[name = tensor("op_15763_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_15763_end_0 = const()[name = tensor("op_15763_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_15763_end_mask_0 = const()[name = tensor("op_15763_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15763_cast_fp16 = slice_by_index(begin = var_15763_begin_0, end = var_15763_end_0, end_mask = var_15763_end_mask_0, x = var_15544_cast_fp16)[name = tensor("op_15763_cast_fp16")]; + tensor var_15770_begin_0 = const()[name = tensor("op_15770_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_15770_end_0 = const()[name = tensor("op_15770_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_15770_end_mask_0 = const()[name = tensor("op_15770_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15770_cast_fp16 = slice_by_index(begin = var_15770_begin_0, end = var_15770_end_0, end_mask = var_15770_end_mask_0, x = var_15544_cast_fp16)[name = tensor("op_15770_cast_fp16")]; + tensor var_15777_begin_0 = const()[name = tensor("op_15777_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_15777_end_0 = const()[name = tensor("op_15777_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_15777_end_mask_0 = const()[name = tensor("op_15777_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15777_cast_fp16 = slice_by_index(begin = var_15777_begin_0, end = var_15777_end_0, end_mask = var_15777_end_mask_0, x = var_15548_cast_fp16)[name = tensor("op_15777_cast_fp16")]; + tensor var_15784_begin_0 = const()[name = tensor("op_15784_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_15784_end_0 = const()[name = tensor("op_15784_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_15784_end_mask_0 = const()[name = tensor("op_15784_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15784_cast_fp16 = slice_by_index(begin = var_15784_begin_0, end = var_15784_end_0, end_mask = var_15784_end_mask_0, x = var_15548_cast_fp16)[name = tensor("op_15784_cast_fp16")]; + tensor var_15791_begin_0 = const()[name = tensor("op_15791_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_15791_end_0 = const()[name = tensor("op_15791_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_15791_end_mask_0 = const()[name = tensor("op_15791_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15791_cast_fp16 = slice_by_index(begin = var_15791_begin_0, end = var_15791_end_0, end_mask = var_15791_end_mask_0, x = var_15548_cast_fp16)[name = tensor("op_15791_cast_fp16")]; + tensor var_15798_begin_0 = const()[name = tensor("op_15798_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_15798_end_0 = const()[name = tensor("op_15798_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_15798_end_mask_0 = const()[name = tensor("op_15798_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15798_cast_fp16 = slice_by_index(begin = var_15798_begin_0, end = var_15798_end_0, end_mask = var_15798_end_mask_0, x = var_15548_cast_fp16)[name = tensor("op_15798_cast_fp16")]; + tensor var_15805_begin_0 = const()[name = tensor("op_15805_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_15805_end_0 = const()[name = tensor("op_15805_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_15805_end_mask_0 = const()[name = tensor("op_15805_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15805_cast_fp16 = slice_by_index(begin = var_15805_begin_0, end = var_15805_end_0, end_mask = var_15805_end_mask_0, x = var_15552_cast_fp16)[name = tensor("op_15805_cast_fp16")]; + tensor var_15812_begin_0 = const()[name = tensor("op_15812_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_15812_end_0 = const()[name = tensor("op_15812_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_15812_end_mask_0 = const()[name = tensor("op_15812_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15812_cast_fp16 = slice_by_index(begin = var_15812_begin_0, end = var_15812_end_0, end_mask = var_15812_end_mask_0, x = var_15552_cast_fp16)[name = tensor("op_15812_cast_fp16")]; + tensor var_15819_begin_0 = const()[name = tensor("op_15819_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_15819_end_0 = const()[name = tensor("op_15819_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_15819_end_mask_0 = const()[name = tensor("op_15819_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15819_cast_fp16 = slice_by_index(begin = var_15819_begin_0, end = var_15819_end_0, end_mask = var_15819_end_mask_0, x = var_15552_cast_fp16)[name = tensor("op_15819_cast_fp16")]; + tensor var_15826_begin_0 = const()[name = tensor("op_15826_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_15826_end_0 = const()[name = tensor("op_15826_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_15826_end_mask_0 = const()[name = tensor("op_15826_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15826_cast_fp16 = slice_by_index(begin = var_15826_begin_0, end = var_15826_end_0, end_mask = var_15826_end_mask_0, x = var_15552_cast_fp16)[name = tensor("op_15826_cast_fp16")]; + tensor var_15833_begin_0 = const()[name = tensor("op_15833_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_15833_end_0 = const()[name = tensor("op_15833_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_15833_end_mask_0 = const()[name = tensor("op_15833_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15833_cast_fp16 = slice_by_index(begin = var_15833_begin_0, end = var_15833_end_0, end_mask = var_15833_end_mask_0, x = var_15556_cast_fp16)[name = tensor("op_15833_cast_fp16")]; + tensor var_15840_begin_0 = const()[name = tensor("op_15840_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_15840_end_0 = const()[name = tensor("op_15840_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_15840_end_mask_0 = const()[name = tensor("op_15840_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15840_cast_fp16 = slice_by_index(begin = var_15840_begin_0, end = var_15840_end_0, end_mask = var_15840_end_mask_0, x = var_15556_cast_fp16)[name = tensor("op_15840_cast_fp16")]; + tensor var_15847_begin_0 = const()[name = tensor("op_15847_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_15847_end_0 = const()[name = tensor("op_15847_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_15847_end_mask_0 = const()[name = tensor("op_15847_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15847_cast_fp16 = slice_by_index(begin = var_15847_begin_0, end = var_15847_end_0, end_mask = var_15847_end_mask_0, x = var_15556_cast_fp16)[name = tensor("op_15847_cast_fp16")]; + tensor var_15854_begin_0 = const()[name = tensor("op_15854_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_15854_end_0 = const()[name = tensor("op_15854_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_15854_end_mask_0 = const()[name = tensor("op_15854_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15854_cast_fp16 = slice_by_index(begin = var_15854_begin_0, end = var_15854_end_0, end_mask = var_15854_end_mask_0, x = var_15556_cast_fp16)[name = tensor("op_15854_cast_fp16")]; + tensor var_15861_begin_0 = const()[name = tensor("op_15861_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_15861_end_0 = const()[name = tensor("op_15861_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_15861_end_mask_0 = const()[name = tensor("op_15861_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15861_cast_fp16 = slice_by_index(begin = var_15861_begin_0, end = var_15861_end_0, end_mask = var_15861_end_mask_0, x = var_15560_cast_fp16)[name = tensor("op_15861_cast_fp16")]; + tensor var_15868_begin_0 = const()[name = tensor("op_15868_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_15868_end_0 = const()[name = tensor("op_15868_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_15868_end_mask_0 = const()[name = tensor("op_15868_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15868_cast_fp16 = slice_by_index(begin = var_15868_begin_0, end = var_15868_end_0, end_mask = var_15868_end_mask_0, x = var_15560_cast_fp16)[name = tensor("op_15868_cast_fp16")]; + tensor var_15875_begin_0 = const()[name = tensor("op_15875_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_15875_end_0 = const()[name = tensor("op_15875_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_15875_end_mask_0 = const()[name = tensor("op_15875_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15875_cast_fp16 = slice_by_index(begin = var_15875_begin_0, end = var_15875_end_0, end_mask = var_15875_end_mask_0, x = var_15560_cast_fp16)[name = tensor("op_15875_cast_fp16")]; + tensor var_15882_begin_0 = const()[name = tensor("op_15882_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_15882_end_0 = const()[name = tensor("op_15882_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_15882_end_mask_0 = const()[name = tensor("op_15882_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15882_cast_fp16 = slice_by_index(begin = var_15882_begin_0, end = var_15882_end_0, end_mask = var_15882_end_mask_0, x = var_15560_cast_fp16)[name = tensor("op_15882_cast_fp16")]; + tensor var_15889_begin_0 = const()[name = tensor("op_15889_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_15889_end_0 = const()[name = tensor("op_15889_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_15889_end_mask_0 = const()[name = tensor("op_15889_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15889_cast_fp16 = slice_by_index(begin = var_15889_begin_0, end = var_15889_end_0, end_mask = var_15889_end_mask_0, x = var_15564_cast_fp16)[name = tensor("op_15889_cast_fp16")]; + tensor var_15896_begin_0 = const()[name = tensor("op_15896_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_15896_end_0 = const()[name = tensor("op_15896_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_15896_end_mask_0 = const()[name = tensor("op_15896_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15896_cast_fp16 = slice_by_index(begin = var_15896_begin_0, end = var_15896_end_0, end_mask = var_15896_end_mask_0, x = var_15564_cast_fp16)[name = tensor("op_15896_cast_fp16")]; + tensor var_15903_begin_0 = const()[name = tensor("op_15903_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_15903_end_0 = const()[name = tensor("op_15903_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_15903_end_mask_0 = const()[name = tensor("op_15903_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15903_cast_fp16 = slice_by_index(begin = var_15903_begin_0, end = var_15903_end_0, end_mask = var_15903_end_mask_0, x = var_15564_cast_fp16)[name = tensor("op_15903_cast_fp16")]; + tensor var_15910_begin_0 = const()[name = tensor("op_15910_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_15910_end_0 = const()[name = tensor("op_15910_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_15910_end_mask_0 = const()[name = tensor("op_15910_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15910_cast_fp16 = slice_by_index(begin = var_15910_begin_0, end = var_15910_end_0, end_mask = var_15910_end_mask_0, x = var_15564_cast_fp16)[name = tensor("op_15910_cast_fp16")]; + tensor var_15917_begin_0 = const()[name = tensor("op_15917_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_15917_end_0 = const()[name = tensor("op_15917_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_15917_end_mask_0 = const()[name = tensor("op_15917_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15917_cast_fp16 = slice_by_index(begin = var_15917_begin_0, end = var_15917_end_0, end_mask = var_15917_end_mask_0, x = var_15568_cast_fp16)[name = tensor("op_15917_cast_fp16")]; + tensor var_15924_begin_0 = const()[name = tensor("op_15924_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_15924_end_0 = const()[name = tensor("op_15924_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_15924_end_mask_0 = const()[name = tensor("op_15924_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15924_cast_fp16 = slice_by_index(begin = var_15924_begin_0, end = var_15924_end_0, end_mask = var_15924_end_mask_0, x = var_15568_cast_fp16)[name = tensor("op_15924_cast_fp16")]; + tensor var_15931_begin_0 = const()[name = tensor("op_15931_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_15931_end_0 = const()[name = tensor("op_15931_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_15931_end_mask_0 = const()[name = tensor("op_15931_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15931_cast_fp16 = slice_by_index(begin = var_15931_begin_0, end = var_15931_end_0, end_mask = var_15931_end_mask_0, x = var_15568_cast_fp16)[name = tensor("op_15931_cast_fp16")]; + tensor var_15938_begin_0 = const()[name = tensor("op_15938_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_15938_end_0 = const()[name = tensor("op_15938_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_15938_end_mask_0 = const()[name = tensor("op_15938_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15938_cast_fp16 = slice_by_index(begin = var_15938_begin_0, end = var_15938_end_0, end_mask = var_15938_end_mask_0, x = var_15568_cast_fp16)[name = tensor("op_15938_cast_fp16")]; + tensor var_15945_begin_0 = const()[name = tensor("op_15945_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_15945_end_0 = const()[name = tensor("op_15945_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_15945_end_mask_0 = const()[name = tensor("op_15945_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15945_cast_fp16 = slice_by_index(begin = var_15945_begin_0, end = var_15945_end_0, end_mask = var_15945_end_mask_0, x = var_15572_cast_fp16)[name = tensor("op_15945_cast_fp16")]; + tensor var_15952_begin_0 = const()[name = tensor("op_15952_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_15952_end_0 = const()[name = tensor("op_15952_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_15952_end_mask_0 = const()[name = tensor("op_15952_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15952_cast_fp16 = slice_by_index(begin = var_15952_begin_0, end = var_15952_end_0, end_mask = var_15952_end_mask_0, x = var_15572_cast_fp16)[name = tensor("op_15952_cast_fp16")]; + tensor var_15959_begin_0 = const()[name = tensor("op_15959_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_15959_end_0 = const()[name = tensor("op_15959_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_15959_end_mask_0 = const()[name = tensor("op_15959_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15959_cast_fp16 = slice_by_index(begin = var_15959_begin_0, end = var_15959_end_0, end_mask = var_15959_end_mask_0, x = var_15572_cast_fp16)[name = tensor("op_15959_cast_fp16")]; + tensor var_15966_begin_0 = const()[name = tensor("op_15966_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_15966_end_0 = const()[name = tensor("op_15966_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_15966_end_mask_0 = const()[name = tensor("op_15966_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15966_cast_fp16 = slice_by_index(begin = var_15966_begin_0, end = var_15966_end_0, end_mask = var_15966_end_mask_0, x = var_15572_cast_fp16)[name = tensor("op_15966_cast_fp16")]; + tensor var_15973_begin_0 = const()[name = tensor("op_15973_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_15973_end_0 = const()[name = tensor("op_15973_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_15973_end_mask_0 = const()[name = tensor("op_15973_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15973_cast_fp16 = slice_by_index(begin = var_15973_begin_0, end = var_15973_end_0, end_mask = var_15973_end_mask_0, x = var_15576_cast_fp16)[name = tensor("op_15973_cast_fp16")]; + tensor var_15980_begin_0 = const()[name = tensor("op_15980_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_15980_end_0 = const()[name = tensor("op_15980_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_15980_end_mask_0 = const()[name = tensor("op_15980_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15980_cast_fp16 = slice_by_index(begin = var_15980_begin_0, end = var_15980_end_0, end_mask = var_15980_end_mask_0, x = var_15576_cast_fp16)[name = tensor("op_15980_cast_fp16")]; + tensor var_15987_begin_0 = const()[name = tensor("op_15987_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_15987_end_0 = const()[name = tensor("op_15987_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_15987_end_mask_0 = const()[name = tensor("op_15987_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15987_cast_fp16 = slice_by_index(begin = var_15987_begin_0, end = var_15987_end_0, end_mask = var_15987_end_mask_0, x = var_15576_cast_fp16)[name = tensor("op_15987_cast_fp16")]; + tensor var_15994_begin_0 = const()[name = tensor("op_15994_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_15994_end_0 = const()[name = tensor("op_15994_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_15994_end_mask_0 = const()[name = tensor("op_15994_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_15994_cast_fp16 = slice_by_index(begin = var_15994_begin_0, end = var_15994_end_0, end_mask = var_15994_end_mask_0, x = var_15576_cast_fp16)[name = tensor("op_15994_cast_fp16")]; + tensor var_16001_begin_0 = const()[name = tensor("op_16001_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_16001_end_0 = const()[name = tensor("op_16001_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_16001_end_mask_0 = const()[name = tensor("op_16001_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16001_cast_fp16 = slice_by_index(begin = var_16001_begin_0, end = var_16001_end_0, end_mask = var_16001_end_mask_0, x = var_15580_cast_fp16)[name = tensor("op_16001_cast_fp16")]; + tensor var_16008_begin_0 = const()[name = tensor("op_16008_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_16008_end_0 = const()[name = tensor("op_16008_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_16008_end_mask_0 = const()[name = tensor("op_16008_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16008_cast_fp16 = slice_by_index(begin = var_16008_begin_0, end = var_16008_end_0, end_mask = var_16008_end_mask_0, x = var_15580_cast_fp16)[name = tensor("op_16008_cast_fp16")]; + tensor var_16015_begin_0 = const()[name = tensor("op_16015_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_16015_end_0 = const()[name = tensor("op_16015_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_16015_end_mask_0 = const()[name = tensor("op_16015_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16015_cast_fp16 = slice_by_index(begin = var_16015_begin_0, end = var_16015_end_0, end_mask = var_16015_end_mask_0, x = var_15580_cast_fp16)[name = tensor("op_16015_cast_fp16")]; + tensor var_16022_begin_0 = const()[name = tensor("op_16022_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_16022_end_0 = const()[name = tensor("op_16022_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_16022_end_mask_0 = const()[name = tensor("op_16022_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16022_cast_fp16 = slice_by_index(begin = var_16022_begin_0, end = var_16022_end_0, end_mask = var_16022_end_mask_0, x = var_15580_cast_fp16)[name = tensor("op_16022_cast_fp16")]; + tensor var_16029_begin_0 = const()[name = tensor("op_16029_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_16029_end_0 = const()[name = tensor("op_16029_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_16029_end_mask_0 = const()[name = tensor("op_16029_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16029_cast_fp16 = slice_by_index(begin = var_16029_begin_0, end = var_16029_end_0, end_mask = var_16029_end_mask_0, x = var_15584_cast_fp16)[name = tensor("op_16029_cast_fp16")]; + tensor var_16036_begin_0 = const()[name = tensor("op_16036_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_16036_end_0 = const()[name = tensor("op_16036_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_16036_end_mask_0 = const()[name = tensor("op_16036_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16036_cast_fp16 = slice_by_index(begin = var_16036_begin_0, end = var_16036_end_0, end_mask = var_16036_end_mask_0, x = var_15584_cast_fp16)[name = tensor("op_16036_cast_fp16")]; + tensor var_16043_begin_0 = const()[name = tensor("op_16043_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_16043_end_0 = const()[name = tensor("op_16043_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_16043_end_mask_0 = const()[name = tensor("op_16043_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16043_cast_fp16 = slice_by_index(begin = var_16043_begin_0, end = var_16043_end_0, end_mask = var_16043_end_mask_0, x = var_15584_cast_fp16)[name = tensor("op_16043_cast_fp16")]; + tensor var_16050_begin_0 = const()[name = tensor("op_16050_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_16050_end_0 = const()[name = tensor("op_16050_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_16050_end_mask_0 = const()[name = tensor("op_16050_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16050_cast_fp16 = slice_by_index(begin = var_16050_begin_0, end = var_16050_end_0, end_mask = var_16050_end_mask_0, x = var_15584_cast_fp16)[name = tensor("op_16050_cast_fp16")]; + tensor var_16057_begin_0 = const()[name = tensor("op_16057_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_16057_end_0 = const()[name = tensor("op_16057_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_16057_end_mask_0 = const()[name = tensor("op_16057_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16057_cast_fp16 = slice_by_index(begin = var_16057_begin_0, end = var_16057_end_0, end_mask = var_16057_end_mask_0, x = var_15588_cast_fp16)[name = tensor("op_16057_cast_fp16")]; + tensor var_16064_begin_0 = const()[name = tensor("op_16064_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_16064_end_0 = const()[name = tensor("op_16064_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_16064_end_mask_0 = const()[name = tensor("op_16064_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16064_cast_fp16 = slice_by_index(begin = var_16064_begin_0, end = var_16064_end_0, end_mask = var_16064_end_mask_0, x = var_15588_cast_fp16)[name = tensor("op_16064_cast_fp16")]; + tensor var_16071_begin_0 = const()[name = tensor("op_16071_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_16071_end_0 = const()[name = tensor("op_16071_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_16071_end_mask_0 = const()[name = tensor("op_16071_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16071_cast_fp16 = slice_by_index(begin = var_16071_begin_0, end = var_16071_end_0, end_mask = var_16071_end_mask_0, x = var_15588_cast_fp16)[name = tensor("op_16071_cast_fp16")]; + tensor var_16078_begin_0 = const()[name = tensor("op_16078_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_16078_end_0 = const()[name = tensor("op_16078_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_16078_end_mask_0 = const()[name = tensor("op_16078_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16078_cast_fp16 = slice_by_index(begin = var_16078_begin_0, end = var_16078_end_0, end_mask = var_16078_end_mask_0, x = var_15588_cast_fp16)[name = tensor("op_16078_cast_fp16")]; + tensor var_16085_begin_0 = const()[name = tensor("op_16085_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_16085_end_0 = const()[name = tensor("op_16085_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_16085_end_mask_0 = const()[name = tensor("op_16085_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16085_cast_fp16 = slice_by_index(begin = var_16085_begin_0, end = var_16085_end_0, end_mask = var_16085_end_mask_0, x = var_15592_cast_fp16)[name = tensor("op_16085_cast_fp16")]; + tensor var_16092_begin_0 = const()[name = tensor("op_16092_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_16092_end_0 = const()[name = tensor("op_16092_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_16092_end_mask_0 = const()[name = tensor("op_16092_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16092_cast_fp16 = slice_by_index(begin = var_16092_begin_0, end = var_16092_end_0, end_mask = var_16092_end_mask_0, x = var_15592_cast_fp16)[name = tensor("op_16092_cast_fp16")]; + tensor var_16099_begin_0 = const()[name = tensor("op_16099_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_16099_end_0 = const()[name = tensor("op_16099_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_16099_end_mask_0 = const()[name = tensor("op_16099_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16099_cast_fp16 = slice_by_index(begin = var_16099_begin_0, end = var_16099_end_0, end_mask = var_16099_end_mask_0, x = var_15592_cast_fp16)[name = tensor("op_16099_cast_fp16")]; + tensor var_16106_begin_0 = const()[name = tensor("op_16106_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_16106_end_0 = const()[name = tensor("op_16106_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_16106_end_mask_0 = const()[name = tensor("op_16106_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16106_cast_fp16 = slice_by_index(begin = var_16106_begin_0, end = var_16106_end_0, end_mask = var_16106_end_mask_0, x = var_15592_cast_fp16)[name = tensor("op_16106_cast_fp16")]; + tensor var_16113_begin_0 = const()[name = tensor("op_16113_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_16113_end_0 = const()[name = tensor("op_16113_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_16113_end_mask_0 = const()[name = tensor("op_16113_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16113_cast_fp16 = slice_by_index(begin = var_16113_begin_0, end = var_16113_end_0, end_mask = var_16113_end_mask_0, x = var_15596_cast_fp16)[name = tensor("op_16113_cast_fp16")]; + tensor var_16120_begin_0 = const()[name = tensor("op_16120_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_16120_end_0 = const()[name = tensor("op_16120_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_16120_end_mask_0 = const()[name = tensor("op_16120_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16120_cast_fp16 = slice_by_index(begin = var_16120_begin_0, end = var_16120_end_0, end_mask = var_16120_end_mask_0, x = var_15596_cast_fp16)[name = tensor("op_16120_cast_fp16")]; + tensor var_16127_begin_0 = const()[name = tensor("op_16127_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_16127_end_0 = const()[name = tensor("op_16127_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_16127_end_mask_0 = const()[name = tensor("op_16127_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16127_cast_fp16 = slice_by_index(begin = var_16127_begin_0, end = var_16127_end_0, end_mask = var_16127_end_mask_0, x = var_15596_cast_fp16)[name = tensor("op_16127_cast_fp16")]; + tensor var_16134_begin_0 = const()[name = tensor("op_16134_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_16134_end_0 = const()[name = tensor("op_16134_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_16134_end_mask_0 = const()[name = tensor("op_16134_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16134_cast_fp16 = slice_by_index(begin = var_16134_begin_0, end = var_16134_end_0, end_mask = var_16134_end_mask_0, x = var_15596_cast_fp16)[name = tensor("op_16134_cast_fp16")]; + tensor var_16141_begin_0 = const()[name = tensor("op_16141_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_16141_end_0 = const()[name = tensor("op_16141_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_16141_end_mask_0 = const()[name = tensor("op_16141_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16141_cast_fp16 = slice_by_index(begin = var_16141_begin_0, end = var_16141_end_0, end_mask = var_16141_end_mask_0, x = var_15600_cast_fp16)[name = tensor("op_16141_cast_fp16")]; + tensor var_16148_begin_0 = const()[name = tensor("op_16148_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_16148_end_0 = const()[name = tensor("op_16148_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_16148_end_mask_0 = const()[name = tensor("op_16148_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16148_cast_fp16 = slice_by_index(begin = var_16148_begin_0, end = var_16148_end_0, end_mask = var_16148_end_mask_0, x = var_15600_cast_fp16)[name = tensor("op_16148_cast_fp16")]; + tensor var_16155_begin_0 = const()[name = tensor("op_16155_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_16155_end_0 = const()[name = tensor("op_16155_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_16155_end_mask_0 = const()[name = tensor("op_16155_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16155_cast_fp16 = slice_by_index(begin = var_16155_begin_0, end = var_16155_end_0, end_mask = var_16155_end_mask_0, x = var_15600_cast_fp16)[name = tensor("op_16155_cast_fp16")]; + tensor var_16162_begin_0 = const()[name = tensor("op_16162_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_16162_end_0 = const()[name = tensor("op_16162_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_16162_end_mask_0 = const()[name = tensor("op_16162_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16162_cast_fp16 = slice_by_index(begin = var_16162_begin_0, end = var_16162_end_0, end_mask = var_16162_end_mask_0, x = var_15600_cast_fp16)[name = tensor("op_16162_cast_fp16")]; + tensor k_21_perm_0 = const()[name = tensor("k_21_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_16167_begin_0 = const()[name = tensor("op_16167_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_16167_end_0 = const()[name = tensor("op_16167_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_16167_end_mask_0 = const()[name = tensor("op_16167_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_21 = transpose(perm = k_21_perm_0, x = key_21_cast_fp16)[name = tensor("transpose_21")]; + tensor var_16167_cast_fp16 = slice_by_index(begin = var_16167_begin_0, end = var_16167_end_0, end_mask = var_16167_end_mask_0, x = transpose_21)[name = tensor("op_16167_cast_fp16")]; + tensor var_16171_begin_0 = const()[name = tensor("op_16171_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_16171_end_0 = const()[name = tensor("op_16171_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_16171_end_mask_0 = const()[name = tensor("op_16171_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16171_cast_fp16 = slice_by_index(begin = var_16171_begin_0, end = var_16171_end_0, end_mask = var_16171_end_mask_0, x = transpose_21)[name = tensor("op_16171_cast_fp16")]; + tensor var_16175_begin_0 = const()[name = tensor("op_16175_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_16175_end_0 = const()[name = tensor("op_16175_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_16175_end_mask_0 = const()[name = tensor("op_16175_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16175_cast_fp16 = slice_by_index(begin = var_16175_begin_0, end = var_16175_end_0, end_mask = var_16175_end_mask_0, x = transpose_21)[name = tensor("op_16175_cast_fp16")]; + tensor var_16179_begin_0 = const()[name = tensor("op_16179_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_16179_end_0 = const()[name = tensor("op_16179_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_16179_end_mask_0 = const()[name = tensor("op_16179_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16179_cast_fp16 = slice_by_index(begin = var_16179_begin_0, end = var_16179_end_0, end_mask = var_16179_end_mask_0, x = transpose_21)[name = tensor("op_16179_cast_fp16")]; + tensor var_16183_begin_0 = const()[name = tensor("op_16183_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_16183_end_0 = const()[name = tensor("op_16183_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_16183_end_mask_0 = const()[name = tensor("op_16183_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16183_cast_fp16 = slice_by_index(begin = var_16183_begin_0, end = var_16183_end_0, end_mask = var_16183_end_mask_0, x = transpose_21)[name = tensor("op_16183_cast_fp16")]; + tensor var_16187_begin_0 = const()[name = tensor("op_16187_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_16187_end_0 = const()[name = tensor("op_16187_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_16187_end_mask_0 = const()[name = tensor("op_16187_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16187_cast_fp16 = slice_by_index(begin = var_16187_begin_0, end = var_16187_end_0, end_mask = var_16187_end_mask_0, x = transpose_21)[name = tensor("op_16187_cast_fp16")]; + tensor var_16191_begin_0 = const()[name = tensor("op_16191_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_16191_end_0 = const()[name = tensor("op_16191_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_16191_end_mask_0 = const()[name = tensor("op_16191_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16191_cast_fp16 = slice_by_index(begin = var_16191_begin_0, end = var_16191_end_0, end_mask = var_16191_end_mask_0, x = transpose_21)[name = tensor("op_16191_cast_fp16")]; + tensor var_16195_begin_0 = const()[name = tensor("op_16195_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_16195_end_0 = const()[name = tensor("op_16195_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_16195_end_mask_0 = const()[name = tensor("op_16195_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16195_cast_fp16 = slice_by_index(begin = var_16195_begin_0, end = var_16195_end_0, end_mask = var_16195_end_mask_0, x = transpose_21)[name = tensor("op_16195_cast_fp16")]; + tensor var_16199_begin_0 = const()[name = tensor("op_16199_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_16199_end_0 = const()[name = tensor("op_16199_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_16199_end_mask_0 = const()[name = tensor("op_16199_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16199_cast_fp16 = slice_by_index(begin = var_16199_begin_0, end = var_16199_end_0, end_mask = var_16199_end_mask_0, x = transpose_21)[name = tensor("op_16199_cast_fp16")]; + tensor var_16203_begin_0 = const()[name = tensor("op_16203_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_16203_end_0 = const()[name = tensor("op_16203_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_16203_end_mask_0 = const()[name = tensor("op_16203_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16203_cast_fp16 = slice_by_index(begin = var_16203_begin_0, end = var_16203_end_0, end_mask = var_16203_end_mask_0, x = transpose_21)[name = tensor("op_16203_cast_fp16")]; + tensor var_16207_begin_0 = const()[name = tensor("op_16207_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_16207_end_0 = const()[name = tensor("op_16207_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_16207_end_mask_0 = const()[name = tensor("op_16207_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16207_cast_fp16 = slice_by_index(begin = var_16207_begin_0, end = var_16207_end_0, end_mask = var_16207_end_mask_0, x = transpose_21)[name = tensor("op_16207_cast_fp16")]; + tensor var_16211_begin_0 = const()[name = tensor("op_16211_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_16211_end_0 = const()[name = tensor("op_16211_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_16211_end_mask_0 = const()[name = tensor("op_16211_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16211_cast_fp16 = slice_by_index(begin = var_16211_begin_0, end = var_16211_end_0, end_mask = var_16211_end_mask_0, x = transpose_21)[name = tensor("op_16211_cast_fp16")]; + tensor var_16215_begin_0 = const()[name = tensor("op_16215_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_16215_end_0 = const()[name = tensor("op_16215_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_16215_end_mask_0 = const()[name = tensor("op_16215_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16215_cast_fp16 = slice_by_index(begin = var_16215_begin_0, end = var_16215_end_0, end_mask = var_16215_end_mask_0, x = transpose_21)[name = tensor("op_16215_cast_fp16")]; + tensor var_16219_begin_0 = const()[name = tensor("op_16219_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_16219_end_0 = const()[name = tensor("op_16219_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_16219_end_mask_0 = const()[name = tensor("op_16219_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16219_cast_fp16 = slice_by_index(begin = var_16219_begin_0, end = var_16219_end_0, end_mask = var_16219_end_mask_0, x = transpose_21)[name = tensor("op_16219_cast_fp16")]; + tensor var_16223_begin_0 = const()[name = tensor("op_16223_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_16223_end_0 = const()[name = tensor("op_16223_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_16223_end_mask_0 = const()[name = tensor("op_16223_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16223_cast_fp16 = slice_by_index(begin = var_16223_begin_0, end = var_16223_end_0, end_mask = var_16223_end_mask_0, x = transpose_21)[name = tensor("op_16223_cast_fp16")]; + tensor var_16227_begin_0 = const()[name = tensor("op_16227_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_16227_end_0 = const()[name = tensor("op_16227_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_16227_end_mask_0 = const()[name = tensor("op_16227_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16227_cast_fp16 = slice_by_index(begin = var_16227_begin_0, end = var_16227_end_0, end_mask = var_16227_end_mask_0, x = transpose_21)[name = tensor("op_16227_cast_fp16")]; + tensor var_16231_begin_0 = const()[name = tensor("op_16231_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_16231_end_0 = const()[name = tensor("op_16231_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_16231_end_mask_0 = const()[name = tensor("op_16231_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16231_cast_fp16 = slice_by_index(begin = var_16231_begin_0, end = var_16231_end_0, end_mask = var_16231_end_mask_0, x = transpose_21)[name = tensor("op_16231_cast_fp16")]; + tensor var_16235_begin_0 = const()[name = tensor("op_16235_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_16235_end_0 = const()[name = tensor("op_16235_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_16235_end_mask_0 = const()[name = tensor("op_16235_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16235_cast_fp16 = slice_by_index(begin = var_16235_begin_0, end = var_16235_end_0, end_mask = var_16235_end_mask_0, x = transpose_21)[name = tensor("op_16235_cast_fp16")]; + tensor var_16239_begin_0 = const()[name = tensor("op_16239_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_16239_end_0 = const()[name = tensor("op_16239_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_16239_end_mask_0 = const()[name = tensor("op_16239_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16239_cast_fp16 = slice_by_index(begin = var_16239_begin_0, end = var_16239_end_0, end_mask = var_16239_end_mask_0, x = transpose_21)[name = tensor("op_16239_cast_fp16")]; + tensor var_16243_begin_0 = const()[name = tensor("op_16243_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_16243_end_0 = const()[name = tensor("op_16243_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_16243_end_mask_0 = const()[name = tensor("op_16243_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_16243_cast_fp16 = slice_by_index(begin = var_16243_begin_0, end = var_16243_end_0, end_mask = var_16243_end_mask_0, x = transpose_21)[name = tensor("op_16243_cast_fp16")]; + tensor var_16245_begin_0 = const()[name = tensor("op_16245_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_16245_end_0 = const()[name = tensor("op_16245_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_16245_end_mask_0 = const()[name = tensor("op_16245_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_16245_cast_fp16 = slice_by_index(begin = var_16245_begin_0, end = var_16245_end_0, end_mask = var_16245_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16245_cast_fp16")]; + tensor var_16249_begin_0 = const()[name = tensor("op_16249_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_16249_end_0 = const()[name = tensor("op_16249_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_16249_end_mask_0 = const()[name = tensor("op_16249_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_16249_cast_fp16 = slice_by_index(begin = var_16249_begin_0, end = var_16249_end_0, end_mask = var_16249_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16249_cast_fp16")]; + tensor var_16253_begin_0 = const()[name = tensor("op_16253_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_16253_end_0 = const()[name = tensor("op_16253_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_16253_end_mask_0 = const()[name = tensor("op_16253_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_16253_cast_fp16 = slice_by_index(begin = var_16253_begin_0, end = var_16253_end_0, end_mask = var_16253_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16253_cast_fp16")]; + tensor var_16257_begin_0 = const()[name = tensor("op_16257_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_16257_end_0 = const()[name = tensor("op_16257_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_16257_end_mask_0 = const()[name = tensor("op_16257_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_16257_cast_fp16 = slice_by_index(begin = var_16257_begin_0, end = var_16257_end_0, end_mask = var_16257_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16257_cast_fp16")]; + tensor var_16261_begin_0 = const()[name = tensor("op_16261_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_16261_end_0 = const()[name = tensor("op_16261_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_16261_end_mask_0 = const()[name = tensor("op_16261_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_16261_cast_fp16 = slice_by_index(begin = var_16261_begin_0, end = var_16261_end_0, end_mask = var_16261_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16261_cast_fp16")]; + tensor var_16265_begin_0 = const()[name = tensor("op_16265_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_16265_end_0 = const()[name = tensor("op_16265_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_16265_end_mask_0 = const()[name = tensor("op_16265_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_16265_cast_fp16 = slice_by_index(begin = var_16265_begin_0, end = var_16265_end_0, end_mask = var_16265_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16265_cast_fp16")]; + tensor var_16269_begin_0 = const()[name = tensor("op_16269_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_16269_end_0 = const()[name = tensor("op_16269_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_16269_end_mask_0 = const()[name = tensor("op_16269_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_16269_cast_fp16 = slice_by_index(begin = var_16269_begin_0, end = var_16269_end_0, end_mask = var_16269_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16269_cast_fp16")]; + tensor var_16273_begin_0 = const()[name = tensor("op_16273_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_16273_end_0 = const()[name = tensor("op_16273_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_16273_end_mask_0 = const()[name = tensor("op_16273_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_16273_cast_fp16 = slice_by_index(begin = var_16273_begin_0, end = var_16273_end_0, end_mask = var_16273_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16273_cast_fp16")]; + tensor var_16277_begin_0 = const()[name = tensor("op_16277_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_16277_end_0 = const()[name = tensor("op_16277_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_16277_end_mask_0 = const()[name = tensor("op_16277_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_16277_cast_fp16 = slice_by_index(begin = var_16277_begin_0, end = var_16277_end_0, end_mask = var_16277_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16277_cast_fp16")]; + tensor var_16281_begin_0 = const()[name = tensor("op_16281_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_16281_end_0 = const()[name = tensor("op_16281_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_16281_end_mask_0 = const()[name = tensor("op_16281_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_16281_cast_fp16 = slice_by_index(begin = var_16281_begin_0, end = var_16281_end_0, end_mask = var_16281_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16281_cast_fp16")]; + tensor var_16285_begin_0 = const()[name = tensor("op_16285_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_16285_end_0 = const()[name = tensor("op_16285_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_16285_end_mask_0 = const()[name = tensor("op_16285_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_16285_cast_fp16 = slice_by_index(begin = var_16285_begin_0, end = var_16285_end_0, end_mask = var_16285_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16285_cast_fp16")]; + tensor var_16289_begin_0 = const()[name = tensor("op_16289_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_16289_end_0 = const()[name = tensor("op_16289_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_16289_end_mask_0 = const()[name = tensor("op_16289_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_16289_cast_fp16 = slice_by_index(begin = var_16289_begin_0, end = var_16289_end_0, end_mask = var_16289_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16289_cast_fp16")]; + tensor var_16293_begin_0 = const()[name = tensor("op_16293_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_16293_end_0 = const()[name = tensor("op_16293_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_16293_end_mask_0 = const()[name = tensor("op_16293_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_16293_cast_fp16 = slice_by_index(begin = var_16293_begin_0, end = var_16293_end_0, end_mask = var_16293_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16293_cast_fp16")]; + tensor var_16297_begin_0 = const()[name = tensor("op_16297_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_16297_end_0 = const()[name = tensor("op_16297_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_16297_end_mask_0 = const()[name = tensor("op_16297_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_16297_cast_fp16 = slice_by_index(begin = var_16297_begin_0, end = var_16297_end_0, end_mask = var_16297_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16297_cast_fp16")]; + tensor var_16301_begin_0 = const()[name = tensor("op_16301_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_16301_end_0 = const()[name = tensor("op_16301_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_16301_end_mask_0 = const()[name = tensor("op_16301_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_16301_cast_fp16 = slice_by_index(begin = var_16301_begin_0, end = var_16301_end_0, end_mask = var_16301_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16301_cast_fp16")]; + tensor var_16305_begin_0 = const()[name = tensor("op_16305_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_16305_end_0 = const()[name = tensor("op_16305_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_16305_end_mask_0 = const()[name = tensor("op_16305_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_16305_cast_fp16 = slice_by_index(begin = var_16305_begin_0, end = var_16305_end_0, end_mask = var_16305_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16305_cast_fp16")]; + tensor var_16309_begin_0 = const()[name = tensor("op_16309_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_16309_end_0 = const()[name = tensor("op_16309_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_16309_end_mask_0 = const()[name = tensor("op_16309_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_16309_cast_fp16 = slice_by_index(begin = var_16309_begin_0, end = var_16309_end_0, end_mask = var_16309_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16309_cast_fp16")]; + tensor var_16313_begin_0 = const()[name = tensor("op_16313_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_16313_end_0 = const()[name = tensor("op_16313_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_16313_end_mask_0 = const()[name = tensor("op_16313_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_16313_cast_fp16 = slice_by_index(begin = var_16313_begin_0, end = var_16313_end_0, end_mask = var_16313_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16313_cast_fp16")]; + tensor var_16317_begin_0 = const()[name = tensor("op_16317_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_16317_end_0 = const()[name = tensor("op_16317_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_16317_end_mask_0 = const()[name = tensor("op_16317_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_16317_cast_fp16 = slice_by_index(begin = var_16317_begin_0, end = var_16317_end_0, end_mask = var_16317_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16317_cast_fp16")]; + tensor var_16321_begin_0 = const()[name = tensor("op_16321_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_16321_end_0 = const()[name = tensor("op_16321_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_16321_end_mask_0 = const()[name = tensor("op_16321_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_16321_cast_fp16 = slice_by_index(begin = var_16321_begin_0, end = var_16321_end_0, end_mask = var_16321_end_mask_0, x = value_21_cast_fp16)[name = tensor("op_16321_cast_fp16")]; + tensor var_16325_equation_0 = const()[name = tensor("op_16325_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16325_cast_fp16 = einsum(equation = var_16325_equation_0, values = (var_16167_cast_fp16, var_15609_cast_fp16))[name = tensor("op_16325_cast_fp16")]; + tensor var_16326_to_fp16 = const()[name = tensor("op_16326_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1601_cast_fp16 = mul(x = var_16325_cast_fp16, y = var_16326_to_fp16)[name = tensor("aw_chunk_1601_cast_fp16")]; + tensor var_16329_equation_0 = const()[name = tensor("op_16329_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16329_cast_fp16 = einsum(equation = var_16329_equation_0, values = (var_16167_cast_fp16, var_15616_cast_fp16))[name = tensor("op_16329_cast_fp16")]; + tensor var_16330_to_fp16 = const()[name = tensor("op_16330_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1603_cast_fp16 = mul(x = var_16329_cast_fp16, y = var_16330_to_fp16)[name = tensor("aw_chunk_1603_cast_fp16")]; + tensor var_16333_equation_0 = const()[name = tensor("op_16333_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16333_cast_fp16 = einsum(equation = var_16333_equation_0, values = (var_16167_cast_fp16, var_15623_cast_fp16))[name = tensor("op_16333_cast_fp16")]; + tensor var_16334_to_fp16 = const()[name = tensor("op_16334_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1605_cast_fp16 = mul(x = var_16333_cast_fp16, y = var_16334_to_fp16)[name = tensor("aw_chunk_1605_cast_fp16")]; + tensor var_16337_equation_0 = const()[name = tensor("op_16337_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16337_cast_fp16 = einsum(equation = var_16337_equation_0, values = (var_16167_cast_fp16, var_15630_cast_fp16))[name = tensor("op_16337_cast_fp16")]; + tensor var_16338_to_fp16 = const()[name = tensor("op_16338_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1607_cast_fp16 = mul(x = var_16337_cast_fp16, y = var_16338_to_fp16)[name = tensor("aw_chunk_1607_cast_fp16")]; + tensor var_16341_equation_0 = const()[name = tensor("op_16341_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16341_cast_fp16 = einsum(equation = var_16341_equation_0, values = (var_16171_cast_fp16, var_15637_cast_fp16))[name = tensor("op_16341_cast_fp16")]; + tensor var_16342_to_fp16 = const()[name = tensor("op_16342_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1609_cast_fp16 = mul(x = var_16341_cast_fp16, y = var_16342_to_fp16)[name = tensor("aw_chunk_1609_cast_fp16")]; + tensor var_16345_equation_0 = const()[name = tensor("op_16345_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16345_cast_fp16 = einsum(equation = var_16345_equation_0, values = (var_16171_cast_fp16, var_15644_cast_fp16))[name = tensor("op_16345_cast_fp16")]; + tensor var_16346_to_fp16 = const()[name = tensor("op_16346_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1611_cast_fp16 = mul(x = var_16345_cast_fp16, y = var_16346_to_fp16)[name = tensor("aw_chunk_1611_cast_fp16")]; + tensor var_16349_equation_0 = const()[name = tensor("op_16349_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16349_cast_fp16 = einsum(equation = var_16349_equation_0, values = (var_16171_cast_fp16, var_15651_cast_fp16))[name = tensor("op_16349_cast_fp16")]; + tensor var_16350_to_fp16 = const()[name = tensor("op_16350_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1613_cast_fp16 = mul(x = var_16349_cast_fp16, y = var_16350_to_fp16)[name = tensor("aw_chunk_1613_cast_fp16")]; + tensor var_16353_equation_0 = const()[name = tensor("op_16353_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16353_cast_fp16 = einsum(equation = var_16353_equation_0, values = (var_16171_cast_fp16, var_15658_cast_fp16))[name = tensor("op_16353_cast_fp16")]; + tensor var_16354_to_fp16 = const()[name = tensor("op_16354_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1615_cast_fp16 = mul(x = var_16353_cast_fp16, y = var_16354_to_fp16)[name = tensor("aw_chunk_1615_cast_fp16")]; + tensor var_16357_equation_0 = const()[name = tensor("op_16357_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16357_cast_fp16 = einsum(equation = var_16357_equation_0, values = (var_16175_cast_fp16, var_15665_cast_fp16))[name = tensor("op_16357_cast_fp16")]; + tensor var_16358_to_fp16 = const()[name = tensor("op_16358_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1617_cast_fp16 = mul(x = var_16357_cast_fp16, y = var_16358_to_fp16)[name = tensor("aw_chunk_1617_cast_fp16")]; + tensor var_16361_equation_0 = const()[name = tensor("op_16361_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16361_cast_fp16 = einsum(equation = var_16361_equation_0, values = (var_16175_cast_fp16, var_15672_cast_fp16))[name = tensor("op_16361_cast_fp16")]; + tensor var_16362_to_fp16 = const()[name = tensor("op_16362_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1619_cast_fp16 = mul(x = var_16361_cast_fp16, y = var_16362_to_fp16)[name = tensor("aw_chunk_1619_cast_fp16")]; + tensor var_16365_equation_0 = const()[name = tensor("op_16365_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16365_cast_fp16 = einsum(equation = var_16365_equation_0, values = (var_16175_cast_fp16, var_15679_cast_fp16))[name = tensor("op_16365_cast_fp16")]; + tensor var_16366_to_fp16 = const()[name = tensor("op_16366_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1621_cast_fp16 = mul(x = var_16365_cast_fp16, y = var_16366_to_fp16)[name = tensor("aw_chunk_1621_cast_fp16")]; + tensor var_16369_equation_0 = const()[name = tensor("op_16369_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16369_cast_fp16 = einsum(equation = var_16369_equation_0, values = (var_16175_cast_fp16, var_15686_cast_fp16))[name = tensor("op_16369_cast_fp16")]; + tensor var_16370_to_fp16 = const()[name = tensor("op_16370_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1623_cast_fp16 = mul(x = var_16369_cast_fp16, y = var_16370_to_fp16)[name = tensor("aw_chunk_1623_cast_fp16")]; + tensor var_16373_equation_0 = const()[name = tensor("op_16373_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16373_cast_fp16 = einsum(equation = var_16373_equation_0, values = (var_16179_cast_fp16, var_15693_cast_fp16))[name = tensor("op_16373_cast_fp16")]; + tensor var_16374_to_fp16 = const()[name = tensor("op_16374_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1625_cast_fp16 = mul(x = var_16373_cast_fp16, y = var_16374_to_fp16)[name = tensor("aw_chunk_1625_cast_fp16")]; + tensor var_16377_equation_0 = const()[name = tensor("op_16377_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16377_cast_fp16 = einsum(equation = var_16377_equation_0, values = (var_16179_cast_fp16, var_15700_cast_fp16))[name = tensor("op_16377_cast_fp16")]; + tensor var_16378_to_fp16 = const()[name = tensor("op_16378_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1627_cast_fp16 = mul(x = var_16377_cast_fp16, y = var_16378_to_fp16)[name = tensor("aw_chunk_1627_cast_fp16")]; + tensor var_16381_equation_0 = const()[name = tensor("op_16381_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16381_cast_fp16 = einsum(equation = var_16381_equation_0, values = (var_16179_cast_fp16, var_15707_cast_fp16))[name = tensor("op_16381_cast_fp16")]; + tensor var_16382_to_fp16 = const()[name = tensor("op_16382_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1629_cast_fp16 = mul(x = var_16381_cast_fp16, y = var_16382_to_fp16)[name = tensor("aw_chunk_1629_cast_fp16")]; + tensor var_16385_equation_0 = const()[name = tensor("op_16385_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16385_cast_fp16 = einsum(equation = var_16385_equation_0, values = (var_16179_cast_fp16, var_15714_cast_fp16))[name = tensor("op_16385_cast_fp16")]; + tensor var_16386_to_fp16 = const()[name = tensor("op_16386_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1631_cast_fp16 = mul(x = var_16385_cast_fp16, y = var_16386_to_fp16)[name = tensor("aw_chunk_1631_cast_fp16")]; + tensor var_16389_equation_0 = const()[name = tensor("op_16389_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16389_cast_fp16 = einsum(equation = var_16389_equation_0, values = (var_16183_cast_fp16, var_15721_cast_fp16))[name = tensor("op_16389_cast_fp16")]; + tensor var_16390_to_fp16 = const()[name = tensor("op_16390_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1633_cast_fp16 = mul(x = var_16389_cast_fp16, y = var_16390_to_fp16)[name = tensor("aw_chunk_1633_cast_fp16")]; + tensor var_16393_equation_0 = const()[name = tensor("op_16393_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16393_cast_fp16 = einsum(equation = var_16393_equation_0, values = (var_16183_cast_fp16, var_15728_cast_fp16))[name = tensor("op_16393_cast_fp16")]; + tensor var_16394_to_fp16 = const()[name = tensor("op_16394_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1635_cast_fp16 = mul(x = var_16393_cast_fp16, y = var_16394_to_fp16)[name = tensor("aw_chunk_1635_cast_fp16")]; + tensor var_16397_equation_0 = const()[name = tensor("op_16397_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16397_cast_fp16 = einsum(equation = var_16397_equation_0, values = (var_16183_cast_fp16, var_15735_cast_fp16))[name = tensor("op_16397_cast_fp16")]; + tensor var_16398_to_fp16 = const()[name = tensor("op_16398_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1637_cast_fp16 = mul(x = var_16397_cast_fp16, y = var_16398_to_fp16)[name = tensor("aw_chunk_1637_cast_fp16")]; + tensor var_16401_equation_0 = const()[name = tensor("op_16401_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16401_cast_fp16 = einsum(equation = var_16401_equation_0, values = (var_16183_cast_fp16, var_15742_cast_fp16))[name = tensor("op_16401_cast_fp16")]; + tensor var_16402_to_fp16 = const()[name = tensor("op_16402_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1639_cast_fp16 = mul(x = var_16401_cast_fp16, y = var_16402_to_fp16)[name = tensor("aw_chunk_1639_cast_fp16")]; + tensor var_16405_equation_0 = const()[name = tensor("op_16405_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16405_cast_fp16 = einsum(equation = var_16405_equation_0, values = (var_16187_cast_fp16, var_15749_cast_fp16))[name = tensor("op_16405_cast_fp16")]; + tensor var_16406_to_fp16 = const()[name = tensor("op_16406_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1641_cast_fp16 = mul(x = var_16405_cast_fp16, y = var_16406_to_fp16)[name = tensor("aw_chunk_1641_cast_fp16")]; + tensor var_16409_equation_0 = const()[name = tensor("op_16409_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16409_cast_fp16 = einsum(equation = var_16409_equation_0, values = (var_16187_cast_fp16, var_15756_cast_fp16))[name = tensor("op_16409_cast_fp16")]; + tensor var_16410_to_fp16 = const()[name = tensor("op_16410_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1643_cast_fp16 = mul(x = var_16409_cast_fp16, y = var_16410_to_fp16)[name = tensor("aw_chunk_1643_cast_fp16")]; + tensor var_16413_equation_0 = const()[name = tensor("op_16413_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16413_cast_fp16 = einsum(equation = var_16413_equation_0, values = (var_16187_cast_fp16, var_15763_cast_fp16))[name = tensor("op_16413_cast_fp16")]; + tensor var_16414_to_fp16 = const()[name = tensor("op_16414_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1645_cast_fp16 = mul(x = var_16413_cast_fp16, y = var_16414_to_fp16)[name = tensor("aw_chunk_1645_cast_fp16")]; + tensor var_16417_equation_0 = const()[name = tensor("op_16417_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16417_cast_fp16 = einsum(equation = var_16417_equation_0, values = (var_16187_cast_fp16, var_15770_cast_fp16))[name = tensor("op_16417_cast_fp16")]; + tensor var_16418_to_fp16 = const()[name = tensor("op_16418_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1647_cast_fp16 = mul(x = var_16417_cast_fp16, y = var_16418_to_fp16)[name = tensor("aw_chunk_1647_cast_fp16")]; + tensor var_16421_equation_0 = const()[name = tensor("op_16421_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16421_cast_fp16 = einsum(equation = var_16421_equation_0, values = (var_16191_cast_fp16, var_15777_cast_fp16))[name = tensor("op_16421_cast_fp16")]; + tensor var_16422_to_fp16 = const()[name = tensor("op_16422_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1649_cast_fp16 = mul(x = var_16421_cast_fp16, y = var_16422_to_fp16)[name = tensor("aw_chunk_1649_cast_fp16")]; + tensor var_16425_equation_0 = const()[name = tensor("op_16425_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16425_cast_fp16 = einsum(equation = var_16425_equation_0, values = (var_16191_cast_fp16, var_15784_cast_fp16))[name = tensor("op_16425_cast_fp16")]; + tensor var_16426_to_fp16 = const()[name = tensor("op_16426_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1651_cast_fp16 = mul(x = var_16425_cast_fp16, y = var_16426_to_fp16)[name = tensor("aw_chunk_1651_cast_fp16")]; + tensor var_16429_equation_0 = const()[name = tensor("op_16429_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16429_cast_fp16 = einsum(equation = var_16429_equation_0, values = (var_16191_cast_fp16, var_15791_cast_fp16))[name = tensor("op_16429_cast_fp16")]; + tensor var_16430_to_fp16 = const()[name = tensor("op_16430_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1653_cast_fp16 = mul(x = var_16429_cast_fp16, y = var_16430_to_fp16)[name = tensor("aw_chunk_1653_cast_fp16")]; + tensor var_16433_equation_0 = const()[name = tensor("op_16433_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16433_cast_fp16 = einsum(equation = var_16433_equation_0, values = (var_16191_cast_fp16, var_15798_cast_fp16))[name = tensor("op_16433_cast_fp16")]; + tensor var_16434_to_fp16 = const()[name = tensor("op_16434_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1655_cast_fp16 = mul(x = var_16433_cast_fp16, y = var_16434_to_fp16)[name = tensor("aw_chunk_1655_cast_fp16")]; + tensor var_16437_equation_0 = const()[name = tensor("op_16437_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16437_cast_fp16 = einsum(equation = var_16437_equation_0, values = (var_16195_cast_fp16, var_15805_cast_fp16))[name = tensor("op_16437_cast_fp16")]; + tensor var_16438_to_fp16 = const()[name = tensor("op_16438_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1657_cast_fp16 = mul(x = var_16437_cast_fp16, y = var_16438_to_fp16)[name = tensor("aw_chunk_1657_cast_fp16")]; + tensor var_16441_equation_0 = const()[name = tensor("op_16441_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16441_cast_fp16 = einsum(equation = var_16441_equation_0, values = (var_16195_cast_fp16, var_15812_cast_fp16))[name = tensor("op_16441_cast_fp16")]; + tensor var_16442_to_fp16 = const()[name = tensor("op_16442_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1659_cast_fp16 = mul(x = var_16441_cast_fp16, y = var_16442_to_fp16)[name = tensor("aw_chunk_1659_cast_fp16")]; + tensor var_16445_equation_0 = const()[name = tensor("op_16445_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16445_cast_fp16 = einsum(equation = var_16445_equation_0, values = (var_16195_cast_fp16, var_15819_cast_fp16))[name = tensor("op_16445_cast_fp16")]; + tensor var_16446_to_fp16 = const()[name = tensor("op_16446_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1661_cast_fp16 = mul(x = var_16445_cast_fp16, y = var_16446_to_fp16)[name = tensor("aw_chunk_1661_cast_fp16")]; + tensor var_16449_equation_0 = const()[name = tensor("op_16449_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16449_cast_fp16 = einsum(equation = var_16449_equation_0, values = (var_16195_cast_fp16, var_15826_cast_fp16))[name = tensor("op_16449_cast_fp16")]; + tensor var_16450_to_fp16 = const()[name = tensor("op_16450_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1663_cast_fp16 = mul(x = var_16449_cast_fp16, y = var_16450_to_fp16)[name = tensor("aw_chunk_1663_cast_fp16")]; + tensor var_16453_equation_0 = const()[name = tensor("op_16453_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16453_cast_fp16 = einsum(equation = var_16453_equation_0, values = (var_16199_cast_fp16, var_15833_cast_fp16))[name = tensor("op_16453_cast_fp16")]; + tensor var_16454_to_fp16 = const()[name = tensor("op_16454_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1665_cast_fp16 = mul(x = var_16453_cast_fp16, y = var_16454_to_fp16)[name = tensor("aw_chunk_1665_cast_fp16")]; + tensor var_16457_equation_0 = const()[name = tensor("op_16457_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16457_cast_fp16 = einsum(equation = var_16457_equation_0, values = (var_16199_cast_fp16, var_15840_cast_fp16))[name = tensor("op_16457_cast_fp16")]; + tensor var_16458_to_fp16 = const()[name = tensor("op_16458_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1667_cast_fp16 = mul(x = var_16457_cast_fp16, y = var_16458_to_fp16)[name = tensor("aw_chunk_1667_cast_fp16")]; + tensor var_16461_equation_0 = const()[name = tensor("op_16461_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16461_cast_fp16 = einsum(equation = var_16461_equation_0, values = (var_16199_cast_fp16, var_15847_cast_fp16))[name = tensor("op_16461_cast_fp16")]; + tensor var_16462_to_fp16 = const()[name = tensor("op_16462_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1669_cast_fp16 = mul(x = var_16461_cast_fp16, y = var_16462_to_fp16)[name = tensor("aw_chunk_1669_cast_fp16")]; + tensor var_16465_equation_0 = const()[name = tensor("op_16465_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16465_cast_fp16 = einsum(equation = var_16465_equation_0, values = (var_16199_cast_fp16, var_15854_cast_fp16))[name = tensor("op_16465_cast_fp16")]; + tensor var_16466_to_fp16 = const()[name = tensor("op_16466_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1671_cast_fp16 = mul(x = var_16465_cast_fp16, y = var_16466_to_fp16)[name = tensor("aw_chunk_1671_cast_fp16")]; + tensor var_16469_equation_0 = const()[name = tensor("op_16469_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16469_cast_fp16 = einsum(equation = var_16469_equation_0, values = (var_16203_cast_fp16, var_15861_cast_fp16))[name = tensor("op_16469_cast_fp16")]; + tensor var_16470_to_fp16 = const()[name = tensor("op_16470_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1673_cast_fp16 = mul(x = var_16469_cast_fp16, y = var_16470_to_fp16)[name = tensor("aw_chunk_1673_cast_fp16")]; + tensor var_16473_equation_0 = const()[name = tensor("op_16473_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16473_cast_fp16 = einsum(equation = var_16473_equation_0, values = (var_16203_cast_fp16, var_15868_cast_fp16))[name = tensor("op_16473_cast_fp16")]; + tensor var_16474_to_fp16 = const()[name = tensor("op_16474_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1675_cast_fp16 = mul(x = var_16473_cast_fp16, y = var_16474_to_fp16)[name = tensor("aw_chunk_1675_cast_fp16")]; + tensor var_16477_equation_0 = const()[name = tensor("op_16477_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16477_cast_fp16 = einsum(equation = var_16477_equation_0, values = (var_16203_cast_fp16, var_15875_cast_fp16))[name = tensor("op_16477_cast_fp16")]; + tensor var_16478_to_fp16 = const()[name = tensor("op_16478_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1677_cast_fp16 = mul(x = var_16477_cast_fp16, y = var_16478_to_fp16)[name = tensor("aw_chunk_1677_cast_fp16")]; + tensor var_16481_equation_0 = const()[name = tensor("op_16481_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16481_cast_fp16 = einsum(equation = var_16481_equation_0, values = (var_16203_cast_fp16, var_15882_cast_fp16))[name = tensor("op_16481_cast_fp16")]; + tensor var_16482_to_fp16 = const()[name = tensor("op_16482_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1679_cast_fp16 = mul(x = var_16481_cast_fp16, y = var_16482_to_fp16)[name = tensor("aw_chunk_1679_cast_fp16")]; + tensor var_16485_equation_0 = const()[name = tensor("op_16485_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16485_cast_fp16 = einsum(equation = var_16485_equation_0, values = (var_16207_cast_fp16, var_15889_cast_fp16))[name = tensor("op_16485_cast_fp16")]; + tensor var_16486_to_fp16 = const()[name = tensor("op_16486_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1681_cast_fp16 = mul(x = var_16485_cast_fp16, y = var_16486_to_fp16)[name = tensor("aw_chunk_1681_cast_fp16")]; + tensor var_16489_equation_0 = const()[name = tensor("op_16489_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16489_cast_fp16 = einsum(equation = var_16489_equation_0, values = (var_16207_cast_fp16, var_15896_cast_fp16))[name = tensor("op_16489_cast_fp16")]; + tensor var_16490_to_fp16 = const()[name = tensor("op_16490_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1683_cast_fp16 = mul(x = var_16489_cast_fp16, y = var_16490_to_fp16)[name = tensor("aw_chunk_1683_cast_fp16")]; + tensor var_16493_equation_0 = const()[name = tensor("op_16493_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16493_cast_fp16 = einsum(equation = var_16493_equation_0, values = (var_16207_cast_fp16, var_15903_cast_fp16))[name = tensor("op_16493_cast_fp16")]; + tensor var_16494_to_fp16 = const()[name = tensor("op_16494_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1685_cast_fp16 = mul(x = var_16493_cast_fp16, y = var_16494_to_fp16)[name = tensor("aw_chunk_1685_cast_fp16")]; + tensor var_16497_equation_0 = const()[name = tensor("op_16497_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16497_cast_fp16 = einsum(equation = var_16497_equation_0, values = (var_16207_cast_fp16, var_15910_cast_fp16))[name = tensor("op_16497_cast_fp16")]; + tensor var_16498_to_fp16 = const()[name = tensor("op_16498_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1687_cast_fp16 = mul(x = var_16497_cast_fp16, y = var_16498_to_fp16)[name = tensor("aw_chunk_1687_cast_fp16")]; + tensor var_16501_equation_0 = const()[name = tensor("op_16501_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16501_cast_fp16 = einsum(equation = var_16501_equation_0, values = (var_16211_cast_fp16, var_15917_cast_fp16))[name = tensor("op_16501_cast_fp16")]; + tensor var_16502_to_fp16 = const()[name = tensor("op_16502_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1689_cast_fp16 = mul(x = var_16501_cast_fp16, y = var_16502_to_fp16)[name = tensor("aw_chunk_1689_cast_fp16")]; + tensor var_16505_equation_0 = const()[name = tensor("op_16505_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16505_cast_fp16 = einsum(equation = var_16505_equation_0, values = (var_16211_cast_fp16, var_15924_cast_fp16))[name = tensor("op_16505_cast_fp16")]; + tensor var_16506_to_fp16 = const()[name = tensor("op_16506_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1691_cast_fp16 = mul(x = var_16505_cast_fp16, y = var_16506_to_fp16)[name = tensor("aw_chunk_1691_cast_fp16")]; + tensor var_16509_equation_0 = const()[name = tensor("op_16509_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16509_cast_fp16 = einsum(equation = var_16509_equation_0, values = (var_16211_cast_fp16, var_15931_cast_fp16))[name = tensor("op_16509_cast_fp16")]; + tensor var_16510_to_fp16 = const()[name = tensor("op_16510_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1693_cast_fp16 = mul(x = var_16509_cast_fp16, y = var_16510_to_fp16)[name = tensor("aw_chunk_1693_cast_fp16")]; + tensor var_16513_equation_0 = const()[name = tensor("op_16513_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16513_cast_fp16 = einsum(equation = var_16513_equation_0, values = (var_16211_cast_fp16, var_15938_cast_fp16))[name = tensor("op_16513_cast_fp16")]; + tensor var_16514_to_fp16 = const()[name = tensor("op_16514_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1695_cast_fp16 = mul(x = var_16513_cast_fp16, y = var_16514_to_fp16)[name = tensor("aw_chunk_1695_cast_fp16")]; + tensor var_16517_equation_0 = const()[name = tensor("op_16517_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16517_cast_fp16 = einsum(equation = var_16517_equation_0, values = (var_16215_cast_fp16, var_15945_cast_fp16))[name = tensor("op_16517_cast_fp16")]; + tensor var_16518_to_fp16 = const()[name = tensor("op_16518_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1697_cast_fp16 = mul(x = var_16517_cast_fp16, y = var_16518_to_fp16)[name = tensor("aw_chunk_1697_cast_fp16")]; + tensor var_16521_equation_0 = const()[name = tensor("op_16521_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16521_cast_fp16 = einsum(equation = var_16521_equation_0, values = (var_16215_cast_fp16, var_15952_cast_fp16))[name = tensor("op_16521_cast_fp16")]; + tensor var_16522_to_fp16 = const()[name = tensor("op_16522_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1699_cast_fp16 = mul(x = var_16521_cast_fp16, y = var_16522_to_fp16)[name = tensor("aw_chunk_1699_cast_fp16")]; + tensor var_16525_equation_0 = const()[name = tensor("op_16525_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16525_cast_fp16 = einsum(equation = var_16525_equation_0, values = (var_16215_cast_fp16, var_15959_cast_fp16))[name = tensor("op_16525_cast_fp16")]; + tensor var_16526_to_fp16 = const()[name = tensor("op_16526_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1701_cast_fp16 = mul(x = var_16525_cast_fp16, y = var_16526_to_fp16)[name = tensor("aw_chunk_1701_cast_fp16")]; + tensor var_16529_equation_0 = const()[name = tensor("op_16529_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16529_cast_fp16 = einsum(equation = var_16529_equation_0, values = (var_16215_cast_fp16, var_15966_cast_fp16))[name = tensor("op_16529_cast_fp16")]; + tensor var_16530_to_fp16 = const()[name = tensor("op_16530_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1703_cast_fp16 = mul(x = var_16529_cast_fp16, y = var_16530_to_fp16)[name = tensor("aw_chunk_1703_cast_fp16")]; + tensor var_16533_equation_0 = const()[name = tensor("op_16533_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16533_cast_fp16 = einsum(equation = var_16533_equation_0, values = (var_16219_cast_fp16, var_15973_cast_fp16))[name = tensor("op_16533_cast_fp16")]; + tensor var_16534_to_fp16 = const()[name = tensor("op_16534_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1705_cast_fp16 = mul(x = var_16533_cast_fp16, y = var_16534_to_fp16)[name = tensor("aw_chunk_1705_cast_fp16")]; + tensor var_16537_equation_0 = const()[name = tensor("op_16537_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16537_cast_fp16 = einsum(equation = var_16537_equation_0, values = (var_16219_cast_fp16, var_15980_cast_fp16))[name = tensor("op_16537_cast_fp16")]; + tensor var_16538_to_fp16 = const()[name = tensor("op_16538_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1707_cast_fp16 = mul(x = var_16537_cast_fp16, y = var_16538_to_fp16)[name = tensor("aw_chunk_1707_cast_fp16")]; + tensor var_16541_equation_0 = const()[name = tensor("op_16541_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16541_cast_fp16 = einsum(equation = var_16541_equation_0, values = (var_16219_cast_fp16, var_15987_cast_fp16))[name = tensor("op_16541_cast_fp16")]; + tensor var_16542_to_fp16 = const()[name = tensor("op_16542_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1709_cast_fp16 = mul(x = var_16541_cast_fp16, y = var_16542_to_fp16)[name = tensor("aw_chunk_1709_cast_fp16")]; + tensor var_16545_equation_0 = const()[name = tensor("op_16545_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16545_cast_fp16 = einsum(equation = var_16545_equation_0, values = (var_16219_cast_fp16, var_15994_cast_fp16))[name = tensor("op_16545_cast_fp16")]; + tensor var_16546_to_fp16 = const()[name = tensor("op_16546_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1711_cast_fp16 = mul(x = var_16545_cast_fp16, y = var_16546_to_fp16)[name = tensor("aw_chunk_1711_cast_fp16")]; + tensor var_16549_equation_0 = const()[name = tensor("op_16549_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16549_cast_fp16 = einsum(equation = var_16549_equation_0, values = (var_16223_cast_fp16, var_16001_cast_fp16))[name = tensor("op_16549_cast_fp16")]; + tensor var_16550_to_fp16 = const()[name = tensor("op_16550_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1713_cast_fp16 = mul(x = var_16549_cast_fp16, y = var_16550_to_fp16)[name = tensor("aw_chunk_1713_cast_fp16")]; + tensor var_16553_equation_0 = const()[name = tensor("op_16553_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16553_cast_fp16 = einsum(equation = var_16553_equation_0, values = (var_16223_cast_fp16, var_16008_cast_fp16))[name = tensor("op_16553_cast_fp16")]; + tensor var_16554_to_fp16 = const()[name = tensor("op_16554_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1715_cast_fp16 = mul(x = var_16553_cast_fp16, y = var_16554_to_fp16)[name = tensor("aw_chunk_1715_cast_fp16")]; + tensor var_16557_equation_0 = const()[name = tensor("op_16557_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16557_cast_fp16 = einsum(equation = var_16557_equation_0, values = (var_16223_cast_fp16, var_16015_cast_fp16))[name = tensor("op_16557_cast_fp16")]; + tensor var_16558_to_fp16 = const()[name = tensor("op_16558_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1717_cast_fp16 = mul(x = var_16557_cast_fp16, y = var_16558_to_fp16)[name = tensor("aw_chunk_1717_cast_fp16")]; + tensor var_16561_equation_0 = const()[name = tensor("op_16561_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16561_cast_fp16 = einsum(equation = var_16561_equation_0, values = (var_16223_cast_fp16, var_16022_cast_fp16))[name = tensor("op_16561_cast_fp16")]; + tensor var_16562_to_fp16 = const()[name = tensor("op_16562_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1719_cast_fp16 = mul(x = var_16561_cast_fp16, y = var_16562_to_fp16)[name = tensor("aw_chunk_1719_cast_fp16")]; + tensor var_16565_equation_0 = const()[name = tensor("op_16565_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16565_cast_fp16 = einsum(equation = var_16565_equation_0, values = (var_16227_cast_fp16, var_16029_cast_fp16))[name = tensor("op_16565_cast_fp16")]; + tensor var_16566_to_fp16 = const()[name = tensor("op_16566_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1721_cast_fp16 = mul(x = var_16565_cast_fp16, y = var_16566_to_fp16)[name = tensor("aw_chunk_1721_cast_fp16")]; + tensor var_16569_equation_0 = const()[name = tensor("op_16569_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16569_cast_fp16 = einsum(equation = var_16569_equation_0, values = (var_16227_cast_fp16, var_16036_cast_fp16))[name = tensor("op_16569_cast_fp16")]; + tensor var_16570_to_fp16 = const()[name = tensor("op_16570_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1723_cast_fp16 = mul(x = var_16569_cast_fp16, y = var_16570_to_fp16)[name = tensor("aw_chunk_1723_cast_fp16")]; + tensor var_16573_equation_0 = const()[name = tensor("op_16573_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16573_cast_fp16 = einsum(equation = var_16573_equation_0, values = (var_16227_cast_fp16, var_16043_cast_fp16))[name = tensor("op_16573_cast_fp16")]; + tensor var_16574_to_fp16 = const()[name = tensor("op_16574_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1725_cast_fp16 = mul(x = var_16573_cast_fp16, y = var_16574_to_fp16)[name = tensor("aw_chunk_1725_cast_fp16")]; + tensor var_16577_equation_0 = const()[name = tensor("op_16577_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16577_cast_fp16 = einsum(equation = var_16577_equation_0, values = (var_16227_cast_fp16, var_16050_cast_fp16))[name = tensor("op_16577_cast_fp16")]; + tensor var_16578_to_fp16 = const()[name = tensor("op_16578_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1727_cast_fp16 = mul(x = var_16577_cast_fp16, y = var_16578_to_fp16)[name = tensor("aw_chunk_1727_cast_fp16")]; + tensor var_16581_equation_0 = const()[name = tensor("op_16581_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16581_cast_fp16 = einsum(equation = var_16581_equation_0, values = (var_16231_cast_fp16, var_16057_cast_fp16))[name = tensor("op_16581_cast_fp16")]; + tensor var_16582_to_fp16 = const()[name = tensor("op_16582_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1729_cast_fp16 = mul(x = var_16581_cast_fp16, y = var_16582_to_fp16)[name = tensor("aw_chunk_1729_cast_fp16")]; + tensor var_16585_equation_0 = const()[name = tensor("op_16585_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16585_cast_fp16 = einsum(equation = var_16585_equation_0, values = (var_16231_cast_fp16, var_16064_cast_fp16))[name = tensor("op_16585_cast_fp16")]; + tensor var_16586_to_fp16 = const()[name = tensor("op_16586_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1731_cast_fp16 = mul(x = var_16585_cast_fp16, y = var_16586_to_fp16)[name = tensor("aw_chunk_1731_cast_fp16")]; + tensor var_16589_equation_0 = const()[name = tensor("op_16589_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16589_cast_fp16 = einsum(equation = var_16589_equation_0, values = (var_16231_cast_fp16, var_16071_cast_fp16))[name = tensor("op_16589_cast_fp16")]; + tensor var_16590_to_fp16 = const()[name = tensor("op_16590_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1733_cast_fp16 = mul(x = var_16589_cast_fp16, y = var_16590_to_fp16)[name = tensor("aw_chunk_1733_cast_fp16")]; + tensor var_16593_equation_0 = const()[name = tensor("op_16593_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16593_cast_fp16 = einsum(equation = var_16593_equation_0, values = (var_16231_cast_fp16, var_16078_cast_fp16))[name = tensor("op_16593_cast_fp16")]; + tensor var_16594_to_fp16 = const()[name = tensor("op_16594_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1735_cast_fp16 = mul(x = var_16593_cast_fp16, y = var_16594_to_fp16)[name = tensor("aw_chunk_1735_cast_fp16")]; + tensor var_16597_equation_0 = const()[name = tensor("op_16597_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16597_cast_fp16 = einsum(equation = var_16597_equation_0, values = (var_16235_cast_fp16, var_16085_cast_fp16))[name = tensor("op_16597_cast_fp16")]; + tensor var_16598_to_fp16 = const()[name = tensor("op_16598_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1737_cast_fp16 = mul(x = var_16597_cast_fp16, y = var_16598_to_fp16)[name = tensor("aw_chunk_1737_cast_fp16")]; + tensor var_16601_equation_0 = const()[name = tensor("op_16601_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16601_cast_fp16 = einsum(equation = var_16601_equation_0, values = (var_16235_cast_fp16, var_16092_cast_fp16))[name = tensor("op_16601_cast_fp16")]; + tensor var_16602_to_fp16 = const()[name = tensor("op_16602_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1739_cast_fp16 = mul(x = var_16601_cast_fp16, y = var_16602_to_fp16)[name = tensor("aw_chunk_1739_cast_fp16")]; + tensor var_16605_equation_0 = const()[name = tensor("op_16605_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16605_cast_fp16 = einsum(equation = var_16605_equation_0, values = (var_16235_cast_fp16, var_16099_cast_fp16))[name = tensor("op_16605_cast_fp16")]; + tensor var_16606_to_fp16 = const()[name = tensor("op_16606_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1741_cast_fp16 = mul(x = var_16605_cast_fp16, y = var_16606_to_fp16)[name = tensor("aw_chunk_1741_cast_fp16")]; + tensor var_16609_equation_0 = const()[name = tensor("op_16609_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16609_cast_fp16 = einsum(equation = var_16609_equation_0, values = (var_16235_cast_fp16, var_16106_cast_fp16))[name = tensor("op_16609_cast_fp16")]; + tensor var_16610_to_fp16 = const()[name = tensor("op_16610_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1743_cast_fp16 = mul(x = var_16609_cast_fp16, y = var_16610_to_fp16)[name = tensor("aw_chunk_1743_cast_fp16")]; + tensor var_16613_equation_0 = const()[name = tensor("op_16613_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16613_cast_fp16 = einsum(equation = var_16613_equation_0, values = (var_16239_cast_fp16, var_16113_cast_fp16))[name = tensor("op_16613_cast_fp16")]; + tensor var_16614_to_fp16 = const()[name = tensor("op_16614_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1745_cast_fp16 = mul(x = var_16613_cast_fp16, y = var_16614_to_fp16)[name = tensor("aw_chunk_1745_cast_fp16")]; + tensor var_16617_equation_0 = const()[name = tensor("op_16617_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16617_cast_fp16 = einsum(equation = var_16617_equation_0, values = (var_16239_cast_fp16, var_16120_cast_fp16))[name = tensor("op_16617_cast_fp16")]; + tensor var_16618_to_fp16 = const()[name = tensor("op_16618_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1747_cast_fp16 = mul(x = var_16617_cast_fp16, y = var_16618_to_fp16)[name = tensor("aw_chunk_1747_cast_fp16")]; + tensor var_16621_equation_0 = const()[name = tensor("op_16621_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16621_cast_fp16 = einsum(equation = var_16621_equation_0, values = (var_16239_cast_fp16, var_16127_cast_fp16))[name = tensor("op_16621_cast_fp16")]; + tensor var_16622_to_fp16 = const()[name = tensor("op_16622_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1749_cast_fp16 = mul(x = var_16621_cast_fp16, y = var_16622_to_fp16)[name = tensor("aw_chunk_1749_cast_fp16")]; + tensor var_16625_equation_0 = const()[name = tensor("op_16625_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16625_cast_fp16 = einsum(equation = var_16625_equation_0, values = (var_16239_cast_fp16, var_16134_cast_fp16))[name = tensor("op_16625_cast_fp16")]; + tensor var_16626_to_fp16 = const()[name = tensor("op_16626_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1751_cast_fp16 = mul(x = var_16625_cast_fp16, y = var_16626_to_fp16)[name = tensor("aw_chunk_1751_cast_fp16")]; + tensor var_16629_equation_0 = const()[name = tensor("op_16629_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16629_cast_fp16 = einsum(equation = var_16629_equation_0, values = (var_16243_cast_fp16, var_16141_cast_fp16))[name = tensor("op_16629_cast_fp16")]; + tensor var_16630_to_fp16 = const()[name = tensor("op_16630_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1753_cast_fp16 = mul(x = var_16629_cast_fp16, y = var_16630_to_fp16)[name = tensor("aw_chunk_1753_cast_fp16")]; + tensor var_16633_equation_0 = const()[name = tensor("op_16633_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16633_cast_fp16 = einsum(equation = var_16633_equation_0, values = (var_16243_cast_fp16, var_16148_cast_fp16))[name = tensor("op_16633_cast_fp16")]; + tensor var_16634_to_fp16 = const()[name = tensor("op_16634_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1755_cast_fp16 = mul(x = var_16633_cast_fp16, y = var_16634_to_fp16)[name = tensor("aw_chunk_1755_cast_fp16")]; + tensor var_16637_equation_0 = const()[name = tensor("op_16637_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16637_cast_fp16 = einsum(equation = var_16637_equation_0, values = (var_16243_cast_fp16, var_16155_cast_fp16))[name = tensor("op_16637_cast_fp16")]; + tensor var_16638_to_fp16 = const()[name = tensor("op_16638_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1757_cast_fp16 = mul(x = var_16637_cast_fp16, y = var_16638_to_fp16)[name = tensor("aw_chunk_1757_cast_fp16")]; + tensor var_16641_equation_0 = const()[name = tensor("op_16641_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_16641_cast_fp16 = einsum(equation = var_16641_equation_0, values = (var_16243_cast_fp16, var_16162_cast_fp16))[name = tensor("op_16641_cast_fp16")]; + tensor var_16642_to_fp16 = const()[name = tensor("op_16642_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1759_cast_fp16 = mul(x = var_16641_cast_fp16, y = var_16642_to_fp16)[name = tensor("aw_chunk_1759_cast_fp16")]; + tensor var_16644_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1601_cast_fp16)[name = tensor("op_16644_cast_fp16")]; + tensor var_16645_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1603_cast_fp16)[name = tensor("op_16645_cast_fp16")]; + tensor var_16646_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1605_cast_fp16)[name = tensor("op_16646_cast_fp16")]; + tensor var_16647_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1607_cast_fp16)[name = tensor("op_16647_cast_fp16")]; + tensor var_16648_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1609_cast_fp16)[name = tensor("op_16648_cast_fp16")]; + tensor var_16649_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1611_cast_fp16)[name = tensor("op_16649_cast_fp16")]; + tensor var_16650_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1613_cast_fp16)[name = tensor("op_16650_cast_fp16")]; + tensor var_16651_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1615_cast_fp16)[name = tensor("op_16651_cast_fp16")]; + tensor var_16652_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1617_cast_fp16)[name = tensor("op_16652_cast_fp16")]; + tensor var_16653_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1619_cast_fp16)[name = tensor("op_16653_cast_fp16")]; + tensor var_16654_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1621_cast_fp16)[name = tensor("op_16654_cast_fp16")]; + tensor var_16655_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1623_cast_fp16)[name = tensor("op_16655_cast_fp16")]; + tensor var_16656_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1625_cast_fp16)[name = tensor("op_16656_cast_fp16")]; + tensor var_16657_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1627_cast_fp16)[name = tensor("op_16657_cast_fp16")]; + tensor var_16658_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1629_cast_fp16)[name = tensor("op_16658_cast_fp16")]; + tensor var_16659_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1631_cast_fp16)[name = tensor("op_16659_cast_fp16")]; + tensor var_16660_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1633_cast_fp16)[name = tensor("op_16660_cast_fp16")]; + tensor var_16661_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1635_cast_fp16)[name = tensor("op_16661_cast_fp16")]; + tensor var_16662_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1637_cast_fp16)[name = tensor("op_16662_cast_fp16")]; + tensor var_16663_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1639_cast_fp16)[name = tensor("op_16663_cast_fp16")]; + tensor var_16664_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1641_cast_fp16)[name = tensor("op_16664_cast_fp16")]; + tensor var_16665_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1643_cast_fp16)[name = tensor("op_16665_cast_fp16")]; + tensor var_16666_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1645_cast_fp16)[name = tensor("op_16666_cast_fp16")]; + tensor var_16667_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1647_cast_fp16)[name = tensor("op_16667_cast_fp16")]; + tensor var_16668_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1649_cast_fp16)[name = tensor("op_16668_cast_fp16")]; + tensor var_16669_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1651_cast_fp16)[name = tensor("op_16669_cast_fp16")]; + tensor var_16670_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1653_cast_fp16)[name = tensor("op_16670_cast_fp16")]; + tensor var_16671_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1655_cast_fp16)[name = tensor("op_16671_cast_fp16")]; + tensor var_16672_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1657_cast_fp16)[name = tensor("op_16672_cast_fp16")]; + tensor var_16673_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1659_cast_fp16)[name = tensor("op_16673_cast_fp16")]; + tensor var_16674_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1661_cast_fp16)[name = tensor("op_16674_cast_fp16")]; + tensor var_16675_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1663_cast_fp16)[name = tensor("op_16675_cast_fp16")]; + tensor var_16676_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1665_cast_fp16)[name = tensor("op_16676_cast_fp16")]; + tensor var_16677_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1667_cast_fp16)[name = tensor("op_16677_cast_fp16")]; + tensor var_16678_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1669_cast_fp16)[name = tensor("op_16678_cast_fp16")]; + tensor var_16679_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1671_cast_fp16)[name = tensor("op_16679_cast_fp16")]; + tensor var_16680_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1673_cast_fp16)[name = tensor("op_16680_cast_fp16")]; + tensor var_16681_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1675_cast_fp16)[name = tensor("op_16681_cast_fp16")]; + tensor var_16682_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1677_cast_fp16)[name = tensor("op_16682_cast_fp16")]; + tensor var_16683_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1679_cast_fp16)[name = tensor("op_16683_cast_fp16")]; + tensor var_16684_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1681_cast_fp16)[name = tensor("op_16684_cast_fp16")]; + tensor var_16685_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1683_cast_fp16)[name = tensor("op_16685_cast_fp16")]; + tensor var_16686_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1685_cast_fp16)[name = tensor("op_16686_cast_fp16")]; + tensor var_16687_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1687_cast_fp16)[name = tensor("op_16687_cast_fp16")]; + tensor var_16688_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1689_cast_fp16)[name = tensor("op_16688_cast_fp16")]; + tensor var_16689_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1691_cast_fp16)[name = tensor("op_16689_cast_fp16")]; + tensor var_16690_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1693_cast_fp16)[name = tensor("op_16690_cast_fp16")]; + tensor var_16691_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1695_cast_fp16)[name = tensor("op_16691_cast_fp16")]; + tensor var_16692_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1697_cast_fp16)[name = tensor("op_16692_cast_fp16")]; + tensor var_16693_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1699_cast_fp16)[name = tensor("op_16693_cast_fp16")]; + tensor var_16694_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1701_cast_fp16)[name = tensor("op_16694_cast_fp16")]; + tensor var_16695_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1703_cast_fp16)[name = tensor("op_16695_cast_fp16")]; + tensor var_16696_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1705_cast_fp16)[name = tensor("op_16696_cast_fp16")]; + tensor var_16697_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1707_cast_fp16)[name = tensor("op_16697_cast_fp16")]; + tensor var_16698_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1709_cast_fp16)[name = tensor("op_16698_cast_fp16")]; + tensor var_16699_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1711_cast_fp16)[name = tensor("op_16699_cast_fp16")]; + tensor var_16700_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1713_cast_fp16)[name = tensor("op_16700_cast_fp16")]; + tensor var_16701_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1715_cast_fp16)[name = tensor("op_16701_cast_fp16")]; + tensor var_16702_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1717_cast_fp16)[name = tensor("op_16702_cast_fp16")]; + tensor var_16703_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1719_cast_fp16)[name = tensor("op_16703_cast_fp16")]; + tensor var_16704_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1721_cast_fp16)[name = tensor("op_16704_cast_fp16")]; + tensor var_16705_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1723_cast_fp16)[name = tensor("op_16705_cast_fp16")]; + tensor var_16706_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1725_cast_fp16)[name = tensor("op_16706_cast_fp16")]; + tensor var_16707_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1727_cast_fp16)[name = tensor("op_16707_cast_fp16")]; + tensor var_16708_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1729_cast_fp16)[name = tensor("op_16708_cast_fp16")]; + tensor var_16709_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1731_cast_fp16)[name = tensor("op_16709_cast_fp16")]; + tensor var_16710_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1733_cast_fp16)[name = tensor("op_16710_cast_fp16")]; + tensor var_16711_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1735_cast_fp16)[name = tensor("op_16711_cast_fp16")]; + tensor var_16712_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1737_cast_fp16)[name = tensor("op_16712_cast_fp16")]; + tensor var_16713_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1739_cast_fp16)[name = tensor("op_16713_cast_fp16")]; + tensor var_16714_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1741_cast_fp16)[name = tensor("op_16714_cast_fp16")]; + tensor var_16715_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1743_cast_fp16)[name = tensor("op_16715_cast_fp16")]; + tensor var_16716_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1745_cast_fp16)[name = tensor("op_16716_cast_fp16")]; + tensor var_16717_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1747_cast_fp16)[name = tensor("op_16717_cast_fp16")]; + tensor var_16718_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1749_cast_fp16)[name = tensor("op_16718_cast_fp16")]; + tensor var_16719_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1751_cast_fp16)[name = tensor("op_16719_cast_fp16")]; + tensor var_16720_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1753_cast_fp16)[name = tensor("op_16720_cast_fp16")]; + tensor var_16721_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1755_cast_fp16)[name = tensor("op_16721_cast_fp16")]; + tensor var_16722_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1757_cast_fp16)[name = tensor("op_16722_cast_fp16")]; + tensor var_16723_cast_fp16 = softmax(axis = var_15469, x = aw_chunk_1759_cast_fp16)[name = tensor("op_16723_cast_fp16")]; + tensor var_16725_equation_0 = const()[name = tensor("op_16725_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16725_cast_fp16 = einsum(equation = var_16725_equation_0, values = (var_16245_cast_fp16, var_16644_cast_fp16))[name = tensor("op_16725_cast_fp16")]; + tensor var_16727_equation_0 = const()[name = tensor("op_16727_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16727_cast_fp16 = einsum(equation = var_16727_equation_0, values = (var_16245_cast_fp16, var_16645_cast_fp16))[name = tensor("op_16727_cast_fp16")]; + tensor var_16729_equation_0 = const()[name = tensor("op_16729_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16729_cast_fp16 = einsum(equation = var_16729_equation_0, values = (var_16245_cast_fp16, var_16646_cast_fp16))[name = tensor("op_16729_cast_fp16")]; + tensor var_16731_equation_0 = const()[name = tensor("op_16731_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16731_cast_fp16 = einsum(equation = var_16731_equation_0, values = (var_16245_cast_fp16, var_16647_cast_fp16))[name = tensor("op_16731_cast_fp16")]; + tensor var_16733_equation_0 = const()[name = tensor("op_16733_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16733_cast_fp16 = einsum(equation = var_16733_equation_0, values = (var_16249_cast_fp16, var_16648_cast_fp16))[name = tensor("op_16733_cast_fp16")]; + tensor var_16735_equation_0 = const()[name = tensor("op_16735_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16735_cast_fp16 = einsum(equation = var_16735_equation_0, values = (var_16249_cast_fp16, var_16649_cast_fp16))[name = tensor("op_16735_cast_fp16")]; + tensor var_16737_equation_0 = const()[name = tensor("op_16737_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16737_cast_fp16 = einsum(equation = var_16737_equation_0, values = (var_16249_cast_fp16, var_16650_cast_fp16))[name = tensor("op_16737_cast_fp16")]; + tensor var_16739_equation_0 = const()[name = tensor("op_16739_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16739_cast_fp16 = einsum(equation = var_16739_equation_0, values = (var_16249_cast_fp16, var_16651_cast_fp16))[name = tensor("op_16739_cast_fp16")]; + tensor var_16741_equation_0 = const()[name = tensor("op_16741_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16741_cast_fp16 = einsum(equation = var_16741_equation_0, values = (var_16253_cast_fp16, var_16652_cast_fp16))[name = tensor("op_16741_cast_fp16")]; + tensor var_16743_equation_0 = const()[name = tensor("op_16743_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16743_cast_fp16 = einsum(equation = var_16743_equation_0, values = (var_16253_cast_fp16, var_16653_cast_fp16))[name = tensor("op_16743_cast_fp16")]; + tensor var_16745_equation_0 = const()[name = tensor("op_16745_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16745_cast_fp16 = einsum(equation = var_16745_equation_0, values = (var_16253_cast_fp16, var_16654_cast_fp16))[name = tensor("op_16745_cast_fp16")]; + tensor var_16747_equation_0 = const()[name = tensor("op_16747_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16747_cast_fp16 = einsum(equation = var_16747_equation_0, values = (var_16253_cast_fp16, var_16655_cast_fp16))[name = tensor("op_16747_cast_fp16")]; + tensor var_16749_equation_0 = const()[name = tensor("op_16749_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16749_cast_fp16 = einsum(equation = var_16749_equation_0, values = (var_16257_cast_fp16, var_16656_cast_fp16))[name = tensor("op_16749_cast_fp16")]; + tensor var_16751_equation_0 = const()[name = tensor("op_16751_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16751_cast_fp16 = einsum(equation = var_16751_equation_0, values = (var_16257_cast_fp16, var_16657_cast_fp16))[name = tensor("op_16751_cast_fp16")]; + tensor var_16753_equation_0 = const()[name = tensor("op_16753_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16753_cast_fp16 = einsum(equation = var_16753_equation_0, values = (var_16257_cast_fp16, var_16658_cast_fp16))[name = tensor("op_16753_cast_fp16")]; + tensor var_16755_equation_0 = const()[name = tensor("op_16755_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16755_cast_fp16 = einsum(equation = var_16755_equation_0, values = (var_16257_cast_fp16, var_16659_cast_fp16))[name = tensor("op_16755_cast_fp16")]; + tensor var_16757_equation_0 = const()[name = tensor("op_16757_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16757_cast_fp16 = einsum(equation = var_16757_equation_0, values = (var_16261_cast_fp16, var_16660_cast_fp16))[name = tensor("op_16757_cast_fp16")]; + tensor var_16759_equation_0 = const()[name = tensor("op_16759_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16759_cast_fp16 = einsum(equation = var_16759_equation_0, values = (var_16261_cast_fp16, var_16661_cast_fp16))[name = tensor("op_16759_cast_fp16")]; + tensor var_16761_equation_0 = const()[name = tensor("op_16761_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16761_cast_fp16 = einsum(equation = var_16761_equation_0, values = (var_16261_cast_fp16, var_16662_cast_fp16))[name = tensor("op_16761_cast_fp16")]; + tensor var_16763_equation_0 = const()[name = tensor("op_16763_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16763_cast_fp16 = einsum(equation = var_16763_equation_0, values = (var_16261_cast_fp16, var_16663_cast_fp16))[name = tensor("op_16763_cast_fp16")]; + tensor var_16765_equation_0 = const()[name = tensor("op_16765_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16765_cast_fp16 = einsum(equation = var_16765_equation_0, values = (var_16265_cast_fp16, var_16664_cast_fp16))[name = tensor("op_16765_cast_fp16")]; + tensor var_16767_equation_0 = const()[name = tensor("op_16767_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16767_cast_fp16 = einsum(equation = var_16767_equation_0, values = (var_16265_cast_fp16, var_16665_cast_fp16))[name = tensor("op_16767_cast_fp16")]; + tensor var_16769_equation_0 = const()[name = tensor("op_16769_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16769_cast_fp16 = einsum(equation = var_16769_equation_0, values = (var_16265_cast_fp16, var_16666_cast_fp16))[name = tensor("op_16769_cast_fp16")]; + tensor var_16771_equation_0 = const()[name = tensor("op_16771_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16771_cast_fp16 = einsum(equation = var_16771_equation_0, values = (var_16265_cast_fp16, var_16667_cast_fp16))[name = tensor("op_16771_cast_fp16")]; + tensor var_16773_equation_0 = const()[name = tensor("op_16773_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16773_cast_fp16 = einsum(equation = var_16773_equation_0, values = (var_16269_cast_fp16, var_16668_cast_fp16))[name = tensor("op_16773_cast_fp16")]; + tensor var_16775_equation_0 = const()[name = tensor("op_16775_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16775_cast_fp16 = einsum(equation = var_16775_equation_0, values = (var_16269_cast_fp16, var_16669_cast_fp16))[name = tensor("op_16775_cast_fp16")]; + tensor var_16777_equation_0 = const()[name = tensor("op_16777_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16777_cast_fp16 = einsum(equation = var_16777_equation_0, values = (var_16269_cast_fp16, var_16670_cast_fp16))[name = tensor("op_16777_cast_fp16")]; + tensor var_16779_equation_0 = const()[name = tensor("op_16779_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16779_cast_fp16 = einsum(equation = var_16779_equation_0, values = (var_16269_cast_fp16, var_16671_cast_fp16))[name = tensor("op_16779_cast_fp16")]; + tensor var_16781_equation_0 = const()[name = tensor("op_16781_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16781_cast_fp16 = einsum(equation = var_16781_equation_0, values = (var_16273_cast_fp16, var_16672_cast_fp16))[name = tensor("op_16781_cast_fp16")]; + tensor var_16783_equation_0 = const()[name = tensor("op_16783_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16783_cast_fp16 = einsum(equation = var_16783_equation_0, values = (var_16273_cast_fp16, var_16673_cast_fp16))[name = tensor("op_16783_cast_fp16")]; + tensor var_16785_equation_0 = const()[name = tensor("op_16785_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16785_cast_fp16 = einsum(equation = var_16785_equation_0, values = (var_16273_cast_fp16, var_16674_cast_fp16))[name = tensor("op_16785_cast_fp16")]; + tensor var_16787_equation_0 = const()[name = tensor("op_16787_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16787_cast_fp16 = einsum(equation = var_16787_equation_0, values = (var_16273_cast_fp16, var_16675_cast_fp16))[name = tensor("op_16787_cast_fp16")]; + tensor var_16789_equation_0 = const()[name = tensor("op_16789_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16789_cast_fp16 = einsum(equation = var_16789_equation_0, values = (var_16277_cast_fp16, var_16676_cast_fp16))[name = tensor("op_16789_cast_fp16")]; + tensor var_16791_equation_0 = const()[name = tensor("op_16791_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16791_cast_fp16 = einsum(equation = var_16791_equation_0, values = (var_16277_cast_fp16, var_16677_cast_fp16))[name = tensor("op_16791_cast_fp16")]; + tensor var_16793_equation_0 = const()[name = tensor("op_16793_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16793_cast_fp16 = einsum(equation = var_16793_equation_0, values = (var_16277_cast_fp16, var_16678_cast_fp16))[name = tensor("op_16793_cast_fp16")]; + tensor var_16795_equation_0 = const()[name = tensor("op_16795_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16795_cast_fp16 = einsum(equation = var_16795_equation_0, values = (var_16277_cast_fp16, var_16679_cast_fp16))[name = tensor("op_16795_cast_fp16")]; + tensor var_16797_equation_0 = const()[name = tensor("op_16797_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16797_cast_fp16 = einsum(equation = var_16797_equation_0, values = (var_16281_cast_fp16, var_16680_cast_fp16))[name = tensor("op_16797_cast_fp16")]; + tensor var_16799_equation_0 = const()[name = tensor("op_16799_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16799_cast_fp16 = einsum(equation = var_16799_equation_0, values = (var_16281_cast_fp16, var_16681_cast_fp16))[name = tensor("op_16799_cast_fp16")]; + tensor var_16801_equation_0 = const()[name = tensor("op_16801_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16801_cast_fp16 = einsum(equation = var_16801_equation_0, values = (var_16281_cast_fp16, var_16682_cast_fp16))[name = tensor("op_16801_cast_fp16")]; + tensor var_16803_equation_0 = const()[name = tensor("op_16803_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16803_cast_fp16 = einsum(equation = var_16803_equation_0, values = (var_16281_cast_fp16, var_16683_cast_fp16))[name = tensor("op_16803_cast_fp16")]; + tensor var_16805_equation_0 = const()[name = tensor("op_16805_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16805_cast_fp16 = einsum(equation = var_16805_equation_0, values = (var_16285_cast_fp16, var_16684_cast_fp16))[name = tensor("op_16805_cast_fp16")]; + tensor var_16807_equation_0 = const()[name = tensor("op_16807_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16807_cast_fp16 = einsum(equation = var_16807_equation_0, values = (var_16285_cast_fp16, var_16685_cast_fp16))[name = tensor("op_16807_cast_fp16")]; + tensor var_16809_equation_0 = const()[name = tensor("op_16809_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16809_cast_fp16 = einsum(equation = var_16809_equation_0, values = (var_16285_cast_fp16, var_16686_cast_fp16))[name = tensor("op_16809_cast_fp16")]; + tensor var_16811_equation_0 = const()[name = tensor("op_16811_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16811_cast_fp16 = einsum(equation = var_16811_equation_0, values = (var_16285_cast_fp16, var_16687_cast_fp16))[name = tensor("op_16811_cast_fp16")]; + tensor var_16813_equation_0 = const()[name = tensor("op_16813_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16813_cast_fp16 = einsum(equation = var_16813_equation_0, values = (var_16289_cast_fp16, var_16688_cast_fp16))[name = tensor("op_16813_cast_fp16")]; + tensor var_16815_equation_0 = const()[name = tensor("op_16815_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16815_cast_fp16 = einsum(equation = var_16815_equation_0, values = (var_16289_cast_fp16, var_16689_cast_fp16))[name = tensor("op_16815_cast_fp16")]; + tensor var_16817_equation_0 = const()[name = tensor("op_16817_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16817_cast_fp16 = einsum(equation = var_16817_equation_0, values = (var_16289_cast_fp16, var_16690_cast_fp16))[name = tensor("op_16817_cast_fp16")]; + tensor var_16819_equation_0 = const()[name = tensor("op_16819_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16819_cast_fp16 = einsum(equation = var_16819_equation_0, values = (var_16289_cast_fp16, var_16691_cast_fp16))[name = tensor("op_16819_cast_fp16")]; + tensor var_16821_equation_0 = const()[name = tensor("op_16821_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16821_cast_fp16 = einsum(equation = var_16821_equation_0, values = (var_16293_cast_fp16, var_16692_cast_fp16))[name = tensor("op_16821_cast_fp16")]; + tensor var_16823_equation_0 = const()[name = tensor("op_16823_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16823_cast_fp16 = einsum(equation = var_16823_equation_0, values = (var_16293_cast_fp16, var_16693_cast_fp16))[name = tensor("op_16823_cast_fp16")]; + tensor var_16825_equation_0 = const()[name = tensor("op_16825_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16825_cast_fp16 = einsum(equation = var_16825_equation_0, values = (var_16293_cast_fp16, var_16694_cast_fp16))[name = tensor("op_16825_cast_fp16")]; + tensor var_16827_equation_0 = const()[name = tensor("op_16827_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16827_cast_fp16 = einsum(equation = var_16827_equation_0, values = (var_16293_cast_fp16, var_16695_cast_fp16))[name = tensor("op_16827_cast_fp16")]; + tensor var_16829_equation_0 = const()[name = tensor("op_16829_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16829_cast_fp16 = einsum(equation = var_16829_equation_0, values = (var_16297_cast_fp16, var_16696_cast_fp16))[name = tensor("op_16829_cast_fp16")]; + tensor var_16831_equation_0 = const()[name = tensor("op_16831_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16831_cast_fp16 = einsum(equation = var_16831_equation_0, values = (var_16297_cast_fp16, var_16697_cast_fp16))[name = tensor("op_16831_cast_fp16")]; + tensor var_16833_equation_0 = const()[name = tensor("op_16833_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16833_cast_fp16 = einsum(equation = var_16833_equation_0, values = (var_16297_cast_fp16, var_16698_cast_fp16))[name = tensor("op_16833_cast_fp16")]; + tensor var_16835_equation_0 = const()[name = tensor("op_16835_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16835_cast_fp16 = einsum(equation = var_16835_equation_0, values = (var_16297_cast_fp16, var_16699_cast_fp16))[name = tensor("op_16835_cast_fp16")]; + tensor var_16837_equation_0 = const()[name = tensor("op_16837_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16837_cast_fp16 = einsum(equation = var_16837_equation_0, values = (var_16301_cast_fp16, var_16700_cast_fp16))[name = tensor("op_16837_cast_fp16")]; + tensor var_16839_equation_0 = const()[name = tensor("op_16839_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16839_cast_fp16 = einsum(equation = var_16839_equation_0, values = (var_16301_cast_fp16, var_16701_cast_fp16))[name = tensor("op_16839_cast_fp16")]; + tensor var_16841_equation_0 = const()[name = tensor("op_16841_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16841_cast_fp16 = einsum(equation = var_16841_equation_0, values = (var_16301_cast_fp16, var_16702_cast_fp16))[name = tensor("op_16841_cast_fp16")]; + tensor var_16843_equation_0 = const()[name = tensor("op_16843_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16843_cast_fp16 = einsum(equation = var_16843_equation_0, values = (var_16301_cast_fp16, var_16703_cast_fp16))[name = tensor("op_16843_cast_fp16")]; + tensor var_16845_equation_0 = const()[name = tensor("op_16845_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16845_cast_fp16 = einsum(equation = var_16845_equation_0, values = (var_16305_cast_fp16, var_16704_cast_fp16))[name = tensor("op_16845_cast_fp16")]; + tensor var_16847_equation_0 = const()[name = tensor("op_16847_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16847_cast_fp16 = einsum(equation = var_16847_equation_0, values = (var_16305_cast_fp16, var_16705_cast_fp16))[name = tensor("op_16847_cast_fp16")]; + tensor var_16849_equation_0 = const()[name = tensor("op_16849_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16849_cast_fp16 = einsum(equation = var_16849_equation_0, values = (var_16305_cast_fp16, var_16706_cast_fp16))[name = tensor("op_16849_cast_fp16")]; + tensor var_16851_equation_0 = const()[name = tensor("op_16851_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16851_cast_fp16 = einsum(equation = var_16851_equation_0, values = (var_16305_cast_fp16, var_16707_cast_fp16))[name = tensor("op_16851_cast_fp16")]; + tensor var_16853_equation_0 = const()[name = tensor("op_16853_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16853_cast_fp16 = einsum(equation = var_16853_equation_0, values = (var_16309_cast_fp16, var_16708_cast_fp16))[name = tensor("op_16853_cast_fp16")]; + tensor var_16855_equation_0 = const()[name = tensor("op_16855_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16855_cast_fp16 = einsum(equation = var_16855_equation_0, values = (var_16309_cast_fp16, var_16709_cast_fp16))[name = tensor("op_16855_cast_fp16")]; + tensor var_16857_equation_0 = const()[name = tensor("op_16857_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16857_cast_fp16 = einsum(equation = var_16857_equation_0, values = (var_16309_cast_fp16, var_16710_cast_fp16))[name = tensor("op_16857_cast_fp16")]; + tensor var_16859_equation_0 = const()[name = tensor("op_16859_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16859_cast_fp16 = einsum(equation = var_16859_equation_0, values = (var_16309_cast_fp16, var_16711_cast_fp16))[name = tensor("op_16859_cast_fp16")]; + tensor var_16861_equation_0 = const()[name = tensor("op_16861_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16861_cast_fp16 = einsum(equation = var_16861_equation_0, values = (var_16313_cast_fp16, var_16712_cast_fp16))[name = tensor("op_16861_cast_fp16")]; + tensor var_16863_equation_0 = const()[name = tensor("op_16863_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16863_cast_fp16 = einsum(equation = var_16863_equation_0, values = (var_16313_cast_fp16, var_16713_cast_fp16))[name = tensor("op_16863_cast_fp16")]; + tensor var_16865_equation_0 = const()[name = tensor("op_16865_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16865_cast_fp16 = einsum(equation = var_16865_equation_0, values = (var_16313_cast_fp16, var_16714_cast_fp16))[name = tensor("op_16865_cast_fp16")]; + tensor var_16867_equation_0 = const()[name = tensor("op_16867_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16867_cast_fp16 = einsum(equation = var_16867_equation_0, values = (var_16313_cast_fp16, var_16715_cast_fp16))[name = tensor("op_16867_cast_fp16")]; + tensor var_16869_equation_0 = const()[name = tensor("op_16869_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16869_cast_fp16 = einsum(equation = var_16869_equation_0, values = (var_16317_cast_fp16, var_16716_cast_fp16))[name = tensor("op_16869_cast_fp16")]; + tensor var_16871_equation_0 = const()[name = tensor("op_16871_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16871_cast_fp16 = einsum(equation = var_16871_equation_0, values = (var_16317_cast_fp16, var_16717_cast_fp16))[name = tensor("op_16871_cast_fp16")]; + tensor var_16873_equation_0 = const()[name = tensor("op_16873_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16873_cast_fp16 = einsum(equation = var_16873_equation_0, values = (var_16317_cast_fp16, var_16718_cast_fp16))[name = tensor("op_16873_cast_fp16")]; + tensor var_16875_equation_0 = const()[name = tensor("op_16875_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16875_cast_fp16 = einsum(equation = var_16875_equation_0, values = (var_16317_cast_fp16, var_16719_cast_fp16))[name = tensor("op_16875_cast_fp16")]; + tensor var_16877_equation_0 = const()[name = tensor("op_16877_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16877_cast_fp16 = einsum(equation = var_16877_equation_0, values = (var_16321_cast_fp16, var_16720_cast_fp16))[name = tensor("op_16877_cast_fp16")]; + tensor var_16879_equation_0 = const()[name = tensor("op_16879_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16879_cast_fp16 = einsum(equation = var_16879_equation_0, values = (var_16321_cast_fp16, var_16721_cast_fp16))[name = tensor("op_16879_cast_fp16")]; + tensor var_16881_equation_0 = const()[name = tensor("op_16881_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16881_cast_fp16 = einsum(equation = var_16881_equation_0, values = (var_16321_cast_fp16, var_16722_cast_fp16))[name = tensor("op_16881_cast_fp16")]; + tensor var_16883_equation_0 = const()[name = tensor("op_16883_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_16883_cast_fp16 = einsum(equation = var_16883_equation_0, values = (var_16321_cast_fp16, var_16723_cast_fp16))[name = tensor("op_16883_cast_fp16")]; + tensor var_16885_interleave_0 = const()[name = tensor("op_16885_interleave_0"), val = tensor(false)]; + tensor var_16885_cast_fp16 = concat(axis = var_15444, interleave = var_16885_interleave_0, values = (var_16725_cast_fp16, var_16727_cast_fp16, var_16729_cast_fp16, var_16731_cast_fp16))[name = tensor("op_16885_cast_fp16")]; + tensor var_16887_interleave_0 = const()[name = tensor("op_16887_interleave_0"), val = tensor(false)]; + tensor var_16887_cast_fp16 = concat(axis = var_15444, interleave = var_16887_interleave_0, values = (var_16733_cast_fp16, var_16735_cast_fp16, var_16737_cast_fp16, var_16739_cast_fp16))[name = tensor("op_16887_cast_fp16")]; + tensor var_16889_interleave_0 = const()[name = tensor("op_16889_interleave_0"), val = tensor(false)]; + tensor var_16889_cast_fp16 = concat(axis = var_15444, interleave = var_16889_interleave_0, values = (var_16741_cast_fp16, var_16743_cast_fp16, var_16745_cast_fp16, var_16747_cast_fp16))[name = tensor("op_16889_cast_fp16")]; + tensor var_16891_interleave_0 = const()[name = tensor("op_16891_interleave_0"), val = tensor(false)]; + tensor var_16891_cast_fp16 = concat(axis = var_15444, interleave = var_16891_interleave_0, values = (var_16749_cast_fp16, var_16751_cast_fp16, var_16753_cast_fp16, var_16755_cast_fp16))[name = tensor("op_16891_cast_fp16")]; + tensor var_16893_interleave_0 = const()[name = tensor("op_16893_interleave_0"), val = tensor(false)]; + tensor var_16893_cast_fp16 = concat(axis = var_15444, interleave = var_16893_interleave_0, values = (var_16757_cast_fp16, var_16759_cast_fp16, var_16761_cast_fp16, var_16763_cast_fp16))[name = tensor("op_16893_cast_fp16")]; + tensor var_16895_interleave_0 = const()[name = tensor("op_16895_interleave_0"), val = tensor(false)]; + tensor var_16895_cast_fp16 = concat(axis = var_15444, interleave = var_16895_interleave_0, values = (var_16765_cast_fp16, var_16767_cast_fp16, var_16769_cast_fp16, var_16771_cast_fp16))[name = tensor("op_16895_cast_fp16")]; + tensor var_16897_interleave_0 = const()[name = tensor("op_16897_interleave_0"), val = tensor(false)]; + tensor var_16897_cast_fp16 = concat(axis = var_15444, interleave = var_16897_interleave_0, values = (var_16773_cast_fp16, var_16775_cast_fp16, var_16777_cast_fp16, var_16779_cast_fp16))[name = tensor("op_16897_cast_fp16")]; + tensor var_16899_interleave_0 = const()[name = tensor("op_16899_interleave_0"), val = tensor(false)]; + tensor var_16899_cast_fp16 = concat(axis = var_15444, interleave = var_16899_interleave_0, values = (var_16781_cast_fp16, var_16783_cast_fp16, var_16785_cast_fp16, var_16787_cast_fp16))[name = tensor("op_16899_cast_fp16")]; + tensor var_16901_interleave_0 = const()[name = tensor("op_16901_interleave_0"), val = tensor(false)]; + tensor var_16901_cast_fp16 = concat(axis = var_15444, interleave = var_16901_interleave_0, values = (var_16789_cast_fp16, var_16791_cast_fp16, var_16793_cast_fp16, var_16795_cast_fp16))[name = tensor("op_16901_cast_fp16")]; + tensor var_16903_interleave_0 = const()[name = tensor("op_16903_interleave_0"), val = tensor(false)]; + tensor var_16903_cast_fp16 = concat(axis = var_15444, interleave = var_16903_interleave_0, values = (var_16797_cast_fp16, var_16799_cast_fp16, var_16801_cast_fp16, var_16803_cast_fp16))[name = tensor("op_16903_cast_fp16")]; + tensor var_16905_interleave_0 = const()[name = tensor("op_16905_interleave_0"), val = tensor(false)]; + tensor var_16905_cast_fp16 = concat(axis = var_15444, interleave = var_16905_interleave_0, values = (var_16805_cast_fp16, var_16807_cast_fp16, var_16809_cast_fp16, var_16811_cast_fp16))[name = tensor("op_16905_cast_fp16")]; + tensor var_16907_interleave_0 = const()[name = tensor("op_16907_interleave_0"), val = tensor(false)]; + tensor var_16907_cast_fp16 = concat(axis = var_15444, interleave = var_16907_interleave_0, values = (var_16813_cast_fp16, var_16815_cast_fp16, var_16817_cast_fp16, var_16819_cast_fp16))[name = tensor("op_16907_cast_fp16")]; + tensor var_16909_interleave_0 = const()[name = tensor("op_16909_interleave_0"), val = tensor(false)]; + tensor var_16909_cast_fp16 = concat(axis = var_15444, interleave = var_16909_interleave_0, values = (var_16821_cast_fp16, var_16823_cast_fp16, var_16825_cast_fp16, var_16827_cast_fp16))[name = tensor("op_16909_cast_fp16")]; + tensor var_16911_interleave_0 = const()[name = tensor("op_16911_interleave_0"), val = tensor(false)]; + tensor var_16911_cast_fp16 = concat(axis = var_15444, interleave = var_16911_interleave_0, values = (var_16829_cast_fp16, var_16831_cast_fp16, var_16833_cast_fp16, var_16835_cast_fp16))[name = tensor("op_16911_cast_fp16")]; + tensor var_16913_interleave_0 = const()[name = tensor("op_16913_interleave_0"), val = tensor(false)]; + tensor var_16913_cast_fp16 = concat(axis = var_15444, interleave = var_16913_interleave_0, values = (var_16837_cast_fp16, var_16839_cast_fp16, var_16841_cast_fp16, var_16843_cast_fp16))[name = tensor("op_16913_cast_fp16")]; + tensor var_16915_interleave_0 = const()[name = tensor("op_16915_interleave_0"), val = tensor(false)]; + tensor var_16915_cast_fp16 = concat(axis = var_15444, interleave = var_16915_interleave_0, values = (var_16845_cast_fp16, var_16847_cast_fp16, var_16849_cast_fp16, var_16851_cast_fp16))[name = tensor("op_16915_cast_fp16")]; + tensor var_16917_interleave_0 = const()[name = tensor("op_16917_interleave_0"), val = tensor(false)]; + tensor var_16917_cast_fp16 = concat(axis = var_15444, interleave = var_16917_interleave_0, values = (var_16853_cast_fp16, var_16855_cast_fp16, var_16857_cast_fp16, var_16859_cast_fp16))[name = tensor("op_16917_cast_fp16")]; + tensor var_16919_interleave_0 = const()[name = tensor("op_16919_interleave_0"), val = tensor(false)]; + tensor var_16919_cast_fp16 = concat(axis = var_15444, interleave = var_16919_interleave_0, values = (var_16861_cast_fp16, var_16863_cast_fp16, var_16865_cast_fp16, var_16867_cast_fp16))[name = tensor("op_16919_cast_fp16")]; + tensor var_16921_interleave_0 = const()[name = tensor("op_16921_interleave_0"), val = tensor(false)]; + tensor var_16921_cast_fp16 = concat(axis = var_15444, interleave = var_16921_interleave_0, values = (var_16869_cast_fp16, var_16871_cast_fp16, var_16873_cast_fp16, var_16875_cast_fp16))[name = tensor("op_16921_cast_fp16")]; + tensor var_16923_interleave_0 = const()[name = tensor("op_16923_interleave_0"), val = tensor(false)]; + tensor var_16923_cast_fp16 = concat(axis = var_15444, interleave = var_16923_interleave_0, values = (var_16877_cast_fp16, var_16879_cast_fp16, var_16881_cast_fp16, var_16883_cast_fp16))[name = tensor("op_16923_cast_fp16")]; + tensor input_81_interleave_0 = const()[name = tensor("input_81_interleave_0"), val = tensor(false)]; + tensor input_81_cast_fp16 = concat(axis = var_15469, interleave = input_81_interleave_0, values = (var_16885_cast_fp16, var_16887_cast_fp16, var_16889_cast_fp16, var_16891_cast_fp16, var_16893_cast_fp16, var_16895_cast_fp16, var_16897_cast_fp16, var_16899_cast_fp16, var_16901_cast_fp16, var_16903_cast_fp16, var_16905_cast_fp16, var_16907_cast_fp16, var_16909_cast_fp16, var_16911_cast_fp16, var_16913_cast_fp16, var_16915_cast_fp16, var_16917_cast_fp16, var_16919_cast_fp16, var_16921_cast_fp16, var_16923_cast_fp16))[name = tensor("input_81_cast_fp16")]; + tensor var_16928 = const()[name = tensor("op_16928"), val = tensor([1, 1])]; + tensor var_16930 = const()[name = tensor("op_16930"), val = tensor([1, 1])]; + tensor obj_43_pad_type_0 = const()[name = tensor("obj_43_pad_type_0"), val = tensor("custom")]; + tensor obj_43_pad_0 = const()[name = tensor("obj_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_10_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_10_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(417669440)))]; + tensor layers_10_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_10_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(420946304)))]; + tensor obj_43_cast_fp16 = conv(bias = layers_10_self_attn_o_proj_bias_to_fp16, dilations = var_16930, groups = var_15469, pad = obj_43_pad_0, pad_type = obj_43_pad_type_0, strides = var_16928, weight = layers_10_self_attn_o_proj_weight_to_fp16, x = input_81_cast_fp16)[name = tensor("obj_43_cast_fp16")]; + tensor inputs_43_cast_fp16 = add(x = inputs_41_cast_fp16, y = obj_43_cast_fp16)[name = tensor("inputs_43_cast_fp16")]; + tensor var_16936 = const()[name = tensor("op_16936"), val = tensor([1])]; + tensor channels_mean_43_cast_fp16 = reduce_mean(axes = var_16936, keep_dims = var_15470, x = inputs_43_cast_fp16)[name = tensor("channels_mean_43_cast_fp16")]; + tensor zero_mean_43_cast_fp16 = sub(x = inputs_43_cast_fp16, y = channels_mean_43_cast_fp16)[name = tensor("zero_mean_43_cast_fp16")]; + tensor zero_mean_sq_43_cast_fp16 = mul(x = zero_mean_43_cast_fp16, y = zero_mean_43_cast_fp16)[name = tensor("zero_mean_sq_43_cast_fp16")]; + tensor var_16940 = const()[name = tensor("op_16940"), val = tensor([1])]; + tensor var_16941_cast_fp16 = reduce_mean(axes = var_16940, keep_dims = var_15470, x = zero_mean_sq_43_cast_fp16)[name = tensor("op_16941_cast_fp16")]; + tensor var_16942_to_fp16 = const()[name = tensor("op_16942_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_16943_cast_fp16 = add(x = var_16941_cast_fp16, y = var_16942_to_fp16)[name = tensor("op_16943_cast_fp16")]; + tensor denom_43_epsilon_0_to_fp16 = const()[name = tensor("denom_43_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_43_cast_fp16 = rsqrt(epsilon = denom_43_epsilon_0_to_fp16, x = var_16943_cast_fp16)[name = tensor("denom_43_cast_fp16")]; + tensor out_43_cast_fp16 = mul(x = zero_mean_43_cast_fp16, y = denom_43_cast_fp16)[name = tensor("out_43_cast_fp16")]; + tensor input_83_gamma_0_to_fp16 = const()[name = tensor("input_83_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(420948928)))]; + tensor input_83_beta_0_to_fp16 = const()[name = tensor("input_83_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(420951552)))]; + tensor input_83_epsilon_0_to_fp16 = const()[name = tensor("input_83_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_83_cast_fp16 = batch_norm(beta = input_83_beta_0_to_fp16, epsilon = input_83_epsilon_0_to_fp16, gamma = input_83_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_43_cast_fp16)[name = tensor("input_83_cast_fp16")]; + tensor var_16954 = const()[name = tensor("op_16954"), val = tensor([1, 1])]; + tensor var_16956 = const()[name = tensor("op_16956"), val = tensor([1, 1])]; + tensor input_85_pad_type_0 = const()[name = tensor("input_85_pad_type_0"), val = tensor("custom")]; + tensor input_85_pad_0 = const()[name = tensor("input_85_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_10_fc1_weight_to_fp16 = const()[name = tensor("layers_10_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(420954176)))]; + tensor layers_10_fc1_bias_to_fp16 = const()[name = tensor("layers_10_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(434061440)))]; + tensor input_85_cast_fp16 = conv(bias = layers_10_fc1_bias_to_fp16, dilations = var_16956, groups = var_15469, pad = input_85_pad_0, pad_type = input_85_pad_type_0, strides = var_16954, weight = layers_10_fc1_weight_to_fp16, x = input_83_cast_fp16)[name = tensor("input_85_cast_fp16")]; + tensor input_87_mode_0 = const()[name = tensor("input_87_mode_0"), val = tensor("EXACT")]; + tensor input_87_cast_fp16 = gelu(mode = input_87_mode_0, x = input_85_cast_fp16)[name = tensor("input_87_cast_fp16")]; + tensor var_16962 = const()[name = tensor("op_16962"), val = tensor([1, 1])]; + tensor var_16964 = const()[name = tensor("op_16964"), val = tensor([1, 1])]; + tensor hidden_states_25_pad_type_0 = const()[name = tensor("hidden_states_25_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_25_pad_0 = const()[name = tensor("hidden_states_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_10_fc2_weight_to_fp16 = const()[name = tensor("layers_10_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(434071744)))]; + tensor layers_10_fc2_bias_to_fp16 = const()[name = tensor("layers_10_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(447179008)))]; + tensor hidden_states_25_cast_fp16 = conv(bias = layers_10_fc2_bias_to_fp16, dilations = var_16964, groups = var_15469, pad = hidden_states_25_pad_0, pad_type = hidden_states_25_pad_type_0, strides = var_16962, weight = layers_10_fc2_weight_to_fp16, x = input_87_cast_fp16)[name = tensor("hidden_states_25_cast_fp16")]; + tensor inputs_45_cast_fp16 = add(x = inputs_43_cast_fp16, y = hidden_states_25_cast_fp16)[name = tensor("inputs_45_cast_fp16")]; + tensor var_16971 = const()[name = tensor("op_16971"), val = tensor(3)]; + tensor var_16996 = const()[name = tensor("op_16996"), val = tensor(1)]; + tensor var_16997 = const()[name = tensor("op_16997"), val = tensor(true)]; + tensor var_17007 = const()[name = tensor("op_17007"), val = tensor([1])]; + tensor channels_mean_45_cast_fp16 = reduce_mean(axes = var_17007, keep_dims = var_16997, x = inputs_45_cast_fp16)[name = tensor("channels_mean_45_cast_fp16")]; + tensor zero_mean_45_cast_fp16 = sub(x = inputs_45_cast_fp16, y = channels_mean_45_cast_fp16)[name = tensor("zero_mean_45_cast_fp16")]; + tensor zero_mean_sq_45_cast_fp16 = mul(x = zero_mean_45_cast_fp16, y = zero_mean_45_cast_fp16)[name = tensor("zero_mean_sq_45_cast_fp16")]; + tensor var_17011 = const()[name = tensor("op_17011"), val = tensor([1])]; + tensor var_17012_cast_fp16 = reduce_mean(axes = var_17011, keep_dims = var_16997, x = zero_mean_sq_45_cast_fp16)[name = tensor("op_17012_cast_fp16")]; + tensor var_17013_to_fp16 = const()[name = tensor("op_17013_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_17014_cast_fp16 = add(x = var_17012_cast_fp16, y = var_17013_to_fp16)[name = tensor("op_17014_cast_fp16")]; + tensor denom_45_epsilon_0_to_fp16 = const()[name = tensor("denom_45_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_45_cast_fp16 = rsqrt(epsilon = denom_45_epsilon_0_to_fp16, x = var_17014_cast_fp16)[name = tensor("denom_45_cast_fp16")]; + tensor out_45_cast_fp16 = mul(x = zero_mean_45_cast_fp16, y = denom_45_cast_fp16)[name = tensor("out_45_cast_fp16")]; + tensor obj_45_gamma_0_to_fp16 = const()[name = tensor("obj_45_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(447181632)))]; + tensor obj_45_beta_0_to_fp16 = const()[name = tensor("obj_45_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(447184256)))]; + tensor obj_45_epsilon_0_to_fp16 = const()[name = tensor("obj_45_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_45_cast_fp16 = batch_norm(beta = obj_45_beta_0_to_fp16, epsilon = obj_45_epsilon_0_to_fp16, gamma = obj_45_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_45_cast_fp16)[name = tensor("obj_45_cast_fp16")]; + tensor var_17029 = const()[name = tensor("op_17029"), val = tensor([1, 1])]; + tensor var_17031 = const()[name = tensor("op_17031"), val = tensor([1, 1])]; + tensor query_23_pad_type_0 = const()[name = tensor("query_23_pad_type_0"), val = tensor("custom")]; + tensor query_23_pad_0 = const()[name = tensor("query_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_11_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_11_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(447186880)))]; + tensor layers_11_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_11_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(450463744)))]; + tensor query_23_cast_fp16 = conv(bias = layers_11_self_attn_q_proj_bias_to_fp16, dilations = var_17031, groups = var_16996, pad = query_23_pad_0, pad_type = query_23_pad_type_0, strides = var_17029, weight = layers_11_self_attn_q_proj_weight_to_fp16, x = obj_45_cast_fp16)[name = tensor("query_23_cast_fp16")]; + tensor var_17035 = const()[name = tensor("op_17035"), val = tensor([1, 1])]; + tensor var_17037 = const()[name = tensor("op_17037"), val = tensor([1, 1])]; + tensor key_23_pad_type_0 = const()[name = tensor("key_23_pad_type_0"), val = tensor("custom")]; + tensor key_23_pad_0 = const()[name = tensor("key_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_11_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_11_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(450466368)))]; + tensor key_23_cast_fp16 = conv(dilations = var_17037, groups = var_16996, pad = key_23_pad_0, pad_type = key_23_pad_type_0, strides = var_17035, weight = layers_11_self_attn_k_proj_weight_to_fp16, x = obj_45_cast_fp16)[name = tensor("key_23_cast_fp16")]; + tensor var_17042 = const()[name = tensor("op_17042"), val = tensor([1, 1])]; + tensor var_17044 = const()[name = tensor("op_17044"), val = tensor([1, 1])]; + tensor value_23_pad_type_0 = const()[name = tensor("value_23_pad_type_0"), val = tensor("custom")]; + tensor value_23_pad_0 = const()[name = tensor("value_23_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_11_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_11_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(453743232)))]; + tensor layers_11_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_11_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(457020096)))]; + tensor value_23_cast_fp16 = conv(bias = layers_11_self_attn_v_proj_bias_to_fp16, dilations = var_17044, groups = var_16996, pad = value_23_pad_0, pad_type = value_23_pad_type_0, strides = var_17042, weight = layers_11_self_attn_v_proj_weight_to_fp16, x = obj_45_cast_fp16)[name = tensor("value_23_cast_fp16")]; + tensor var_17051_begin_0 = const()[name = tensor("op_17051_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17051_end_0 = const()[name = tensor("op_17051_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_17051_end_mask_0 = const()[name = tensor("op_17051_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17051_cast_fp16 = slice_by_index(begin = var_17051_begin_0, end = var_17051_end_0, end_mask = var_17051_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17051_cast_fp16")]; + tensor var_17055_begin_0 = const()[name = tensor("op_17055_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_17055_end_0 = const()[name = tensor("op_17055_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_17055_end_mask_0 = const()[name = tensor("op_17055_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17055_cast_fp16 = slice_by_index(begin = var_17055_begin_0, end = var_17055_end_0, end_mask = var_17055_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17055_cast_fp16")]; + tensor var_17059_begin_0 = const()[name = tensor("op_17059_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_17059_end_0 = const()[name = tensor("op_17059_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_17059_end_mask_0 = const()[name = tensor("op_17059_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17059_cast_fp16 = slice_by_index(begin = var_17059_begin_0, end = var_17059_end_0, end_mask = var_17059_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17059_cast_fp16")]; + tensor var_17063_begin_0 = const()[name = tensor("op_17063_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_17063_end_0 = const()[name = tensor("op_17063_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_17063_end_mask_0 = const()[name = tensor("op_17063_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17063_cast_fp16 = slice_by_index(begin = var_17063_begin_0, end = var_17063_end_0, end_mask = var_17063_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17063_cast_fp16")]; + tensor var_17067_begin_0 = const()[name = tensor("op_17067_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_17067_end_0 = const()[name = tensor("op_17067_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_17067_end_mask_0 = const()[name = tensor("op_17067_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17067_cast_fp16 = slice_by_index(begin = var_17067_begin_0, end = var_17067_end_0, end_mask = var_17067_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17067_cast_fp16")]; + tensor var_17071_begin_0 = const()[name = tensor("op_17071_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_17071_end_0 = const()[name = tensor("op_17071_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_17071_end_mask_0 = const()[name = tensor("op_17071_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17071_cast_fp16 = slice_by_index(begin = var_17071_begin_0, end = var_17071_end_0, end_mask = var_17071_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17071_cast_fp16")]; + tensor var_17075_begin_0 = const()[name = tensor("op_17075_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_17075_end_0 = const()[name = tensor("op_17075_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_17075_end_mask_0 = const()[name = tensor("op_17075_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17075_cast_fp16 = slice_by_index(begin = var_17075_begin_0, end = var_17075_end_0, end_mask = var_17075_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17075_cast_fp16")]; + tensor var_17079_begin_0 = const()[name = tensor("op_17079_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_17079_end_0 = const()[name = tensor("op_17079_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_17079_end_mask_0 = const()[name = tensor("op_17079_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17079_cast_fp16 = slice_by_index(begin = var_17079_begin_0, end = var_17079_end_0, end_mask = var_17079_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17079_cast_fp16")]; + tensor var_17083_begin_0 = const()[name = tensor("op_17083_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_17083_end_0 = const()[name = tensor("op_17083_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_17083_end_mask_0 = const()[name = tensor("op_17083_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17083_cast_fp16 = slice_by_index(begin = var_17083_begin_0, end = var_17083_end_0, end_mask = var_17083_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17083_cast_fp16")]; + tensor var_17087_begin_0 = const()[name = tensor("op_17087_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_17087_end_0 = const()[name = tensor("op_17087_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_17087_end_mask_0 = const()[name = tensor("op_17087_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17087_cast_fp16 = slice_by_index(begin = var_17087_begin_0, end = var_17087_end_0, end_mask = var_17087_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17087_cast_fp16")]; + tensor var_17091_begin_0 = const()[name = tensor("op_17091_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_17091_end_0 = const()[name = tensor("op_17091_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_17091_end_mask_0 = const()[name = tensor("op_17091_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17091_cast_fp16 = slice_by_index(begin = var_17091_begin_0, end = var_17091_end_0, end_mask = var_17091_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17091_cast_fp16")]; + tensor var_17095_begin_0 = const()[name = tensor("op_17095_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_17095_end_0 = const()[name = tensor("op_17095_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_17095_end_mask_0 = const()[name = tensor("op_17095_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17095_cast_fp16 = slice_by_index(begin = var_17095_begin_0, end = var_17095_end_0, end_mask = var_17095_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17095_cast_fp16")]; + tensor var_17099_begin_0 = const()[name = tensor("op_17099_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_17099_end_0 = const()[name = tensor("op_17099_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_17099_end_mask_0 = const()[name = tensor("op_17099_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17099_cast_fp16 = slice_by_index(begin = var_17099_begin_0, end = var_17099_end_0, end_mask = var_17099_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17099_cast_fp16")]; + tensor var_17103_begin_0 = const()[name = tensor("op_17103_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_17103_end_0 = const()[name = tensor("op_17103_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_17103_end_mask_0 = const()[name = tensor("op_17103_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17103_cast_fp16 = slice_by_index(begin = var_17103_begin_0, end = var_17103_end_0, end_mask = var_17103_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17103_cast_fp16")]; + tensor var_17107_begin_0 = const()[name = tensor("op_17107_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_17107_end_0 = const()[name = tensor("op_17107_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_17107_end_mask_0 = const()[name = tensor("op_17107_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17107_cast_fp16 = slice_by_index(begin = var_17107_begin_0, end = var_17107_end_0, end_mask = var_17107_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17107_cast_fp16")]; + tensor var_17111_begin_0 = const()[name = tensor("op_17111_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_17111_end_0 = const()[name = tensor("op_17111_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_17111_end_mask_0 = const()[name = tensor("op_17111_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17111_cast_fp16 = slice_by_index(begin = var_17111_begin_0, end = var_17111_end_0, end_mask = var_17111_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17111_cast_fp16")]; + tensor var_17115_begin_0 = const()[name = tensor("op_17115_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_17115_end_0 = const()[name = tensor("op_17115_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_17115_end_mask_0 = const()[name = tensor("op_17115_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17115_cast_fp16 = slice_by_index(begin = var_17115_begin_0, end = var_17115_end_0, end_mask = var_17115_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17115_cast_fp16")]; + tensor var_17119_begin_0 = const()[name = tensor("op_17119_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_17119_end_0 = const()[name = tensor("op_17119_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_17119_end_mask_0 = const()[name = tensor("op_17119_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17119_cast_fp16 = slice_by_index(begin = var_17119_begin_0, end = var_17119_end_0, end_mask = var_17119_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17119_cast_fp16")]; + tensor var_17123_begin_0 = const()[name = tensor("op_17123_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_17123_end_0 = const()[name = tensor("op_17123_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_17123_end_mask_0 = const()[name = tensor("op_17123_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17123_cast_fp16 = slice_by_index(begin = var_17123_begin_0, end = var_17123_end_0, end_mask = var_17123_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17123_cast_fp16")]; + tensor var_17127_begin_0 = const()[name = tensor("op_17127_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_17127_end_0 = const()[name = tensor("op_17127_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_17127_end_mask_0 = const()[name = tensor("op_17127_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17127_cast_fp16 = slice_by_index(begin = var_17127_begin_0, end = var_17127_end_0, end_mask = var_17127_end_mask_0, x = query_23_cast_fp16)[name = tensor("op_17127_cast_fp16")]; + tensor var_17136_begin_0 = const()[name = tensor("op_17136_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17136_end_0 = const()[name = tensor("op_17136_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_17136_end_mask_0 = const()[name = tensor("op_17136_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17136_cast_fp16 = slice_by_index(begin = var_17136_begin_0, end = var_17136_end_0, end_mask = var_17136_end_mask_0, x = var_17051_cast_fp16)[name = tensor("op_17136_cast_fp16")]; + tensor var_17143_begin_0 = const()[name = tensor("op_17143_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_17143_end_0 = const()[name = tensor("op_17143_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_17143_end_mask_0 = const()[name = tensor("op_17143_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17143_cast_fp16 = slice_by_index(begin = var_17143_begin_0, end = var_17143_end_0, end_mask = var_17143_end_mask_0, x = var_17051_cast_fp16)[name = tensor("op_17143_cast_fp16")]; + tensor var_17150_begin_0 = const()[name = tensor("op_17150_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_17150_end_0 = const()[name = tensor("op_17150_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_17150_end_mask_0 = const()[name = tensor("op_17150_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17150_cast_fp16 = slice_by_index(begin = var_17150_begin_0, end = var_17150_end_0, end_mask = var_17150_end_mask_0, x = var_17051_cast_fp16)[name = tensor("op_17150_cast_fp16")]; + tensor var_17157_begin_0 = const()[name = tensor("op_17157_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_17157_end_0 = const()[name = tensor("op_17157_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_17157_end_mask_0 = const()[name = tensor("op_17157_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17157_cast_fp16 = slice_by_index(begin = var_17157_begin_0, end = var_17157_end_0, end_mask = var_17157_end_mask_0, x = var_17051_cast_fp16)[name = tensor("op_17157_cast_fp16")]; + tensor var_17164_begin_0 = const()[name = tensor("op_17164_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17164_end_0 = const()[name = tensor("op_17164_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_17164_end_mask_0 = const()[name = tensor("op_17164_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17164_cast_fp16 = slice_by_index(begin = var_17164_begin_0, end = var_17164_end_0, end_mask = var_17164_end_mask_0, x = var_17055_cast_fp16)[name = tensor("op_17164_cast_fp16")]; + tensor var_17171_begin_0 = const()[name = tensor("op_17171_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_17171_end_0 = const()[name = tensor("op_17171_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_17171_end_mask_0 = const()[name = tensor("op_17171_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17171_cast_fp16 = slice_by_index(begin = var_17171_begin_0, end = var_17171_end_0, end_mask = var_17171_end_mask_0, x = var_17055_cast_fp16)[name = tensor("op_17171_cast_fp16")]; + tensor var_17178_begin_0 = const()[name = tensor("op_17178_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_17178_end_0 = const()[name = tensor("op_17178_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_17178_end_mask_0 = const()[name = tensor("op_17178_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17178_cast_fp16 = slice_by_index(begin = var_17178_begin_0, end = var_17178_end_0, end_mask = var_17178_end_mask_0, x = var_17055_cast_fp16)[name = tensor("op_17178_cast_fp16")]; + tensor var_17185_begin_0 = const()[name = tensor("op_17185_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_17185_end_0 = const()[name = tensor("op_17185_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_17185_end_mask_0 = const()[name = tensor("op_17185_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17185_cast_fp16 = slice_by_index(begin = var_17185_begin_0, end = var_17185_end_0, end_mask = var_17185_end_mask_0, x = var_17055_cast_fp16)[name = tensor("op_17185_cast_fp16")]; + tensor var_17192_begin_0 = const()[name = tensor("op_17192_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17192_end_0 = const()[name = tensor("op_17192_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_17192_end_mask_0 = const()[name = tensor("op_17192_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17192_cast_fp16 = slice_by_index(begin = var_17192_begin_0, end = var_17192_end_0, end_mask = var_17192_end_mask_0, x = var_17059_cast_fp16)[name = tensor("op_17192_cast_fp16")]; + tensor var_17199_begin_0 = const()[name = tensor("op_17199_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_17199_end_0 = const()[name = tensor("op_17199_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_17199_end_mask_0 = const()[name = tensor("op_17199_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17199_cast_fp16 = slice_by_index(begin = var_17199_begin_0, end = var_17199_end_0, end_mask = var_17199_end_mask_0, x = var_17059_cast_fp16)[name = tensor("op_17199_cast_fp16")]; + tensor var_17206_begin_0 = const()[name = tensor("op_17206_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_17206_end_0 = const()[name = tensor("op_17206_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_17206_end_mask_0 = const()[name = tensor("op_17206_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17206_cast_fp16 = slice_by_index(begin = var_17206_begin_0, end = var_17206_end_0, end_mask = var_17206_end_mask_0, x = var_17059_cast_fp16)[name = tensor("op_17206_cast_fp16")]; + tensor var_17213_begin_0 = const()[name = tensor("op_17213_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_17213_end_0 = const()[name = tensor("op_17213_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_17213_end_mask_0 = const()[name = tensor("op_17213_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17213_cast_fp16 = slice_by_index(begin = var_17213_begin_0, end = var_17213_end_0, end_mask = var_17213_end_mask_0, x = var_17059_cast_fp16)[name = tensor("op_17213_cast_fp16")]; + tensor var_17220_begin_0 = const()[name = tensor("op_17220_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17220_end_0 = const()[name = tensor("op_17220_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_17220_end_mask_0 = const()[name = tensor("op_17220_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17220_cast_fp16 = slice_by_index(begin = var_17220_begin_0, end = var_17220_end_0, end_mask = var_17220_end_mask_0, x = var_17063_cast_fp16)[name = tensor("op_17220_cast_fp16")]; + tensor var_17227_begin_0 = const()[name = tensor("op_17227_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_17227_end_0 = const()[name = tensor("op_17227_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_17227_end_mask_0 = const()[name = tensor("op_17227_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17227_cast_fp16 = slice_by_index(begin = var_17227_begin_0, end = var_17227_end_0, end_mask = var_17227_end_mask_0, x = var_17063_cast_fp16)[name = tensor("op_17227_cast_fp16")]; + tensor var_17234_begin_0 = const()[name = tensor("op_17234_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_17234_end_0 = const()[name = tensor("op_17234_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_17234_end_mask_0 = const()[name = tensor("op_17234_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17234_cast_fp16 = slice_by_index(begin = var_17234_begin_0, end = var_17234_end_0, end_mask = var_17234_end_mask_0, x = var_17063_cast_fp16)[name = tensor("op_17234_cast_fp16")]; + tensor var_17241_begin_0 = const()[name = tensor("op_17241_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_17241_end_0 = const()[name = tensor("op_17241_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_17241_end_mask_0 = const()[name = tensor("op_17241_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17241_cast_fp16 = slice_by_index(begin = var_17241_begin_0, end = var_17241_end_0, end_mask = var_17241_end_mask_0, x = var_17063_cast_fp16)[name = tensor("op_17241_cast_fp16")]; + tensor var_17248_begin_0 = const()[name = tensor("op_17248_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17248_end_0 = const()[name = tensor("op_17248_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_17248_end_mask_0 = const()[name = tensor("op_17248_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17248_cast_fp16 = slice_by_index(begin = var_17248_begin_0, end = var_17248_end_0, end_mask = var_17248_end_mask_0, x = var_17067_cast_fp16)[name = tensor("op_17248_cast_fp16")]; + tensor var_17255_begin_0 = const()[name = tensor("op_17255_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_17255_end_0 = const()[name = tensor("op_17255_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_17255_end_mask_0 = const()[name = tensor("op_17255_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17255_cast_fp16 = slice_by_index(begin = var_17255_begin_0, end = var_17255_end_0, end_mask = var_17255_end_mask_0, x = var_17067_cast_fp16)[name = tensor("op_17255_cast_fp16")]; + tensor var_17262_begin_0 = const()[name = tensor("op_17262_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_17262_end_0 = const()[name = tensor("op_17262_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_17262_end_mask_0 = const()[name = tensor("op_17262_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17262_cast_fp16 = slice_by_index(begin = var_17262_begin_0, end = var_17262_end_0, end_mask = var_17262_end_mask_0, x = var_17067_cast_fp16)[name = tensor("op_17262_cast_fp16")]; + tensor var_17269_begin_0 = const()[name = tensor("op_17269_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_17269_end_0 = const()[name = tensor("op_17269_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_17269_end_mask_0 = const()[name = tensor("op_17269_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17269_cast_fp16 = slice_by_index(begin = var_17269_begin_0, end = var_17269_end_0, end_mask = var_17269_end_mask_0, x = var_17067_cast_fp16)[name = tensor("op_17269_cast_fp16")]; + tensor var_17276_begin_0 = const()[name = tensor("op_17276_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17276_end_0 = const()[name = tensor("op_17276_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_17276_end_mask_0 = const()[name = tensor("op_17276_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17276_cast_fp16 = slice_by_index(begin = var_17276_begin_0, end = var_17276_end_0, end_mask = var_17276_end_mask_0, x = var_17071_cast_fp16)[name = tensor("op_17276_cast_fp16")]; + tensor var_17283_begin_0 = const()[name = tensor("op_17283_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_17283_end_0 = const()[name = tensor("op_17283_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_17283_end_mask_0 = const()[name = tensor("op_17283_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17283_cast_fp16 = slice_by_index(begin = var_17283_begin_0, end = var_17283_end_0, end_mask = var_17283_end_mask_0, x = var_17071_cast_fp16)[name = tensor("op_17283_cast_fp16")]; + tensor var_17290_begin_0 = const()[name = tensor("op_17290_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_17290_end_0 = const()[name = tensor("op_17290_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_17290_end_mask_0 = const()[name = tensor("op_17290_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17290_cast_fp16 = slice_by_index(begin = var_17290_begin_0, end = var_17290_end_0, end_mask = var_17290_end_mask_0, x = var_17071_cast_fp16)[name = tensor("op_17290_cast_fp16")]; + tensor var_17297_begin_0 = const()[name = tensor("op_17297_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_17297_end_0 = const()[name = tensor("op_17297_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_17297_end_mask_0 = const()[name = tensor("op_17297_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17297_cast_fp16 = slice_by_index(begin = var_17297_begin_0, end = var_17297_end_0, end_mask = var_17297_end_mask_0, x = var_17071_cast_fp16)[name = tensor("op_17297_cast_fp16")]; + tensor var_17304_begin_0 = const()[name = tensor("op_17304_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17304_end_0 = const()[name = tensor("op_17304_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_17304_end_mask_0 = const()[name = tensor("op_17304_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17304_cast_fp16 = slice_by_index(begin = var_17304_begin_0, end = var_17304_end_0, end_mask = var_17304_end_mask_0, x = var_17075_cast_fp16)[name = tensor("op_17304_cast_fp16")]; + tensor var_17311_begin_0 = const()[name = tensor("op_17311_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_17311_end_0 = const()[name = tensor("op_17311_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_17311_end_mask_0 = const()[name = tensor("op_17311_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17311_cast_fp16 = slice_by_index(begin = var_17311_begin_0, end = var_17311_end_0, end_mask = var_17311_end_mask_0, x = var_17075_cast_fp16)[name = tensor("op_17311_cast_fp16")]; + tensor var_17318_begin_0 = const()[name = tensor("op_17318_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_17318_end_0 = const()[name = tensor("op_17318_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_17318_end_mask_0 = const()[name = tensor("op_17318_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17318_cast_fp16 = slice_by_index(begin = var_17318_begin_0, end = var_17318_end_0, end_mask = var_17318_end_mask_0, x = var_17075_cast_fp16)[name = tensor("op_17318_cast_fp16")]; + tensor var_17325_begin_0 = const()[name = tensor("op_17325_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_17325_end_0 = const()[name = tensor("op_17325_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_17325_end_mask_0 = const()[name = tensor("op_17325_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17325_cast_fp16 = slice_by_index(begin = var_17325_begin_0, end = var_17325_end_0, end_mask = var_17325_end_mask_0, x = var_17075_cast_fp16)[name = tensor("op_17325_cast_fp16")]; + tensor var_17332_begin_0 = const()[name = tensor("op_17332_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17332_end_0 = const()[name = tensor("op_17332_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_17332_end_mask_0 = const()[name = tensor("op_17332_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17332_cast_fp16 = slice_by_index(begin = var_17332_begin_0, end = var_17332_end_0, end_mask = var_17332_end_mask_0, x = var_17079_cast_fp16)[name = tensor("op_17332_cast_fp16")]; + tensor var_17339_begin_0 = const()[name = tensor("op_17339_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_17339_end_0 = const()[name = tensor("op_17339_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_17339_end_mask_0 = const()[name = tensor("op_17339_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17339_cast_fp16 = slice_by_index(begin = var_17339_begin_0, end = var_17339_end_0, end_mask = var_17339_end_mask_0, x = var_17079_cast_fp16)[name = tensor("op_17339_cast_fp16")]; + tensor var_17346_begin_0 = const()[name = tensor("op_17346_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_17346_end_0 = const()[name = tensor("op_17346_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_17346_end_mask_0 = const()[name = tensor("op_17346_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17346_cast_fp16 = slice_by_index(begin = var_17346_begin_0, end = var_17346_end_0, end_mask = var_17346_end_mask_0, x = var_17079_cast_fp16)[name = tensor("op_17346_cast_fp16")]; + tensor var_17353_begin_0 = const()[name = tensor("op_17353_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_17353_end_0 = const()[name = tensor("op_17353_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_17353_end_mask_0 = const()[name = tensor("op_17353_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17353_cast_fp16 = slice_by_index(begin = var_17353_begin_0, end = var_17353_end_0, end_mask = var_17353_end_mask_0, x = var_17079_cast_fp16)[name = tensor("op_17353_cast_fp16")]; + tensor var_17360_begin_0 = const()[name = tensor("op_17360_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17360_end_0 = const()[name = tensor("op_17360_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_17360_end_mask_0 = const()[name = tensor("op_17360_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17360_cast_fp16 = slice_by_index(begin = var_17360_begin_0, end = var_17360_end_0, end_mask = var_17360_end_mask_0, x = var_17083_cast_fp16)[name = tensor("op_17360_cast_fp16")]; + tensor var_17367_begin_0 = const()[name = tensor("op_17367_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_17367_end_0 = const()[name = tensor("op_17367_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_17367_end_mask_0 = const()[name = tensor("op_17367_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17367_cast_fp16 = slice_by_index(begin = var_17367_begin_0, end = var_17367_end_0, end_mask = var_17367_end_mask_0, x = var_17083_cast_fp16)[name = tensor("op_17367_cast_fp16")]; + tensor var_17374_begin_0 = const()[name = tensor("op_17374_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_17374_end_0 = const()[name = tensor("op_17374_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_17374_end_mask_0 = const()[name = tensor("op_17374_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17374_cast_fp16 = slice_by_index(begin = var_17374_begin_0, end = var_17374_end_0, end_mask = var_17374_end_mask_0, x = var_17083_cast_fp16)[name = tensor("op_17374_cast_fp16")]; + tensor var_17381_begin_0 = const()[name = tensor("op_17381_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_17381_end_0 = const()[name = tensor("op_17381_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_17381_end_mask_0 = const()[name = tensor("op_17381_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17381_cast_fp16 = slice_by_index(begin = var_17381_begin_0, end = var_17381_end_0, end_mask = var_17381_end_mask_0, x = var_17083_cast_fp16)[name = tensor("op_17381_cast_fp16")]; + tensor var_17388_begin_0 = const()[name = tensor("op_17388_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17388_end_0 = const()[name = tensor("op_17388_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_17388_end_mask_0 = const()[name = tensor("op_17388_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17388_cast_fp16 = slice_by_index(begin = var_17388_begin_0, end = var_17388_end_0, end_mask = var_17388_end_mask_0, x = var_17087_cast_fp16)[name = tensor("op_17388_cast_fp16")]; + tensor var_17395_begin_0 = const()[name = tensor("op_17395_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_17395_end_0 = const()[name = tensor("op_17395_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_17395_end_mask_0 = const()[name = tensor("op_17395_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17395_cast_fp16 = slice_by_index(begin = var_17395_begin_0, end = var_17395_end_0, end_mask = var_17395_end_mask_0, x = var_17087_cast_fp16)[name = tensor("op_17395_cast_fp16")]; + tensor var_17402_begin_0 = const()[name = tensor("op_17402_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_17402_end_0 = const()[name = tensor("op_17402_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_17402_end_mask_0 = const()[name = tensor("op_17402_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17402_cast_fp16 = slice_by_index(begin = var_17402_begin_0, end = var_17402_end_0, end_mask = var_17402_end_mask_0, x = var_17087_cast_fp16)[name = tensor("op_17402_cast_fp16")]; + tensor var_17409_begin_0 = const()[name = tensor("op_17409_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_17409_end_0 = const()[name = tensor("op_17409_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_17409_end_mask_0 = const()[name = tensor("op_17409_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17409_cast_fp16 = slice_by_index(begin = var_17409_begin_0, end = var_17409_end_0, end_mask = var_17409_end_mask_0, x = var_17087_cast_fp16)[name = tensor("op_17409_cast_fp16")]; + tensor var_17416_begin_0 = const()[name = tensor("op_17416_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17416_end_0 = const()[name = tensor("op_17416_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_17416_end_mask_0 = const()[name = tensor("op_17416_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17416_cast_fp16 = slice_by_index(begin = var_17416_begin_0, end = var_17416_end_0, end_mask = var_17416_end_mask_0, x = var_17091_cast_fp16)[name = tensor("op_17416_cast_fp16")]; + tensor var_17423_begin_0 = const()[name = tensor("op_17423_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_17423_end_0 = const()[name = tensor("op_17423_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_17423_end_mask_0 = const()[name = tensor("op_17423_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17423_cast_fp16 = slice_by_index(begin = var_17423_begin_0, end = var_17423_end_0, end_mask = var_17423_end_mask_0, x = var_17091_cast_fp16)[name = tensor("op_17423_cast_fp16")]; + tensor var_17430_begin_0 = const()[name = tensor("op_17430_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_17430_end_0 = const()[name = tensor("op_17430_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_17430_end_mask_0 = const()[name = tensor("op_17430_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17430_cast_fp16 = slice_by_index(begin = var_17430_begin_0, end = var_17430_end_0, end_mask = var_17430_end_mask_0, x = var_17091_cast_fp16)[name = tensor("op_17430_cast_fp16")]; + tensor var_17437_begin_0 = const()[name = tensor("op_17437_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_17437_end_0 = const()[name = tensor("op_17437_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_17437_end_mask_0 = const()[name = tensor("op_17437_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17437_cast_fp16 = slice_by_index(begin = var_17437_begin_0, end = var_17437_end_0, end_mask = var_17437_end_mask_0, x = var_17091_cast_fp16)[name = tensor("op_17437_cast_fp16")]; + tensor var_17444_begin_0 = const()[name = tensor("op_17444_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17444_end_0 = const()[name = tensor("op_17444_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_17444_end_mask_0 = const()[name = tensor("op_17444_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17444_cast_fp16 = slice_by_index(begin = var_17444_begin_0, end = var_17444_end_0, end_mask = var_17444_end_mask_0, x = var_17095_cast_fp16)[name = tensor("op_17444_cast_fp16")]; + tensor var_17451_begin_0 = const()[name = tensor("op_17451_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_17451_end_0 = const()[name = tensor("op_17451_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_17451_end_mask_0 = const()[name = tensor("op_17451_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17451_cast_fp16 = slice_by_index(begin = var_17451_begin_0, end = var_17451_end_0, end_mask = var_17451_end_mask_0, x = var_17095_cast_fp16)[name = tensor("op_17451_cast_fp16")]; + tensor var_17458_begin_0 = const()[name = tensor("op_17458_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_17458_end_0 = const()[name = tensor("op_17458_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_17458_end_mask_0 = const()[name = tensor("op_17458_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17458_cast_fp16 = slice_by_index(begin = var_17458_begin_0, end = var_17458_end_0, end_mask = var_17458_end_mask_0, x = var_17095_cast_fp16)[name = tensor("op_17458_cast_fp16")]; + tensor var_17465_begin_0 = const()[name = tensor("op_17465_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_17465_end_0 = const()[name = tensor("op_17465_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_17465_end_mask_0 = const()[name = tensor("op_17465_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17465_cast_fp16 = slice_by_index(begin = var_17465_begin_0, end = var_17465_end_0, end_mask = var_17465_end_mask_0, x = var_17095_cast_fp16)[name = tensor("op_17465_cast_fp16")]; + tensor var_17472_begin_0 = const()[name = tensor("op_17472_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17472_end_0 = const()[name = tensor("op_17472_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_17472_end_mask_0 = const()[name = tensor("op_17472_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17472_cast_fp16 = slice_by_index(begin = var_17472_begin_0, end = var_17472_end_0, end_mask = var_17472_end_mask_0, x = var_17099_cast_fp16)[name = tensor("op_17472_cast_fp16")]; + tensor var_17479_begin_0 = const()[name = tensor("op_17479_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_17479_end_0 = const()[name = tensor("op_17479_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_17479_end_mask_0 = const()[name = tensor("op_17479_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17479_cast_fp16 = slice_by_index(begin = var_17479_begin_0, end = var_17479_end_0, end_mask = var_17479_end_mask_0, x = var_17099_cast_fp16)[name = tensor("op_17479_cast_fp16")]; + tensor var_17486_begin_0 = const()[name = tensor("op_17486_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_17486_end_0 = const()[name = tensor("op_17486_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_17486_end_mask_0 = const()[name = tensor("op_17486_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17486_cast_fp16 = slice_by_index(begin = var_17486_begin_0, end = var_17486_end_0, end_mask = var_17486_end_mask_0, x = var_17099_cast_fp16)[name = tensor("op_17486_cast_fp16")]; + tensor var_17493_begin_0 = const()[name = tensor("op_17493_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_17493_end_0 = const()[name = tensor("op_17493_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_17493_end_mask_0 = const()[name = tensor("op_17493_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17493_cast_fp16 = slice_by_index(begin = var_17493_begin_0, end = var_17493_end_0, end_mask = var_17493_end_mask_0, x = var_17099_cast_fp16)[name = tensor("op_17493_cast_fp16")]; + tensor var_17500_begin_0 = const()[name = tensor("op_17500_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17500_end_0 = const()[name = tensor("op_17500_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_17500_end_mask_0 = const()[name = tensor("op_17500_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17500_cast_fp16 = slice_by_index(begin = var_17500_begin_0, end = var_17500_end_0, end_mask = var_17500_end_mask_0, x = var_17103_cast_fp16)[name = tensor("op_17500_cast_fp16")]; + tensor var_17507_begin_0 = const()[name = tensor("op_17507_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_17507_end_0 = const()[name = tensor("op_17507_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_17507_end_mask_0 = const()[name = tensor("op_17507_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17507_cast_fp16 = slice_by_index(begin = var_17507_begin_0, end = var_17507_end_0, end_mask = var_17507_end_mask_0, x = var_17103_cast_fp16)[name = tensor("op_17507_cast_fp16")]; + tensor var_17514_begin_0 = const()[name = tensor("op_17514_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_17514_end_0 = const()[name = tensor("op_17514_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_17514_end_mask_0 = const()[name = tensor("op_17514_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17514_cast_fp16 = slice_by_index(begin = var_17514_begin_0, end = var_17514_end_0, end_mask = var_17514_end_mask_0, x = var_17103_cast_fp16)[name = tensor("op_17514_cast_fp16")]; + tensor var_17521_begin_0 = const()[name = tensor("op_17521_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_17521_end_0 = const()[name = tensor("op_17521_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_17521_end_mask_0 = const()[name = tensor("op_17521_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17521_cast_fp16 = slice_by_index(begin = var_17521_begin_0, end = var_17521_end_0, end_mask = var_17521_end_mask_0, x = var_17103_cast_fp16)[name = tensor("op_17521_cast_fp16")]; + tensor var_17528_begin_0 = const()[name = tensor("op_17528_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17528_end_0 = const()[name = tensor("op_17528_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_17528_end_mask_0 = const()[name = tensor("op_17528_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17528_cast_fp16 = slice_by_index(begin = var_17528_begin_0, end = var_17528_end_0, end_mask = var_17528_end_mask_0, x = var_17107_cast_fp16)[name = tensor("op_17528_cast_fp16")]; + tensor var_17535_begin_0 = const()[name = tensor("op_17535_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_17535_end_0 = const()[name = tensor("op_17535_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_17535_end_mask_0 = const()[name = tensor("op_17535_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17535_cast_fp16 = slice_by_index(begin = var_17535_begin_0, end = var_17535_end_0, end_mask = var_17535_end_mask_0, x = var_17107_cast_fp16)[name = tensor("op_17535_cast_fp16")]; + tensor var_17542_begin_0 = const()[name = tensor("op_17542_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_17542_end_0 = const()[name = tensor("op_17542_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_17542_end_mask_0 = const()[name = tensor("op_17542_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17542_cast_fp16 = slice_by_index(begin = var_17542_begin_0, end = var_17542_end_0, end_mask = var_17542_end_mask_0, x = var_17107_cast_fp16)[name = tensor("op_17542_cast_fp16")]; + tensor var_17549_begin_0 = const()[name = tensor("op_17549_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_17549_end_0 = const()[name = tensor("op_17549_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_17549_end_mask_0 = const()[name = tensor("op_17549_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17549_cast_fp16 = slice_by_index(begin = var_17549_begin_0, end = var_17549_end_0, end_mask = var_17549_end_mask_0, x = var_17107_cast_fp16)[name = tensor("op_17549_cast_fp16")]; + tensor var_17556_begin_0 = const()[name = tensor("op_17556_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17556_end_0 = const()[name = tensor("op_17556_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_17556_end_mask_0 = const()[name = tensor("op_17556_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17556_cast_fp16 = slice_by_index(begin = var_17556_begin_0, end = var_17556_end_0, end_mask = var_17556_end_mask_0, x = var_17111_cast_fp16)[name = tensor("op_17556_cast_fp16")]; + tensor var_17563_begin_0 = const()[name = tensor("op_17563_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_17563_end_0 = const()[name = tensor("op_17563_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_17563_end_mask_0 = const()[name = tensor("op_17563_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17563_cast_fp16 = slice_by_index(begin = var_17563_begin_0, end = var_17563_end_0, end_mask = var_17563_end_mask_0, x = var_17111_cast_fp16)[name = tensor("op_17563_cast_fp16")]; + tensor var_17570_begin_0 = const()[name = tensor("op_17570_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_17570_end_0 = const()[name = tensor("op_17570_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_17570_end_mask_0 = const()[name = tensor("op_17570_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17570_cast_fp16 = slice_by_index(begin = var_17570_begin_0, end = var_17570_end_0, end_mask = var_17570_end_mask_0, x = var_17111_cast_fp16)[name = tensor("op_17570_cast_fp16")]; + tensor var_17577_begin_0 = const()[name = tensor("op_17577_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_17577_end_0 = const()[name = tensor("op_17577_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_17577_end_mask_0 = const()[name = tensor("op_17577_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17577_cast_fp16 = slice_by_index(begin = var_17577_begin_0, end = var_17577_end_0, end_mask = var_17577_end_mask_0, x = var_17111_cast_fp16)[name = tensor("op_17577_cast_fp16")]; + tensor var_17584_begin_0 = const()[name = tensor("op_17584_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17584_end_0 = const()[name = tensor("op_17584_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_17584_end_mask_0 = const()[name = tensor("op_17584_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17584_cast_fp16 = slice_by_index(begin = var_17584_begin_0, end = var_17584_end_0, end_mask = var_17584_end_mask_0, x = var_17115_cast_fp16)[name = tensor("op_17584_cast_fp16")]; + tensor var_17591_begin_0 = const()[name = tensor("op_17591_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_17591_end_0 = const()[name = tensor("op_17591_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_17591_end_mask_0 = const()[name = tensor("op_17591_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17591_cast_fp16 = slice_by_index(begin = var_17591_begin_0, end = var_17591_end_0, end_mask = var_17591_end_mask_0, x = var_17115_cast_fp16)[name = tensor("op_17591_cast_fp16")]; + tensor var_17598_begin_0 = const()[name = tensor("op_17598_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_17598_end_0 = const()[name = tensor("op_17598_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_17598_end_mask_0 = const()[name = tensor("op_17598_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17598_cast_fp16 = slice_by_index(begin = var_17598_begin_0, end = var_17598_end_0, end_mask = var_17598_end_mask_0, x = var_17115_cast_fp16)[name = tensor("op_17598_cast_fp16")]; + tensor var_17605_begin_0 = const()[name = tensor("op_17605_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_17605_end_0 = const()[name = tensor("op_17605_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_17605_end_mask_0 = const()[name = tensor("op_17605_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17605_cast_fp16 = slice_by_index(begin = var_17605_begin_0, end = var_17605_end_0, end_mask = var_17605_end_mask_0, x = var_17115_cast_fp16)[name = tensor("op_17605_cast_fp16")]; + tensor var_17612_begin_0 = const()[name = tensor("op_17612_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17612_end_0 = const()[name = tensor("op_17612_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_17612_end_mask_0 = const()[name = tensor("op_17612_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17612_cast_fp16 = slice_by_index(begin = var_17612_begin_0, end = var_17612_end_0, end_mask = var_17612_end_mask_0, x = var_17119_cast_fp16)[name = tensor("op_17612_cast_fp16")]; + tensor var_17619_begin_0 = const()[name = tensor("op_17619_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_17619_end_0 = const()[name = tensor("op_17619_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_17619_end_mask_0 = const()[name = tensor("op_17619_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17619_cast_fp16 = slice_by_index(begin = var_17619_begin_0, end = var_17619_end_0, end_mask = var_17619_end_mask_0, x = var_17119_cast_fp16)[name = tensor("op_17619_cast_fp16")]; + tensor var_17626_begin_0 = const()[name = tensor("op_17626_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_17626_end_0 = const()[name = tensor("op_17626_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_17626_end_mask_0 = const()[name = tensor("op_17626_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17626_cast_fp16 = slice_by_index(begin = var_17626_begin_0, end = var_17626_end_0, end_mask = var_17626_end_mask_0, x = var_17119_cast_fp16)[name = tensor("op_17626_cast_fp16")]; + tensor var_17633_begin_0 = const()[name = tensor("op_17633_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_17633_end_0 = const()[name = tensor("op_17633_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_17633_end_mask_0 = const()[name = tensor("op_17633_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17633_cast_fp16 = slice_by_index(begin = var_17633_begin_0, end = var_17633_end_0, end_mask = var_17633_end_mask_0, x = var_17119_cast_fp16)[name = tensor("op_17633_cast_fp16")]; + tensor var_17640_begin_0 = const()[name = tensor("op_17640_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17640_end_0 = const()[name = tensor("op_17640_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_17640_end_mask_0 = const()[name = tensor("op_17640_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17640_cast_fp16 = slice_by_index(begin = var_17640_begin_0, end = var_17640_end_0, end_mask = var_17640_end_mask_0, x = var_17123_cast_fp16)[name = tensor("op_17640_cast_fp16")]; + tensor var_17647_begin_0 = const()[name = tensor("op_17647_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_17647_end_0 = const()[name = tensor("op_17647_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_17647_end_mask_0 = const()[name = tensor("op_17647_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17647_cast_fp16 = slice_by_index(begin = var_17647_begin_0, end = var_17647_end_0, end_mask = var_17647_end_mask_0, x = var_17123_cast_fp16)[name = tensor("op_17647_cast_fp16")]; + tensor var_17654_begin_0 = const()[name = tensor("op_17654_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_17654_end_0 = const()[name = tensor("op_17654_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_17654_end_mask_0 = const()[name = tensor("op_17654_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17654_cast_fp16 = slice_by_index(begin = var_17654_begin_0, end = var_17654_end_0, end_mask = var_17654_end_mask_0, x = var_17123_cast_fp16)[name = tensor("op_17654_cast_fp16")]; + tensor var_17661_begin_0 = const()[name = tensor("op_17661_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_17661_end_0 = const()[name = tensor("op_17661_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_17661_end_mask_0 = const()[name = tensor("op_17661_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17661_cast_fp16 = slice_by_index(begin = var_17661_begin_0, end = var_17661_end_0, end_mask = var_17661_end_mask_0, x = var_17123_cast_fp16)[name = tensor("op_17661_cast_fp16")]; + tensor var_17668_begin_0 = const()[name = tensor("op_17668_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17668_end_0 = const()[name = tensor("op_17668_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_17668_end_mask_0 = const()[name = tensor("op_17668_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17668_cast_fp16 = slice_by_index(begin = var_17668_begin_0, end = var_17668_end_0, end_mask = var_17668_end_mask_0, x = var_17127_cast_fp16)[name = tensor("op_17668_cast_fp16")]; + tensor var_17675_begin_0 = const()[name = tensor("op_17675_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_17675_end_0 = const()[name = tensor("op_17675_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_17675_end_mask_0 = const()[name = tensor("op_17675_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17675_cast_fp16 = slice_by_index(begin = var_17675_begin_0, end = var_17675_end_0, end_mask = var_17675_end_mask_0, x = var_17127_cast_fp16)[name = tensor("op_17675_cast_fp16")]; + tensor var_17682_begin_0 = const()[name = tensor("op_17682_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_17682_end_0 = const()[name = tensor("op_17682_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_17682_end_mask_0 = const()[name = tensor("op_17682_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17682_cast_fp16 = slice_by_index(begin = var_17682_begin_0, end = var_17682_end_0, end_mask = var_17682_end_mask_0, x = var_17127_cast_fp16)[name = tensor("op_17682_cast_fp16")]; + tensor var_17689_begin_0 = const()[name = tensor("op_17689_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_17689_end_0 = const()[name = tensor("op_17689_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_17689_end_mask_0 = const()[name = tensor("op_17689_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17689_cast_fp16 = slice_by_index(begin = var_17689_begin_0, end = var_17689_end_0, end_mask = var_17689_end_mask_0, x = var_17127_cast_fp16)[name = tensor("op_17689_cast_fp16")]; + tensor k_23_perm_0 = const()[name = tensor("k_23_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_17694_begin_0 = const()[name = tensor("op_17694_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17694_end_0 = const()[name = tensor("op_17694_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_17694_end_mask_0 = const()[name = tensor("op_17694_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_20 = transpose(perm = k_23_perm_0, x = key_23_cast_fp16)[name = tensor("transpose_20")]; + tensor var_17694_cast_fp16 = slice_by_index(begin = var_17694_begin_0, end = var_17694_end_0, end_mask = var_17694_end_mask_0, x = transpose_20)[name = tensor("op_17694_cast_fp16")]; + tensor var_17698_begin_0 = const()[name = tensor("op_17698_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_17698_end_0 = const()[name = tensor("op_17698_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_17698_end_mask_0 = const()[name = tensor("op_17698_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17698_cast_fp16 = slice_by_index(begin = var_17698_begin_0, end = var_17698_end_0, end_mask = var_17698_end_mask_0, x = transpose_20)[name = tensor("op_17698_cast_fp16")]; + tensor var_17702_begin_0 = const()[name = tensor("op_17702_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_17702_end_0 = const()[name = tensor("op_17702_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_17702_end_mask_0 = const()[name = tensor("op_17702_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17702_cast_fp16 = slice_by_index(begin = var_17702_begin_0, end = var_17702_end_0, end_mask = var_17702_end_mask_0, x = transpose_20)[name = tensor("op_17702_cast_fp16")]; + tensor var_17706_begin_0 = const()[name = tensor("op_17706_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_17706_end_0 = const()[name = tensor("op_17706_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_17706_end_mask_0 = const()[name = tensor("op_17706_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17706_cast_fp16 = slice_by_index(begin = var_17706_begin_0, end = var_17706_end_0, end_mask = var_17706_end_mask_0, x = transpose_20)[name = tensor("op_17706_cast_fp16")]; + tensor var_17710_begin_0 = const()[name = tensor("op_17710_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_17710_end_0 = const()[name = tensor("op_17710_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_17710_end_mask_0 = const()[name = tensor("op_17710_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17710_cast_fp16 = slice_by_index(begin = var_17710_begin_0, end = var_17710_end_0, end_mask = var_17710_end_mask_0, x = transpose_20)[name = tensor("op_17710_cast_fp16")]; + tensor var_17714_begin_0 = const()[name = tensor("op_17714_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_17714_end_0 = const()[name = tensor("op_17714_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_17714_end_mask_0 = const()[name = tensor("op_17714_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17714_cast_fp16 = slice_by_index(begin = var_17714_begin_0, end = var_17714_end_0, end_mask = var_17714_end_mask_0, x = transpose_20)[name = tensor("op_17714_cast_fp16")]; + tensor var_17718_begin_0 = const()[name = tensor("op_17718_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_17718_end_0 = const()[name = tensor("op_17718_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_17718_end_mask_0 = const()[name = tensor("op_17718_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17718_cast_fp16 = slice_by_index(begin = var_17718_begin_0, end = var_17718_end_0, end_mask = var_17718_end_mask_0, x = transpose_20)[name = tensor("op_17718_cast_fp16")]; + tensor var_17722_begin_0 = const()[name = tensor("op_17722_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_17722_end_0 = const()[name = tensor("op_17722_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_17722_end_mask_0 = const()[name = tensor("op_17722_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17722_cast_fp16 = slice_by_index(begin = var_17722_begin_0, end = var_17722_end_0, end_mask = var_17722_end_mask_0, x = transpose_20)[name = tensor("op_17722_cast_fp16")]; + tensor var_17726_begin_0 = const()[name = tensor("op_17726_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_17726_end_0 = const()[name = tensor("op_17726_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_17726_end_mask_0 = const()[name = tensor("op_17726_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17726_cast_fp16 = slice_by_index(begin = var_17726_begin_0, end = var_17726_end_0, end_mask = var_17726_end_mask_0, x = transpose_20)[name = tensor("op_17726_cast_fp16")]; + tensor var_17730_begin_0 = const()[name = tensor("op_17730_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_17730_end_0 = const()[name = tensor("op_17730_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_17730_end_mask_0 = const()[name = tensor("op_17730_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17730_cast_fp16 = slice_by_index(begin = var_17730_begin_0, end = var_17730_end_0, end_mask = var_17730_end_mask_0, x = transpose_20)[name = tensor("op_17730_cast_fp16")]; + tensor var_17734_begin_0 = const()[name = tensor("op_17734_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_17734_end_0 = const()[name = tensor("op_17734_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_17734_end_mask_0 = const()[name = tensor("op_17734_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17734_cast_fp16 = slice_by_index(begin = var_17734_begin_0, end = var_17734_end_0, end_mask = var_17734_end_mask_0, x = transpose_20)[name = tensor("op_17734_cast_fp16")]; + tensor var_17738_begin_0 = const()[name = tensor("op_17738_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_17738_end_0 = const()[name = tensor("op_17738_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_17738_end_mask_0 = const()[name = tensor("op_17738_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17738_cast_fp16 = slice_by_index(begin = var_17738_begin_0, end = var_17738_end_0, end_mask = var_17738_end_mask_0, x = transpose_20)[name = tensor("op_17738_cast_fp16")]; + tensor var_17742_begin_0 = const()[name = tensor("op_17742_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_17742_end_0 = const()[name = tensor("op_17742_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_17742_end_mask_0 = const()[name = tensor("op_17742_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17742_cast_fp16 = slice_by_index(begin = var_17742_begin_0, end = var_17742_end_0, end_mask = var_17742_end_mask_0, x = transpose_20)[name = tensor("op_17742_cast_fp16")]; + tensor var_17746_begin_0 = const()[name = tensor("op_17746_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_17746_end_0 = const()[name = tensor("op_17746_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_17746_end_mask_0 = const()[name = tensor("op_17746_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17746_cast_fp16 = slice_by_index(begin = var_17746_begin_0, end = var_17746_end_0, end_mask = var_17746_end_mask_0, x = transpose_20)[name = tensor("op_17746_cast_fp16")]; + tensor var_17750_begin_0 = const()[name = tensor("op_17750_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_17750_end_0 = const()[name = tensor("op_17750_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_17750_end_mask_0 = const()[name = tensor("op_17750_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17750_cast_fp16 = slice_by_index(begin = var_17750_begin_0, end = var_17750_end_0, end_mask = var_17750_end_mask_0, x = transpose_20)[name = tensor("op_17750_cast_fp16")]; + tensor var_17754_begin_0 = const()[name = tensor("op_17754_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_17754_end_0 = const()[name = tensor("op_17754_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_17754_end_mask_0 = const()[name = tensor("op_17754_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17754_cast_fp16 = slice_by_index(begin = var_17754_begin_0, end = var_17754_end_0, end_mask = var_17754_end_mask_0, x = transpose_20)[name = tensor("op_17754_cast_fp16")]; + tensor var_17758_begin_0 = const()[name = tensor("op_17758_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_17758_end_0 = const()[name = tensor("op_17758_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_17758_end_mask_0 = const()[name = tensor("op_17758_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17758_cast_fp16 = slice_by_index(begin = var_17758_begin_0, end = var_17758_end_0, end_mask = var_17758_end_mask_0, x = transpose_20)[name = tensor("op_17758_cast_fp16")]; + tensor var_17762_begin_0 = const()[name = tensor("op_17762_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_17762_end_0 = const()[name = tensor("op_17762_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_17762_end_mask_0 = const()[name = tensor("op_17762_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17762_cast_fp16 = slice_by_index(begin = var_17762_begin_0, end = var_17762_end_0, end_mask = var_17762_end_mask_0, x = transpose_20)[name = tensor("op_17762_cast_fp16")]; + tensor var_17766_begin_0 = const()[name = tensor("op_17766_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_17766_end_0 = const()[name = tensor("op_17766_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_17766_end_mask_0 = const()[name = tensor("op_17766_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17766_cast_fp16 = slice_by_index(begin = var_17766_begin_0, end = var_17766_end_0, end_mask = var_17766_end_mask_0, x = transpose_20)[name = tensor("op_17766_cast_fp16")]; + tensor var_17770_begin_0 = const()[name = tensor("op_17770_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_17770_end_0 = const()[name = tensor("op_17770_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_17770_end_mask_0 = const()[name = tensor("op_17770_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_17770_cast_fp16 = slice_by_index(begin = var_17770_begin_0, end = var_17770_end_0, end_mask = var_17770_end_mask_0, x = transpose_20)[name = tensor("op_17770_cast_fp16")]; + tensor var_17772_begin_0 = const()[name = tensor("op_17772_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_17772_end_0 = const()[name = tensor("op_17772_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_17772_end_mask_0 = const()[name = tensor("op_17772_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17772_cast_fp16 = slice_by_index(begin = var_17772_begin_0, end = var_17772_end_0, end_mask = var_17772_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_17772_cast_fp16")]; + tensor var_17776_begin_0 = const()[name = tensor("op_17776_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_17776_end_0 = const()[name = tensor("op_17776_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_17776_end_mask_0 = const()[name = tensor("op_17776_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17776_cast_fp16 = slice_by_index(begin = var_17776_begin_0, end = var_17776_end_0, end_mask = var_17776_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_17776_cast_fp16")]; + tensor var_17780_begin_0 = const()[name = tensor("op_17780_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_17780_end_0 = const()[name = tensor("op_17780_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_17780_end_mask_0 = const()[name = tensor("op_17780_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17780_cast_fp16 = slice_by_index(begin = var_17780_begin_0, end = var_17780_end_0, end_mask = var_17780_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_17780_cast_fp16")]; + tensor var_17784_begin_0 = const()[name = tensor("op_17784_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_17784_end_0 = const()[name = tensor("op_17784_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_17784_end_mask_0 = const()[name = tensor("op_17784_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17784_cast_fp16 = slice_by_index(begin = var_17784_begin_0, end = var_17784_end_0, end_mask = var_17784_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_17784_cast_fp16")]; + tensor var_17788_begin_0 = const()[name = tensor("op_17788_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_17788_end_0 = const()[name = tensor("op_17788_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_17788_end_mask_0 = const()[name = tensor("op_17788_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17788_cast_fp16 = slice_by_index(begin = var_17788_begin_0, end = var_17788_end_0, end_mask = var_17788_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_17788_cast_fp16")]; + tensor var_17792_begin_0 = const()[name = tensor("op_17792_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_17792_end_0 = const()[name = tensor("op_17792_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_17792_end_mask_0 = const()[name = tensor("op_17792_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17792_cast_fp16 = slice_by_index(begin = var_17792_begin_0, end = var_17792_end_0, end_mask = var_17792_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_17792_cast_fp16")]; + tensor var_17796_begin_0 = const()[name = tensor("op_17796_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_17796_end_0 = const()[name = tensor("op_17796_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_17796_end_mask_0 = const()[name = tensor("op_17796_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17796_cast_fp16 = slice_by_index(begin = var_17796_begin_0, end = var_17796_end_0, end_mask = var_17796_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_17796_cast_fp16")]; + tensor var_17800_begin_0 = const()[name = tensor("op_17800_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_17800_end_0 = const()[name = tensor("op_17800_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_17800_end_mask_0 = const()[name = tensor("op_17800_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17800_cast_fp16 = slice_by_index(begin = var_17800_begin_0, end = var_17800_end_0, end_mask = var_17800_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_17800_cast_fp16")]; + tensor var_17804_begin_0 = const()[name = tensor("op_17804_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_17804_end_0 = const()[name = tensor("op_17804_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_17804_end_mask_0 = const()[name = tensor("op_17804_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17804_cast_fp16 = slice_by_index(begin = var_17804_begin_0, end = var_17804_end_0, end_mask = var_17804_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_17804_cast_fp16")]; + tensor var_17808_begin_0 = const()[name = tensor("op_17808_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_17808_end_0 = const()[name = tensor("op_17808_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_17808_end_mask_0 = const()[name = tensor("op_17808_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17808_cast_fp16 = slice_by_index(begin = var_17808_begin_0, end = var_17808_end_0, end_mask = var_17808_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_17808_cast_fp16")]; + tensor var_17812_begin_0 = const()[name = tensor("op_17812_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_17812_end_0 = const()[name = tensor("op_17812_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_17812_end_mask_0 = const()[name = tensor("op_17812_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17812_cast_fp16 = slice_by_index(begin = var_17812_begin_0, end = var_17812_end_0, end_mask = var_17812_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_17812_cast_fp16")]; + tensor var_17816_begin_0 = const()[name = tensor("op_17816_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_17816_end_0 = const()[name = tensor("op_17816_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_17816_end_mask_0 = const()[name = tensor("op_17816_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17816_cast_fp16 = slice_by_index(begin = var_17816_begin_0, end = var_17816_end_0, end_mask = var_17816_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_17816_cast_fp16")]; + tensor var_17820_begin_0 = const()[name = tensor("op_17820_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_17820_end_0 = const()[name = tensor("op_17820_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_17820_end_mask_0 = const()[name = tensor("op_17820_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17820_cast_fp16 = slice_by_index(begin = var_17820_begin_0, end = var_17820_end_0, end_mask = var_17820_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_17820_cast_fp16")]; + tensor var_17824_begin_0 = const()[name = tensor("op_17824_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_17824_end_0 = const()[name = tensor("op_17824_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_17824_end_mask_0 = const()[name = tensor("op_17824_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17824_cast_fp16 = slice_by_index(begin = var_17824_begin_0, end = var_17824_end_0, end_mask = var_17824_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_17824_cast_fp16")]; + tensor var_17828_begin_0 = const()[name = tensor("op_17828_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_17828_end_0 = const()[name = tensor("op_17828_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_17828_end_mask_0 = const()[name = tensor("op_17828_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17828_cast_fp16 = slice_by_index(begin = var_17828_begin_0, end = var_17828_end_0, end_mask = var_17828_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_17828_cast_fp16")]; + tensor var_17832_begin_0 = const()[name = tensor("op_17832_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_17832_end_0 = const()[name = tensor("op_17832_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_17832_end_mask_0 = const()[name = tensor("op_17832_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17832_cast_fp16 = slice_by_index(begin = var_17832_begin_0, end = var_17832_end_0, end_mask = var_17832_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_17832_cast_fp16")]; + tensor var_17836_begin_0 = const()[name = tensor("op_17836_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_17836_end_0 = const()[name = tensor("op_17836_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_17836_end_mask_0 = const()[name = tensor("op_17836_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17836_cast_fp16 = slice_by_index(begin = var_17836_begin_0, end = var_17836_end_0, end_mask = var_17836_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_17836_cast_fp16")]; + tensor var_17840_begin_0 = const()[name = tensor("op_17840_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_17840_end_0 = const()[name = tensor("op_17840_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_17840_end_mask_0 = const()[name = tensor("op_17840_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17840_cast_fp16 = slice_by_index(begin = var_17840_begin_0, end = var_17840_end_0, end_mask = var_17840_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_17840_cast_fp16")]; + tensor var_17844_begin_0 = const()[name = tensor("op_17844_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_17844_end_0 = const()[name = tensor("op_17844_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_17844_end_mask_0 = const()[name = tensor("op_17844_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17844_cast_fp16 = slice_by_index(begin = var_17844_begin_0, end = var_17844_end_0, end_mask = var_17844_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_17844_cast_fp16")]; + tensor var_17848_begin_0 = const()[name = tensor("op_17848_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_17848_end_0 = const()[name = tensor("op_17848_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_17848_end_mask_0 = const()[name = tensor("op_17848_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_17848_cast_fp16 = slice_by_index(begin = var_17848_begin_0, end = var_17848_end_0, end_mask = var_17848_end_mask_0, x = value_23_cast_fp16)[name = tensor("op_17848_cast_fp16")]; + tensor var_17852_equation_0 = const()[name = tensor("op_17852_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17852_cast_fp16 = einsum(equation = var_17852_equation_0, values = (var_17694_cast_fp16, var_17136_cast_fp16))[name = tensor("op_17852_cast_fp16")]; + tensor var_17853_to_fp16 = const()[name = tensor("op_17853_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1761_cast_fp16 = mul(x = var_17852_cast_fp16, y = var_17853_to_fp16)[name = tensor("aw_chunk_1761_cast_fp16")]; + tensor var_17856_equation_0 = const()[name = tensor("op_17856_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17856_cast_fp16 = einsum(equation = var_17856_equation_0, values = (var_17694_cast_fp16, var_17143_cast_fp16))[name = tensor("op_17856_cast_fp16")]; + tensor var_17857_to_fp16 = const()[name = tensor("op_17857_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1763_cast_fp16 = mul(x = var_17856_cast_fp16, y = var_17857_to_fp16)[name = tensor("aw_chunk_1763_cast_fp16")]; + tensor var_17860_equation_0 = const()[name = tensor("op_17860_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17860_cast_fp16 = einsum(equation = var_17860_equation_0, values = (var_17694_cast_fp16, var_17150_cast_fp16))[name = tensor("op_17860_cast_fp16")]; + tensor var_17861_to_fp16 = const()[name = tensor("op_17861_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1765_cast_fp16 = mul(x = var_17860_cast_fp16, y = var_17861_to_fp16)[name = tensor("aw_chunk_1765_cast_fp16")]; + tensor var_17864_equation_0 = const()[name = tensor("op_17864_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17864_cast_fp16 = einsum(equation = var_17864_equation_0, values = (var_17694_cast_fp16, var_17157_cast_fp16))[name = tensor("op_17864_cast_fp16")]; + tensor var_17865_to_fp16 = const()[name = tensor("op_17865_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1767_cast_fp16 = mul(x = var_17864_cast_fp16, y = var_17865_to_fp16)[name = tensor("aw_chunk_1767_cast_fp16")]; + tensor var_17868_equation_0 = const()[name = tensor("op_17868_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17868_cast_fp16 = einsum(equation = var_17868_equation_0, values = (var_17698_cast_fp16, var_17164_cast_fp16))[name = tensor("op_17868_cast_fp16")]; + tensor var_17869_to_fp16 = const()[name = tensor("op_17869_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1769_cast_fp16 = mul(x = var_17868_cast_fp16, y = var_17869_to_fp16)[name = tensor("aw_chunk_1769_cast_fp16")]; + tensor var_17872_equation_0 = const()[name = tensor("op_17872_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17872_cast_fp16 = einsum(equation = var_17872_equation_0, values = (var_17698_cast_fp16, var_17171_cast_fp16))[name = tensor("op_17872_cast_fp16")]; + tensor var_17873_to_fp16 = const()[name = tensor("op_17873_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1771_cast_fp16 = mul(x = var_17872_cast_fp16, y = var_17873_to_fp16)[name = tensor("aw_chunk_1771_cast_fp16")]; + tensor var_17876_equation_0 = const()[name = tensor("op_17876_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17876_cast_fp16 = einsum(equation = var_17876_equation_0, values = (var_17698_cast_fp16, var_17178_cast_fp16))[name = tensor("op_17876_cast_fp16")]; + tensor var_17877_to_fp16 = const()[name = tensor("op_17877_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1773_cast_fp16 = mul(x = var_17876_cast_fp16, y = var_17877_to_fp16)[name = tensor("aw_chunk_1773_cast_fp16")]; + tensor var_17880_equation_0 = const()[name = tensor("op_17880_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17880_cast_fp16 = einsum(equation = var_17880_equation_0, values = (var_17698_cast_fp16, var_17185_cast_fp16))[name = tensor("op_17880_cast_fp16")]; + tensor var_17881_to_fp16 = const()[name = tensor("op_17881_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1775_cast_fp16 = mul(x = var_17880_cast_fp16, y = var_17881_to_fp16)[name = tensor("aw_chunk_1775_cast_fp16")]; + tensor var_17884_equation_0 = const()[name = tensor("op_17884_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17884_cast_fp16 = einsum(equation = var_17884_equation_0, values = (var_17702_cast_fp16, var_17192_cast_fp16))[name = tensor("op_17884_cast_fp16")]; + tensor var_17885_to_fp16 = const()[name = tensor("op_17885_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1777_cast_fp16 = mul(x = var_17884_cast_fp16, y = var_17885_to_fp16)[name = tensor("aw_chunk_1777_cast_fp16")]; + tensor var_17888_equation_0 = const()[name = tensor("op_17888_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17888_cast_fp16 = einsum(equation = var_17888_equation_0, values = (var_17702_cast_fp16, var_17199_cast_fp16))[name = tensor("op_17888_cast_fp16")]; + tensor var_17889_to_fp16 = const()[name = tensor("op_17889_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1779_cast_fp16 = mul(x = var_17888_cast_fp16, y = var_17889_to_fp16)[name = tensor("aw_chunk_1779_cast_fp16")]; + tensor var_17892_equation_0 = const()[name = tensor("op_17892_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17892_cast_fp16 = einsum(equation = var_17892_equation_0, values = (var_17702_cast_fp16, var_17206_cast_fp16))[name = tensor("op_17892_cast_fp16")]; + tensor var_17893_to_fp16 = const()[name = tensor("op_17893_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1781_cast_fp16 = mul(x = var_17892_cast_fp16, y = var_17893_to_fp16)[name = tensor("aw_chunk_1781_cast_fp16")]; + tensor var_17896_equation_0 = const()[name = tensor("op_17896_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17896_cast_fp16 = einsum(equation = var_17896_equation_0, values = (var_17702_cast_fp16, var_17213_cast_fp16))[name = tensor("op_17896_cast_fp16")]; + tensor var_17897_to_fp16 = const()[name = tensor("op_17897_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1783_cast_fp16 = mul(x = var_17896_cast_fp16, y = var_17897_to_fp16)[name = tensor("aw_chunk_1783_cast_fp16")]; + tensor var_17900_equation_0 = const()[name = tensor("op_17900_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17900_cast_fp16 = einsum(equation = var_17900_equation_0, values = (var_17706_cast_fp16, var_17220_cast_fp16))[name = tensor("op_17900_cast_fp16")]; + tensor var_17901_to_fp16 = const()[name = tensor("op_17901_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1785_cast_fp16 = mul(x = var_17900_cast_fp16, y = var_17901_to_fp16)[name = tensor("aw_chunk_1785_cast_fp16")]; + tensor var_17904_equation_0 = const()[name = tensor("op_17904_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17904_cast_fp16 = einsum(equation = var_17904_equation_0, values = (var_17706_cast_fp16, var_17227_cast_fp16))[name = tensor("op_17904_cast_fp16")]; + tensor var_17905_to_fp16 = const()[name = tensor("op_17905_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1787_cast_fp16 = mul(x = var_17904_cast_fp16, y = var_17905_to_fp16)[name = tensor("aw_chunk_1787_cast_fp16")]; + tensor var_17908_equation_0 = const()[name = tensor("op_17908_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17908_cast_fp16 = einsum(equation = var_17908_equation_0, values = (var_17706_cast_fp16, var_17234_cast_fp16))[name = tensor("op_17908_cast_fp16")]; + tensor var_17909_to_fp16 = const()[name = tensor("op_17909_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1789_cast_fp16 = mul(x = var_17908_cast_fp16, y = var_17909_to_fp16)[name = tensor("aw_chunk_1789_cast_fp16")]; + tensor var_17912_equation_0 = const()[name = tensor("op_17912_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17912_cast_fp16 = einsum(equation = var_17912_equation_0, values = (var_17706_cast_fp16, var_17241_cast_fp16))[name = tensor("op_17912_cast_fp16")]; + tensor var_17913_to_fp16 = const()[name = tensor("op_17913_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1791_cast_fp16 = mul(x = var_17912_cast_fp16, y = var_17913_to_fp16)[name = tensor("aw_chunk_1791_cast_fp16")]; + tensor var_17916_equation_0 = const()[name = tensor("op_17916_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17916_cast_fp16 = einsum(equation = var_17916_equation_0, values = (var_17710_cast_fp16, var_17248_cast_fp16))[name = tensor("op_17916_cast_fp16")]; + tensor var_17917_to_fp16 = const()[name = tensor("op_17917_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1793_cast_fp16 = mul(x = var_17916_cast_fp16, y = var_17917_to_fp16)[name = tensor("aw_chunk_1793_cast_fp16")]; + tensor var_17920_equation_0 = const()[name = tensor("op_17920_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17920_cast_fp16 = einsum(equation = var_17920_equation_0, values = (var_17710_cast_fp16, var_17255_cast_fp16))[name = tensor("op_17920_cast_fp16")]; + tensor var_17921_to_fp16 = const()[name = tensor("op_17921_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1795_cast_fp16 = mul(x = var_17920_cast_fp16, y = var_17921_to_fp16)[name = tensor("aw_chunk_1795_cast_fp16")]; + tensor var_17924_equation_0 = const()[name = tensor("op_17924_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17924_cast_fp16 = einsum(equation = var_17924_equation_0, values = (var_17710_cast_fp16, var_17262_cast_fp16))[name = tensor("op_17924_cast_fp16")]; + tensor var_17925_to_fp16 = const()[name = tensor("op_17925_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1797_cast_fp16 = mul(x = var_17924_cast_fp16, y = var_17925_to_fp16)[name = tensor("aw_chunk_1797_cast_fp16")]; + tensor var_17928_equation_0 = const()[name = tensor("op_17928_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17928_cast_fp16 = einsum(equation = var_17928_equation_0, values = (var_17710_cast_fp16, var_17269_cast_fp16))[name = tensor("op_17928_cast_fp16")]; + tensor var_17929_to_fp16 = const()[name = tensor("op_17929_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1799_cast_fp16 = mul(x = var_17928_cast_fp16, y = var_17929_to_fp16)[name = tensor("aw_chunk_1799_cast_fp16")]; + tensor var_17932_equation_0 = const()[name = tensor("op_17932_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17932_cast_fp16 = einsum(equation = var_17932_equation_0, values = (var_17714_cast_fp16, var_17276_cast_fp16))[name = tensor("op_17932_cast_fp16")]; + tensor var_17933_to_fp16 = const()[name = tensor("op_17933_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1801_cast_fp16 = mul(x = var_17932_cast_fp16, y = var_17933_to_fp16)[name = tensor("aw_chunk_1801_cast_fp16")]; + tensor var_17936_equation_0 = const()[name = tensor("op_17936_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17936_cast_fp16 = einsum(equation = var_17936_equation_0, values = (var_17714_cast_fp16, var_17283_cast_fp16))[name = tensor("op_17936_cast_fp16")]; + tensor var_17937_to_fp16 = const()[name = tensor("op_17937_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1803_cast_fp16 = mul(x = var_17936_cast_fp16, y = var_17937_to_fp16)[name = tensor("aw_chunk_1803_cast_fp16")]; + tensor var_17940_equation_0 = const()[name = tensor("op_17940_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17940_cast_fp16 = einsum(equation = var_17940_equation_0, values = (var_17714_cast_fp16, var_17290_cast_fp16))[name = tensor("op_17940_cast_fp16")]; + tensor var_17941_to_fp16 = const()[name = tensor("op_17941_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1805_cast_fp16 = mul(x = var_17940_cast_fp16, y = var_17941_to_fp16)[name = tensor("aw_chunk_1805_cast_fp16")]; + tensor var_17944_equation_0 = const()[name = tensor("op_17944_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17944_cast_fp16 = einsum(equation = var_17944_equation_0, values = (var_17714_cast_fp16, var_17297_cast_fp16))[name = tensor("op_17944_cast_fp16")]; + tensor var_17945_to_fp16 = const()[name = tensor("op_17945_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1807_cast_fp16 = mul(x = var_17944_cast_fp16, y = var_17945_to_fp16)[name = tensor("aw_chunk_1807_cast_fp16")]; + tensor var_17948_equation_0 = const()[name = tensor("op_17948_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17948_cast_fp16 = einsum(equation = var_17948_equation_0, values = (var_17718_cast_fp16, var_17304_cast_fp16))[name = tensor("op_17948_cast_fp16")]; + tensor var_17949_to_fp16 = const()[name = tensor("op_17949_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1809_cast_fp16 = mul(x = var_17948_cast_fp16, y = var_17949_to_fp16)[name = tensor("aw_chunk_1809_cast_fp16")]; + tensor var_17952_equation_0 = const()[name = tensor("op_17952_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17952_cast_fp16 = einsum(equation = var_17952_equation_0, values = (var_17718_cast_fp16, var_17311_cast_fp16))[name = tensor("op_17952_cast_fp16")]; + tensor var_17953_to_fp16 = const()[name = tensor("op_17953_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1811_cast_fp16 = mul(x = var_17952_cast_fp16, y = var_17953_to_fp16)[name = tensor("aw_chunk_1811_cast_fp16")]; + tensor var_17956_equation_0 = const()[name = tensor("op_17956_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17956_cast_fp16 = einsum(equation = var_17956_equation_0, values = (var_17718_cast_fp16, var_17318_cast_fp16))[name = tensor("op_17956_cast_fp16")]; + tensor var_17957_to_fp16 = const()[name = tensor("op_17957_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1813_cast_fp16 = mul(x = var_17956_cast_fp16, y = var_17957_to_fp16)[name = tensor("aw_chunk_1813_cast_fp16")]; + tensor var_17960_equation_0 = const()[name = tensor("op_17960_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17960_cast_fp16 = einsum(equation = var_17960_equation_0, values = (var_17718_cast_fp16, var_17325_cast_fp16))[name = tensor("op_17960_cast_fp16")]; + tensor var_17961_to_fp16 = const()[name = tensor("op_17961_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1815_cast_fp16 = mul(x = var_17960_cast_fp16, y = var_17961_to_fp16)[name = tensor("aw_chunk_1815_cast_fp16")]; + tensor var_17964_equation_0 = const()[name = tensor("op_17964_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17964_cast_fp16 = einsum(equation = var_17964_equation_0, values = (var_17722_cast_fp16, var_17332_cast_fp16))[name = tensor("op_17964_cast_fp16")]; + tensor var_17965_to_fp16 = const()[name = tensor("op_17965_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1817_cast_fp16 = mul(x = var_17964_cast_fp16, y = var_17965_to_fp16)[name = tensor("aw_chunk_1817_cast_fp16")]; + tensor var_17968_equation_0 = const()[name = tensor("op_17968_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17968_cast_fp16 = einsum(equation = var_17968_equation_0, values = (var_17722_cast_fp16, var_17339_cast_fp16))[name = tensor("op_17968_cast_fp16")]; + tensor var_17969_to_fp16 = const()[name = tensor("op_17969_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1819_cast_fp16 = mul(x = var_17968_cast_fp16, y = var_17969_to_fp16)[name = tensor("aw_chunk_1819_cast_fp16")]; + tensor var_17972_equation_0 = const()[name = tensor("op_17972_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17972_cast_fp16 = einsum(equation = var_17972_equation_0, values = (var_17722_cast_fp16, var_17346_cast_fp16))[name = tensor("op_17972_cast_fp16")]; + tensor var_17973_to_fp16 = const()[name = tensor("op_17973_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1821_cast_fp16 = mul(x = var_17972_cast_fp16, y = var_17973_to_fp16)[name = tensor("aw_chunk_1821_cast_fp16")]; + tensor var_17976_equation_0 = const()[name = tensor("op_17976_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17976_cast_fp16 = einsum(equation = var_17976_equation_0, values = (var_17722_cast_fp16, var_17353_cast_fp16))[name = tensor("op_17976_cast_fp16")]; + tensor var_17977_to_fp16 = const()[name = tensor("op_17977_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1823_cast_fp16 = mul(x = var_17976_cast_fp16, y = var_17977_to_fp16)[name = tensor("aw_chunk_1823_cast_fp16")]; + tensor var_17980_equation_0 = const()[name = tensor("op_17980_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17980_cast_fp16 = einsum(equation = var_17980_equation_0, values = (var_17726_cast_fp16, var_17360_cast_fp16))[name = tensor("op_17980_cast_fp16")]; + tensor var_17981_to_fp16 = const()[name = tensor("op_17981_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1825_cast_fp16 = mul(x = var_17980_cast_fp16, y = var_17981_to_fp16)[name = tensor("aw_chunk_1825_cast_fp16")]; + tensor var_17984_equation_0 = const()[name = tensor("op_17984_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17984_cast_fp16 = einsum(equation = var_17984_equation_0, values = (var_17726_cast_fp16, var_17367_cast_fp16))[name = tensor("op_17984_cast_fp16")]; + tensor var_17985_to_fp16 = const()[name = tensor("op_17985_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1827_cast_fp16 = mul(x = var_17984_cast_fp16, y = var_17985_to_fp16)[name = tensor("aw_chunk_1827_cast_fp16")]; + tensor var_17988_equation_0 = const()[name = tensor("op_17988_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17988_cast_fp16 = einsum(equation = var_17988_equation_0, values = (var_17726_cast_fp16, var_17374_cast_fp16))[name = tensor("op_17988_cast_fp16")]; + tensor var_17989_to_fp16 = const()[name = tensor("op_17989_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1829_cast_fp16 = mul(x = var_17988_cast_fp16, y = var_17989_to_fp16)[name = tensor("aw_chunk_1829_cast_fp16")]; + tensor var_17992_equation_0 = const()[name = tensor("op_17992_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17992_cast_fp16 = einsum(equation = var_17992_equation_0, values = (var_17726_cast_fp16, var_17381_cast_fp16))[name = tensor("op_17992_cast_fp16")]; + tensor var_17993_to_fp16 = const()[name = tensor("op_17993_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1831_cast_fp16 = mul(x = var_17992_cast_fp16, y = var_17993_to_fp16)[name = tensor("aw_chunk_1831_cast_fp16")]; + tensor var_17996_equation_0 = const()[name = tensor("op_17996_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_17996_cast_fp16 = einsum(equation = var_17996_equation_0, values = (var_17730_cast_fp16, var_17388_cast_fp16))[name = tensor("op_17996_cast_fp16")]; + tensor var_17997_to_fp16 = const()[name = tensor("op_17997_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1833_cast_fp16 = mul(x = var_17996_cast_fp16, y = var_17997_to_fp16)[name = tensor("aw_chunk_1833_cast_fp16")]; + tensor var_18000_equation_0 = const()[name = tensor("op_18000_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18000_cast_fp16 = einsum(equation = var_18000_equation_0, values = (var_17730_cast_fp16, var_17395_cast_fp16))[name = tensor("op_18000_cast_fp16")]; + tensor var_18001_to_fp16 = const()[name = tensor("op_18001_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1835_cast_fp16 = mul(x = var_18000_cast_fp16, y = var_18001_to_fp16)[name = tensor("aw_chunk_1835_cast_fp16")]; + tensor var_18004_equation_0 = const()[name = tensor("op_18004_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18004_cast_fp16 = einsum(equation = var_18004_equation_0, values = (var_17730_cast_fp16, var_17402_cast_fp16))[name = tensor("op_18004_cast_fp16")]; + tensor var_18005_to_fp16 = const()[name = tensor("op_18005_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1837_cast_fp16 = mul(x = var_18004_cast_fp16, y = var_18005_to_fp16)[name = tensor("aw_chunk_1837_cast_fp16")]; + tensor var_18008_equation_0 = const()[name = tensor("op_18008_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18008_cast_fp16 = einsum(equation = var_18008_equation_0, values = (var_17730_cast_fp16, var_17409_cast_fp16))[name = tensor("op_18008_cast_fp16")]; + tensor var_18009_to_fp16 = const()[name = tensor("op_18009_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1839_cast_fp16 = mul(x = var_18008_cast_fp16, y = var_18009_to_fp16)[name = tensor("aw_chunk_1839_cast_fp16")]; + tensor var_18012_equation_0 = const()[name = tensor("op_18012_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18012_cast_fp16 = einsum(equation = var_18012_equation_0, values = (var_17734_cast_fp16, var_17416_cast_fp16))[name = tensor("op_18012_cast_fp16")]; + tensor var_18013_to_fp16 = const()[name = tensor("op_18013_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1841_cast_fp16 = mul(x = var_18012_cast_fp16, y = var_18013_to_fp16)[name = tensor("aw_chunk_1841_cast_fp16")]; + tensor var_18016_equation_0 = const()[name = tensor("op_18016_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18016_cast_fp16 = einsum(equation = var_18016_equation_0, values = (var_17734_cast_fp16, var_17423_cast_fp16))[name = tensor("op_18016_cast_fp16")]; + tensor var_18017_to_fp16 = const()[name = tensor("op_18017_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1843_cast_fp16 = mul(x = var_18016_cast_fp16, y = var_18017_to_fp16)[name = tensor("aw_chunk_1843_cast_fp16")]; + tensor var_18020_equation_0 = const()[name = tensor("op_18020_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18020_cast_fp16 = einsum(equation = var_18020_equation_0, values = (var_17734_cast_fp16, var_17430_cast_fp16))[name = tensor("op_18020_cast_fp16")]; + tensor var_18021_to_fp16 = const()[name = tensor("op_18021_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1845_cast_fp16 = mul(x = var_18020_cast_fp16, y = var_18021_to_fp16)[name = tensor("aw_chunk_1845_cast_fp16")]; + tensor var_18024_equation_0 = const()[name = tensor("op_18024_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18024_cast_fp16 = einsum(equation = var_18024_equation_0, values = (var_17734_cast_fp16, var_17437_cast_fp16))[name = tensor("op_18024_cast_fp16")]; + tensor var_18025_to_fp16 = const()[name = tensor("op_18025_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1847_cast_fp16 = mul(x = var_18024_cast_fp16, y = var_18025_to_fp16)[name = tensor("aw_chunk_1847_cast_fp16")]; + tensor var_18028_equation_0 = const()[name = tensor("op_18028_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18028_cast_fp16 = einsum(equation = var_18028_equation_0, values = (var_17738_cast_fp16, var_17444_cast_fp16))[name = tensor("op_18028_cast_fp16")]; + tensor var_18029_to_fp16 = const()[name = tensor("op_18029_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1849_cast_fp16 = mul(x = var_18028_cast_fp16, y = var_18029_to_fp16)[name = tensor("aw_chunk_1849_cast_fp16")]; + tensor var_18032_equation_0 = const()[name = tensor("op_18032_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18032_cast_fp16 = einsum(equation = var_18032_equation_0, values = (var_17738_cast_fp16, var_17451_cast_fp16))[name = tensor("op_18032_cast_fp16")]; + tensor var_18033_to_fp16 = const()[name = tensor("op_18033_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1851_cast_fp16 = mul(x = var_18032_cast_fp16, y = var_18033_to_fp16)[name = tensor("aw_chunk_1851_cast_fp16")]; + tensor var_18036_equation_0 = const()[name = tensor("op_18036_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18036_cast_fp16 = einsum(equation = var_18036_equation_0, values = (var_17738_cast_fp16, var_17458_cast_fp16))[name = tensor("op_18036_cast_fp16")]; + tensor var_18037_to_fp16 = const()[name = tensor("op_18037_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1853_cast_fp16 = mul(x = var_18036_cast_fp16, y = var_18037_to_fp16)[name = tensor("aw_chunk_1853_cast_fp16")]; + tensor var_18040_equation_0 = const()[name = tensor("op_18040_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18040_cast_fp16 = einsum(equation = var_18040_equation_0, values = (var_17738_cast_fp16, var_17465_cast_fp16))[name = tensor("op_18040_cast_fp16")]; + tensor var_18041_to_fp16 = const()[name = tensor("op_18041_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1855_cast_fp16 = mul(x = var_18040_cast_fp16, y = var_18041_to_fp16)[name = tensor("aw_chunk_1855_cast_fp16")]; + tensor var_18044_equation_0 = const()[name = tensor("op_18044_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18044_cast_fp16 = einsum(equation = var_18044_equation_0, values = (var_17742_cast_fp16, var_17472_cast_fp16))[name = tensor("op_18044_cast_fp16")]; + tensor var_18045_to_fp16 = const()[name = tensor("op_18045_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1857_cast_fp16 = mul(x = var_18044_cast_fp16, y = var_18045_to_fp16)[name = tensor("aw_chunk_1857_cast_fp16")]; + tensor var_18048_equation_0 = const()[name = tensor("op_18048_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18048_cast_fp16 = einsum(equation = var_18048_equation_0, values = (var_17742_cast_fp16, var_17479_cast_fp16))[name = tensor("op_18048_cast_fp16")]; + tensor var_18049_to_fp16 = const()[name = tensor("op_18049_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1859_cast_fp16 = mul(x = var_18048_cast_fp16, y = var_18049_to_fp16)[name = tensor("aw_chunk_1859_cast_fp16")]; + tensor var_18052_equation_0 = const()[name = tensor("op_18052_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18052_cast_fp16 = einsum(equation = var_18052_equation_0, values = (var_17742_cast_fp16, var_17486_cast_fp16))[name = tensor("op_18052_cast_fp16")]; + tensor var_18053_to_fp16 = const()[name = tensor("op_18053_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1861_cast_fp16 = mul(x = var_18052_cast_fp16, y = var_18053_to_fp16)[name = tensor("aw_chunk_1861_cast_fp16")]; + tensor var_18056_equation_0 = const()[name = tensor("op_18056_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18056_cast_fp16 = einsum(equation = var_18056_equation_0, values = (var_17742_cast_fp16, var_17493_cast_fp16))[name = tensor("op_18056_cast_fp16")]; + tensor var_18057_to_fp16 = const()[name = tensor("op_18057_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1863_cast_fp16 = mul(x = var_18056_cast_fp16, y = var_18057_to_fp16)[name = tensor("aw_chunk_1863_cast_fp16")]; + tensor var_18060_equation_0 = const()[name = tensor("op_18060_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18060_cast_fp16 = einsum(equation = var_18060_equation_0, values = (var_17746_cast_fp16, var_17500_cast_fp16))[name = tensor("op_18060_cast_fp16")]; + tensor var_18061_to_fp16 = const()[name = tensor("op_18061_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1865_cast_fp16 = mul(x = var_18060_cast_fp16, y = var_18061_to_fp16)[name = tensor("aw_chunk_1865_cast_fp16")]; + tensor var_18064_equation_0 = const()[name = tensor("op_18064_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18064_cast_fp16 = einsum(equation = var_18064_equation_0, values = (var_17746_cast_fp16, var_17507_cast_fp16))[name = tensor("op_18064_cast_fp16")]; + tensor var_18065_to_fp16 = const()[name = tensor("op_18065_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1867_cast_fp16 = mul(x = var_18064_cast_fp16, y = var_18065_to_fp16)[name = tensor("aw_chunk_1867_cast_fp16")]; + tensor var_18068_equation_0 = const()[name = tensor("op_18068_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18068_cast_fp16 = einsum(equation = var_18068_equation_0, values = (var_17746_cast_fp16, var_17514_cast_fp16))[name = tensor("op_18068_cast_fp16")]; + tensor var_18069_to_fp16 = const()[name = tensor("op_18069_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1869_cast_fp16 = mul(x = var_18068_cast_fp16, y = var_18069_to_fp16)[name = tensor("aw_chunk_1869_cast_fp16")]; + tensor var_18072_equation_0 = const()[name = tensor("op_18072_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18072_cast_fp16 = einsum(equation = var_18072_equation_0, values = (var_17746_cast_fp16, var_17521_cast_fp16))[name = tensor("op_18072_cast_fp16")]; + tensor var_18073_to_fp16 = const()[name = tensor("op_18073_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1871_cast_fp16 = mul(x = var_18072_cast_fp16, y = var_18073_to_fp16)[name = tensor("aw_chunk_1871_cast_fp16")]; + tensor var_18076_equation_0 = const()[name = tensor("op_18076_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18076_cast_fp16 = einsum(equation = var_18076_equation_0, values = (var_17750_cast_fp16, var_17528_cast_fp16))[name = tensor("op_18076_cast_fp16")]; + tensor var_18077_to_fp16 = const()[name = tensor("op_18077_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1873_cast_fp16 = mul(x = var_18076_cast_fp16, y = var_18077_to_fp16)[name = tensor("aw_chunk_1873_cast_fp16")]; + tensor var_18080_equation_0 = const()[name = tensor("op_18080_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18080_cast_fp16 = einsum(equation = var_18080_equation_0, values = (var_17750_cast_fp16, var_17535_cast_fp16))[name = tensor("op_18080_cast_fp16")]; + tensor var_18081_to_fp16 = const()[name = tensor("op_18081_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1875_cast_fp16 = mul(x = var_18080_cast_fp16, y = var_18081_to_fp16)[name = tensor("aw_chunk_1875_cast_fp16")]; + tensor var_18084_equation_0 = const()[name = tensor("op_18084_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18084_cast_fp16 = einsum(equation = var_18084_equation_0, values = (var_17750_cast_fp16, var_17542_cast_fp16))[name = tensor("op_18084_cast_fp16")]; + tensor var_18085_to_fp16 = const()[name = tensor("op_18085_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1877_cast_fp16 = mul(x = var_18084_cast_fp16, y = var_18085_to_fp16)[name = tensor("aw_chunk_1877_cast_fp16")]; + tensor var_18088_equation_0 = const()[name = tensor("op_18088_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18088_cast_fp16 = einsum(equation = var_18088_equation_0, values = (var_17750_cast_fp16, var_17549_cast_fp16))[name = tensor("op_18088_cast_fp16")]; + tensor var_18089_to_fp16 = const()[name = tensor("op_18089_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1879_cast_fp16 = mul(x = var_18088_cast_fp16, y = var_18089_to_fp16)[name = tensor("aw_chunk_1879_cast_fp16")]; + tensor var_18092_equation_0 = const()[name = tensor("op_18092_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18092_cast_fp16 = einsum(equation = var_18092_equation_0, values = (var_17754_cast_fp16, var_17556_cast_fp16))[name = tensor("op_18092_cast_fp16")]; + tensor var_18093_to_fp16 = const()[name = tensor("op_18093_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1881_cast_fp16 = mul(x = var_18092_cast_fp16, y = var_18093_to_fp16)[name = tensor("aw_chunk_1881_cast_fp16")]; + tensor var_18096_equation_0 = const()[name = tensor("op_18096_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18096_cast_fp16 = einsum(equation = var_18096_equation_0, values = (var_17754_cast_fp16, var_17563_cast_fp16))[name = tensor("op_18096_cast_fp16")]; + tensor var_18097_to_fp16 = const()[name = tensor("op_18097_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1883_cast_fp16 = mul(x = var_18096_cast_fp16, y = var_18097_to_fp16)[name = tensor("aw_chunk_1883_cast_fp16")]; + tensor var_18100_equation_0 = const()[name = tensor("op_18100_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18100_cast_fp16 = einsum(equation = var_18100_equation_0, values = (var_17754_cast_fp16, var_17570_cast_fp16))[name = tensor("op_18100_cast_fp16")]; + tensor var_18101_to_fp16 = const()[name = tensor("op_18101_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1885_cast_fp16 = mul(x = var_18100_cast_fp16, y = var_18101_to_fp16)[name = tensor("aw_chunk_1885_cast_fp16")]; + tensor var_18104_equation_0 = const()[name = tensor("op_18104_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18104_cast_fp16 = einsum(equation = var_18104_equation_0, values = (var_17754_cast_fp16, var_17577_cast_fp16))[name = tensor("op_18104_cast_fp16")]; + tensor var_18105_to_fp16 = const()[name = tensor("op_18105_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1887_cast_fp16 = mul(x = var_18104_cast_fp16, y = var_18105_to_fp16)[name = tensor("aw_chunk_1887_cast_fp16")]; + tensor var_18108_equation_0 = const()[name = tensor("op_18108_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18108_cast_fp16 = einsum(equation = var_18108_equation_0, values = (var_17758_cast_fp16, var_17584_cast_fp16))[name = tensor("op_18108_cast_fp16")]; + tensor var_18109_to_fp16 = const()[name = tensor("op_18109_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1889_cast_fp16 = mul(x = var_18108_cast_fp16, y = var_18109_to_fp16)[name = tensor("aw_chunk_1889_cast_fp16")]; + tensor var_18112_equation_0 = const()[name = tensor("op_18112_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18112_cast_fp16 = einsum(equation = var_18112_equation_0, values = (var_17758_cast_fp16, var_17591_cast_fp16))[name = tensor("op_18112_cast_fp16")]; + tensor var_18113_to_fp16 = const()[name = tensor("op_18113_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1891_cast_fp16 = mul(x = var_18112_cast_fp16, y = var_18113_to_fp16)[name = tensor("aw_chunk_1891_cast_fp16")]; + tensor var_18116_equation_0 = const()[name = tensor("op_18116_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18116_cast_fp16 = einsum(equation = var_18116_equation_0, values = (var_17758_cast_fp16, var_17598_cast_fp16))[name = tensor("op_18116_cast_fp16")]; + tensor var_18117_to_fp16 = const()[name = tensor("op_18117_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1893_cast_fp16 = mul(x = var_18116_cast_fp16, y = var_18117_to_fp16)[name = tensor("aw_chunk_1893_cast_fp16")]; + tensor var_18120_equation_0 = const()[name = tensor("op_18120_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18120_cast_fp16 = einsum(equation = var_18120_equation_0, values = (var_17758_cast_fp16, var_17605_cast_fp16))[name = tensor("op_18120_cast_fp16")]; + tensor var_18121_to_fp16 = const()[name = tensor("op_18121_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1895_cast_fp16 = mul(x = var_18120_cast_fp16, y = var_18121_to_fp16)[name = tensor("aw_chunk_1895_cast_fp16")]; + tensor var_18124_equation_0 = const()[name = tensor("op_18124_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18124_cast_fp16 = einsum(equation = var_18124_equation_0, values = (var_17762_cast_fp16, var_17612_cast_fp16))[name = tensor("op_18124_cast_fp16")]; + tensor var_18125_to_fp16 = const()[name = tensor("op_18125_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1897_cast_fp16 = mul(x = var_18124_cast_fp16, y = var_18125_to_fp16)[name = tensor("aw_chunk_1897_cast_fp16")]; + tensor var_18128_equation_0 = const()[name = tensor("op_18128_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18128_cast_fp16 = einsum(equation = var_18128_equation_0, values = (var_17762_cast_fp16, var_17619_cast_fp16))[name = tensor("op_18128_cast_fp16")]; + tensor var_18129_to_fp16 = const()[name = tensor("op_18129_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1899_cast_fp16 = mul(x = var_18128_cast_fp16, y = var_18129_to_fp16)[name = tensor("aw_chunk_1899_cast_fp16")]; + tensor var_18132_equation_0 = const()[name = tensor("op_18132_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18132_cast_fp16 = einsum(equation = var_18132_equation_0, values = (var_17762_cast_fp16, var_17626_cast_fp16))[name = tensor("op_18132_cast_fp16")]; + tensor var_18133_to_fp16 = const()[name = tensor("op_18133_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1901_cast_fp16 = mul(x = var_18132_cast_fp16, y = var_18133_to_fp16)[name = tensor("aw_chunk_1901_cast_fp16")]; + tensor var_18136_equation_0 = const()[name = tensor("op_18136_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18136_cast_fp16 = einsum(equation = var_18136_equation_0, values = (var_17762_cast_fp16, var_17633_cast_fp16))[name = tensor("op_18136_cast_fp16")]; + tensor var_18137_to_fp16 = const()[name = tensor("op_18137_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1903_cast_fp16 = mul(x = var_18136_cast_fp16, y = var_18137_to_fp16)[name = tensor("aw_chunk_1903_cast_fp16")]; + tensor var_18140_equation_0 = const()[name = tensor("op_18140_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18140_cast_fp16 = einsum(equation = var_18140_equation_0, values = (var_17766_cast_fp16, var_17640_cast_fp16))[name = tensor("op_18140_cast_fp16")]; + tensor var_18141_to_fp16 = const()[name = tensor("op_18141_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1905_cast_fp16 = mul(x = var_18140_cast_fp16, y = var_18141_to_fp16)[name = tensor("aw_chunk_1905_cast_fp16")]; + tensor var_18144_equation_0 = const()[name = tensor("op_18144_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18144_cast_fp16 = einsum(equation = var_18144_equation_0, values = (var_17766_cast_fp16, var_17647_cast_fp16))[name = tensor("op_18144_cast_fp16")]; + tensor var_18145_to_fp16 = const()[name = tensor("op_18145_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1907_cast_fp16 = mul(x = var_18144_cast_fp16, y = var_18145_to_fp16)[name = tensor("aw_chunk_1907_cast_fp16")]; + tensor var_18148_equation_0 = const()[name = tensor("op_18148_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18148_cast_fp16 = einsum(equation = var_18148_equation_0, values = (var_17766_cast_fp16, var_17654_cast_fp16))[name = tensor("op_18148_cast_fp16")]; + tensor var_18149_to_fp16 = const()[name = tensor("op_18149_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1909_cast_fp16 = mul(x = var_18148_cast_fp16, y = var_18149_to_fp16)[name = tensor("aw_chunk_1909_cast_fp16")]; + tensor var_18152_equation_0 = const()[name = tensor("op_18152_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18152_cast_fp16 = einsum(equation = var_18152_equation_0, values = (var_17766_cast_fp16, var_17661_cast_fp16))[name = tensor("op_18152_cast_fp16")]; + tensor var_18153_to_fp16 = const()[name = tensor("op_18153_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1911_cast_fp16 = mul(x = var_18152_cast_fp16, y = var_18153_to_fp16)[name = tensor("aw_chunk_1911_cast_fp16")]; + tensor var_18156_equation_0 = const()[name = tensor("op_18156_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18156_cast_fp16 = einsum(equation = var_18156_equation_0, values = (var_17770_cast_fp16, var_17668_cast_fp16))[name = tensor("op_18156_cast_fp16")]; + tensor var_18157_to_fp16 = const()[name = tensor("op_18157_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1913_cast_fp16 = mul(x = var_18156_cast_fp16, y = var_18157_to_fp16)[name = tensor("aw_chunk_1913_cast_fp16")]; + tensor var_18160_equation_0 = const()[name = tensor("op_18160_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18160_cast_fp16 = einsum(equation = var_18160_equation_0, values = (var_17770_cast_fp16, var_17675_cast_fp16))[name = tensor("op_18160_cast_fp16")]; + tensor var_18161_to_fp16 = const()[name = tensor("op_18161_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1915_cast_fp16 = mul(x = var_18160_cast_fp16, y = var_18161_to_fp16)[name = tensor("aw_chunk_1915_cast_fp16")]; + tensor var_18164_equation_0 = const()[name = tensor("op_18164_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18164_cast_fp16 = einsum(equation = var_18164_equation_0, values = (var_17770_cast_fp16, var_17682_cast_fp16))[name = tensor("op_18164_cast_fp16")]; + tensor var_18165_to_fp16 = const()[name = tensor("op_18165_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1917_cast_fp16 = mul(x = var_18164_cast_fp16, y = var_18165_to_fp16)[name = tensor("aw_chunk_1917_cast_fp16")]; + tensor var_18168_equation_0 = const()[name = tensor("op_18168_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_18168_cast_fp16 = einsum(equation = var_18168_equation_0, values = (var_17770_cast_fp16, var_17689_cast_fp16))[name = tensor("op_18168_cast_fp16")]; + tensor var_18169_to_fp16 = const()[name = tensor("op_18169_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1919_cast_fp16 = mul(x = var_18168_cast_fp16, y = var_18169_to_fp16)[name = tensor("aw_chunk_1919_cast_fp16")]; + tensor var_18171_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1761_cast_fp16)[name = tensor("op_18171_cast_fp16")]; + tensor var_18172_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1763_cast_fp16)[name = tensor("op_18172_cast_fp16")]; + tensor var_18173_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1765_cast_fp16)[name = tensor("op_18173_cast_fp16")]; + tensor var_18174_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1767_cast_fp16)[name = tensor("op_18174_cast_fp16")]; + tensor var_18175_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1769_cast_fp16)[name = tensor("op_18175_cast_fp16")]; + tensor var_18176_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1771_cast_fp16)[name = tensor("op_18176_cast_fp16")]; + tensor var_18177_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1773_cast_fp16)[name = tensor("op_18177_cast_fp16")]; + tensor var_18178_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1775_cast_fp16)[name = tensor("op_18178_cast_fp16")]; + tensor var_18179_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1777_cast_fp16)[name = tensor("op_18179_cast_fp16")]; + tensor var_18180_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1779_cast_fp16)[name = tensor("op_18180_cast_fp16")]; + tensor var_18181_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1781_cast_fp16)[name = tensor("op_18181_cast_fp16")]; + tensor var_18182_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1783_cast_fp16)[name = tensor("op_18182_cast_fp16")]; + tensor var_18183_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1785_cast_fp16)[name = tensor("op_18183_cast_fp16")]; + tensor var_18184_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1787_cast_fp16)[name = tensor("op_18184_cast_fp16")]; + tensor var_18185_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1789_cast_fp16)[name = tensor("op_18185_cast_fp16")]; + tensor var_18186_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1791_cast_fp16)[name = tensor("op_18186_cast_fp16")]; + tensor var_18187_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1793_cast_fp16)[name = tensor("op_18187_cast_fp16")]; + tensor var_18188_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1795_cast_fp16)[name = tensor("op_18188_cast_fp16")]; + tensor var_18189_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1797_cast_fp16)[name = tensor("op_18189_cast_fp16")]; + tensor var_18190_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1799_cast_fp16)[name = tensor("op_18190_cast_fp16")]; + tensor var_18191_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1801_cast_fp16)[name = tensor("op_18191_cast_fp16")]; + tensor var_18192_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1803_cast_fp16)[name = tensor("op_18192_cast_fp16")]; + tensor var_18193_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1805_cast_fp16)[name = tensor("op_18193_cast_fp16")]; + tensor var_18194_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1807_cast_fp16)[name = tensor("op_18194_cast_fp16")]; + tensor var_18195_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1809_cast_fp16)[name = tensor("op_18195_cast_fp16")]; + tensor var_18196_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1811_cast_fp16)[name = tensor("op_18196_cast_fp16")]; + tensor var_18197_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1813_cast_fp16)[name = tensor("op_18197_cast_fp16")]; + tensor var_18198_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1815_cast_fp16)[name = tensor("op_18198_cast_fp16")]; + tensor var_18199_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1817_cast_fp16)[name = tensor("op_18199_cast_fp16")]; + tensor var_18200_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1819_cast_fp16)[name = tensor("op_18200_cast_fp16")]; + tensor var_18201_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1821_cast_fp16)[name = tensor("op_18201_cast_fp16")]; + tensor var_18202_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1823_cast_fp16)[name = tensor("op_18202_cast_fp16")]; + tensor var_18203_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1825_cast_fp16)[name = tensor("op_18203_cast_fp16")]; + tensor var_18204_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1827_cast_fp16)[name = tensor("op_18204_cast_fp16")]; + tensor var_18205_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1829_cast_fp16)[name = tensor("op_18205_cast_fp16")]; + tensor var_18206_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1831_cast_fp16)[name = tensor("op_18206_cast_fp16")]; + tensor var_18207_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1833_cast_fp16)[name = tensor("op_18207_cast_fp16")]; + tensor var_18208_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1835_cast_fp16)[name = tensor("op_18208_cast_fp16")]; + tensor var_18209_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1837_cast_fp16)[name = tensor("op_18209_cast_fp16")]; + tensor var_18210_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1839_cast_fp16)[name = tensor("op_18210_cast_fp16")]; + tensor var_18211_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1841_cast_fp16)[name = tensor("op_18211_cast_fp16")]; + tensor var_18212_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1843_cast_fp16)[name = tensor("op_18212_cast_fp16")]; + tensor var_18213_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1845_cast_fp16)[name = tensor("op_18213_cast_fp16")]; + tensor var_18214_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1847_cast_fp16)[name = tensor("op_18214_cast_fp16")]; + tensor var_18215_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1849_cast_fp16)[name = tensor("op_18215_cast_fp16")]; + tensor var_18216_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1851_cast_fp16)[name = tensor("op_18216_cast_fp16")]; + tensor var_18217_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1853_cast_fp16)[name = tensor("op_18217_cast_fp16")]; + tensor var_18218_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1855_cast_fp16)[name = tensor("op_18218_cast_fp16")]; + tensor var_18219_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1857_cast_fp16)[name = tensor("op_18219_cast_fp16")]; + tensor var_18220_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1859_cast_fp16)[name = tensor("op_18220_cast_fp16")]; + tensor var_18221_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1861_cast_fp16)[name = tensor("op_18221_cast_fp16")]; + tensor var_18222_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1863_cast_fp16)[name = tensor("op_18222_cast_fp16")]; + tensor var_18223_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1865_cast_fp16)[name = tensor("op_18223_cast_fp16")]; + tensor var_18224_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1867_cast_fp16)[name = tensor("op_18224_cast_fp16")]; + tensor var_18225_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1869_cast_fp16)[name = tensor("op_18225_cast_fp16")]; + tensor var_18226_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1871_cast_fp16)[name = tensor("op_18226_cast_fp16")]; + tensor var_18227_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1873_cast_fp16)[name = tensor("op_18227_cast_fp16")]; + tensor var_18228_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1875_cast_fp16)[name = tensor("op_18228_cast_fp16")]; + tensor var_18229_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1877_cast_fp16)[name = tensor("op_18229_cast_fp16")]; + tensor var_18230_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1879_cast_fp16)[name = tensor("op_18230_cast_fp16")]; + tensor var_18231_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1881_cast_fp16)[name = tensor("op_18231_cast_fp16")]; + tensor var_18232_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1883_cast_fp16)[name = tensor("op_18232_cast_fp16")]; + tensor var_18233_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1885_cast_fp16)[name = tensor("op_18233_cast_fp16")]; + tensor var_18234_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1887_cast_fp16)[name = tensor("op_18234_cast_fp16")]; + tensor var_18235_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1889_cast_fp16)[name = tensor("op_18235_cast_fp16")]; + tensor var_18236_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1891_cast_fp16)[name = tensor("op_18236_cast_fp16")]; + tensor var_18237_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1893_cast_fp16)[name = tensor("op_18237_cast_fp16")]; + tensor var_18238_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1895_cast_fp16)[name = tensor("op_18238_cast_fp16")]; + tensor var_18239_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1897_cast_fp16)[name = tensor("op_18239_cast_fp16")]; + tensor var_18240_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1899_cast_fp16)[name = tensor("op_18240_cast_fp16")]; + tensor var_18241_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1901_cast_fp16)[name = tensor("op_18241_cast_fp16")]; + tensor var_18242_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1903_cast_fp16)[name = tensor("op_18242_cast_fp16")]; + tensor var_18243_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1905_cast_fp16)[name = tensor("op_18243_cast_fp16")]; + tensor var_18244_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1907_cast_fp16)[name = tensor("op_18244_cast_fp16")]; + tensor var_18245_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1909_cast_fp16)[name = tensor("op_18245_cast_fp16")]; + tensor var_18246_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1911_cast_fp16)[name = tensor("op_18246_cast_fp16")]; + tensor var_18247_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1913_cast_fp16)[name = tensor("op_18247_cast_fp16")]; + tensor var_18248_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1915_cast_fp16)[name = tensor("op_18248_cast_fp16")]; + tensor var_18249_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1917_cast_fp16)[name = tensor("op_18249_cast_fp16")]; + tensor var_18250_cast_fp16 = softmax(axis = var_16996, x = aw_chunk_1919_cast_fp16)[name = tensor("op_18250_cast_fp16")]; + tensor var_18252_equation_0 = const()[name = tensor("op_18252_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18252_cast_fp16 = einsum(equation = var_18252_equation_0, values = (var_17772_cast_fp16, var_18171_cast_fp16))[name = tensor("op_18252_cast_fp16")]; + tensor var_18254_equation_0 = const()[name = tensor("op_18254_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18254_cast_fp16 = einsum(equation = var_18254_equation_0, values = (var_17772_cast_fp16, var_18172_cast_fp16))[name = tensor("op_18254_cast_fp16")]; + tensor var_18256_equation_0 = const()[name = tensor("op_18256_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18256_cast_fp16 = einsum(equation = var_18256_equation_0, values = (var_17772_cast_fp16, var_18173_cast_fp16))[name = tensor("op_18256_cast_fp16")]; + tensor var_18258_equation_0 = const()[name = tensor("op_18258_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18258_cast_fp16 = einsum(equation = var_18258_equation_0, values = (var_17772_cast_fp16, var_18174_cast_fp16))[name = tensor("op_18258_cast_fp16")]; + tensor var_18260_equation_0 = const()[name = tensor("op_18260_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18260_cast_fp16 = einsum(equation = var_18260_equation_0, values = (var_17776_cast_fp16, var_18175_cast_fp16))[name = tensor("op_18260_cast_fp16")]; + tensor var_18262_equation_0 = const()[name = tensor("op_18262_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18262_cast_fp16 = einsum(equation = var_18262_equation_0, values = (var_17776_cast_fp16, var_18176_cast_fp16))[name = tensor("op_18262_cast_fp16")]; + tensor var_18264_equation_0 = const()[name = tensor("op_18264_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18264_cast_fp16 = einsum(equation = var_18264_equation_0, values = (var_17776_cast_fp16, var_18177_cast_fp16))[name = tensor("op_18264_cast_fp16")]; + tensor var_18266_equation_0 = const()[name = tensor("op_18266_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18266_cast_fp16 = einsum(equation = var_18266_equation_0, values = (var_17776_cast_fp16, var_18178_cast_fp16))[name = tensor("op_18266_cast_fp16")]; + tensor var_18268_equation_0 = const()[name = tensor("op_18268_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18268_cast_fp16 = einsum(equation = var_18268_equation_0, values = (var_17780_cast_fp16, var_18179_cast_fp16))[name = tensor("op_18268_cast_fp16")]; + tensor var_18270_equation_0 = const()[name = tensor("op_18270_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18270_cast_fp16 = einsum(equation = var_18270_equation_0, values = (var_17780_cast_fp16, var_18180_cast_fp16))[name = tensor("op_18270_cast_fp16")]; + tensor var_18272_equation_0 = const()[name = tensor("op_18272_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18272_cast_fp16 = einsum(equation = var_18272_equation_0, values = (var_17780_cast_fp16, var_18181_cast_fp16))[name = tensor("op_18272_cast_fp16")]; + tensor var_18274_equation_0 = const()[name = tensor("op_18274_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18274_cast_fp16 = einsum(equation = var_18274_equation_0, values = (var_17780_cast_fp16, var_18182_cast_fp16))[name = tensor("op_18274_cast_fp16")]; + tensor var_18276_equation_0 = const()[name = tensor("op_18276_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18276_cast_fp16 = einsum(equation = var_18276_equation_0, values = (var_17784_cast_fp16, var_18183_cast_fp16))[name = tensor("op_18276_cast_fp16")]; + tensor var_18278_equation_0 = const()[name = tensor("op_18278_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18278_cast_fp16 = einsum(equation = var_18278_equation_0, values = (var_17784_cast_fp16, var_18184_cast_fp16))[name = tensor("op_18278_cast_fp16")]; + tensor var_18280_equation_0 = const()[name = tensor("op_18280_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18280_cast_fp16 = einsum(equation = var_18280_equation_0, values = (var_17784_cast_fp16, var_18185_cast_fp16))[name = tensor("op_18280_cast_fp16")]; + tensor var_18282_equation_0 = const()[name = tensor("op_18282_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18282_cast_fp16 = einsum(equation = var_18282_equation_0, values = (var_17784_cast_fp16, var_18186_cast_fp16))[name = tensor("op_18282_cast_fp16")]; + tensor var_18284_equation_0 = const()[name = tensor("op_18284_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18284_cast_fp16 = einsum(equation = var_18284_equation_0, values = (var_17788_cast_fp16, var_18187_cast_fp16))[name = tensor("op_18284_cast_fp16")]; + tensor var_18286_equation_0 = const()[name = tensor("op_18286_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18286_cast_fp16 = einsum(equation = var_18286_equation_0, values = (var_17788_cast_fp16, var_18188_cast_fp16))[name = tensor("op_18286_cast_fp16")]; + tensor var_18288_equation_0 = const()[name = tensor("op_18288_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18288_cast_fp16 = einsum(equation = var_18288_equation_0, values = (var_17788_cast_fp16, var_18189_cast_fp16))[name = tensor("op_18288_cast_fp16")]; + tensor var_18290_equation_0 = const()[name = tensor("op_18290_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18290_cast_fp16 = einsum(equation = var_18290_equation_0, values = (var_17788_cast_fp16, var_18190_cast_fp16))[name = tensor("op_18290_cast_fp16")]; + tensor var_18292_equation_0 = const()[name = tensor("op_18292_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18292_cast_fp16 = einsum(equation = var_18292_equation_0, values = (var_17792_cast_fp16, var_18191_cast_fp16))[name = tensor("op_18292_cast_fp16")]; + tensor var_18294_equation_0 = const()[name = tensor("op_18294_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18294_cast_fp16 = einsum(equation = var_18294_equation_0, values = (var_17792_cast_fp16, var_18192_cast_fp16))[name = tensor("op_18294_cast_fp16")]; + tensor var_18296_equation_0 = const()[name = tensor("op_18296_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18296_cast_fp16 = einsum(equation = var_18296_equation_0, values = (var_17792_cast_fp16, var_18193_cast_fp16))[name = tensor("op_18296_cast_fp16")]; + tensor var_18298_equation_0 = const()[name = tensor("op_18298_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18298_cast_fp16 = einsum(equation = var_18298_equation_0, values = (var_17792_cast_fp16, var_18194_cast_fp16))[name = tensor("op_18298_cast_fp16")]; + tensor var_18300_equation_0 = const()[name = tensor("op_18300_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18300_cast_fp16 = einsum(equation = var_18300_equation_0, values = (var_17796_cast_fp16, var_18195_cast_fp16))[name = tensor("op_18300_cast_fp16")]; + tensor var_18302_equation_0 = const()[name = tensor("op_18302_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18302_cast_fp16 = einsum(equation = var_18302_equation_0, values = (var_17796_cast_fp16, var_18196_cast_fp16))[name = tensor("op_18302_cast_fp16")]; + tensor var_18304_equation_0 = const()[name = tensor("op_18304_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18304_cast_fp16 = einsum(equation = var_18304_equation_0, values = (var_17796_cast_fp16, var_18197_cast_fp16))[name = tensor("op_18304_cast_fp16")]; + tensor var_18306_equation_0 = const()[name = tensor("op_18306_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18306_cast_fp16 = einsum(equation = var_18306_equation_0, values = (var_17796_cast_fp16, var_18198_cast_fp16))[name = tensor("op_18306_cast_fp16")]; + tensor var_18308_equation_0 = const()[name = tensor("op_18308_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18308_cast_fp16 = einsum(equation = var_18308_equation_0, values = (var_17800_cast_fp16, var_18199_cast_fp16))[name = tensor("op_18308_cast_fp16")]; + tensor var_18310_equation_0 = const()[name = tensor("op_18310_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18310_cast_fp16 = einsum(equation = var_18310_equation_0, values = (var_17800_cast_fp16, var_18200_cast_fp16))[name = tensor("op_18310_cast_fp16")]; + tensor var_18312_equation_0 = const()[name = tensor("op_18312_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18312_cast_fp16 = einsum(equation = var_18312_equation_0, values = (var_17800_cast_fp16, var_18201_cast_fp16))[name = tensor("op_18312_cast_fp16")]; + tensor var_18314_equation_0 = const()[name = tensor("op_18314_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18314_cast_fp16 = einsum(equation = var_18314_equation_0, values = (var_17800_cast_fp16, var_18202_cast_fp16))[name = tensor("op_18314_cast_fp16")]; + tensor var_18316_equation_0 = const()[name = tensor("op_18316_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18316_cast_fp16 = einsum(equation = var_18316_equation_0, values = (var_17804_cast_fp16, var_18203_cast_fp16))[name = tensor("op_18316_cast_fp16")]; + tensor var_18318_equation_0 = const()[name = tensor("op_18318_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18318_cast_fp16 = einsum(equation = var_18318_equation_0, values = (var_17804_cast_fp16, var_18204_cast_fp16))[name = tensor("op_18318_cast_fp16")]; + tensor var_18320_equation_0 = const()[name = tensor("op_18320_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18320_cast_fp16 = einsum(equation = var_18320_equation_0, values = (var_17804_cast_fp16, var_18205_cast_fp16))[name = tensor("op_18320_cast_fp16")]; + tensor var_18322_equation_0 = const()[name = tensor("op_18322_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18322_cast_fp16 = einsum(equation = var_18322_equation_0, values = (var_17804_cast_fp16, var_18206_cast_fp16))[name = tensor("op_18322_cast_fp16")]; + tensor var_18324_equation_0 = const()[name = tensor("op_18324_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18324_cast_fp16 = einsum(equation = var_18324_equation_0, values = (var_17808_cast_fp16, var_18207_cast_fp16))[name = tensor("op_18324_cast_fp16")]; + tensor var_18326_equation_0 = const()[name = tensor("op_18326_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18326_cast_fp16 = einsum(equation = var_18326_equation_0, values = (var_17808_cast_fp16, var_18208_cast_fp16))[name = tensor("op_18326_cast_fp16")]; + tensor var_18328_equation_0 = const()[name = tensor("op_18328_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18328_cast_fp16 = einsum(equation = var_18328_equation_0, values = (var_17808_cast_fp16, var_18209_cast_fp16))[name = tensor("op_18328_cast_fp16")]; + tensor var_18330_equation_0 = const()[name = tensor("op_18330_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18330_cast_fp16 = einsum(equation = var_18330_equation_0, values = (var_17808_cast_fp16, var_18210_cast_fp16))[name = tensor("op_18330_cast_fp16")]; + tensor var_18332_equation_0 = const()[name = tensor("op_18332_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18332_cast_fp16 = einsum(equation = var_18332_equation_0, values = (var_17812_cast_fp16, var_18211_cast_fp16))[name = tensor("op_18332_cast_fp16")]; + tensor var_18334_equation_0 = const()[name = tensor("op_18334_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18334_cast_fp16 = einsum(equation = var_18334_equation_0, values = (var_17812_cast_fp16, var_18212_cast_fp16))[name = tensor("op_18334_cast_fp16")]; + tensor var_18336_equation_0 = const()[name = tensor("op_18336_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18336_cast_fp16 = einsum(equation = var_18336_equation_0, values = (var_17812_cast_fp16, var_18213_cast_fp16))[name = tensor("op_18336_cast_fp16")]; + tensor var_18338_equation_0 = const()[name = tensor("op_18338_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18338_cast_fp16 = einsum(equation = var_18338_equation_0, values = (var_17812_cast_fp16, var_18214_cast_fp16))[name = tensor("op_18338_cast_fp16")]; + tensor var_18340_equation_0 = const()[name = tensor("op_18340_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18340_cast_fp16 = einsum(equation = var_18340_equation_0, values = (var_17816_cast_fp16, var_18215_cast_fp16))[name = tensor("op_18340_cast_fp16")]; + tensor var_18342_equation_0 = const()[name = tensor("op_18342_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18342_cast_fp16 = einsum(equation = var_18342_equation_0, values = (var_17816_cast_fp16, var_18216_cast_fp16))[name = tensor("op_18342_cast_fp16")]; + tensor var_18344_equation_0 = const()[name = tensor("op_18344_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18344_cast_fp16 = einsum(equation = var_18344_equation_0, values = (var_17816_cast_fp16, var_18217_cast_fp16))[name = tensor("op_18344_cast_fp16")]; + tensor var_18346_equation_0 = const()[name = tensor("op_18346_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18346_cast_fp16 = einsum(equation = var_18346_equation_0, values = (var_17816_cast_fp16, var_18218_cast_fp16))[name = tensor("op_18346_cast_fp16")]; + tensor var_18348_equation_0 = const()[name = tensor("op_18348_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18348_cast_fp16 = einsum(equation = var_18348_equation_0, values = (var_17820_cast_fp16, var_18219_cast_fp16))[name = tensor("op_18348_cast_fp16")]; + tensor var_18350_equation_0 = const()[name = tensor("op_18350_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18350_cast_fp16 = einsum(equation = var_18350_equation_0, values = (var_17820_cast_fp16, var_18220_cast_fp16))[name = tensor("op_18350_cast_fp16")]; + tensor var_18352_equation_0 = const()[name = tensor("op_18352_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18352_cast_fp16 = einsum(equation = var_18352_equation_0, values = (var_17820_cast_fp16, var_18221_cast_fp16))[name = tensor("op_18352_cast_fp16")]; + tensor var_18354_equation_0 = const()[name = tensor("op_18354_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18354_cast_fp16 = einsum(equation = var_18354_equation_0, values = (var_17820_cast_fp16, var_18222_cast_fp16))[name = tensor("op_18354_cast_fp16")]; + tensor var_18356_equation_0 = const()[name = tensor("op_18356_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18356_cast_fp16 = einsum(equation = var_18356_equation_0, values = (var_17824_cast_fp16, var_18223_cast_fp16))[name = tensor("op_18356_cast_fp16")]; + tensor var_18358_equation_0 = const()[name = tensor("op_18358_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18358_cast_fp16 = einsum(equation = var_18358_equation_0, values = (var_17824_cast_fp16, var_18224_cast_fp16))[name = tensor("op_18358_cast_fp16")]; + tensor var_18360_equation_0 = const()[name = tensor("op_18360_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18360_cast_fp16 = einsum(equation = var_18360_equation_0, values = (var_17824_cast_fp16, var_18225_cast_fp16))[name = tensor("op_18360_cast_fp16")]; + tensor var_18362_equation_0 = const()[name = tensor("op_18362_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18362_cast_fp16 = einsum(equation = var_18362_equation_0, values = (var_17824_cast_fp16, var_18226_cast_fp16))[name = tensor("op_18362_cast_fp16")]; + tensor var_18364_equation_0 = const()[name = tensor("op_18364_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18364_cast_fp16 = einsum(equation = var_18364_equation_0, values = (var_17828_cast_fp16, var_18227_cast_fp16))[name = tensor("op_18364_cast_fp16")]; + tensor var_18366_equation_0 = const()[name = tensor("op_18366_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18366_cast_fp16 = einsum(equation = var_18366_equation_0, values = (var_17828_cast_fp16, var_18228_cast_fp16))[name = tensor("op_18366_cast_fp16")]; + tensor var_18368_equation_0 = const()[name = tensor("op_18368_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18368_cast_fp16 = einsum(equation = var_18368_equation_0, values = (var_17828_cast_fp16, var_18229_cast_fp16))[name = tensor("op_18368_cast_fp16")]; + tensor var_18370_equation_0 = const()[name = tensor("op_18370_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18370_cast_fp16 = einsum(equation = var_18370_equation_0, values = (var_17828_cast_fp16, var_18230_cast_fp16))[name = tensor("op_18370_cast_fp16")]; + tensor var_18372_equation_0 = const()[name = tensor("op_18372_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18372_cast_fp16 = einsum(equation = var_18372_equation_0, values = (var_17832_cast_fp16, var_18231_cast_fp16))[name = tensor("op_18372_cast_fp16")]; + tensor var_18374_equation_0 = const()[name = tensor("op_18374_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18374_cast_fp16 = einsum(equation = var_18374_equation_0, values = (var_17832_cast_fp16, var_18232_cast_fp16))[name = tensor("op_18374_cast_fp16")]; + tensor var_18376_equation_0 = const()[name = tensor("op_18376_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18376_cast_fp16 = einsum(equation = var_18376_equation_0, values = (var_17832_cast_fp16, var_18233_cast_fp16))[name = tensor("op_18376_cast_fp16")]; + tensor var_18378_equation_0 = const()[name = tensor("op_18378_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18378_cast_fp16 = einsum(equation = var_18378_equation_0, values = (var_17832_cast_fp16, var_18234_cast_fp16))[name = tensor("op_18378_cast_fp16")]; + tensor var_18380_equation_0 = const()[name = tensor("op_18380_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18380_cast_fp16 = einsum(equation = var_18380_equation_0, values = (var_17836_cast_fp16, var_18235_cast_fp16))[name = tensor("op_18380_cast_fp16")]; + tensor var_18382_equation_0 = const()[name = tensor("op_18382_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18382_cast_fp16 = einsum(equation = var_18382_equation_0, values = (var_17836_cast_fp16, var_18236_cast_fp16))[name = tensor("op_18382_cast_fp16")]; + tensor var_18384_equation_0 = const()[name = tensor("op_18384_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18384_cast_fp16 = einsum(equation = var_18384_equation_0, values = (var_17836_cast_fp16, var_18237_cast_fp16))[name = tensor("op_18384_cast_fp16")]; + tensor var_18386_equation_0 = const()[name = tensor("op_18386_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18386_cast_fp16 = einsum(equation = var_18386_equation_0, values = (var_17836_cast_fp16, var_18238_cast_fp16))[name = tensor("op_18386_cast_fp16")]; + tensor var_18388_equation_0 = const()[name = tensor("op_18388_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18388_cast_fp16 = einsum(equation = var_18388_equation_0, values = (var_17840_cast_fp16, var_18239_cast_fp16))[name = tensor("op_18388_cast_fp16")]; + tensor var_18390_equation_0 = const()[name = tensor("op_18390_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18390_cast_fp16 = einsum(equation = var_18390_equation_0, values = (var_17840_cast_fp16, var_18240_cast_fp16))[name = tensor("op_18390_cast_fp16")]; + tensor var_18392_equation_0 = const()[name = tensor("op_18392_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18392_cast_fp16 = einsum(equation = var_18392_equation_0, values = (var_17840_cast_fp16, var_18241_cast_fp16))[name = tensor("op_18392_cast_fp16")]; + tensor var_18394_equation_0 = const()[name = tensor("op_18394_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18394_cast_fp16 = einsum(equation = var_18394_equation_0, values = (var_17840_cast_fp16, var_18242_cast_fp16))[name = tensor("op_18394_cast_fp16")]; + tensor var_18396_equation_0 = const()[name = tensor("op_18396_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18396_cast_fp16 = einsum(equation = var_18396_equation_0, values = (var_17844_cast_fp16, var_18243_cast_fp16))[name = tensor("op_18396_cast_fp16")]; + tensor var_18398_equation_0 = const()[name = tensor("op_18398_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18398_cast_fp16 = einsum(equation = var_18398_equation_0, values = (var_17844_cast_fp16, var_18244_cast_fp16))[name = tensor("op_18398_cast_fp16")]; + tensor var_18400_equation_0 = const()[name = tensor("op_18400_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18400_cast_fp16 = einsum(equation = var_18400_equation_0, values = (var_17844_cast_fp16, var_18245_cast_fp16))[name = tensor("op_18400_cast_fp16")]; + tensor var_18402_equation_0 = const()[name = tensor("op_18402_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18402_cast_fp16 = einsum(equation = var_18402_equation_0, values = (var_17844_cast_fp16, var_18246_cast_fp16))[name = tensor("op_18402_cast_fp16")]; + tensor var_18404_equation_0 = const()[name = tensor("op_18404_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18404_cast_fp16 = einsum(equation = var_18404_equation_0, values = (var_17848_cast_fp16, var_18247_cast_fp16))[name = tensor("op_18404_cast_fp16")]; + tensor var_18406_equation_0 = const()[name = tensor("op_18406_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18406_cast_fp16 = einsum(equation = var_18406_equation_0, values = (var_17848_cast_fp16, var_18248_cast_fp16))[name = tensor("op_18406_cast_fp16")]; + tensor var_18408_equation_0 = const()[name = tensor("op_18408_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18408_cast_fp16 = einsum(equation = var_18408_equation_0, values = (var_17848_cast_fp16, var_18249_cast_fp16))[name = tensor("op_18408_cast_fp16")]; + tensor var_18410_equation_0 = const()[name = tensor("op_18410_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_18410_cast_fp16 = einsum(equation = var_18410_equation_0, values = (var_17848_cast_fp16, var_18250_cast_fp16))[name = tensor("op_18410_cast_fp16")]; + tensor var_18412_interleave_0 = const()[name = tensor("op_18412_interleave_0"), val = tensor(false)]; + tensor var_18412_cast_fp16 = concat(axis = var_16971, interleave = var_18412_interleave_0, values = (var_18252_cast_fp16, var_18254_cast_fp16, var_18256_cast_fp16, var_18258_cast_fp16))[name = tensor("op_18412_cast_fp16")]; + tensor var_18414_interleave_0 = const()[name = tensor("op_18414_interleave_0"), val = tensor(false)]; + tensor var_18414_cast_fp16 = concat(axis = var_16971, interleave = var_18414_interleave_0, values = (var_18260_cast_fp16, var_18262_cast_fp16, var_18264_cast_fp16, var_18266_cast_fp16))[name = tensor("op_18414_cast_fp16")]; + tensor var_18416_interleave_0 = const()[name = tensor("op_18416_interleave_0"), val = tensor(false)]; + tensor var_18416_cast_fp16 = concat(axis = var_16971, interleave = var_18416_interleave_0, values = (var_18268_cast_fp16, var_18270_cast_fp16, var_18272_cast_fp16, var_18274_cast_fp16))[name = tensor("op_18416_cast_fp16")]; + tensor var_18418_interleave_0 = const()[name = tensor("op_18418_interleave_0"), val = tensor(false)]; + tensor var_18418_cast_fp16 = concat(axis = var_16971, interleave = var_18418_interleave_0, values = (var_18276_cast_fp16, var_18278_cast_fp16, var_18280_cast_fp16, var_18282_cast_fp16))[name = tensor("op_18418_cast_fp16")]; + tensor var_18420_interleave_0 = const()[name = tensor("op_18420_interleave_0"), val = tensor(false)]; + tensor var_18420_cast_fp16 = concat(axis = var_16971, interleave = var_18420_interleave_0, values = (var_18284_cast_fp16, var_18286_cast_fp16, var_18288_cast_fp16, var_18290_cast_fp16))[name = tensor("op_18420_cast_fp16")]; + tensor var_18422_interleave_0 = const()[name = tensor("op_18422_interleave_0"), val = tensor(false)]; + tensor var_18422_cast_fp16 = concat(axis = var_16971, interleave = var_18422_interleave_0, values = (var_18292_cast_fp16, var_18294_cast_fp16, var_18296_cast_fp16, var_18298_cast_fp16))[name = tensor("op_18422_cast_fp16")]; + tensor var_18424_interleave_0 = const()[name = tensor("op_18424_interleave_0"), val = tensor(false)]; + tensor var_18424_cast_fp16 = concat(axis = var_16971, interleave = var_18424_interleave_0, values = (var_18300_cast_fp16, var_18302_cast_fp16, var_18304_cast_fp16, var_18306_cast_fp16))[name = tensor("op_18424_cast_fp16")]; + tensor var_18426_interleave_0 = const()[name = tensor("op_18426_interleave_0"), val = tensor(false)]; + tensor var_18426_cast_fp16 = concat(axis = var_16971, interleave = var_18426_interleave_0, values = (var_18308_cast_fp16, var_18310_cast_fp16, var_18312_cast_fp16, var_18314_cast_fp16))[name = tensor("op_18426_cast_fp16")]; + tensor var_18428_interleave_0 = const()[name = tensor("op_18428_interleave_0"), val = tensor(false)]; + tensor var_18428_cast_fp16 = concat(axis = var_16971, interleave = var_18428_interleave_0, values = (var_18316_cast_fp16, var_18318_cast_fp16, var_18320_cast_fp16, var_18322_cast_fp16))[name = tensor("op_18428_cast_fp16")]; + tensor var_18430_interleave_0 = const()[name = tensor("op_18430_interleave_0"), val = tensor(false)]; + tensor var_18430_cast_fp16 = concat(axis = var_16971, interleave = var_18430_interleave_0, values = (var_18324_cast_fp16, var_18326_cast_fp16, var_18328_cast_fp16, var_18330_cast_fp16))[name = tensor("op_18430_cast_fp16")]; + tensor var_18432_interleave_0 = const()[name = tensor("op_18432_interleave_0"), val = tensor(false)]; + tensor var_18432_cast_fp16 = concat(axis = var_16971, interleave = var_18432_interleave_0, values = (var_18332_cast_fp16, var_18334_cast_fp16, var_18336_cast_fp16, var_18338_cast_fp16))[name = tensor("op_18432_cast_fp16")]; + tensor var_18434_interleave_0 = const()[name = tensor("op_18434_interleave_0"), val = tensor(false)]; + tensor var_18434_cast_fp16 = concat(axis = var_16971, interleave = var_18434_interleave_0, values = (var_18340_cast_fp16, var_18342_cast_fp16, var_18344_cast_fp16, var_18346_cast_fp16))[name = tensor("op_18434_cast_fp16")]; + tensor var_18436_interleave_0 = const()[name = tensor("op_18436_interleave_0"), val = tensor(false)]; + tensor var_18436_cast_fp16 = concat(axis = var_16971, interleave = var_18436_interleave_0, values = (var_18348_cast_fp16, var_18350_cast_fp16, var_18352_cast_fp16, var_18354_cast_fp16))[name = tensor("op_18436_cast_fp16")]; + tensor var_18438_interleave_0 = const()[name = tensor("op_18438_interleave_0"), val = tensor(false)]; + tensor var_18438_cast_fp16 = concat(axis = var_16971, interleave = var_18438_interleave_0, values = (var_18356_cast_fp16, var_18358_cast_fp16, var_18360_cast_fp16, var_18362_cast_fp16))[name = tensor("op_18438_cast_fp16")]; + tensor var_18440_interleave_0 = const()[name = tensor("op_18440_interleave_0"), val = tensor(false)]; + tensor var_18440_cast_fp16 = concat(axis = var_16971, interleave = var_18440_interleave_0, values = (var_18364_cast_fp16, var_18366_cast_fp16, var_18368_cast_fp16, var_18370_cast_fp16))[name = tensor("op_18440_cast_fp16")]; + tensor var_18442_interleave_0 = const()[name = tensor("op_18442_interleave_0"), val = tensor(false)]; + tensor var_18442_cast_fp16 = concat(axis = var_16971, interleave = var_18442_interleave_0, values = (var_18372_cast_fp16, var_18374_cast_fp16, var_18376_cast_fp16, var_18378_cast_fp16))[name = tensor("op_18442_cast_fp16")]; + tensor var_18444_interleave_0 = const()[name = tensor("op_18444_interleave_0"), val = tensor(false)]; + tensor var_18444_cast_fp16 = concat(axis = var_16971, interleave = var_18444_interleave_0, values = (var_18380_cast_fp16, var_18382_cast_fp16, var_18384_cast_fp16, var_18386_cast_fp16))[name = tensor("op_18444_cast_fp16")]; + tensor var_18446_interleave_0 = const()[name = tensor("op_18446_interleave_0"), val = tensor(false)]; + tensor var_18446_cast_fp16 = concat(axis = var_16971, interleave = var_18446_interleave_0, values = (var_18388_cast_fp16, var_18390_cast_fp16, var_18392_cast_fp16, var_18394_cast_fp16))[name = tensor("op_18446_cast_fp16")]; + tensor var_18448_interleave_0 = const()[name = tensor("op_18448_interleave_0"), val = tensor(false)]; + tensor var_18448_cast_fp16 = concat(axis = var_16971, interleave = var_18448_interleave_0, values = (var_18396_cast_fp16, var_18398_cast_fp16, var_18400_cast_fp16, var_18402_cast_fp16))[name = tensor("op_18448_cast_fp16")]; + tensor var_18450_interleave_0 = const()[name = tensor("op_18450_interleave_0"), val = tensor(false)]; + tensor var_18450_cast_fp16 = concat(axis = var_16971, interleave = var_18450_interleave_0, values = (var_18404_cast_fp16, var_18406_cast_fp16, var_18408_cast_fp16, var_18410_cast_fp16))[name = tensor("op_18450_cast_fp16")]; + tensor input_89_interleave_0 = const()[name = tensor("input_89_interleave_0"), val = tensor(false)]; + tensor input_89_cast_fp16 = concat(axis = var_16996, interleave = input_89_interleave_0, values = (var_18412_cast_fp16, var_18414_cast_fp16, var_18416_cast_fp16, var_18418_cast_fp16, var_18420_cast_fp16, var_18422_cast_fp16, var_18424_cast_fp16, var_18426_cast_fp16, var_18428_cast_fp16, var_18430_cast_fp16, var_18432_cast_fp16, var_18434_cast_fp16, var_18436_cast_fp16, var_18438_cast_fp16, var_18440_cast_fp16, var_18442_cast_fp16, var_18444_cast_fp16, var_18446_cast_fp16, var_18448_cast_fp16, var_18450_cast_fp16))[name = tensor("input_89_cast_fp16")]; + tensor var_18455 = const()[name = tensor("op_18455"), val = tensor([1, 1])]; + tensor var_18457 = const()[name = tensor("op_18457"), val = tensor([1, 1])]; + tensor obj_47_pad_type_0 = const()[name = tensor("obj_47_pad_type_0"), val = tensor("custom")]; + tensor obj_47_pad_0 = const()[name = tensor("obj_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_11_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_11_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(457022720)))]; + tensor layers_11_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_11_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(460299584)))]; + tensor obj_47_cast_fp16 = conv(bias = layers_11_self_attn_o_proj_bias_to_fp16, dilations = var_18457, groups = var_16996, pad = obj_47_pad_0, pad_type = obj_47_pad_type_0, strides = var_18455, weight = layers_11_self_attn_o_proj_weight_to_fp16, x = input_89_cast_fp16)[name = tensor("obj_47_cast_fp16")]; + tensor inputs_47_cast_fp16 = add(x = inputs_45_cast_fp16, y = obj_47_cast_fp16)[name = tensor("inputs_47_cast_fp16")]; + tensor var_18463 = const()[name = tensor("op_18463"), val = tensor([1])]; + tensor channels_mean_47_cast_fp16 = reduce_mean(axes = var_18463, keep_dims = var_16997, x = inputs_47_cast_fp16)[name = tensor("channels_mean_47_cast_fp16")]; + tensor zero_mean_47_cast_fp16 = sub(x = inputs_47_cast_fp16, y = channels_mean_47_cast_fp16)[name = tensor("zero_mean_47_cast_fp16")]; + tensor zero_mean_sq_47_cast_fp16 = mul(x = zero_mean_47_cast_fp16, y = zero_mean_47_cast_fp16)[name = tensor("zero_mean_sq_47_cast_fp16")]; + tensor var_18467 = const()[name = tensor("op_18467"), val = tensor([1])]; + tensor var_18468_cast_fp16 = reduce_mean(axes = var_18467, keep_dims = var_16997, x = zero_mean_sq_47_cast_fp16)[name = tensor("op_18468_cast_fp16")]; + tensor var_18469_to_fp16 = const()[name = tensor("op_18469_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_18470_cast_fp16 = add(x = var_18468_cast_fp16, y = var_18469_to_fp16)[name = tensor("op_18470_cast_fp16")]; + tensor denom_47_epsilon_0_to_fp16 = const()[name = tensor("denom_47_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_47_cast_fp16 = rsqrt(epsilon = denom_47_epsilon_0_to_fp16, x = var_18470_cast_fp16)[name = tensor("denom_47_cast_fp16")]; + tensor out_47_cast_fp16 = mul(x = zero_mean_47_cast_fp16, y = denom_47_cast_fp16)[name = tensor("out_47_cast_fp16")]; + tensor input_91_gamma_0_to_fp16 = const()[name = tensor("input_91_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(460302208)))]; + tensor input_91_beta_0_to_fp16 = const()[name = tensor("input_91_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(460304832)))]; + tensor input_91_epsilon_0_to_fp16 = const()[name = tensor("input_91_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_91_cast_fp16 = batch_norm(beta = input_91_beta_0_to_fp16, epsilon = input_91_epsilon_0_to_fp16, gamma = input_91_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_47_cast_fp16)[name = tensor("input_91_cast_fp16")]; + tensor var_18481 = const()[name = tensor("op_18481"), val = tensor([1, 1])]; + tensor var_18483 = const()[name = tensor("op_18483"), val = tensor([1, 1])]; + tensor input_93_pad_type_0 = const()[name = tensor("input_93_pad_type_0"), val = tensor("custom")]; + tensor input_93_pad_0 = const()[name = tensor("input_93_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_11_fc1_weight_to_fp16 = const()[name = tensor("layers_11_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(460307456)))]; + tensor layers_11_fc1_bias_to_fp16 = const()[name = tensor("layers_11_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(473414720)))]; + tensor input_93_cast_fp16 = conv(bias = layers_11_fc1_bias_to_fp16, dilations = var_18483, groups = var_16996, pad = input_93_pad_0, pad_type = input_93_pad_type_0, strides = var_18481, weight = layers_11_fc1_weight_to_fp16, x = input_91_cast_fp16)[name = tensor("input_93_cast_fp16")]; + tensor input_95_mode_0 = const()[name = tensor("input_95_mode_0"), val = tensor("EXACT")]; + tensor input_95_cast_fp16 = gelu(mode = input_95_mode_0, x = input_93_cast_fp16)[name = tensor("input_95_cast_fp16")]; + tensor var_18489 = const()[name = tensor("op_18489"), val = tensor([1, 1])]; + tensor var_18491 = const()[name = tensor("op_18491"), val = tensor([1, 1])]; + tensor hidden_states_27_pad_type_0 = const()[name = tensor("hidden_states_27_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_27_pad_0 = const()[name = tensor("hidden_states_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_11_fc2_weight_to_fp16 = const()[name = tensor("layers_11_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(473425024)))]; + tensor layers_11_fc2_bias_to_fp16 = const()[name = tensor("layers_11_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(486532288)))]; + tensor hidden_states_27_cast_fp16 = conv(bias = layers_11_fc2_bias_to_fp16, dilations = var_18491, groups = var_16996, pad = hidden_states_27_pad_0, pad_type = hidden_states_27_pad_type_0, strides = var_18489, weight = layers_11_fc2_weight_to_fp16, x = input_95_cast_fp16)[name = tensor("hidden_states_27_cast_fp16")]; + tensor inputs_49_cast_fp16 = add(x = inputs_47_cast_fp16, y = hidden_states_27_cast_fp16)[name = tensor("inputs_49_cast_fp16")]; + tensor var_18498 = const()[name = tensor("op_18498"), val = tensor(3)]; + tensor var_18523 = const()[name = tensor("op_18523"), val = tensor(1)]; + tensor var_18524 = const()[name = tensor("op_18524"), val = tensor(true)]; + tensor var_18534 = const()[name = tensor("op_18534"), val = tensor([1])]; + tensor channels_mean_49_cast_fp16 = reduce_mean(axes = var_18534, keep_dims = var_18524, x = inputs_49_cast_fp16)[name = tensor("channels_mean_49_cast_fp16")]; + tensor zero_mean_49_cast_fp16 = sub(x = inputs_49_cast_fp16, y = channels_mean_49_cast_fp16)[name = tensor("zero_mean_49_cast_fp16")]; + tensor zero_mean_sq_49_cast_fp16 = mul(x = zero_mean_49_cast_fp16, y = zero_mean_49_cast_fp16)[name = tensor("zero_mean_sq_49_cast_fp16")]; + tensor var_18538 = const()[name = tensor("op_18538"), val = tensor([1])]; + tensor var_18539_cast_fp16 = reduce_mean(axes = var_18538, keep_dims = var_18524, x = zero_mean_sq_49_cast_fp16)[name = tensor("op_18539_cast_fp16")]; + tensor var_18540_to_fp16 = const()[name = tensor("op_18540_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_18541_cast_fp16 = add(x = var_18539_cast_fp16, y = var_18540_to_fp16)[name = tensor("op_18541_cast_fp16")]; + tensor denom_49_epsilon_0_to_fp16 = const()[name = tensor("denom_49_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_49_cast_fp16 = rsqrt(epsilon = denom_49_epsilon_0_to_fp16, x = var_18541_cast_fp16)[name = tensor("denom_49_cast_fp16")]; + tensor out_49_cast_fp16 = mul(x = zero_mean_49_cast_fp16, y = denom_49_cast_fp16)[name = tensor("out_49_cast_fp16")]; + tensor obj_49_gamma_0_to_fp16 = const()[name = tensor("obj_49_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(486534912)))]; + tensor obj_49_beta_0_to_fp16 = const()[name = tensor("obj_49_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(486537536)))]; + tensor obj_49_epsilon_0_to_fp16 = const()[name = tensor("obj_49_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_49_cast_fp16 = batch_norm(beta = obj_49_beta_0_to_fp16, epsilon = obj_49_epsilon_0_to_fp16, gamma = obj_49_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_49_cast_fp16)[name = tensor("obj_49_cast_fp16")]; + tensor var_18556 = const()[name = tensor("op_18556"), val = tensor([1, 1])]; + tensor var_18558 = const()[name = tensor("op_18558"), val = tensor([1, 1])]; + tensor query_25_pad_type_0 = const()[name = tensor("query_25_pad_type_0"), val = tensor("custom")]; + tensor query_25_pad_0 = const()[name = tensor("query_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_12_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_12_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(486540160)))]; + tensor layers_12_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_12_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(489817024)))]; + tensor query_25_cast_fp16 = conv(bias = layers_12_self_attn_q_proj_bias_to_fp16, dilations = var_18558, groups = var_18523, pad = query_25_pad_0, pad_type = query_25_pad_type_0, strides = var_18556, weight = layers_12_self_attn_q_proj_weight_to_fp16, x = obj_49_cast_fp16)[name = tensor("query_25_cast_fp16")]; + tensor var_18562 = const()[name = tensor("op_18562"), val = tensor([1, 1])]; + tensor var_18564 = const()[name = tensor("op_18564"), val = tensor([1, 1])]; + tensor key_25_pad_type_0 = const()[name = tensor("key_25_pad_type_0"), val = tensor("custom")]; + tensor key_25_pad_0 = const()[name = tensor("key_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_12_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_12_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(489819648)))]; + tensor key_25_cast_fp16 = conv(dilations = var_18564, groups = var_18523, pad = key_25_pad_0, pad_type = key_25_pad_type_0, strides = var_18562, weight = layers_12_self_attn_k_proj_weight_to_fp16, x = obj_49_cast_fp16)[name = tensor("key_25_cast_fp16")]; + tensor var_18569 = const()[name = tensor("op_18569"), val = tensor([1, 1])]; + tensor var_18571 = const()[name = tensor("op_18571"), val = tensor([1, 1])]; + tensor value_25_pad_type_0 = const()[name = tensor("value_25_pad_type_0"), val = tensor("custom")]; + tensor value_25_pad_0 = const()[name = tensor("value_25_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_12_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_12_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(493096512)))]; + tensor layers_12_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_12_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(496373376)))]; + tensor value_25_cast_fp16 = conv(bias = layers_12_self_attn_v_proj_bias_to_fp16, dilations = var_18571, groups = var_18523, pad = value_25_pad_0, pad_type = value_25_pad_type_0, strides = var_18569, weight = layers_12_self_attn_v_proj_weight_to_fp16, x = obj_49_cast_fp16)[name = tensor("value_25_cast_fp16")]; + tensor var_18578_begin_0 = const()[name = tensor("op_18578_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_18578_end_0 = const()[name = tensor("op_18578_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_18578_end_mask_0 = const()[name = tensor("op_18578_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_18578_cast_fp16 = slice_by_index(begin = var_18578_begin_0, end = var_18578_end_0, end_mask = var_18578_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_18578_cast_fp16")]; + tensor var_18582_begin_0 = const()[name = tensor("op_18582_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_18582_end_0 = const()[name = tensor("op_18582_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_18582_end_mask_0 = const()[name = tensor("op_18582_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_18582_cast_fp16 = slice_by_index(begin = var_18582_begin_0, end = var_18582_end_0, end_mask = var_18582_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_18582_cast_fp16")]; + tensor var_18586_begin_0 = const()[name = tensor("op_18586_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_18586_end_0 = const()[name = tensor("op_18586_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_18586_end_mask_0 = const()[name = tensor("op_18586_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_18586_cast_fp16 = slice_by_index(begin = var_18586_begin_0, end = var_18586_end_0, end_mask = var_18586_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_18586_cast_fp16")]; + tensor var_18590_begin_0 = const()[name = tensor("op_18590_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_18590_end_0 = const()[name = tensor("op_18590_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_18590_end_mask_0 = const()[name = tensor("op_18590_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_18590_cast_fp16 = slice_by_index(begin = var_18590_begin_0, end = var_18590_end_0, end_mask = var_18590_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_18590_cast_fp16")]; + tensor var_18594_begin_0 = const()[name = tensor("op_18594_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_18594_end_0 = const()[name = tensor("op_18594_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_18594_end_mask_0 = const()[name = tensor("op_18594_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_18594_cast_fp16 = slice_by_index(begin = var_18594_begin_0, end = var_18594_end_0, end_mask = var_18594_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_18594_cast_fp16")]; + tensor var_18598_begin_0 = const()[name = tensor("op_18598_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_18598_end_0 = const()[name = tensor("op_18598_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_18598_end_mask_0 = const()[name = tensor("op_18598_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_18598_cast_fp16 = slice_by_index(begin = var_18598_begin_0, end = var_18598_end_0, end_mask = var_18598_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_18598_cast_fp16")]; + tensor var_18602_begin_0 = const()[name = tensor("op_18602_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_18602_end_0 = const()[name = tensor("op_18602_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_18602_end_mask_0 = const()[name = tensor("op_18602_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_18602_cast_fp16 = slice_by_index(begin = var_18602_begin_0, end = var_18602_end_0, end_mask = var_18602_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_18602_cast_fp16")]; + tensor var_18606_begin_0 = const()[name = tensor("op_18606_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_18606_end_0 = const()[name = tensor("op_18606_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_18606_end_mask_0 = const()[name = tensor("op_18606_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_18606_cast_fp16 = slice_by_index(begin = var_18606_begin_0, end = var_18606_end_0, end_mask = var_18606_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_18606_cast_fp16")]; + tensor var_18610_begin_0 = const()[name = tensor("op_18610_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_18610_end_0 = const()[name = tensor("op_18610_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_18610_end_mask_0 = const()[name = tensor("op_18610_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_18610_cast_fp16 = slice_by_index(begin = var_18610_begin_0, end = var_18610_end_0, end_mask = var_18610_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_18610_cast_fp16")]; + tensor var_18614_begin_0 = const()[name = tensor("op_18614_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_18614_end_0 = const()[name = tensor("op_18614_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_18614_end_mask_0 = const()[name = tensor("op_18614_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_18614_cast_fp16 = slice_by_index(begin = var_18614_begin_0, end = var_18614_end_0, end_mask = var_18614_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_18614_cast_fp16")]; + tensor var_18618_begin_0 = const()[name = tensor("op_18618_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_18618_end_0 = const()[name = tensor("op_18618_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_18618_end_mask_0 = const()[name = tensor("op_18618_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_18618_cast_fp16 = slice_by_index(begin = var_18618_begin_0, end = var_18618_end_0, end_mask = var_18618_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_18618_cast_fp16")]; + tensor var_18622_begin_0 = const()[name = tensor("op_18622_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_18622_end_0 = const()[name = tensor("op_18622_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_18622_end_mask_0 = const()[name = tensor("op_18622_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_18622_cast_fp16 = slice_by_index(begin = var_18622_begin_0, end = var_18622_end_0, end_mask = var_18622_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_18622_cast_fp16")]; + tensor var_18626_begin_0 = const()[name = tensor("op_18626_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_18626_end_0 = const()[name = tensor("op_18626_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_18626_end_mask_0 = const()[name = tensor("op_18626_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_18626_cast_fp16 = slice_by_index(begin = var_18626_begin_0, end = var_18626_end_0, end_mask = var_18626_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_18626_cast_fp16")]; + tensor var_18630_begin_0 = const()[name = tensor("op_18630_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_18630_end_0 = const()[name = tensor("op_18630_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_18630_end_mask_0 = const()[name = tensor("op_18630_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_18630_cast_fp16 = slice_by_index(begin = var_18630_begin_0, end = var_18630_end_0, end_mask = var_18630_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_18630_cast_fp16")]; + tensor var_18634_begin_0 = const()[name = tensor("op_18634_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_18634_end_0 = const()[name = tensor("op_18634_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_18634_end_mask_0 = const()[name = tensor("op_18634_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_18634_cast_fp16 = slice_by_index(begin = var_18634_begin_0, end = var_18634_end_0, end_mask = var_18634_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_18634_cast_fp16")]; + tensor var_18638_begin_0 = const()[name = tensor("op_18638_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_18638_end_0 = const()[name = tensor("op_18638_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_18638_end_mask_0 = const()[name = tensor("op_18638_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_18638_cast_fp16 = slice_by_index(begin = var_18638_begin_0, end = var_18638_end_0, end_mask = var_18638_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_18638_cast_fp16")]; + tensor var_18642_begin_0 = const()[name = tensor("op_18642_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_18642_end_0 = const()[name = tensor("op_18642_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_18642_end_mask_0 = const()[name = tensor("op_18642_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_18642_cast_fp16 = slice_by_index(begin = var_18642_begin_0, end = var_18642_end_0, end_mask = var_18642_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_18642_cast_fp16")]; + tensor var_18646_begin_0 = const()[name = tensor("op_18646_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_18646_end_0 = const()[name = tensor("op_18646_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_18646_end_mask_0 = const()[name = tensor("op_18646_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_18646_cast_fp16 = slice_by_index(begin = var_18646_begin_0, end = var_18646_end_0, end_mask = var_18646_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_18646_cast_fp16")]; + tensor var_18650_begin_0 = const()[name = tensor("op_18650_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_18650_end_0 = const()[name = tensor("op_18650_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_18650_end_mask_0 = const()[name = tensor("op_18650_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_18650_cast_fp16 = slice_by_index(begin = var_18650_begin_0, end = var_18650_end_0, end_mask = var_18650_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_18650_cast_fp16")]; + tensor var_18654_begin_0 = const()[name = tensor("op_18654_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_18654_end_0 = const()[name = tensor("op_18654_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_18654_end_mask_0 = const()[name = tensor("op_18654_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_18654_cast_fp16 = slice_by_index(begin = var_18654_begin_0, end = var_18654_end_0, end_mask = var_18654_end_mask_0, x = query_25_cast_fp16)[name = tensor("op_18654_cast_fp16")]; + tensor var_18663_begin_0 = const()[name = tensor("op_18663_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_18663_end_0 = const()[name = tensor("op_18663_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_18663_end_mask_0 = const()[name = tensor("op_18663_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18663_cast_fp16 = slice_by_index(begin = var_18663_begin_0, end = var_18663_end_0, end_mask = var_18663_end_mask_0, x = var_18578_cast_fp16)[name = tensor("op_18663_cast_fp16")]; + tensor var_18670_begin_0 = const()[name = tensor("op_18670_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_18670_end_0 = const()[name = tensor("op_18670_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_18670_end_mask_0 = const()[name = tensor("op_18670_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18670_cast_fp16 = slice_by_index(begin = var_18670_begin_0, end = var_18670_end_0, end_mask = var_18670_end_mask_0, x = var_18578_cast_fp16)[name = tensor("op_18670_cast_fp16")]; + tensor var_18677_begin_0 = const()[name = tensor("op_18677_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_18677_end_0 = const()[name = tensor("op_18677_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_18677_end_mask_0 = const()[name = tensor("op_18677_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18677_cast_fp16 = slice_by_index(begin = var_18677_begin_0, end = var_18677_end_0, end_mask = var_18677_end_mask_0, x = var_18578_cast_fp16)[name = tensor("op_18677_cast_fp16")]; + tensor var_18684_begin_0 = const()[name = tensor("op_18684_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_18684_end_0 = const()[name = tensor("op_18684_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_18684_end_mask_0 = const()[name = tensor("op_18684_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18684_cast_fp16 = slice_by_index(begin = var_18684_begin_0, end = var_18684_end_0, end_mask = var_18684_end_mask_0, x = var_18578_cast_fp16)[name = tensor("op_18684_cast_fp16")]; + tensor var_18691_begin_0 = const()[name = tensor("op_18691_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_18691_end_0 = const()[name = tensor("op_18691_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_18691_end_mask_0 = const()[name = tensor("op_18691_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18691_cast_fp16 = slice_by_index(begin = var_18691_begin_0, end = var_18691_end_0, end_mask = var_18691_end_mask_0, x = var_18582_cast_fp16)[name = tensor("op_18691_cast_fp16")]; + tensor var_18698_begin_0 = const()[name = tensor("op_18698_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_18698_end_0 = const()[name = tensor("op_18698_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_18698_end_mask_0 = const()[name = tensor("op_18698_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18698_cast_fp16 = slice_by_index(begin = var_18698_begin_0, end = var_18698_end_0, end_mask = var_18698_end_mask_0, x = var_18582_cast_fp16)[name = tensor("op_18698_cast_fp16")]; + tensor var_18705_begin_0 = const()[name = tensor("op_18705_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_18705_end_0 = const()[name = tensor("op_18705_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_18705_end_mask_0 = const()[name = tensor("op_18705_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18705_cast_fp16 = slice_by_index(begin = var_18705_begin_0, end = var_18705_end_0, end_mask = var_18705_end_mask_0, x = var_18582_cast_fp16)[name = tensor("op_18705_cast_fp16")]; + tensor var_18712_begin_0 = const()[name = tensor("op_18712_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_18712_end_0 = const()[name = tensor("op_18712_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_18712_end_mask_0 = const()[name = tensor("op_18712_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18712_cast_fp16 = slice_by_index(begin = var_18712_begin_0, end = var_18712_end_0, end_mask = var_18712_end_mask_0, x = var_18582_cast_fp16)[name = tensor("op_18712_cast_fp16")]; + tensor var_18719_begin_0 = const()[name = tensor("op_18719_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_18719_end_0 = const()[name = tensor("op_18719_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_18719_end_mask_0 = const()[name = tensor("op_18719_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18719_cast_fp16 = slice_by_index(begin = var_18719_begin_0, end = var_18719_end_0, end_mask = var_18719_end_mask_0, x = var_18586_cast_fp16)[name = tensor("op_18719_cast_fp16")]; + tensor var_18726_begin_0 = const()[name = tensor("op_18726_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_18726_end_0 = const()[name = tensor("op_18726_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_18726_end_mask_0 = const()[name = tensor("op_18726_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18726_cast_fp16 = slice_by_index(begin = var_18726_begin_0, end = var_18726_end_0, end_mask = var_18726_end_mask_0, x = var_18586_cast_fp16)[name = tensor("op_18726_cast_fp16")]; + tensor var_18733_begin_0 = const()[name = tensor("op_18733_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_18733_end_0 = const()[name = tensor("op_18733_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_18733_end_mask_0 = const()[name = tensor("op_18733_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18733_cast_fp16 = slice_by_index(begin = var_18733_begin_0, end = var_18733_end_0, end_mask = var_18733_end_mask_0, x = var_18586_cast_fp16)[name = tensor("op_18733_cast_fp16")]; + tensor var_18740_begin_0 = const()[name = tensor("op_18740_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_18740_end_0 = const()[name = tensor("op_18740_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_18740_end_mask_0 = const()[name = tensor("op_18740_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18740_cast_fp16 = slice_by_index(begin = var_18740_begin_0, end = var_18740_end_0, end_mask = var_18740_end_mask_0, x = var_18586_cast_fp16)[name = tensor("op_18740_cast_fp16")]; + tensor var_18747_begin_0 = const()[name = tensor("op_18747_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_18747_end_0 = const()[name = tensor("op_18747_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_18747_end_mask_0 = const()[name = tensor("op_18747_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18747_cast_fp16 = slice_by_index(begin = var_18747_begin_0, end = var_18747_end_0, end_mask = var_18747_end_mask_0, x = var_18590_cast_fp16)[name = tensor("op_18747_cast_fp16")]; + tensor var_18754_begin_0 = const()[name = tensor("op_18754_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_18754_end_0 = const()[name = tensor("op_18754_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_18754_end_mask_0 = const()[name = tensor("op_18754_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18754_cast_fp16 = slice_by_index(begin = var_18754_begin_0, end = var_18754_end_0, end_mask = var_18754_end_mask_0, x = var_18590_cast_fp16)[name = tensor("op_18754_cast_fp16")]; + tensor var_18761_begin_0 = const()[name = tensor("op_18761_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_18761_end_0 = const()[name = tensor("op_18761_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_18761_end_mask_0 = const()[name = tensor("op_18761_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18761_cast_fp16 = slice_by_index(begin = var_18761_begin_0, end = var_18761_end_0, end_mask = var_18761_end_mask_0, x = var_18590_cast_fp16)[name = tensor("op_18761_cast_fp16")]; + tensor var_18768_begin_0 = const()[name = tensor("op_18768_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_18768_end_0 = const()[name = tensor("op_18768_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_18768_end_mask_0 = const()[name = tensor("op_18768_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18768_cast_fp16 = slice_by_index(begin = var_18768_begin_0, end = var_18768_end_0, end_mask = var_18768_end_mask_0, x = var_18590_cast_fp16)[name = tensor("op_18768_cast_fp16")]; + tensor var_18775_begin_0 = const()[name = tensor("op_18775_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_18775_end_0 = const()[name = tensor("op_18775_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_18775_end_mask_0 = const()[name = tensor("op_18775_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18775_cast_fp16 = slice_by_index(begin = var_18775_begin_0, end = var_18775_end_0, end_mask = var_18775_end_mask_0, x = var_18594_cast_fp16)[name = tensor("op_18775_cast_fp16")]; + tensor var_18782_begin_0 = const()[name = tensor("op_18782_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_18782_end_0 = const()[name = tensor("op_18782_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_18782_end_mask_0 = const()[name = tensor("op_18782_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18782_cast_fp16 = slice_by_index(begin = var_18782_begin_0, end = var_18782_end_0, end_mask = var_18782_end_mask_0, x = var_18594_cast_fp16)[name = tensor("op_18782_cast_fp16")]; + tensor var_18789_begin_0 = const()[name = tensor("op_18789_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_18789_end_0 = const()[name = tensor("op_18789_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_18789_end_mask_0 = const()[name = tensor("op_18789_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18789_cast_fp16 = slice_by_index(begin = var_18789_begin_0, end = var_18789_end_0, end_mask = var_18789_end_mask_0, x = var_18594_cast_fp16)[name = tensor("op_18789_cast_fp16")]; + tensor var_18796_begin_0 = const()[name = tensor("op_18796_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_18796_end_0 = const()[name = tensor("op_18796_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_18796_end_mask_0 = const()[name = tensor("op_18796_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18796_cast_fp16 = slice_by_index(begin = var_18796_begin_0, end = var_18796_end_0, end_mask = var_18796_end_mask_0, x = var_18594_cast_fp16)[name = tensor("op_18796_cast_fp16")]; + tensor var_18803_begin_0 = const()[name = tensor("op_18803_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_18803_end_0 = const()[name = tensor("op_18803_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_18803_end_mask_0 = const()[name = tensor("op_18803_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18803_cast_fp16 = slice_by_index(begin = var_18803_begin_0, end = var_18803_end_0, end_mask = var_18803_end_mask_0, x = var_18598_cast_fp16)[name = tensor("op_18803_cast_fp16")]; + tensor var_18810_begin_0 = const()[name = tensor("op_18810_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_18810_end_0 = const()[name = tensor("op_18810_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_18810_end_mask_0 = const()[name = tensor("op_18810_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18810_cast_fp16 = slice_by_index(begin = var_18810_begin_0, end = var_18810_end_0, end_mask = var_18810_end_mask_0, x = var_18598_cast_fp16)[name = tensor("op_18810_cast_fp16")]; + tensor var_18817_begin_0 = const()[name = tensor("op_18817_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_18817_end_0 = const()[name = tensor("op_18817_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_18817_end_mask_0 = const()[name = tensor("op_18817_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18817_cast_fp16 = slice_by_index(begin = var_18817_begin_0, end = var_18817_end_0, end_mask = var_18817_end_mask_0, x = var_18598_cast_fp16)[name = tensor("op_18817_cast_fp16")]; + tensor var_18824_begin_0 = const()[name = tensor("op_18824_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_18824_end_0 = const()[name = tensor("op_18824_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_18824_end_mask_0 = const()[name = tensor("op_18824_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18824_cast_fp16 = slice_by_index(begin = var_18824_begin_0, end = var_18824_end_0, end_mask = var_18824_end_mask_0, x = var_18598_cast_fp16)[name = tensor("op_18824_cast_fp16")]; + tensor var_18831_begin_0 = const()[name = tensor("op_18831_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_18831_end_0 = const()[name = tensor("op_18831_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_18831_end_mask_0 = const()[name = tensor("op_18831_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18831_cast_fp16 = slice_by_index(begin = var_18831_begin_0, end = var_18831_end_0, end_mask = var_18831_end_mask_0, x = var_18602_cast_fp16)[name = tensor("op_18831_cast_fp16")]; + tensor var_18838_begin_0 = const()[name = tensor("op_18838_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_18838_end_0 = const()[name = tensor("op_18838_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_18838_end_mask_0 = const()[name = tensor("op_18838_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18838_cast_fp16 = slice_by_index(begin = var_18838_begin_0, end = var_18838_end_0, end_mask = var_18838_end_mask_0, x = var_18602_cast_fp16)[name = tensor("op_18838_cast_fp16")]; + tensor var_18845_begin_0 = const()[name = tensor("op_18845_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_18845_end_0 = const()[name = tensor("op_18845_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_18845_end_mask_0 = const()[name = tensor("op_18845_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18845_cast_fp16 = slice_by_index(begin = var_18845_begin_0, end = var_18845_end_0, end_mask = var_18845_end_mask_0, x = var_18602_cast_fp16)[name = tensor("op_18845_cast_fp16")]; + tensor var_18852_begin_0 = const()[name = tensor("op_18852_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_18852_end_0 = const()[name = tensor("op_18852_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_18852_end_mask_0 = const()[name = tensor("op_18852_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18852_cast_fp16 = slice_by_index(begin = var_18852_begin_0, end = var_18852_end_0, end_mask = var_18852_end_mask_0, x = var_18602_cast_fp16)[name = tensor("op_18852_cast_fp16")]; + tensor var_18859_begin_0 = const()[name = tensor("op_18859_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_18859_end_0 = const()[name = tensor("op_18859_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_18859_end_mask_0 = const()[name = tensor("op_18859_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18859_cast_fp16 = slice_by_index(begin = var_18859_begin_0, end = var_18859_end_0, end_mask = var_18859_end_mask_0, x = var_18606_cast_fp16)[name = tensor("op_18859_cast_fp16")]; + tensor var_18866_begin_0 = const()[name = tensor("op_18866_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_18866_end_0 = const()[name = tensor("op_18866_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_18866_end_mask_0 = const()[name = tensor("op_18866_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18866_cast_fp16 = slice_by_index(begin = var_18866_begin_0, end = var_18866_end_0, end_mask = var_18866_end_mask_0, x = var_18606_cast_fp16)[name = tensor("op_18866_cast_fp16")]; + tensor var_18873_begin_0 = const()[name = tensor("op_18873_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_18873_end_0 = const()[name = tensor("op_18873_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_18873_end_mask_0 = const()[name = tensor("op_18873_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18873_cast_fp16 = slice_by_index(begin = var_18873_begin_0, end = var_18873_end_0, end_mask = var_18873_end_mask_0, x = var_18606_cast_fp16)[name = tensor("op_18873_cast_fp16")]; + tensor var_18880_begin_0 = const()[name = tensor("op_18880_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_18880_end_0 = const()[name = tensor("op_18880_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_18880_end_mask_0 = const()[name = tensor("op_18880_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18880_cast_fp16 = slice_by_index(begin = var_18880_begin_0, end = var_18880_end_0, end_mask = var_18880_end_mask_0, x = var_18606_cast_fp16)[name = tensor("op_18880_cast_fp16")]; + tensor var_18887_begin_0 = const()[name = tensor("op_18887_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_18887_end_0 = const()[name = tensor("op_18887_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_18887_end_mask_0 = const()[name = tensor("op_18887_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18887_cast_fp16 = slice_by_index(begin = var_18887_begin_0, end = var_18887_end_0, end_mask = var_18887_end_mask_0, x = var_18610_cast_fp16)[name = tensor("op_18887_cast_fp16")]; + tensor var_18894_begin_0 = const()[name = tensor("op_18894_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_18894_end_0 = const()[name = tensor("op_18894_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_18894_end_mask_0 = const()[name = tensor("op_18894_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18894_cast_fp16 = slice_by_index(begin = var_18894_begin_0, end = var_18894_end_0, end_mask = var_18894_end_mask_0, x = var_18610_cast_fp16)[name = tensor("op_18894_cast_fp16")]; + tensor var_18901_begin_0 = const()[name = tensor("op_18901_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_18901_end_0 = const()[name = tensor("op_18901_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_18901_end_mask_0 = const()[name = tensor("op_18901_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18901_cast_fp16 = slice_by_index(begin = var_18901_begin_0, end = var_18901_end_0, end_mask = var_18901_end_mask_0, x = var_18610_cast_fp16)[name = tensor("op_18901_cast_fp16")]; + tensor var_18908_begin_0 = const()[name = tensor("op_18908_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_18908_end_0 = const()[name = tensor("op_18908_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_18908_end_mask_0 = const()[name = tensor("op_18908_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18908_cast_fp16 = slice_by_index(begin = var_18908_begin_0, end = var_18908_end_0, end_mask = var_18908_end_mask_0, x = var_18610_cast_fp16)[name = tensor("op_18908_cast_fp16")]; + tensor var_18915_begin_0 = const()[name = tensor("op_18915_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_18915_end_0 = const()[name = tensor("op_18915_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_18915_end_mask_0 = const()[name = tensor("op_18915_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18915_cast_fp16 = slice_by_index(begin = var_18915_begin_0, end = var_18915_end_0, end_mask = var_18915_end_mask_0, x = var_18614_cast_fp16)[name = tensor("op_18915_cast_fp16")]; + tensor var_18922_begin_0 = const()[name = tensor("op_18922_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_18922_end_0 = const()[name = tensor("op_18922_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_18922_end_mask_0 = const()[name = tensor("op_18922_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18922_cast_fp16 = slice_by_index(begin = var_18922_begin_0, end = var_18922_end_0, end_mask = var_18922_end_mask_0, x = var_18614_cast_fp16)[name = tensor("op_18922_cast_fp16")]; + tensor var_18929_begin_0 = const()[name = tensor("op_18929_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_18929_end_0 = const()[name = tensor("op_18929_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_18929_end_mask_0 = const()[name = tensor("op_18929_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18929_cast_fp16 = slice_by_index(begin = var_18929_begin_0, end = var_18929_end_0, end_mask = var_18929_end_mask_0, x = var_18614_cast_fp16)[name = tensor("op_18929_cast_fp16")]; + tensor var_18936_begin_0 = const()[name = tensor("op_18936_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_18936_end_0 = const()[name = tensor("op_18936_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_18936_end_mask_0 = const()[name = tensor("op_18936_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18936_cast_fp16 = slice_by_index(begin = var_18936_begin_0, end = var_18936_end_0, end_mask = var_18936_end_mask_0, x = var_18614_cast_fp16)[name = tensor("op_18936_cast_fp16")]; + tensor var_18943_begin_0 = const()[name = tensor("op_18943_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_18943_end_0 = const()[name = tensor("op_18943_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_18943_end_mask_0 = const()[name = tensor("op_18943_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18943_cast_fp16 = slice_by_index(begin = var_18943_begin_0, end = var_18943_end_0, end_mask = var_18943_end_mask_0, x = var_18618_cast_fp16)[name = tensor("op_18943_cast_fp16")]; + tensor var_18950_begin_0 = const()[name = tensor("op_18950_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_18950_end_0 = const()[name = tensor("op_18950_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_18950_end_mask_0 = const()[name = tensor("op_18950_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18950_cast_fp16 = slice_by_index(begin = var_18950_begin_0, end = var_18950_end_0, end_mask = var_18950_end_mask_0, x = var_18618_cast_fp16)[name = tensor("op_18950_cast_fp16")]; + tensor var_18957_begin_0 = const()[name = tensor("op_18957_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_18957_end_0 = const()[name = tensor("op_18957_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_18957_end_mask_0 = const()[name = tensor("op_18957_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18957_cast_fp16 = slice_by_index(begin = var_18957_begin_0, end = var_18957_end_0, end_mask = var_18957_end_mask_0, x = var_18618_cast_fp16)[name = tensor("op_18957_cast_fp16")]; + tensor var_18964_begin_0 = const()[name = tensor("op_18964_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_18964_end_0 = const()[name = tensor("op_18964_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_18964_end_mask_0 = const()[name = tensor("op_18964_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18964_cast_fp16 = slice_by_index(begin = var_18964_begin_0, end = var_18964_end_0, end_mask = var_18964_end_mask_0, x = var_18618_cast_fp16)[name = tensor("op_18964_cast_fp16")]; + tensor var_18971_begin_0 = const()[name = tensor("op_18971_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_18971_end_0 = const()[name = tensor("op_18971_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_18971_end_mask_0 = const()[name = tensor("op_18971_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18971_cast_fp16 = slice_by_index(begin = var_18971_begin_0, end = var_18971_end_0, end_mask = var_18971_end_mask_0, x = var_18622_cast_fp16)[name = tensor("op_18971_cast_fp16")]; + tensor var_18978_begin_0 = const()[name = tensor("op_18978_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_18978_end_0 = const()[name = tensor("op_18978_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_18978_end_mask_0 = const()[name = tensor("op_18978_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18978_cast_fp16 = slice_by_index(begin = var_18978_begin_0, end = var_18978_end_0, end_mask = var_18978_end_mask_0, x = var_18622_cast_fp16)[name = tensor("op_18978_cast_fp16")]; + tensor var_18985_begin_0 = const()[name = tensor("op_18985_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_18985_end_0 = const()[name = tensor("op_18985_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_18985_end_mask_0 = const()[name = tensor("op_18985_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18985_cast_fp16 = slice_by_index(begin = var_18985_begin_0, end = var_18985_end_0, end_mask = var_18985_end_mask_0, x = var_18622_cast_fp16)[name = tensor("op_18985_cast_fp16")]; + tensor var_18992_begin_0 = const()[name = tensor("op_18992_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_18992_end_0 = const()[name = tensor("op_18992_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_18992_end_mask_0 = const()[name = tensor("op_18992_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18992_cast_fp16 = slice_by_index(begin = var_18992_begin_0, end = var_18992_end_0, end_mask = var_18992_end_mask_0, x = var_18622_cast_fp16)[name = tensor("op_18992_cast_fp16")]; + tensor var_18999_begin_0 = const()[name = tensor("op_18999_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_18999_end_0 = const()[name = tensor("op_18999_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_18999_end_mask_0 = const()[name = tensor("op_18999_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_18999_cast_fp16 = slice_by_index(begin = var_18999_begin_0, end = var_18999_end_0, end_mask = var_18999_end_mask_0, x = var_18626_cast_fp16)[name = tensor("op_18999_cast_fp16")]; + tensor var_19006_begin_0 = const()[name = tensor("op_19006_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_19006_end_0 = const()[name = tensor("op_19006_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_19006_end_mask_0 = const()[name = tensor("op_19006_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19006_cast_fp16 = slice_by_index(begin = var_19006_begin_0, end = var_19006_end_0, end_mask = var_19006_end_mask_0, x = var_18626_cast_fp16)[name = tensor("op_19006_cast_fp16")]; + tensor var_19013_begin_0 = const()[name = tensor("op_19013_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_19013_end_0 = const()[name = tensor("op_19013_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_19013_end_mask_0 = const()[name = tensor("op_19013_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19013_cast_fp16 = slice_by_index(begin = var_19013_begin_0, end = var_19013_end_0, end_mask = var_19013_end_mask_0, x = var_18626_cast_fp16)[name = tensor("op_19013_cast_fp16")]; + tensor var_19020_begin_0 = const()[name = tensor("op_19020_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_19020_end_0 = const()[name = tensor("op_19020_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_19020_end_mask_0 = const()[name = tensor("op_19020_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19020_cast_fp16 = slice_by_index(begin = var_19020_begin_0, end = var_19020_end_0, end_mask = var_19020_end_mask_0, x = var_18626_cast_fp16)[name = tensor("op_19020_cast_fp16")]; + tensor var_19027_begin_0 = const()[name = tensor("op_19027_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_19027_end_0 = const()[name = tensor("op_19027_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_19027_end_mask_0 = const()[name = tensor("op_19027_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19027_cast_fp16 = slice_by_index(begin = var_19027_begin_0, end = var_19027_end_0, end_mask = var_19027_end_mask_0, x = var_18630_cast_fp16)[name = tensor("op_19027_cast_fp16")]; + tensor var_19034_begin_0 = const()[name = tensor("op_19034_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_19034_end_0 = const()[name = tensor("op_19034_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_19034_end_mask_0 = const()[name = tensor("op_19034_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19034_cast_fp16 = slice_by_index(begin = var_19034_begin_0, end = var_19034_end_0, end_mask = var_19034_end_mask_0, x = var_18630_cast_fp16)[name = tensor("op_19034_cast_fp16")]; + tensor var_19041_begin_0 = const()[name = tensor("op_19041_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_19041_end_0 = const()[name = tensor("op_19041_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_19041_end_mask_0 = const()[name = tensor("op_19041_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19041_cast_fp16 = slice_by_index(begin = var_19041_begin_0, end = var_19041_end_0, end_mask = var_19041_end_mask_0, x = var_18630_cast_fp16)[name = tensor("op_19041_cast_fp16")]; + tensor var_19048_begin_0 = const()[name = tensor("op_19048_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_19048_end_0 = const()[name = tensor("op_19048_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_19048_end_mask_0 = const()[name = tensor("op_19048_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19048_cast_fp16 = slice_by_index(begin = var_19048_begin_0, end = var_19048_end_0, end_mask = var_19048_end_mask_0, x = var_18630_cast_fp16)[name = tensor("op_19048_cast_fp16")]; + tensor var_19055_begin_0 = const()[name = tensor("op_19055_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_19055_end_0 = const()[name = tensor("op_19055_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_19055_end_mask_0 = const()[name = tensor("op_19055_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19055_cast_fp16 = slice_by_index(begin = var_19055_begin_0, end = var_19055_end_0, end_mask = var_19055_end_mask_0, x = var_18634_cast_fp16)[name = tensor("op_19055_cast_fp16")]; + tensor var_19062_begin_0 = const()[name = tensor("op_19062_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_19062_end_0 = const()[name = tensor("op_19062_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_19062_end_mask_0 = const()[name = tensor("op_19062_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19062_cast_fp16 = slice_by_index(begin = var_19062_begin_0, end = var_19062_end_0, end_mask = var_19062_end_mask_0, x = var_18634_cast_fp16)[name = tensor("op_19062_cast_fp16")]; + tensor var_19069_begin_0 = const()[name = tensor("op_19069_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_19069_end_0 = const()[name = tensor("op_19069_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_19069_end_mask_0 = const()[name = tensor("op_19069_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19069_cast_fp16 = slice_by_index(begin = var_19069_begin_0, end = var_19069_end_0, end_mask = var_19069_end_mask_0, x = var_18634_cast_fp16)[name = tensor("op_19069_cast_fp16")]; + tensor var_19076_begin_0 = const()[name = tensor("op_19076_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_19076_end_0 = const()[name = tensor("op_19076_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_19076_end_mask_0 = const()[name = tensor("op_19076_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19076_cast_fp16 = slice_by_index(begin = var_19076_begin_0, end = var_19076_end_0, end_mask = var_19076_end_mask_0, x = var_18634_cast_fp16)[name = tensor("op_19076_cast_fp16")]; + tensor var_19083_begin_0 = const()[name = tensor("op_19083_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_19083_end_0 = const()[name = tensor("op_19083_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_19083_end_mask_0 = const()[name = tensor("op_19083_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19083_cast_fp16 = slice_by_index(begin = var_19083_begin_0, end = var_19083_end_0, end_mask = var_19083_end_mask_0, x = var_18638_cast_fp16)[name = tensor("op_19083_cast_fp16")]; + tensor var_19090_begin_0 = const()[name = tensor("op_19090_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_19090_end_0 = const()[name = tensor("op_19090_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_19090_end_mask_0 = const()[name = tensor("op_19090_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19090_cast_fp16 = slice_by_index(begin = var_19090_begin_0, end = var_19090_end_0, end_mask = var_19090_end_mask_0, x = var_18638_cast_fp16)[name = tensor("op_19090_cast_fp16")]; + tensor var_19097_begin_0 = const()[name = tensor("op_19097_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_19097_end_0 = const()[name = tensor("op_19097_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_19097_end_mask_0 = const()[name = tensor("op_19097_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19097_cast_fp16 = slice_by_index(begin = var_19097_begin_0, end = var_19097_end_0, end_mask = var_19097_end_mask_0, x = var_18638_cast_fp16)[name = tensor("op_19097_cast_fp16")]; + tensor var_19104_begin_0 = const()[name = tensor("op_19104_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_19104_end_0 = const()[name = tensor("op_19104_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_19104_end_mask_0 = const()[name = tensor("op_19104_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19104_cast_fp16 = slice_by_index(begin = var_19104_begin_0, end = var_19104_end_0, end_mask = var_19104_end_mask_0, x = var_18638_cast_fp16)[name = tensor("op_19104_cast_fp16")]; + tensor var_19111_begin_0 = const()[name = tensor("op_19111_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_19111_end_0 = const()[name = tensor("op_19111_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_19111_end_mask_0 = const()[name = tensor("op_19111_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19111_cast_fp16 = slice_by_index(begin = var_19111_begin_0, end = var_19111_end_0, end_mask = var_19111_end_mask_0, x = var_18642_cast_fp16)[name = tensor("op_19111_cast_fp16")]; + tensor var_19118_begin_0 = const()[name = tensor("op_19118_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_19118_end_0 = const()[name = tensor("op_19118_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_19118_end_mask_0 = const()[name = tensor("op_19118_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19118_cast_fp16 = slice_by_index(begin = var_19118_begin_0, end = var_19118_end_0, end_mask = var_19118_end_mask_0, x = var_18642_cast_fp16)[name = tensor("op_19118_cast_fp16")]; + tensor var_19125_begin_0 = const()[name = tensor("op_19125_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_19125_end_0 = const()[name = tensor("op_19125_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_19125_end_mask_0 = const()[name = tensor("op_19125_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19125_cast_fp16 = slice_by_index(begin = var_19125_begin_0, end = var_19125_end_0, end_mask = var_19125_end_mask_0, x = var_18642_cast_fp16)[name = tensor("op_19125_cast_fp16")]; + tensor var_19132_begin_0 = const()[name = tensor("op_19132_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_19132_end_0 = const()[name = tensor("op_19132_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_19132_end_mask_0 = const()[name = tensor("op_19132_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19132_cast_fp16 = slice_by_index(begin = var_19132_begin_0, end = var_19132_end_0, end_mask = var_19132_end_mask_0, x = var_18642_cast_fp16)[name = tensor("op_19132_cast_fp16")]; + tensor var_19139_begin_0 = const()[name = tensor("op_19139_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_19139_end_0 = const()[name = tensor("op_19139_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_19139_end_mask_0 = const()[name = tensor("op_19139_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19139_cast_fp16 = slice_by_index(begin = var_19139_begin_0, end = var_19139_end_0, end_mask = var_19139_end_mask_0, x = var_18646_cast_fp16)[name = tensor("op_19139_cast_fp16")]; + tensor var_19146_begin_0 = const()[name = tensor("op_19146_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_19146_end_0 = const()[name = tensor("op_19146_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_19146_end_mask_0 = const()[name = tensor("op_19146_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19146_cast_fp16 = slice_by_index(begin = var_19146_begin_0, end = var_19146_end_0, end_mask = var_19146_end_mask_0, x = var_18646_cast_fp16)[name = tensor("op_19146_cast_fp16")]; + tensor var_19153_begin_0 = const()[name = tensor("op_19153_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_19153_end_0 = const()[name = tensor("op_19153_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_19153_end_mask_0 = const()[name = tensor("op_19153_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19153_cast_fp16 = slice_by_index(begin = var_19153_begin_0, end = var_19153_end_0, end_mask = var_19153_end_mask_0, x = var_18646_cast_fp16)[name = tensor("op_19153_cast_fp16")]; + tensor var_19160_begin_0 = const()[name = tensor("op_19160_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_19160_end_0 = const()[name = tensor("op_19160_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_19160_end_mask_0 = const()[name = tensor("op_19160_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19160_cast_fp16 = slice_by_index(begin = var_19160_begin_0, end = var_19160_end_0, end_mask = var_19160_end_mask_0, x = var_18646_cast_fp16)[name = tensor("op_19160_cast_fp16")]; + tensor var_19167_begin_0 = const()[name = tensor("op_19167_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_19167_end_0 = const()[name = tensor("op_19167_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_19167_end_mask_0 = const()[name = tensor("op_19167_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19167_cast_fp16 = slice_by_index(begin = var_19167_begin_0, end = var_19167_end_0, end_mask = var_19167_end_mask_0, x = var_18650_cast_fp16)[name = tensor("op_19167_cast_fp16")]; + tensor var_19174_begin_0 = const()[name = tensor("op_19174_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_19174_end_0 = const()[name = tensor("op_19174_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_19174_end_mask_0 = const()[name = tensor("op_19174_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19174_cast_fp16 = slice_by_index(begin = var_19174_begin_0, end = var_19174_end_0, end_mask = var_19174_end_mask_0, x = var_18650_cast_fp16)[name = tensor("op_19174_cast_fp16")]; + tensor var_19181_begin_0 = const()[name = tensor("op_19181_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_19181_end_0 = const()[name = tensor("op_19181_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_19181_end_mask_0 = const()[name = tensor("op_19181_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19181_cast_fp16 = slice_by_index(begin = var_19181_begin_0, end = var_19181_end_0, end_mask = var_19181_end_mask_0, x = var_18650_cast_fp16)[name = tensor("op_19181_cast_fp16")]; + tensor var_19188_begin_0 = const()[name = tensor("op_19188_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_19188_end_0 = const()[name = tensor("op_19188_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_19188_end_mask_0 = const()[name = tensor("op_19188_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19188_cast_fp16 = slice_by_index(begin = var_19188_begin_0, end = var_19188_end_0, end_mask = var_19188_end_mask_0, x = var_18650_cast_fp16)[name = tensor("op_19188_cast_fp16")]; + tensor var_19195_begin_0 = const()[name = tensor("op_19195_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_19195_end_0 = const()[name = tensor("op_19195_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_19195_end_mask_0 = const()[name = tensor("op_19195_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19195_cast_fp16 = slice_by_index(begin = var_19195_begin_0, end = var_19195_end_0, end_mask = var_19195_end_mask_0, x = var_18654_cast_fp16)[name = tensor("op_19195_cast_fp16")]; + tensor var_19202_begin_0 = const()[name = tensor("op_19202_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_19202_end_0 = const()[name = tensor("op_19202_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_19202_end_mask_0 = const()[name = tensor("op_19202_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19202_cast_fp16 = slice_by_index(begin = var_19202_begin_0, end = var_19202_end_0, end_mask = var_19202_end_mask_0, x = var_18654_cast_fp16)[name = tensor("op_19202_cast_fp16")]; + tensor var_19209_begin_0 = const()[name = tensor("op_19209_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_19209_end_0 = const()[name = tensor("op_19209_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_19209_end_mask_0 = const()[name = tensor("op_19209_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19209_cast_fp16 = slice_by_index(begin = var_19209_begin_0, end = var_19209_end_0, end_mask = var_19209_end_mask_0, x = var_18654_cast_fp16)[name = tensor("op_19209_cast_fp16")]; + tensor var_19216_begin_0 = const()[name = tensor("op_19216_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_19216_end_0 = const()[name = tensor("op_19216_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_19216_end_mask_0 = const()[name = tensor("op_19216_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19216_cast_fp16 = slice_by_index(begin = var_19216_begin_0, end = var_19216_end_0, end_mask = var_19216_end_mask_0, x = var_18654_cast_fp16)[name = tensor("op_19216_cast_fp16")]; + tensor k_25_perm_0 = const()[name = tensor("k_25_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_19221_begin_0 = const()[name = tensor("op_19221_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_19221_end_0 = const()[name = tensor("op_19221_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_19221_end_mask_0 = const()[name = tensor("op_19221_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_19 = transpose(perm = k_25_perm_0, x = key_25_cast_fp16)[name = tensor("transpose_19")]; + tensor var_19221_cast_fp16 = slice_by_index(begin = var_19221_begin_0, end = var_19221_end_0, end_mask = var_19221_end_mask_0, x = transpose_19)[name = tensor("op_19221_cast_fp16")]; + tensor var_19225_begin_0 = const()[name = tensor("op_19225_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_19225_end_0 = const()[name = tensor("op_19225_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_19225_end_mask_0 = const()[name = tensor("op_19225_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19225_cast_fp16 = slice_by_index(begin = var_19225_begin_0, end = var_19225_end_0, end_mask = var_19225_end_mask_0, x = transpose_19)[name = tensor("op_19225_cast_fp16")]; + tensor var_19229_begin_0 = const()[name = tensor("op_19229_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_19229_end_0 = const()[name = tensor("op_19229_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_19229_end_mask_0 = const()[name = tensor("op_19229_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19229_cast_fp16 = slice_by_index(begin = var_19229_begin_0, end = var_19229_end_0, end_mask = var_19229_end_mask_0, x = transpose_19)[name = tensor("op_19229_cast_fp16")]; + tensor var_19233_begin_0 = const()[name = tensor("op_19233_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_19233_end_0 = const()[name = tensor("op_19233_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_19233_end_mask_0 = const()[name = tensor("op_19233_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19233_cast_fp16 = slice_by_index(begin = var_19233_begin_0, end = var_19233_end_0, end_mask = var_19233_end_mask_0, x = transpose_19)[name = tensor("op_19233_cast_fp16")]; + tensor var_19237_begin_0 = const()[name = tensor("op_19237_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_19237_end_0 = const()[name = tensor("op_19237_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_19237_end_mask_0 = const()[name = tensor("op_19237_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19237_cast_fp16 = slice_by_index(begin = var_19237_begin_0, end = var_19237_end_0, end_mask = var_19237_end_mask_0, x = transpose_19)[name = tensor("op_19237_cast_fp16")]; + tensor var_19241_begin_0 = const()[name = tensor("op_19241_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_19241_end_0 = const()[name = tensor("op_19241_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_19241_end_mask_0 = const()[name = tensor("op_19241_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19241_cast_fp16 = slice_by_index(begin = var_19241_begin_0, end = var_19241_end_0, end_mask = var_19241_end_mask_0, x = transpose_19)[name = tensor("op_19241_cast_fp16")]; + tensor var_19245_begin_0 = const()[name = tensor("op_19245_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_19245_end_0 = const()[name = tensor("op_19245_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_19245_end_mask_0 = const()[name = tensor("op_19245_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19245_cast_fp16 = slice_by_index(begin = var_19245_begin_0, end = var_19245_end_0, end_mask = var_19245_end_mask_0, x = transpose_19)[name = tensor("op_19245_cast_fp16")]; + tensor var_19249_begin_0 = const()[name = tensor("op_19249_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_19249_end_0 = const()[name = tensor("op_19249_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_19249_end_mask_0 = const()[name = tensor("op_19249_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19249_cast_fp16 = slice_by_index(begin = var_19249_begin_0, end = var_19249_end_0, end_mask = var_19249_end_mask_0, x = transpose_19)[name = tensor("op_19249_cast_fp16")]; + tensor var_19253_begin_0 = const()[name = tensor("op_19253_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_19253_end_0 = const()[name = tensor("op_19253_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_19253_end_mask_0 = const()[name = tensor("op_19253_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19253_cast_fp16 = slice_by_index(begin = var_19253_begin_0, end = var_19253_end_0, end_mask = var_19253_end_mask_0, x = transpose_19)[name = tensor("op_19253_cast_fp16")]; + tensor var_19257_begin_0 = const()[name = tensor("op_19257_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_19257_end_0 = const()[name = tensor("op_19257_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_19257_end_mask_0 = const()[name = tensor("op_19257_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19257_cast_fp16 = slice_by_index(begin = var_19257_begin_0, end = var_19257_end_0, end_mask = var_19257_end_mask_0, x = transpose_19)[name = tensor("op_19257_cast_fp16")]; + tensor var_19261_begin_0 = const()[name = tensor("op_19261_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_19261_end_0 = const()[name = tensor("op_19261_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_19261_end_mask_0 = const()[name = tensor("op_19261_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19261_cast_fp16 = slice_by_index(begin = var_19261_begin_0, end = var_19261_end_0, end_mask = var_19261_end_mask_0, x = transpose_19)[name = tensor("op_19261_cast_fp16")]; + tensor var_19265_begin_0 = const()[name = tensor("op_19265_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_19265_end_0 = const()[name = tensor("op_19265_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_19265_end_mask_0 = const()[name = tensor("op_19265_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19265_cast_fp16 = slice_by_index(begin = var_19265_begin_0, end = var_19265_end_0, end_mask = var_19265_end_mask_0, x = transpose_19)[name = tensor("op_19265_cast_fp16")]; + tensor var_19269_begin_0 = const()[name = tensor("op_19269_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_19269_end_0 = const()[name = tensor("op_19269_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_19269_end_mask_0 = const()[name = tensor("op_19269_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19269_cast_fp16 = slice_by_index(begin = var_19269_begin_0, end = var_19269_end_0, end_mask = var_19269_end_mask_0, x = transpose_19)[name = tensor("op_19269_cast_fp16")]; + tensor var_19273_begin_0 = const()[name = tensor("op_19273_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_19273_end_0 = const()[name = tensor("op_19273_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_19273_end_mask_0 = const()[name = tensor("op_19273_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19273_cast_fp16 = slice_by_index(begin = var_19273_begin_0, end = var_19273_end_0, end_mask = var_19273_end_mask_0, x = transpose_19)[name = tensor("op_19273_cast_fp16")]; + tensor var_19277_begin_0 = const()[name = tensor("op_19277_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_19277_end_0 = const()[name = tensor("op_19277_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_19277_end_mask_0 = const()[name = tensor("op_19277_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19277_cast_fp16 = slice_by_index(begin = var_19277_begin_0, end = var_19277_end_0, end_mask = var_19277_end_mask_0, x = transpose_19)[name = tensor("op_19277_cast_fp16")]; + tensor var_19281_begin_0 = const()[name = tensor("op_19281_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_19281_end_0 = const()[name = tensor("op_19281_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_19281_end_mask_0 = const()[name = tensor("op_19281_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19281_cast_fp16 = slice_by_index(begin = var_19281_begin_0, end = var_19281_end_0, end_mask = var_19281_end_mask_0, x = transpose_19)[name = tensor("op_19281_cast_fp16")]; + tensor var_19285_begin_0 = const()[name = tensor("op_19285_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_19285_end_0 = const()[name = tensor("op_19285_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_19285_end_mask_0 = const()[name = tensor("op_19285_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19285_cast_fp16 = slice_by_index(begin = var_19285_begin_0, end = var_19285_end_0, end_mask = var_19285_end_mask_0, x = transpose_19)[name = tensor("op_19285_cast_fp16")]; + tensor var_19289_begin_0 = const()[name = tensor("op_19289_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_19289_end_0 = const()[name = tensor("op_19289_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_19289_end_mask_0 = const()[name = tensor("op_19289_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19289_cast_fp16 = slice_by_index(begin = var_19289_begin_0, end = var_19289_end_0, end_mask = var_19289_end_mask_0, x = transpose_19)[name = tensor("op_19289_cast_fp16")]; + tensor var_19293_begin_0 = const()[name = tensor("op_19293_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_19293_end_0 = const()[name = tensor("op_19293_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_19293_end_mask_0 = const()[name = tensor("op_19293_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19293_cast_fp16 = slice_by_index(begin = var_19293_begin_0, end = var_19293_end_0, end_mask = var_19293_end_mask_0, x = transpose_19)[name = tensor("op_19293_cast_fp16")]; + tensor var_19297_begin_0 = const()[name = tensor("op_19297_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_19297_end_0 = const()[name = tensor("op_19297_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_19297_end_mask_0 = const()[name = tensor("op_19297_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_19297_cast_fp16 = slice_by_index(begin = var_19297_begin_0, end = var_19297_end_0, end_mask = var_19297_end_mask_0, x = transpose_19)[name = tensor("op_19297_cast_fp16")]; + tensor var_19299_begin_0 = const()[name = tensor("op_19299_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_19299_end_0 = const()[name = tensor("op_19299_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_19299_end_mask_0 = const()[name = tensor("op_19299_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_19299_cast_fp16 = slice_by_index(begin = var_19299_begin_0, end = var_19299_end_0, end_mask = var_19299_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_19299_cast_fp16")]; + tensor var_19303_begin_0 = const()[name = tensor("op_19303_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_19303_end_0 = const()[name = tensor("op_19303_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_19303_end_mask_0 = const()[name = tensor("op_19303_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_19303_cast_fp16 = slice_by_index(begin = var_19303_begin_0, end = var_19303_end_0, end_mask = var_19303_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_19303_cast_fp16")]; + tensor var_19307_begin_0 = const()[name = tensor("op_19307_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_19307_end_0 = const()[name = tensor("op_19307_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_19307_end_mask_0 = const()[name = tensor("op_19307_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_19307_cast_fp16 = slice_by_index(begin = var_19307_begin_0, end = var_19307_end_0, end_mask = var_19307_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_19307_cast_fp16")]; + tensor var_19311_begin_0 = const()[name = tensor("op_19311_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_19311_end_0 = const()[name = tensor("op_19311_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_19311_end_mask_0 = const()[name = tensor("op_19311_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_19311_cast_fp16 = slice_by_index(begin = var_19311_begin_0, end = var_19311_end_0, end_mask = var_19311_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_19311_cast_fp16")]; + tensor var_19315_begin_0 = const()[name = tensor("op_19315_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_19315_end_0 = const()[name = tensor("op_19315_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_19315_end_mask_0 = const()[name = tensor("op_19315_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_19315_cast_fp16 = slice_by_index(begin = var_19315_begin_0, end = var_19315_end_0, end_mask = var_19315_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_19315_cast_fp16")]; + tensor var_19319_begin_0 = const()[name = tensor("op_19319_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_19319_end_0 = const()[name = tensor("op_19319_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_19319_end_mask_0 = const()[name = tensor("op_19319_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_19319_cast_fp16 = slice_by_index(begin = var_19319_begin_0, end = var_19319_end_0, end_mask = var_19319_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_19319_cast_fp16")]; + tensor var_19323_begin_0 = const()[name = tensor("op_19323_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_19323_end_0 = const()[name = tensor("op_19323_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_19323_end_mask_0 = const()[name = tensor("op_19323_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_19323_cast_fp16 = slice_by_index(begin = var_19323_begin_0, end = var_19323_end_0, end_mask = var_19323_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_19323_cast_fp16")]; + tensor var_19327_begin_0 = const()[name = tensor("op_19327_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_19327_end_0 = const()[name = tensor("op_19327_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_19327_end_mask_0 = const()[name = tensor("op_19327_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_19327_cast_fp16 = slice_by_index(begin = var_19327_begin_0, end = var_19327_end_0, end_mask = var_19327_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_19327_cast_fp16")]; + tensor var_19331_begin_0 = const()[name = tensor("op_19331_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_19331_end_0 = const()[name = tensor("op_19331_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_19331_end_mask_0 = const()[name = tensor("op_19331_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_19331_cast_fp16 = slice_by_index(begin = var_19331_begin_0, end = var_19331_end_0, end_mask = var_19331_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_19331_cast_fp16")]; + tensor var_19335_begin_0 = const()[name = tensor("op_19335_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_19335_end_0 = const()[name = tensor("op_19335_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_19335_end_mask_0 = const()[name = tensor("op_19335_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_19335_cast_fp16 = slice_by_index(begin = var_19335_begin_0, end = var_19335_end_0, end_mask = var_19335_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_19335_cast_fp16")]; + tensor var_19339_begin_0 = const()[name = tensor("op_19339_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_19339_end_0 = const()[name = tensor("op_19339_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_19339_end_mask_0 = const()[name = tensor("op_19339_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_19339_cast_fp16 = slice_by_index(begin = var_19339_begin_0, end = var_19339_end_0, end_mask = var_19339_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_19339_cast_fp16")]; + tensor var_19343_begin_0 = const()[name = tensor("op_19343_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_19343_end_0 = const()[name = tensor("op_19343_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_19343_end_mask_0 = const()[name = tensor("op_19343_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_19343_cast_fp16 = slice_by_index(begin = var_19343_begin_0, end = var_19343_end_0, end_mask = var_19343_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_19343_cast_fp16")]; + tensor var_19347_begin_0 = const()[name = tensor("op_19347_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_19347_end_0 = const()[name = tensor("op_19347_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_19347_end_mask_0 = const()[name = tensor("op_19347_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_19347_cast_fp16 = slice_by_index(begin = var_19347_begin_0, end = var_19347_end_0, end_mask = var_19347_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_19347_cast_fp16")]; + tensor var_19351_begin_0 = const()[name = tensor("op_19351_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_19351_end_0 = const()[name = tensor("op_19351_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_19351_end_mask_0 = const()[name = tensor("op_19351_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_19351_cast_fp16 = slice_by_index(begin = var_19351_begin_0, end = var_19351_end_0, end_mask = var_19351_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_19351_cast_fp16")]; + tensor var_19355_begin_0 = const()[name = tensor("op_19355_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_19355_end_0 = const()[name = tensor("op_19355_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_19355_end_mask_0 = const()[name = tensor("op_19355_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_19355_cast_fp16 = slice_by_index(begin = var_19355_begin_0, end = var_19355_end_0, end_mask = var_19355_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_19355_cast_fp16")]; + tensor var_19359_begin_0 = const()[name = tensor("op_19359_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_19359_end_0 = const()[name = tensor("op_19359_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_19359_end_mask_0 = const()[name = tensor("op_19359_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_19359_cast_fp16 = slice_by_index(begin = var_19359_begin_0, end = var_19359_end_0, end_mask = var_19359_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_19359_cast_fp16")]; + tensor var_19363_begin_0 = const()[name = tensor("op_19363_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_19363_end_0 = const()[name = tensor("op_19363_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_19363_end_mask_0 = const()[name = tensor("op_19363_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_19363_cast_fp16 = slice_by_index(begin = var_19363_begin_0, end = var_19363_end_0, end_mask = var_19363_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_19363_cast_fp16")]; + tensor var_19367_begin_0 = const()[name = tensor("op_19367_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_19367_end_0 = const()[name = tensor("op_19367_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_19367_end_mask_0 = const()[name = tensor("op_19367_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_19367_cast_fp16 = slice_by_index(begin = var_19367_begin_0, end = var_19367_end_0, end_mask = var_19367_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_19367_cast_fp16")]; + tensor var_19371_begin_0 = const()[name = tensor("op_19371_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_19371_end_0 = const()[name = tensor("op_19371_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_19371_end_mask_0 = const()[name = tensor("op_19371_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_19371_cast_fp16 = slice_by_index(begin = var_19371_begin_0, end = var_19371_end_0, end_mask = var_19371_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_19371_cast_fp16")]; + tensor var_19375_begin_0 = const()[name = tensor("op_19375_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_19375_end_0 = const()[name = tensor("op_19375_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_19375_end_mask_0 = const()[name = tensor("op_19375_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_19375_cast_fp16 = slice_by_index(begin = var_19375_begin_0, end = var_19375_end_0, end_mask = var_19375_end_mask_0, x = value_25_cast_fp16)[name = tensor("op_19375_cast_fp16")]; + tensor var_19379_equation_0 = const()[name = tensor("op_19379_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19379_cast_fp16 = einsum(equation = var_19379_equation_0, values = (var_19221_cast_fp16, var_18663_cast_fp16))[name = tensor("op_19379_cast_fp16")]; + tensor var_19380_to_fp16 = const()[name = tensor("op_19380_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1921_cast_fp16 = mul(x = var_19379_cast_fp16, y = var_19380_to_fp16)[name = tensor("aw_chunk_1921_cast_fp16")]; + tensor var_19383_equation_0 = const()[name = tensor("op_19383_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19383_cast_fp16 = einsum(equation = var_19383_equation_0, values = (var_19221_cast_fp16, var_18670_cast_fp16))[name = tensor("op_19383_cast_fp16")]; + tensor var_19384_to_fp16 = const()[name = tensor("op_19384_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1923_cast_fp16 = mul(x = var_19383_cast_fp16, y = var_19384_to_fp16)[name = tensor("aw_chunk_1923_cast_fp16")]; + tensor var_19387_equation_0 = const()[name = tensor("op_19387_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19387_cast_fp16 = einsum(equation = var_19387_equation_0, values = (var_19221_cast_fp16, var_18677_cast_fp16))[name = tensor("op_19387_cast_fp16")]; + tensor var_19388_to_fp16 = const()[name = tensor("op_19388_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1925_cast_fp16 = mul(x = var_19387_cast_fp16, y = var_19388_to_fp16)[name = tensor("aw_chunk_1925_cast_fp16")]; + tensor var_19391_equation_0 = const()[name = tensor("op_19391_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19391_cast_fp16 = einsum(equation = var_19391_equation_0, values = (var_19221_cast_fp16, var_18684_cast_fp16))[name = tensor("op_19391_cast_fp16")]; + tensor var_19392_to_fp16 = const()[name = tensor("op_19392_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1927_cast_fp16 = mul(x = var_19391_cast_fp16, y = var_19392_to_fp16)[name = tensor("aw_chunk_1927_cast_fp16")]; + tensor var_19395_equation_0 = const()[name = tensor("op_19395_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19395_cast_fp16 = einsum(equation = var_19395_equation_0, values = (var_19225_cast_fp16, var_18691_cast_fp16))[name = tensor("op_19395_cast_fp16")]; + tensor var_19396_to_fp16 = const()[name = tensor("op_19396_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1929_cast_fp16 = mul(x = var_19395_cast_fp16, y = var_19396_to_fp16)[name = tensor("aw_chunk_1929_cast_fp16")]; + tensor var_19399_equation_0 = const()[name = tensor("op_19399_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19399_cast_fp16 = einsum(equation = var_19399_equation_0, values = (var_19225_cast_fp16, var_18698_cast_fp16))[name = tensor("op_19399_cast_fp16")]; + tensor var_19400_to_fp16 = const()[name = tensor("op_19400_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1931_cast_fp16 = mul(x = var_19399_cast_fp16, y = var_19400_to_fp16)[name = tensor("aw_chunk_1931_cast_fp16")]; + tensor var_19403_equation_0 = const()[name = tensor("op_19403_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19403_cast_fp16 = einsum(equation = var_19403_equation_0, values = (var_19225_cast_fp16, var_18705_cast_fp16))[name = tensor("op_19403_cast_fp16")]; + tensor var_19404_to_fp16 = const()[name = tensor("op_19404_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1933_cast_fp16 = mul(x = var_19403_cast_fp16, y = var_19404_to_fp16)[name = tensor("aw_chunk_1933_cast_fp16")]; + tensor var_19407_equation_0 = const()[name = tensor("op_19407_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19407_cast_fp16 = einsum(equation = var_19407_equation_0, values = (var_19225_cast_fp16, var_18712_cast_fp16))[name = tensor("op_19407_cast_fp16")]; + tensor var_19408_to_fp16 = const()[name = tensor("op_19408_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1935_cast_fp16 = mul(x = var_19407_cast_fp16, y = var_19408_to_fp16)[name = tensor("aw_chunk_1935_cast_fp16")]; + tensor var_19411_equation_0 = const()[name = tensor("op_19411_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19411_cast_fp16 = einsum(equation = var_19411_equation_0, values = (var_19229_cast_fp16, var_18719_cast_fp16))[name = tensor("op_19411_cast_fp16")]; + tensor var_19412_to_fp16 = const()[name = tensor("op_19412_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1937_cast_fp16 = mul(x = var_19411_cast_fp16, y = var_19412_to_fp16)[name = tensor("aw_chunk_1937_cast_fp16")]; + tensor var_19415_equation_0 = const()[name = tensor("op_19415_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19415_cast_fp16 = einsum(equation = var_19415_equation_0, values = (var_19229_cast_fp16, var_18726_cast_fp16))[name = tensor("op_19415_cast_fp16")]; + tensor var_19416_to_fp16 = const()[name = tensor("op_19416_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1939_cast_fp16 = mul(x = var_19415_cast_fp16, y = var_19416_to_fp16)[name = tensor("aw_chunk_1939_cast_fp16")]; + tensor var_19419_equation_0 = const()[name = tensor("op_19419_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19419_cast_fp16 = einsum(equation = var_19419_equation_0, values = (var_19229_cast_fp16, var_18733_cast_fp16))[name = tensor("op_19419_cast_fp16")]; + tensor var_19420_to_fp16 = const()[name = tensor("op_19420_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1941_cast_fp16 = mul(x = var_19419_cast_fp16, y = var_19420_to_fp16)[name = tensor("aw_chunk_1941_cast_fp16")]; + tensor var_19423_equation_0 = const()[name = tensor("op_19423_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19423_cast_fp16 = einsum(equation = var_19423_equation_0, values = (var_19229_cast_fp16, var_18740_cast_fp16))[name = tensor("op_19423_cast_fp16")]; + tensor var_19424_to_fp16 = const()[name = tensor("op_19424_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1943_cast_fp16 = mul(x = var_19423_cast_fp16, y = var_19424_to_fp16)[name = tensor("aw_chunk_1943_cast_fp16")]; + tensor var_19427_equation_0 = const()[name = tensor("op_19427_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19427_cast_fp16 = einsum(equation = var_19427_equation_0, values = (var_19233_cast_fp16, var_18747_cast_fp16))[name = tensor("op_19427_cast_fp16")]; + tensor var_19428_to_fp16 = const()[name = tensor("op_19428_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1945_cast_fp16 = mul(x = var_19427_cast_fp16, y = var_19428_to_fp16)[name = tensor("aw_chunk_1945_cast_fp16")]; + tensor var_19431_equation_0 = const()[name = tensor("op_19431_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19431_cast_fp16 = einsum(equation = var_19431_equation_0, values = (var_19233_cast_fp16, var_18754_cast_fp16))[name = tensor("op_19431_cast_fp16")]; + tensor var_19432_to_fp16 = const()[name = tensor("op_19432_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1947_cast_fp16 = mul(x = var_19431_cast_fp16, y = var_19432_to_fp16)[name = tensor("aw_chunk_1947_cast_fp16")]; + tensor var_19435_equation_0 = const()[name = tensor("op_19435_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19435_cast_fp16 = einsum(equation = var_19435_equation_0, values = (var_19233_cast_fp16, var_18761_cast_fp16))[name = tensor("op_19435_cast_fp16")]; + tensor var_19436_to_fp16 = const()[name = tensor("op_19436_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1949_cast_fp16 = mul(x = var_19435_cast_fp16, y = var_19436_to_fp16)[name = tensor("aw_chunk_1949_cast_fp16")]; + tensor var_19439_equation_0 = const()[name = tensor("op_19439_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19439_cast_fp16 = einsum(equation = var_19439_equation_0, values = (var_19233_cast_fp16, var_18768_cast_fp16))[name = tensor("op_19439_cast_fp16")]; + tensor var_19440_to_fp16 = const()[name = tensor("op_19440_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1951_cast_fp16 = mul(x = var_19439_cast_fp16, y = var_19440_to_fp16)[name = tensor("aw_chunk_1951_cast_fp16")]; + tensor var_19443_equation_0 = const()[name = tensor("op_19443_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19443_cast_fp16 = einsum(equation = var_19443_equation_0, values = (var_19237_cast_fp16, var_18775_cast_fp16))[name = tensor("op_19443_cast_fp16")]; + tensor var_19444_to_fp16 = const()[name = tensor("op_19444_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1953_cast_fp16 = mul(x = var_19443_cast_fp16, y = var_19444_to_fp16)[name = tensor("aw_chunk_1953_cast_fp16")]; + tensor var_19447_equation_0 = const()[name = tensor("op_19447_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19447_cast_fp16 = einsum(equation = var_19447_equation_0, values = (var_19237_cast_fp16, var_18782_cast_fp16))[name = tensor("op_19447_cast_fp16")]; + tensor var_19448_to_fp16 = const()[name = tensor("op_19448_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1955_cast_fp16 = mul(x = var_19447_cast_fp16, y = var_19448_to_fp16)[name = tensor("aw_chunk_1955_cast_fp16")]; + tensor var_19451_equation_0 = const()[name = tensor("op_19451_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19451_cast_fp16 = einsum(equation = var_19451_equation_0, values = (var_19237_cast_fp16, var_18789_cast_fp16))[name = tensor("op_19451_cast_fp16")]; + tensor var_19452_to_fp16 = const()[name = tensor("op_19452_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1957_cast_fp16 = mul(x = var_19451_cast_fp16, y = var_19452_to_fp16)[name = tensor("aw_chunk_1957_cast_fp16")]; + tensor var_19455_equation_0 = const()[name = tensor("op_19455_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19455_cast_fp16 = einsum(equation = var_19455_equation_0, values = (var_19237_cast_fp16, var_18796_cast_fp16))[name = tensor("op_19455_cast_fp16")]; + tensor var_19456_to_fp16 = const()[name = tensor("op_19456_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1959_cast_fp16 = mul(x = var_19455_cast_fp16, y = var_19456_to_fp16)[name = tensor("aw_chunk_1959_cast_fp16")]; + tensor var_19459_equation_0 = const()[name = tensor("op_19459_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19459_cast_fp16 = einsum(equation = var_19459_equation_0, values = (var_19241_cast_fp16, var_18803_cast_fp16))[name = tensor("op_19459_cast_fp16")]; + tensor var_19460_to_fp16 = const()[name = tensor("op_19460_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1961_cast_fp16 = mul(x = var_19459_cast_fp16, y = var_19460_to_fp16)[name = tensor("aw_chunk_1961_cast_fp16")]; + tensor var_19463_equation_0 = const()[name = tensor("op_19463_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19463_cast_fp16 = einsum(equation = var_19463_equation_0, values = (var_19241_cast_fp16, var_18810_cast_fp16))[name = tensor("op_19463_cast_fp16")]; + tensor var_19464_to_fp16 = const()[name = tensor("op_19464_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1963_cast_fp16 = mul(x = var_19463_cast_fp16, y = var_19464_to_fp16)[name = tensor("aw_chunk_1963_cast_fp16")]; + tensor var_19467_equation_0 = const()[name = tensor("op_19467_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19467_cast_fp16 = einsum(equation = var_19467_equation_0, values = (var_19241_cast_fp16, var_18817_cast_fp16))[name = tensor("op_19467_cast_fp16")]; + tensor var_19468_to_fp16 = const()[name = tensor("op_19468_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1965_cast_fp16 = mul(x = var_19467_cast_fp16, y = var_19468_to_fp16)[name = tensor("aw_chunk_1965_cast_fp16")]; + tensor var_19471_equation_0 = const()[name = tensor("op_19471_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19471_cast_fp16 = einsum(equation = var_19471_equation_0, values = (var_19241_cast_fp16, var_18824_cast_fp16))[name = tensor("op_19471_cast_fp16")]; + tensor var_19472_to_fp16 = const()[name = tensor("op_19472_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1967_cast_fp16 = mul(x = var_19471_cast_fp16, y = var_19472_to_fp16)[name = tensor("aw_chunk_1967_cast_fp16")]; + tensor var_19475_equation_0 = const()[name = tensor("op_19475_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19475_cast_fp16 = einsum(equation = var_19475_equation_0, values = (var_19245_cast_fp16, var_18831_cast_fp16))[name = tensor("op_19475_cast_fp16")]; + tensor var_19476_to_fp16 = const()[name = tensor("op_19476_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1969_cast_fp16 = mul(x = var_19475_cast_fp16, y = var_19476_to_fp16)[name = tensor("aw_chunk_1969_cast_fp16")]; + tensor var_19479_equation_0 = const()[name = tensor("op_19479_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19479_cast_fp16 = einsum(equation = var_19479_equation_0, values = (var_19245_cast_fp16, var_18838_cast_fp16))[name = tensor("op_19479_cast_fp16")]; + tensor var_19480_to_fp16 = const()[name = tensor("op_19480_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1971_cast_fp16 = mul(x = var_19479_cast_fp16, y = var_19480_to_fp16)[name = tensor("aw_chunk_1971_cast_fp16")]; + tensor var_19483_equation_0 = const()[name = tensor("op_19483_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19483_cast_fp16 = einsum(equation = var_19483_equation_0, values = (var_19245_cast_fp16, var_18845_cast_fp16))[name = tensor("op_19483_cast_fp16")]; + tensor var_19484_to_fp16 = const()[name = tensor("op_19484_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1973_cast_fp16 = mul(x = var_19483_cast_fp16, y = var_19484_to_fp16)[name = tensor("aw_chunk_1973_cast_fp16")]; + tensor var_19487_equation_0 = const()[name = tensor("op_19487_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19487_cast_fp16 = einsum(equation = var_19487_equation_0, values = (var_19245_cast_fp16, var_18852_cast_fp16))[name = tensor("op_19487_cast_fp16")]; + tensor var_19488_to_fp16 = const()[name = tensor("op_19488_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1975_cast_fp16 = mul(x = var_19487_cast_fp16, y = var_19488_to_fp16)[name = tensor("aw_chunk_1975_cast_fp16")]; + tensor var_19491_equation_0 = const()[name = tensor("op_19491_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19491_cast_fp16 = einsum(equation = var_19491_equation_0, values = (var_19249_cast_fp16, var_18859_cast_fp16))[name = tensor("op_19491_cast_fp16")]; + tensor var_19492_to_fp16 = const()[name = tensor("op_19492_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1977_cast_fp16 = mul(x = var_19491_cast_fp16, y = var_19492_to_fp16)[name = tensor("aw_chunk_1977_cast_fp16")]; + tensor var_19495_equation_0 = const()[name = tensor("op_19495_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19495_cast_fp16 = einsum(equation = var_19495_equation_0, values = (var_19249_cast_fp16, var_18866_cast_fp16))[name = tensor("op_19495_cast_fp16")]; + tensor var_19496_to_fp16 = const()[name = tensor("op_19496_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1979_cast_fp16 = mul(x = var_19495_cast_fp16, y = var_19496_to_fp16)[name = tensor("aw_chunk_1979_cast_fp16")]; + tensor var_19499_equation_0 = const()[name = tensor("op_19499_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19499_cast_fp16 = einsum(equation = var_19499_equation_0, values = (var_19249_cast_fp16, var_18873_cast_fp16))[name = tensor("op_19499_cast_fp16")]; + tensor var_19500_to_fp16 = const()[name = tensor("op_19500_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1981_cast_fp16 = mul(x = var_19499_cast_fp16, y = var_19500_to_fp16)[name = tensor("aw_chunk_1981_cast_fp16")]; + tensor var_19503_equation_0 = const()[name = tensor("op_19503_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19503_cast_fp16 = einsum(equation = var_19503_equation_0, values = (var_19249_cast_fp16, var_18880_cast_fp16))[name = tensor("op_19503_cast_fp16")]; + tensor var_19504_to_fp16 = const()[name = tensor("op_19504_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1983_cast_fp16 = mul(x = var_19503_cast_fp16, y = var_19504_to_fp16)[name = tensor("aw_chunk_1983_cast_fp16")]; + tensor var_19507_equation_0 = const()[name = tensor("op_19507_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19507_cast_fp16 = einsum(equation = var_19507_equation_0, values = (var_19253_cast_fp16, var_18887_cast_fp16))[name = tensor("op_19507_cast_fp16")]; + tensor var_19508_to_fp16 = const()[name = tensor("op_19508_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1985_cast_fp16 = mul(x = var_19507_cast_fp16, y = var_19508_to_fp16)[name = tensor("aw_chunk_1985_cast_fp16")]; + tensor var_19511_equation_0 = const()[name = tensor("op_19511_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19511_cast_fp16 = einsum(equation = var_19511_equation_0, values = (var_19253_cast_fp16, var_18894_cast_fp16))[name = tensor("op_19511_cast_fp16")]; + tensor var_19512_to_fp16 = const()[name = tensor("op_19512_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1987_cast_fp16 = mul(x = var_19511_cast_fp16, y = var_19512_to_fp16)[name = tensor("aw_chunk_1987_cast_fp16")]; + tensor var_19515_equation_0 = const()[name = tensor("op_19515_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19515_cast_fp16 = einsum(equation = var_19515_equation_0, values = (var_19253_cast_fp16, var_18901_cast_fp16))[name = tensor("op_19515_cast_fp16")]; + tensor var_19516_to_fp16 = const()[name = tensor("op_19516_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1989_cast_fp16 = mul(x = var_19515_cast_fp16, y = var_19516_to_fp16)[name = tensor("aw_chunk_1989_cast_fp16")]; + tensor var_19519_equation_0 = const()[name = tensor("op_19519_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19519_cast_fp16 = einsum(equation = var_19519_equation_0, values = (var_19253_cast_fp16, var_18908_cast_fp16))[name = tensor("op_19519_cast_fp16")]; + tensor var_19520_to_fp16 = const()[name = tensor("op_19520_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1991_cast_fp16 = mul(x = var_19519_cast_fp16, y = var_19520_to_fp16)[name = tensor("aw_chunk_1991_cast_fp16")]; + tensor var_19523_equation_0 = const()[name = tensor("op_19523_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19523_cast_fp16 = einsum(equation = var_19523_equation_0, values = (var_19257_cast_fp16, var_18915_cast_fp16))[name = tensor("op_19523_cast_fp16")]; + tensor var_19524_to_fp16 = const()[name = tensor("op_19524_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1993_cast_fp16 = mul(x = var_19523_cast_fp16, y = var_19524_to_fp16)[name = tensor("aw_chunk_1993_cast_fp16")]; + tensor var_19527_equation_0 = const()[name = tensor("op_19527_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19527_cast_fp16 = einsum(equation = var_19527_equation_0, values = (var_19257_cast_fp16, var_18922_cast_fp16))[name = tensor("op_19527_cast_fp16")]; + tensor var_19528_to_fp16 = const()[name = tensor("op_19528_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1995_cast_fp16 = mul(x = var_19527_cast_fp16, y = var_19528_to_fp16)[name = tensor("aw_chunk_1995_cast_fp16")]; + tensor var_19531_equation_0 = const()[name = tensor("op_19531_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19531_cast_fp16 = einsum(equation = var_19531_equation_0, values = (var_19257_cast_fp16, var_18929_cast_fp16))[name = tensor("op_19531_cast_fp16")]; + tensor var_19532_to_fp16 = const()[name = tensor("op_19532_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1997_cast_fp16 = mul(x = var_19531_cast_fp16, y = var_19532_to_fp16)[name = tensor("aw_chunk_1997_cast_fp16")]; + tensor var_19535_equation_0 = const()[name = tensor("op_19535_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19535_cast_fp16 = einsum(equation = var_19535_equation_0, values = (var_19257_cast_fp16, var_18936_cast_fp16))[name = tensor("op_19535_cast_fp16")]; + tensor var_19536_to_fp16 = const()[name = tensor("op_19536_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_1999_cast_fp16 = mul(x = var_19535_cast_fp16, y = var_19536_to_fp16)[name = tensor("aw_chunk_1999_cast_fp16")]; + tensor var_19539_equation_0 = const()[name = tensor("op_19539_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19539_cast_fp16 = einsum(equation = var_19539_equation_0, values = (var_19261_cast_fp16, var_18943_cast_fp16))[name = tensor("op_19539_cast_fp16")]; + tensor var_19540_to_fp16 = const()[name = tensor("op_19540_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2001_cast_fp16 = mul(x = var_19539_cast_fp16, y = var_19540_to_fp16)[name = tensor("aw_chunk_2001_cast_fp16")]; + tensor var_19543_equation_0 = const()[name = tensor("op_19543_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19543_cast_fp16 = einsum(equation = var_19543_equation_0, values = (var_19261_cast_fp16, var_18950_cast_fp16))[name = tensor("op_19543_cast_fp16")]; + tensor var_19544_to_fp16 = const()[name = tensor("op_19544_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2003_cast_fp16 = mul(x = var_19543_cast_fp16, y = var_19544_to_fp16)[name = tensor("aw_chunk_2003_cast_fp16")]; + tensor var_19547_equation_0 = const()[name = tensor("op_19547_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19547_cast_fp16 = einsum(equation = var_19547_equation_0, values = (var_19261_cast_fp16, var_18957_cast_fp16))[name = tensor("op_19547_cast_fp16")]; + tensor var_19548_to_fp16 = const()[name = tensor("op_19548_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2005_cast_fp16 = mul(x = var_19547_cast_fp16, y = var_19548_to_fp16)[name = tensor("aw_chunk_2005_cast_fp16")]; + tensor var_19551_equation_0 = const()[name = tensor("op_19551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19551_cast_fp16 = einsum(equation = var_19551_equation_0, values = (var_19261_cast_fp16, var_18964_cast_fp16))[name = tensor("op_19551_cast_fp16")]; + tensor var_19552_to_fp16 = const()[name = tensor("op_19552_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2007_cast_fp16 = mul(x = var_19551_cast_fp16, y = var_19552_to_fp16)[name = tensor("aw_chunk_2007_cast_fp16")]; + tensor var_19555_equation_0 = const()[name = tensor("op_19555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19555_cast_fp16 = einsum(equation = var_19555_equation_0, values = (var_19265_cast_fp16, var_18971_cast_fp16))[name = tensor("op_19555_cast_fp16")]; + tensor var_19556_to_fp16 = const()[name = tensor("op_19556_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2009_cast_fp16 = mul(x = var_19555_cast_fp16, y = var_19556_to_fp16)[name = tensor("aw_chunk_2009_cast_fp16")]; + tensor var_19559_equation_0 = const()[name = tensor("op_19559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19559_cast_fp16 = einsum(equation = var_19559_equation_0, values = (var_19265_cast_fp16, var_18978_cast_fp16))[name = tensor("op_19559_cast_fp16")]; + tensor var_19560_to_fp16 = const()[name = tensor("op_19560_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2011_cast_fp16 = mul(x = var_19559_cast_fp16, y = var_19560_to_fp16)[name = tensor("aw_chunk_2011_cast_fp16")]; + tensor var_19563_equation_0 = const()[name = tensor("op_19563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19563_cast_fp16 = einsum(equation = var_19563_equation_0, values = (var_19265_cast_fp16, var_18985_cast_fp16))[name = tensor("op_19563_cast_fp16")]; + tensor var_19564_to_fp16 = const()[name = tensor("op_19564_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2013_cast_fp16 = mul(x = var_19563_cast_fp16, y = var_19564_to_fp16)[name = tensor("aw_chunk_2013_cast_fp16")]; + tensor var_19567_equation_0 = const()[name = tensor("op_19567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19567_cast_fp16 = einsum(equation = var_19567_equation_0, values = (var_19265_cast_fp16, var_18992_cast_fp16))[name = tensor("op_19567_cast_fp16")]; + tensor var_19568_to_fp16 = const()[name = tensor("op_19568_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2015_cast_fp16 = mul(x = var_19567_cast_fp16, y = var_19568_to_fp16)[name = tensor("aw_chunk_2015_cast_fp16")]; + tensor var_19571_equation_0 = const()[name = tensor("op_19571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19571_cast_fp16 = einsum(equation = var_19571_equation_0, values = (var_19269_cast_fp16, var_18999_cast_fp16))[name = tensor("op_19571_cast_fp16")]; + tensor var_19572_to_fp16 = const()[name = tensor("op_19572_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2017_cast_fp16 = mul(x = var_19571_cast_fp16, y = var_19572_to_fp16)[name = tensor("aw_chunk_2017_cast_fp16")]; + tensor var_19575_equation_0 = const()[name = tensor("op_19575_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19575_cast_fp16 = einsum(equation = var_19575_equation_0, values = (var_19269_cast_fp16, var_19006_cast_fp16))[name = tensor("op_19575_cast_fp16")]; + tensor var_19576_to_fp16 = const()[name = tensor("op_19576_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2019_cast_fp16 = mul(x = var_19575_cast_fp16, y = var_19576_to_fp16)[name = tensor("aw_chunk_2019_cast_fp16")]; + tensor var_19579_equation_0 = const()[name = tensor("op_19579_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19579_cast_fp16 = einsum(equation = var_19579_equation_0, values = (var_19269_cast_fp16, var_19013_cast_fp16))[name = tensor("op_19579_cast_fp16")]; + tensor var_19580_to_fp16 = const()[name = tensor("op_19580_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2021_cast_fp16 = mul(x = var_19579_cast_fp16, y = var_19580_to_fp16)[name = tensor("aw_chunk_2021_cast_fp16")]; + tensor var_19583_equation_0 = const()[name = tensor("op_19583_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19583_cast_fp16 = einsum(equation = var_19583_equation_0, values = (var_19269_cast_fp16, var_19020_cast_fp16))[name = tensor("op_19583_cast_fp16")]; + tensor var_19584_to_fp16 = const()[name = tensor("op_19584_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2023_cast_fp16 = mul(x = var_19583_cast_fp16, y = var_19584_to_fp16)[name = tensor("aw_chunk_2023_cast_fp16")]; + tensor var_19587_equation_0 = const()[name = tensor("op_19587_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19587_cast_fp16 = einsum(equation = var_19587_equation_0, values = (var_19273_cast_fp16, var_19027_cast_fp16))[name = tensor("op_19587_cast_fp16")]; + tensor var_19588_to_fp16 = const()[name = tensor("op_19588_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2025_cast_fp16 = mul(x = var_19587_cast_fp16, y = var_19588_to_fp16)[name = tensor("aw_chunk_2025_cast_fp16")]; + tensor var_19591_equation_0 = const()[name = tensor("op_19591_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19591_cast_fp16 = einsum(equation = var_19591_equation_0, values = (var_19273_cast_fp16, var_19034_cast_fp16))[name = tensor("op_19591_cast_fp16")]; + tensor var_19592_to_fp16 = const()[name = tensor("op_19592_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2027_cast_fp16 = mul(x = var_19591_cast_fp16, y = var_19592_to_fp16)[name = tensor("aw_chunk_2027_cast_fp16")]; + tensor var_19595_equation_0 = const()[name = tensor("op_19595_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19595_cast_fp16 = einsum(equation = var_19595_equation_0, values = (var_19273_cast_fp16, var_19041_cast_fp16))[name = tensor("op_19595_cast_fp16")]; + tensor var_19596_to_fp16 = const()[name = tensor("op_19596_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2029_cast_fp16 = mul(x = var_19595_cast_fp16, y = var_19596_to_fp16)[name = tensor("aw_chunk_2029_cast_fp16")]; + tensor var_19599_equation_0 = const()[name = tensor("op_19599_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19599_cast_fp16 = einsum(equation = var_19599_equation_0, values = (var_19273_cast_fp16, var_19048_cast_fp16))[name = tensor("op_19599_cast_fp16")]; + tensor var_19600_to_fp16 = const()[name = tensor("op_19600_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2031_cast_fp16 = mul(x = var_19599_cast_fp16, y = var_19600_to_fp16)[name = tensor("aw_chunk_2031_cast_fp16")]; + tensor var_19603_equation_0 = const()[name = tensor("op_19603_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19603_cast_fp16 = einsum(equation = var_19603_equation_0, values = (var_19277_cast_fp16, var_19055_cast_fp16))[name = tensor("op_19603_cast_fp16")]; + tensor var_19604_to_fp16 = const()[name = tensor("op_19604_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2033_cast_fp16 = mul(x = var_19603_cast_fp16, y = var_19604_to_fp16)[name = tensor("aw_chunk_2033_cast_fp16")]; + tensor var_19607_equation_0 = const()[name = tensor("op_19607_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19607_cast_fp16 = einsum(equation = var_19607_equation_0, values = (var_19277_cast_fp16, var_19062_cast_fp16))[name = tensor("op_19607_cast_fp16")]; + tensor var_19608_to_fp16 = const()[name = tensor("op_19608_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2035_cast_fp16 = mul(x = var_19607_cast_fp16, y = var_19608_to_fp16)[name = tensor("aw_chunk_2035_cast_fp16")]; + tensor var_19611_equation_0 = const()[name = tensor("op_19611_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19611_cast_fp16 = einsum(equation = var_19611_equation_0, values = (var_19277_cast_fp16, var_19069_cast_fp16))[name = tensor("op_19611_cast_fp16")]; + tensor var_19612_to_fp16 = const()[name = tensor("op_19612_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2037_cast_fp16 = mul(x = var_19611_cast_fp16, y = var_19612_to_fp16)[name = tensor("aw_chunk_2037_cast_fp16")]; + tensor var_19615_equation_0 = const()[name = tensor("op_19615_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19615_cast_fp16 = einsum(equation = var_19615_equation_0, values = (var_19277_cast_fp16, var_19076_cast_fp16))[name = tensor("op_19615_cast_fp16")]; + tensor var_19616_to_fp16 = const()[name = tensor("op_19616_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2039_cast_fp16 = mul(x = var_19615_cast_fp16, y = var_19616_to_fp16)[name = tensor("aw_chunk_2039_cast_fp16")]; + tensor var_19619_equation_0 = const()[name = tensor("op_19619_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19619_cast_fp16 = einsum(equation = var_19619_equation_0, values = (var_19281_cast_fp16, var_19083_cast_fp16))[name = tensor("op_19619_cast_fp16")]; + tensor var_19620_to_fp16 = const()[name = tensor("op_19620_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2041_cast_fp16 = mul(x = var_19619_cast_fp16, y = var_19620_to_fp16)[name = tensor("aw_chunk_2041_cast_fp16")]; + tensor var_19623_equation_0 = const()[name = tensor("op_19623_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19623_cast_fp16 = einsum(equation = var_19623_equation_0, values = (var_19281_cast_fp16, var_19090_cast_fp16))[name = tensor("op_19623_cast_fp16")]; + tensor var_19624_to_fp16 = const()[name = tensor("op_19624_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2043_cast_fp16 = mul(x = var_19623_cast_fp16, y = var_19624_to_fp16)[name = tensor("aw_chunk_2043_cast_fp16")]; + tensor var_19627_equation_0 = const()[name = tensor("op_19627_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19627_cast_fp16 = einsum(equation = var_19627_equation_0, values = (var_19281_cast_fp16, var_19097_cast_fp16))[name = tensor("op_19627_cast_fp16")]; + tensor var_19628_to_fp16 = const()[name = tensor("op_19628_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2045_cast_fp16 = mul(x = var_19627_cast_fp16, y = var_19628_to_fp16)[name = tensor("aw_chunk_2045_cast_fp16")]; + tensor var_19631_equation_0 = const()[name = tensor("op_19631_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19631_cast_fp16 = einsum(equation = var_19631_equation_0, values = (var_19281_cast_fp16, var_19104_cast_fp16))[name = tensor("op_19631_cast_fp16")]; + tensor var_19632_to_fp16 = const()[name = tensor("op_19632_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2047_cast_fp16 = mul(x = var_19631_cast_fp16, y = var_19632_to_fp16)[name = tensor("aw_chunk_2047_cast_fp16")]; + tensor var_19635_equation_0 = const()[name = tensor("op_19635_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19635_cast_fp16 = einsum(equation = var_19635_equation_0, values = (var_19285_cast_fp16, var_19111_cast_fp16))[name = tensor("op_19635_cast_fp16")]; + tensor var_19636_to_fp16 = const()[name = tensor("op_19636_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2049_cast_fp16 = mul(x = var_19635_cast_fp16, y = var_19636_to_fp16)[name = tensor("aw_chunk_2049_cast_fp16")]; + tensor var_19639_equation_0 = const()[name = tensor("op_19639_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19639_cast_fp16 = einsum(equation = var_19639_equation_0, values = (var_19285_cast_fp16, var_19118_cast_fp16))[name = tensor("op_19639_cast_fp16")]; + tensor var_19640_to_fp16 = const()[name = tensor("op_19640_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2051_cast_fp16 = mul(x = var_19639_cast_fp16, y = var_19640_to_fp16)[name = tensor("aw_chunk_2051_cast_fp16")]; + tensor var_19643_equation_0 = const()[name = tensor("op_19643_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19643_cast_fp16 = einsum(equation = var_19643_equation_0, values = (var_19285_cast_fp16, var_19125_cast_fp16))[name = tensor("op_19643_cast_fp16")]; + tensor var_19644_to_fp16 = const()[name = tensor("op_19644_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2053_cast_fp16 = mul(x = var_19643_cast_fp16, y = var_19644_to_fp16)[name = tensor("aw_chunk_2053_cast_fp16")]; + tensor var_19647_equation_0 = const()[name = tensor("op_19647_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19647_cast_fp16 = einsum(equation = var_19647_equation_0, values = (var_19285_cast_fp16, var_19132_cast_fp16))[name = tensor("op_19647_cast_fp16")]; + tensor var_19648_to_fp16 = const()[name = tensor("op_19648_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2055_cast_fp16 = mul(x = var_19647_cast_fp16, y = var_19648_to_fp16)[name = tensor("aw_chunk_2055_cast_fp16")]; + tensor var_19651_equation_0 = const()[name = tensor("op_19651_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19651_cast_fp16 = einsum(equation = var_19651_equation_0, values = (var_19289_cast_fp16, var_19139_cast_fp16))[name = tensor("op_19651_cast_fp16")]; + tensor var_19652_to_fp16 = const()[name = tensor("op_19652_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2057_cast_fp16 = mul(x = var_19651_cast_fp16, y = var_19652_to_fp16)[name = tensor("aw_chunk_2057_cast_fp16")]; + tensor var_19655_equation_0 = const()[name = tensor("op_19655_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19655_cast_fp16 = einsum(equation = var_19655_equation_0, values = (var_19289_cast_fp16, var_19146_cast_fp16))[name = tensor("op_19655_cast_fp16")]; + tensor var_19656_to_fp16 = const()[name = tensor("op_19656_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2059_cast_fp16 = mul(x = var_19655_cast_fp16, y = var_19656_to_fp16)[name = tensor("aw_chunk_2059_cast_fp16")]; + tensor var_19659_equation_0 = const()[name = tensor("op_19659_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19659_cast_fp16 = einsum(equation = var_19659_equation_0, values = (var_19289_cast_fp16, var_19153_cast_fp16))[name = tensor("op_19659_cast_fp16")]; + tensor var_19660_to_fp16 = const()[name = tensor("op_19660_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2061_cast_fp16 = mul(x = var_19659_cast_fp16, y = var_19660_to_fp16)[name = tensor("aw_chunk_2061_cast_fp16")]; + tensor var_19663_equation_0 = const()[name = tensor("op_19663_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19663_cast_fp16 = einsum(equation = var_19663_equation_0, values = (var_19289_cast_fp16, var_19160_cast_fp16))[name = tensor("op_19663_cast_fp16")]; + tensor var_19664_to_fp16 = const()[name = tensor("op_19664_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2063_cast_fp16 = mul(x = var_19663_cast_fp16, y = var_19664_to_fp16)[name = tensor("aw_chunk_2063_cast_fp16")]; + tensor var_19667_equation_0 = const()[name = tensor("op_19667_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19667_cast_fp16 = einsum(equation = var_19667_equation_0, values = (var_19293_cast_fp16, var_19167_cast_fp16))[name = tensor("op_19667_cast_fp16")]; + tensor var_19668_to_fp16 = const()[name = tensor("op_19668_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2065_cast_fp16 = mul(x = var_19667_cast_fp16, y = var_19668_to_fp16)[name = tensor("aw_chunk_2065_cast_fp16")]; + tensor var_19671_equation_0 = const()[name = tensor("op_19671_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19671_cast_fp16 = einsum(equation = var_19671_equation_0, values = (var_19293_cast_fp16, var_19174_cast_fp16))[name = tensor("op_19671_cast_fp16")]; + tensor var_19672_to_fp16 = const()[name = tensor("op_19672_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2067_cast_fp16 = mul(x = var_19671_cast_fp16, y = var_19672_to_fp16)[name = tensor("aw_chunk_2067_cast_fp16")]; + tensor var_19675_equation_0 = const()[name = tensor("op_19675_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19675_cast_fp16 = einsum(equation = var_19675_equation_0, values = (var_19293_cast_fp16, var_19181_cast_fp16))[name = tensor("op_19675_cast_fp16")]; + tensor var_19676_to_fp16 = const()[name = tensor("op_19676_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2069_cast_fp16 = mul(x = var_19675_cast_fp16, y = var_19676_to_fp16)[name = tensor("aw_chunk_2069_cast_fp16")]; + tensor var_19679_equation_0 = const()[name = tensor("op_19679_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19679_cast_fp16 = einsum(equation = var_19679_equation_0, values = (var_19293_cast_fp16, var_19188_cast_fp16))[name = tensor("op_19679_cast_fp16")]; + tensor var_19680_to_fp16 = const()[name = tensor("op_19680_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2071_cast_fp16 = mul(x = var_19679_cast_fp16, y = var_19680_to_fp16)[name = tensor("aw_chunk_2071_cast_fp16")]; + tensor var_19683_equation_0 = const()[name = tensor("op_19683_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19683_cast_fp16 = einsum(equation = var_19683_equation_0, values = (var_19297_cast_fp16, var_19195_cast_fp16))[name = tensor("op_19683_cast_fp16")]; + tensor var_19684_to_fp16 = const()[name = tensor("op_19684_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2073_cast_fp16 = mul(x = var_19683_cast_fp16, y = var_19684_to_fp16)[name = tensor("aw_chunk_2073_cast_fp16")]; + tensor var_19687_equation_0 = const()[name = tensor("op_19687_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19687_cast_fp16 = einsum(equation = var_19687_equation_0, values = (var_19297_cast_fp16, var_19202_cast_fp16))[name = tensor("op_19687_cast_fp16")]; + tensor var_19688_to_fp16 = const()[name = tensor("op_19688_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2075_cast_fp16 = mul(x = var_19687_cast_fp16, y = var_19688_to_fp16)[name = tensor("aw_chunk_2075_cast_fp16")]; + tensor var_19691_equation_0 = const()[name = tensor("op_19691_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19691_cast_fp16 = einsum(equation = var_19691_equation_0, values = (var_19297_cast_fp16, var_19209_cast_fp16))[name = tensor("op_19691_cast_fp16")]; + tensor var_19692_to_fp16 = const()[name = tensor("op_19692_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2077_cast_fp16 = mul(x = var_19691_cast_fp16, y = var_19692_to_fp16)[name = tensor("aw_chunk_2077_cast_fp16")]; + tensor var_19695_equation_0 = const()[name = tensor("op_19695_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_19695_cast_fp16 = einsum(equation = var_19695_equation_0, values = (var_19297_cast_fp16, var_19216_cast_fp16))[name = tensor("op_19695_cast_fp16")]; + tensor var_19696_to_fp16 = const()[name = tensor("op_19696_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2079_cast_fp16 = mul(x = var_19695_cast_fp16, y = var_19696_to_fp16)[name = tensor("aw_chunk_2079_cast_fp16")]; + tensor var_19698_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1921_cast_fp16)[name = tensor("op_19698_cast_fp16")]; + tensor var_19699_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1923_cast_fp16)[name = tensor("op_19699_cast_fp16")]; + tensor var_19700_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1925_cast_fp16)[name = tensor("op_19700_cast_fp16")]; + tensor var_19701_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1927_cast_fp16)[name = tensor("op_19701_cast_fp16")]; + tensor var_19702_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1929_cast_fp16)[name = tensor("op_19702_cast_fp16")]; + tensor var_19703_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1931_cast_fp16)[name = tensor("op_19703_cast_fp16")]; + tensor var_19704_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1933_cast_fp16)[name = tensor("op_19704_cast_fp16")]; + tensor var_19705_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1935_cast_fp16)[name = tensor("op_19705_cast_fp16")]; + tensor var_19706_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1937_cast_fp16)[name = tensor("op_19706_cast_fp16")]; + tensor var_19707_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1939_cast_fp16)[name = tensor("op_19707_cast_fp16")]; + tensor var_19708_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1941_cast_fp16)[name = tensor("op_19708_cast_fp16")]; + tensor var_19709_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1943_cast_fp16)[name = tensor("op_19709_cast_fp16")]; + tensor var_19710_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1945_cast_fp16)[name = tensor("op_19710_cast_fp16")]; + tensor var_19711_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1947_cast_fp16)[name = tensor("op_19711_cast_fp16")]; + tensor var_19712_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1949_cast_fp16)[name = tensor("op_19712_cast_fp16")]; + tensor var_19713_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1951_cast_fp16)[name = tensor("op_19713_cast_fp16")]; + tensor var_19714_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1953_cast_fp16)[name = tensor("op_19714_cast_fp16")]; + tensor var_19715_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1955_cast_fp16)[name = tensor("op_19715_cast_fp16")]; + tensor var_19716_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1957_cast_fp16)[name = tensor("op_19716_cast_fp16")]; + tensor var_19717_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1959_cast_fp16)[name = tensor("op_19717_cast_fp16")]; + tensor var_19718_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1961_cast_fp16)[name = tensor("op_19718_cast_fp16")]; + tensor var_19719_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1963_cast_fp16)[name = tensor("op_19719_cast_fp16")]; + tensor var_19720_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1965_cast_fp16)[name = tensor("op_19720_cast_fp16")]; + tensor var_19721_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1967_cast_fp16)[name = tensor("op_19721_cast_fp16")]; + tensor var_19722_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1969_cast_fp16)[name = tensor("op_19722_cast_fp16")]; + tensor var_19723_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1971_cast_fp16)[name = tensor("op_19723_cast_fp16")]; + tensor var_19724_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1973_cast_fp16)[name = tensor("op_19724_cast_fp16")]; + tensor var_19725_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1975_cast_fp16)[name = tensor("op_19725_cast_fp16")]; + tensor var_19726_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1977_cast_fp16)[name = tensor("op_19726_cast_fp16")]; + tensor var_19727_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1979_cast_fp16)[name = tensor("op_19727_cast_fp16")]; + tensor var_19728_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1981_cast_fp16)[name = tensor("op_19728_cast_fp16")]; + tensor var_19729_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1983_cast_fp16)[name = tensor("op_19729_cast_fp16")]; + tensor var_19730_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1985_cast_fp16)[name = tensor("op_19730_cast_fp16")]; + tensor var_19731_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1987_cast_fp16)[name = tensor("op_19731_cast_fp16")]; + tensor var_19732_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1989_cast_fp16)[name = tensor("op_19732_cast_fp16")]; + tensor var_19733_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1991_cast_fp16)[name = tensor("op_19733_cast_fp16")]; + tensor var_19734_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1993_cast_fp16)[name = tensor("op_19734_cast_fp16")]; + tensor var_19735_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1995_cast_fp16)[name = tensor("op_19735_cast_fp16")]; + tensor var_19736_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1997_cast_fp16)[name = tensor("op_19736_cast_fp16")]; + tensor var_19737_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_1999_cast_fp16)[name = tensor("op_19737_cast_fp16")]; + tensor var_19738_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2001_cast_fp16)[name = tensor("op_19738_cast_fp16")]; + tensor var_19739_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2003_cast_fp16)[name = tensor("op_19739_cast_fp16")]; + tensor var_19740_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2005_cast_fp16)[name = tensor("op_19740_cast_fp16")]; + tensor var_19741_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2007_cast_fp16)[name = tensor("op_19741_cast_fp16")]; + tensor var_19742_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2009_cast_fp16)[name = tensor("op_19742_cast_fp16")]; + tensor var_19743_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2011_cast_fp16)[name = tensor("op_19743_cast_fp16")]; + tensor var_19744_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2013_cast_fp16)[name = tensor("op_19744_cast_fp16")]; + tensor var_19745_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2015_cast_fp16)[name = tensor("op_19745_cast_fp16")]; + tensor var_19746_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2017_cast_fp16)[name = tensor("op_19746_cast_fp16")]; + tensor var_19747_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2019_cast_fp16)[name = tensor("op_19747_cast_fp16")]; + tensor var_19748_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2021_cast_fp16)[name = tensor("op_19748_cast_fp16")]; + tensor var_19749_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2023_cast_fp16)[name = tensor("op_19749_cast_fp16")]; + tensor var_19750_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2025_cast_fp16)[name = tensor("op_19750_cast_fp16")]; + tensor var_19751_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2027_cast_fp16)[name = tensor("op_19751_cast_fp16")]; + tensor var_19752_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2029_cast_fp16)[name = tensor("op_19752_cast_fp16")]; + tensor var_19753_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2031_cast_fp16)[name = tensor("op_19753_cast_fp16")]; + tensor var_19754_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2033_cast_fp16)[name = tensor("op_19754_cast_fp16")]; + tensor var_19755_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2035_cast_fp16)[name = tensor("op_19755_cast_fp16")]; + tensor var_19756_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2037_cast_fp16)[name = tensor("op_19756_cast_fp16")]; + tensor var_19757_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2039_cast_fp16)[name = tensor("op_19757_cast_fp16")]; + tensor var_19758_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2041_cast_fp16)[name = tensor("op_19758_cast_fp16")]; + tensor var_19759_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2043_cast_fp16)[name = tensor("op_19759_cast_fp16")]; + tensor var_19760_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2045_cast_fp16)[name = tensor("op_19760_cast_fp16")]; + tensor var_19761_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2047_cast_fp16)[name = tensor("op_19761_cast_fp16")]; + tensor var_19762_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2049_cast_fp16)[name = tensor("op_19762_cast_fp16")]; + tensor var_19763_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2051_cast_fp16)[name = tensor("op_19763_cast_fp16")]; + tensor var_19764_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2053_cast_fp16)[name = tensor("op_19764_cast_fp16")]; + tensor var_19765_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2055_cast_fp16)[name = tensor("op_19765_cast_fp16")]; + tensor var_19766_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2057_cast_fp16)[name = tensor("op_19766_cast_fp16")]; + tensor var_19767_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2059_cast_fp16)[name = tensor("op_19767_cast_fp16")]; + tensor var_19768_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2061_cast_fp16)[name = tensor("op_19768_cast_fp16")]; + tensor var_19769_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2063_cast_fp16)[name = tensor("op_19769_cast_fp16")]; + tensor var_19770_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2065_cast_fp16)[name = tensor("op_19770_cast_fp16")]; + tensor var_19771_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2067_cast_fp16)[name = tensor("op_19771_cast_fp16")]; + tensor var_19772_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2069_cast_fp16)[name = tensor("op_19772_cast_fp16")]; + tensor var_19773_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2071_cast_fp16)[name = tensor("op_19773_cast_fp16")]; + tensor var_19774_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2073_cast_fp16)[name = tensor("op_19774_cast_fp16")]; + tensor var_19775_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2075_cast_fp16)[name = tensor("op_19775_cast_fp16")]; + tensor var_19776_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2077_cast_fp16)[name = tensor("op_19776_cast_fp16")]; + tensor var_19777_cast_fp16 = softmax(axis = var_18523, x = aw_chunk_2079_cast_fp16)[name = tensor("op_19777_cast_fp16")]; + tensor var_19779_equation_0 = const()[name = tensor("op_19779_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19779_cast_fp16 = einsum(equation = var_19779_equation_0, values = (var_19299_cast_fp16, var_19698_cast_fp16))[name = tensor("op_19779_cast_fp16")]; + tensor var_19781_equation_0 = const()[name = tensor("op_19781_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19781_cast_fp16 = einsum(equation = var_19781_equation_0, values = (var_19299_cast_fp16, var_19699_cast_fp16))[name = tensor("op_19781_cast_fp16")]; + tensor var_19783_equation_0 = const()[name = tensor("op_19783_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19783_cast_fp16 = einsum(equation = var_19783_equation_0, values = (var_19299_cast_fp16, var_19700_cast_fp16))[name = tensor("op_19783_cast_fp16")]; + tensor var_19785_equation_0 = const()[name = tensor("op_19785_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19785_cast_fp16 = einsum(equation = var_19785_equation_0, values = (var_19299_cast_fp16, var_19701_cast_fp16))[name = tensor("op_19785_cast_fp16")]; + tensor var_19787_equation_0 = const()[name = tensor("op_19787_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19787_cast_fp16 = einsum(equation = var_19787_equation_0, values = (var_19303_cast_fp16, var_19702_cast_fp16))[name = tensor("op_19787_cast_fp16")]; + tensor var_19789_equation_0 = const()[name = tensor("op_19789_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19789_cast_fp16 = einsum(equation = var_19789_equation_0, values = (var_19303_cast_fp16, var_19703_cast_fp16))[name = tensor("op_19789_cast_fp16")]; + tensor var_19791_equation_0 = const()[name = tensor("op_19791_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19791_cast_fp16 = einsum(equation = var_19791_equation_0, values = (var_19303_cast_fp16, var_19704_cast_fp16))[name = tensor("op_19791_cast_fp16")]; + tensor var_19793_equation_0 = const()[name = tensor("op_19793_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19793_cast_fp16 = einsum(equation = var_19793_equation_0, values = (var_19303_cast_fp16, var_19705_cast_fp16))[name = tensor("op_19793_cast_fp16")]; + tensor var_19795_equation_0 = const()[name = tensor("op_19795_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19795_cast_fp16 = einsum(equation = var_19795_equation_0, values = (var_19307_cast_fp16, var_19706_cast_fp16))[name = tensor("op_19795_cast_fp16")]; + tensor var_19797_equation_0 = const()[name = tensor("op_19797_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19797_cast_fp16 = einsum(equation = var_19797_equation_0, values = (var_19307_cast_fp16, var_19707_cast_fp16))[name = tensor("op_19797_cast_fp16")]; + tensor var_19799_equation_0 = const()[name = tensor("op_19799_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19799_cast_fp16 = einsum(equation = var_19799_equation_0, values = (var_19307_cast_fp16, var_19708_cast_fp16))[name = tensor("op_19799_cast_fp16")]; + tensor var_19801_equation_0 = const()[name = tensor("op_19801_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19801_cast_fp16 = einsum(equation = var_19801_equation_0, values = (var_19307_cast_fp16, var_19709_cast_fp16))[name = tensor("op_19801_cast_fp16")]; + tensor var_19803_equation_0 = const()[name = tensor("op_19803_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19803_cast_fp16 = einsum(equation = var_19803_equation_0, values = (var_19311_cast_fp16, var_19710_cast_fp16))[name = tensor("op_19803_cast_fp16")]; + tensor var_19805_equation_0 = const()[name = tensor("op_19805_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19805_cast_fp16 = einsum(equation = var_19805_equation_0, values = (var_19311_cast_fp16, var_19711_cast_fp16))[name = tensor("op_19805_cast_fp16")]; + tensor var_19807_equation_0 = const()[name = tensor("op_19807_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19807_cast_fp16 = einsum(equation = var_19807_equation_0, values = (var_19311_cast_fp16, var_19712_cast_fp16))[name = tensor("op_19807_cast_fp16")]; + tensor var_19809_equation_0 = const()[name = tensor("op_19809_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19809_cast_fp16 = einsum(equation = var_19809_equation_0, values = (var_19311_cast_fp16, var_19713_cast_fp16))[name = tensor("op_19809_cast_fp16")]; + tensor var_19811_equation_0 = const()[name = tensor("op_19811_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19811_cast_fp16 = einsum(equation = var_19811_equation_0, values = (var_19315_cast_fp16, var_19714_cast_fp16))[name = tensor("op_19811_cast_fp16")]; + tensor var_19813_equation_0 = const()[name = tensor("op_19813_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19813_cast_fp16 = einsum(equation = var_19813_equation_0, values = (var_19315_cast_fp16, var_19715_cast_fp16))[name = tensor("op_19813_cast_fp16")]; + tensor var_19815_equation_0 = const()[name = tensor("op_19815_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19815_cast_fp16 = einsum(equation = var_19815_equation_0, values = (var_19315_cast_fp16, var_19716_cast_fp16))[name = tensor("op_19815_cast_fp16")]; + tensor var_19817_equation_0 = const()[name = tensor("op_19817_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19817_cast_fp16 = einsum(equation = var_19817_equation_0, values = (var_19315_cast_fp16, var_19717_cast_fp16))[name = tensor("op_19817_cast_fp16")]; + tensor var_19819_equation_0 = const()[name = tensor("op_19819_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19819_cast_fp16 = einsum(equation = var_19819_equation_0, values = (var_19319_cast_fp16, var_19718_cast_fp16))[name = tensor("op_19819_cast_fp16")]; + tensor var_19821_equation_0 = const()[name = tensor("op_19821_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19821_cast_fp16 = einsum(equation = var_19821_equation_0, values = (var_19319_cast_fp16, var_19719_cast_fp16))[name = tensor("op_19821_cast_fp16")]; + tensor var_19823_equation_0 = const()[name = tensor("op_19823_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19823_cast_fp16 = einsum(equation = var_19823_equation_0, values = (var_19319_cast_fp16, var_19720_cast_fp16))[name = tensor("op_19823_cast_fp16")]; + tensor var_19825_equation_0 = const()[name = tensor("op_19825_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19825_cast_fp16 = einsum(equation = var_19825_equation_0, values = (var_19319_cast_fp16, var_19721_cast_fp16))[name = tensor("op_19825_cast_fp16")]; + tensor var_19827_equation_0 = const()[name = tensor("op_19827_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19827_cast_fp16 = einsum(equation = var_19827_equation_0, values = (var_19323_cast_fp16, var_19722_cast_fp16))[name = tensor("op_19827_cast_fp16")]; + tensor var_19829_equation_0 = const()[name = tensor("op_19829_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19829_cast_fp16 = einsum(equation = var_19829_equation_0, values = (var_19323_cast_fp16, var_19723_cast_fp16))[name = tensor("op_19829_cast_fp16")]; + tensor var_19831_equation_0 = const()[name = tensor("op_19831_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19831_cast_fp16 = einsum(equation = var_19831_equation_0, values = (var_19323_cast_fp16, var_19724_cast_fp16))[name = tensor("op_19831_cast_fp16")]; + tensor var_19833_equation_0 = const()[name = tensor("op_19833_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19833_cast_fp16 = einsum(equation = var_19833_equation_0, values = (var_19323_cast_fp16, var_19725_cast_fp16))[name = tensor("op_19833_cast_fp16")]; + tensor var_19835_equation_0 = const()[name = tensor("op_19835_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19835_cast_fp16 = einsum(equation = var_19835_equation_0, values = (var_19327_cast_fp16, var_19726_cast_fp16))[name = tensor("op_19835_cast_fp16")]; + tensor var_19837_equation_0 = const()[name = tensor("op_19837_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19837_cast_fp16 = einsum(equation = var_19837_equation_0, values = (var_19327_cast_fp16, var_19727_cast_fp16))[name = tensor("op_19837_cast_fp16")]; + tensor var_19839_equation_0 = const()[name = tensor("op_19839_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19839_cast_fp16 = einsum(equation = var_19839_equation_0, values = (var_19327_cast_fp16, var_19728_cast_fp16))[name = tensor("op_19839_cast_fp16")]; + tensor var_19841_equation_0 = const()[name = tensor("op_19841_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19841_cast_fp16 = einsum(equation = var_19841_equation_0, values = (var_19327_cast_fp16, var_19729_cast_fp16))[name = tensor("op_19841_cast_fp16")]; + tensor var_19843_equation_0 = const()[name = tensor("op_19843_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19843_cast_fp16 = einsum(equation = var_19843_equation_0, values = (var_19331_cast_fp16, var_19730_cast_fp16))[name = tensor("op_19843_cast_fp16")]; + tensor var_19845_equation_0 = const()[name = tensor("op_19845_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19845_cast_fp16 = einsum(equation = var_19845_equation_0, values = (var_19331_cast_fp16, var_19731_cast_fp16))[name = tensor("op_19845_cast_fp16")]; + tensor var_19847_equation_0 = const()[name = tensor("op_19847_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19847_cast_fp16 = einsum(equation = var_19847_equation_0, values = (var_19331_cast_fp16, var_19732_cast_fp16))[name = tensor("op_19847_cast_fp16")]; + tensor var_19849_equation_0 = const()[name = tensor("op_19849_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19849_cast_fp16 = einsum(equation = var_19849_equation_0, values = (var_19331_cast_fp16, var_19733_cast_fp16))[name = tensor("op_19849_cast_fp16")]; + tensor var_19851_equation_0 = const()[name = tensor("op_19851_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19851_cast_fp16 = einsum(equation = var_19851_equation_0, values = (var_19335_cast_fp16, var_19734_cast_fp16))[name = tensor("op_19851_cast_fp16")]; + tensor var_19853_equation_0 = const()[name = tensor("op_19853_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19853_cast_fp16 = einsum(equation = var_19853_equation_0, values = (var_19335_cast_fp16, var_19735_cast_fp16))[name = tensor("op_19853_cast_fp16")]; + tensor var_19855_equation_0 = const()[name = tensor("op_19855_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19855_cast_fp16 = einsum(equation = var_19855_equation_0, values = (var_19335_cast_fp16, var_19736_cast_fp16))[name = tensor("op_19855_cast_fp16")]; + tensor var_19857_equation_0 = const()[name = tensor("op_19857_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19857_cast_fp16 = einsum(equation = var_19857_equation_0, values = (var_19335_cast_fp16, var_19737_cast_fp16))[name = tensor("op_19857_cast_fp16")]; + tensor var_19859_equation_0 = const()[name = tensor("op_19859_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19859_cast_fp16 = einsum(equation = var_19859_equation_0, values = (var_19339_cast_fp16, var_19738_cast_fp16))[name = tensor("op_19859_cast_fp16")]; + tensor var_19861_equation_0 = const()[name = tensor("op_19861_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19861_cast_fp16 = einsum(equation = var_19861_equation_0, values = (var_19339_cast_fp16, var_19739_cast_fp16))[name = tensor("op_19861_cast_fp16")]; + tensor var_19863_equation_0 = const()[name = tensor("op_19863_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19863_cast_fp16 = einsum(equation = var_19863_equation_0, values = (var_19339_cast_fp16, var_19740_cast_fp16))[name = tensor("op_19863_cast_fp16")]; + tensor var_19865_equation_0 = const()[name = tensor("op_19865_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19865_cast_fp16 = einsum(equation = var_19865_equation_0, values = (var_19339_cast_fp16, var_19741_cast_fp16))[name = tensor("op_19865_cast_fp16")]; + tensor var_19867_equation_0 = const()[name = tensor("op_19867_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19867_cast_fp16 = einsum(equation = var_19867_equation_0, values = (var_19343_cast_fp16, var_19742_cast_fp16))[name = tensor("op_19867_cast_fp16")]; + tensor var_19869_equation_0 = const()[name = tensor("op_19869_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19869_cast_fp16 = einsum(equation = var_19869_equation_0, values = (var_19343_cast_fp16, var_19743_cast_fp16))[name = tensor("op_19869_cast_fp16")]; + tensor var_19871_equation_0 = const()[name = tensor("op_19871_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19871_cast_fp16 = einsum(equation = var_19871_equation_0, values = (var_19343_cast_fp16, var_19744_cast_fp16))[name = tensor("op_19871_cast_fp16")]; + tensor var_19873_equation_0 = const()[name = tensor("op_19873_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19873_cast_fp16 = einsum(equation = var_19873_equation_0, values = (var_19343_cast_fp16, var_19745_cast_fp16))[name = tensor("op_19873_cast_fp16")]; + tensor var_19875_equation_0 = const()[name = tensor("op_19875_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19875_cast_fp16 = einsum(equation = var_19875_equation_0, values = (var_19347_cast_fp16, var_19746_cast_fp16))[name = tensor("op_19875_cast_fp16")]; + tensor var_19877_equation_0 = const()[name = tensor("op_19877_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19877_cast_fp16 = einsum(equation = var_19877_equation_0, values = (var_19347_cast_fp16, var_19747_cast_fp16))[name = tensor("op_19877_cast_fp16")]; + tensor var_19879_equation_0 = const()[name = tensor("op_19879_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19879_cast_fp16 = einsum(equation = var_19879_equation_0, values = (var_19347_cast_fp16, var_19748_cast_fp16))[name = tensor("op_19879_cast_fp16")]; + tensor var_19881_equation_0 = const()[name = tensor("op_19881_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19881_cast_fp16 = einsum(equation = var_19881_equation_0, values = (var_19347_cast_fp16, var_19749_cast_fp16))[name = tensor("op_19881_cast_fp16")]; + tensor var_19883_equation_0 = const()[name = tensor("op_19883_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19883_cast_fp16 = einsum(equation = var_19883_equation_0, values = (var_19351_cast_fp16, var_19750_cast_fp16))[name = tensor("op_19883_cast_fp16")]; + tensor var_19885_equation_0 = const()[name = tensor("op_19885_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19885_cast_fp16 = einsum(equation = var_19885_equation_0, values = (var_19351_cast_fp16, var_19751_cast_fp16))[name = tensor("op_19885_cast_fp16")]; + tensor var_19887_equation_0 = const()[name = tensor("op_19887_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19887_cast_fp16 = einsum(equation = var_19887_equation_0, values = (var_19351_cast_fp16, var_19752_cast_fp16))[name = tensor("op_19887_cast_fp16")]; + tensor var_19889_equation_0 = const()[name = tensor("op_19889_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19889_cast_fp16 = einsum(equation = var_19889_equation_0, values = (var_19351_cast_fp16, var_19753_cast_fp16))[name = tensor("op_19889_cast_fp16")]; + tensor var_19891_equation_0 = const()[name = tensor("op_19891_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19891_cast_fp16 = einsum(equation = var_19891_equation_0, values = (var_19355_cast_fp16, var_19754_cast_fp16))[name = tensor("op_19891_cast_fp16")]; + tensor var_19893_equation_0 = const()[name = tensor("op_19893_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19893_cast_fp16 = einsum(equation = var_19893_equation_0, values = (var_19355_cast_fp16, var_19755_cast_fp16))[name = tensor("op_19893_cast_fp16")]; + tensor var_19895_equation_0 = const()[name = tensor("op_19895_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19895_cast_fp16 = einsum(equation = var_19895_equation_0, values = (var_19355_cast_fp16, var_19756_cast_fp16))[name = tensor("op_19895_cast_fp16")]; + tensor var_19897_equation_0 = const()[name = tensor("op_19897_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19897_cast_fp16 = einsum(equation = var_19897_equation_0, values = (var_19355_cast_fp16, var_19757_cast_fp16))[name = tensor("op_19897_cast_fp16")]; + tensor var_19899_equation_0 = const()[name = tensor("op_19899_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19899_cast_fp16 = einsum(equation = var_19899_equation_0, values = (var_19359_cast_fp16, var_19758_cast_fp16))[name = tensor("op_19899_cast_fp16")]; + tensor var_19901_equation_0 = const()[name = tensor("op_19901_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19901_cast_fp16 = einsum(equation = var_19901_equation_0, values = (var_19359_cast_fp16, var_19759_cast_fp16))[name = tensor("op_19901_cast_fp16")]; + tensor var_19903_equation_0 = const()[name = tensor("op_19903_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19903_cast_fp16 = einsum(equation = var_19903_equation_0, values = (var_19359_cast_fp16, var_19760_cast_fp16))[name = tensor("op_19903_cast_fp16")]; + tensor var_19905_equation_0 = const()[name = tensor("op_19905_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19905_cast_fp16 = einsum(equation = var_19905_equation_0, values = (var_19359_cast_fp16, var_19761_cast_fp16))[name = tensor("op_19905_cast_fp16")]; + tensor var_19907_equation_0 = const()[name = tensor("op_19907_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19907_cast_fp16 = einsum(equation = var_19907_equation_0, values = (var_19363_cast_fp16, var_19762_cast_fp16))[name = tensor("op_19907_cast_fp16")]; + tensor var_19909_equation_0 = const()[name = tensor("op_19909_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19909_cast_fp16 = einsum(equation = var_19909_equation_0, values = (var_19363_cast_fp16, var_19763_cast_fp16))[name = tensor("op_19909_cast_fp16")]; + tensor var_19911_equation_0 = const()[name = tensor("op_19911_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19911_cast_fp16 = einsum(equation = var_19911_equation_0, values = (var_19363_cast_fp16, var_19764_cast_fp16))[name = tensor("op_19911_cast_fp16")]; + tensor var_19913_equation_0 = const()[name = tensor("op_19913_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19913_cast_fp16 = einsum(equation = var_19913_equation_0, values = (var_19363_cast_fp16, var_19765_cast_fp16))[name = tensor("op_19913_cast_fp16")]; + tensor var_19915_equation_0 = const()[name = tensor("op_19915_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19915_cast_fp16 = einsum(equation = var_19915_equation_0, values = (var_19367_cast_fp16, var_19766_cast_fp16))[name = tensor("op_19915_cast_fp16")]; + tensor var_19917_equation_0 = const()[name = tensor("op_19917_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19917_cast_fp16 = einsum(equation = var_19917_equation_0, values = (var_19367_cast_fp16, var_19767_cast_fp16))[name = tensor("op_19917_cast_fp16")]; + tensor var_19919_equation_0 = const()[name = tensor("op_19919_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19919_cast_fp16 = einsum(equation = var_19919_equation_0, values = (var_19367_cast_fp16, var_19768_cast_fp16))[name = tensor("op_19919_cast_fp16")]; + tensor var_19921_equation_0 = const()[name = tensor("op_19921_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19921_cast_fp16 = einsum(equation = var_19921_equation_0, values = (var_19367_cast_fp16, var_19769_cast_fp16))[name = tensor("op_19921_cast_fp16")]; + tensor var_19923_equation_0 = const()[name = tensor("op_19923_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19923_cast_fp16 = einsum(equation = var_19923_equation_0, values = (var_19371_cast_fp16, var_19770_cast_fp16))[name = tensor("op_19923_cast_fp16")]; + tensor var_19925_equation_0 = const()[name = tensor("op_19925_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19925_cast_fp16 = einsum(equation = var_19925_equation_0, values = (var_19371_cast_fp16, var_19771_cast_fp16))[name = tensor("op_19925_cast_fp16")]; + tensor var_19927_equation_0 = const()[name = tensor("op_19927_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19927_cast_fp16 = einsum(equation = var_19927_equation_0, values = (var_19371_cast_fp16, var_19772_cast_fp16))[name = tensor("op_19927_cast_fp16")]; + tensor var_19929_equation_0 = const()[name = tensor("op_19929_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19929_cast_fp16 = einsum(equation = var_19929_equation_0, values = (var_19371_cast_fp16, var_19773_cast_fp16))[name = tensor("op_19929_cast_fp16")]; + tensor var_19931_equation_0 = const()[name = tensor("op_19931_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19931_cast_fp16 = einsum(equation = var_19931_equation_0, values = (var_19375_cast_fp16, var_19774_cast_fp16))[name = tensor("op_19931_cast_fp16")]; + tensor var_19933_equation_0 = const()[name = tensor("op_19933_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19933_cast_fp16 = einsum(equation = var_19933_equation_0, values = (var_19375_cast_fp16, var_19775_cast_fp16))[name = tensor("op_19933_cast_fp16")]; + tensor var_19935_equation_0 = const()[name = tensor("op_19935_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19935_cast_fp16 = einsum(equation = var_19935_equation_0, values = (var_19375_cast_fp16, var_19776_cast_fp16))[name = tensor("op_19935_cast_fp16")]; + tensor var_19937_equation_0 = const()[name = tensor("op_19937_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_19937_cast_fp16 = einsum(equation = var_19937_equation_0, values = (var_19375_cast_fp16, var_19777_cast_fp16))[name = tensor("op_19937_cast_fp16")]; + tensor var_19939_interleave_0 = const()[name = tensor("op_19939_interleave_0"), val = tensor(false)]; + tensor var_19939_cast_fp16 = concat(axis = var_18498, interleave = var_19939_interleave_0, values = (var_19779_cast_fp16, var_19781_cast_fp16, var_19783_cast_fp16, var_19785_cast_fp16))[name = tensor("op_19939_cast_fp16")]; + tensor var_19941_interleave_0 = const()[name = tensor("op_19941_interleave_0"), val = tensor(false)]; + tensor var_19941_cast_fp16 = concat(axis = var_18498, interleave = var_19941_interleave_0, values = (var_19787_cast_fp16, var_19789_cast_fp16, var_19791_cast_fp16, var_19793_cast_fp16))[name = tensor("op_19941_cast_fp16")]; + tensor var_19943_interleave_0 = const()[name = tensor("op_19943_interleave_0"), val = tensor(false)]; + tensor var_19943_cast_fp16 = concat(axis = var_18498, interleave = var_19943_interleave_0, values = (var_19795_cast_fp16, var_19797_cast_fp16, var_19799_cast_fp16, var_19801_cast_fp16))[name = tensor("op_19943_cast_fp16")]; + tensor var_19945_interleave_0 = const()[name = tensor("op_19945_interleave_0"), val = tensor(false)]; + tensor var_19945_cast_fp16 = concat(axis = var_18498, interleave = var_19945_interleave_0, values = (var_19803_cast_fp16, var_19805_cast_fp16, var_19807_cast_fp16, var_19809_cast_fp16))[name = tensor("op_19945_cast_fp16")]; + tensor var_19947_interleave_0 = const()[name = tensor("op_19947_interleave_0"), val = tensor(false)]; + tensor var_19947_cast_fp16 = concat(axis = var_18498, interleave = var_19947_interleave_0, values = (var_19811_cast_fp16, var_19813_cast_fp16, var_19815_cast_fp16, var_19817_cast_fp16))[name = tensor("op_19947_cast_fp16")]; + tensor var_19949_interleave_0 = const()[name = tensor("op_19949_interleave_0"), val = tensor(false)]; + tensor var_19949_cast_fp16 = concat(axis = var_18498, interleave = var_19949_interleave_0, values = (var_19819_cast_fp16, var_19821_cast_fp16, var_19823_cast_fp16, var_19825_cast_fp16))[name = tensor("op_19949_cast_fp16")]; + tensor var_19951_interleave_0 = const()[name = tensor("op_19951_interleave_0"), val = tensor(false)]; + tensor var_19951_cast_fp16 = concat(axis = var_18498, interleave = var_19951_interleave_0, values = (var_19827_cast_fp16, var_19829_cast_fp16, var_19831_cast_fp16, var_19833_cast_fp16))[name = tensor("op_19951_cast_fp16")]; + tensor var_19953_interleave_0 = const()[name = tensor("op_19953_interleave_0"), val = tensor(false)]; + tensor var_19953_cast_fp16 = concat(axis = var_18498, interleave = var_19953_interleave_0, values = (var_19835_cast_fp16, var_19837_cast_fp16, var_19839_cast_fp16, var_19841_cast_fp16))[name = tensor("op_19953_cast_fp16")]; + tensor var_19955_interleave_0 = const()[name = tensor("op_19955_interleave_0"), val = tensor(false)]; + tensor var_19955_cast_fp16 = concat(axis = var_18498, interleave = var_19955_interleave_0, values = (var_19843_cast_fp16, var_19845_cast_fp16, var_19847_cast_fp16, var_19849_cast_fp16))[name = tensor("op_19955_cast_fp16")]; + tensor var_19957_interleave_0 = const()[name = tensor("op_19957_interleave_0"), val = tensor(false)]; + tensor var_19957_cast_fp16 = concat(axis = var_18498, interleave = var_19957_interleave_0, values = (var_19851_cast_fp16, var_19853_cast_fp16, var_19855_cast_fp16, var_19857_cast_fp16))[name = tensor("op_19957_cast_fp16")]; + tensor var_19959_interleave_0 = const()[name = tensor("op_19959_interleave_0"), val = tensor(false)]; + tensor var_19959_cast_fp16 = concat(axis = var_18498, interleave = var_19959_interleave_0, values = (var_19859_cast_fp16, var_19861_cast_fp16, var_19863_cast_fp16, var_19865_cast_fp16))[name = tensor("op_19959_cast_fp16")]; + tensor var_19961_interleave_0 = const()[name = tensor("op_19961_interleave_0"), val = tensor(false)]; + tensor var_19961_cast_fp16 = concat(axis = var_18498, interleave = var_19961_interleave_0, values = (var_19867_cast_fp16, var_19869_cast_fp16, var_19871_cast_fp16, var_19873_cast_fp16))[name = tensor("op_19961_cast_fp16")]; + tensor var_19963_interleave_0 = const()[name = tensor("op_19963_interleave_0"), val = tensor(false)]; + tensor var_19963_cast_fp16 = concat(axis = var_18498, interleave = var_19963_interleave_0, values = (var_19875_cast_fp16, var_19877_cast_fp16, var_19879_cast_fp16, var_19881_cast_fp16))[name = tensor("op_19963_cast_fp16")]; + tensor var_19965_interleave_0 = const()[name = tensor("op_19965_interleave_0"), val = tensor(false)]; + tensor var_19965_cast_fp16 = concat(axis = var_18498, interleave = var_19965_interleave_0, values = (var_19883_cast_fp16, var_19885_cast_fp16, var_19887_cast_fp16, var_19889_cast_fp16))[name = tensor("op_19965_cast_fp16")]; + tensor var_19967_interleave_0 = const()[name = tensor("op_19967_interleave_0"), val = tensor(false)]; + tensor var_19967_cast_fp16 = concat(axis = var_18498, interleave = var_19967_interleave_0, values = (var_19891_cast_fp16, var_19893_cast_fp16, var_19895_cast_fp16, var_19897_cast_fp16))[name = tensor("op_19967_cast_fp16")]; + tensor var_19969_interleave_0 = const()[name = tensor("op_19969_interleave_0"), val = tensor(false)]; + tensor var_19969_cast_fp16 = concat(axis = var_18498, interleave = var_19969_interleave_0, values = (var_19899_cast_fp16, var_19901_cast_fp16, var_19903_cast_fp16, var_19905_cast_fp16))[name = tensor("op_19969_cast_fp16")]; + tensor var_19971_interleave_0 = const()[name = tensor("op_19971_interleave_0"), val = tensor(false)]; + tensor var_19971_cast_fp16 = concat(axis = var_18498, interleave = var_19971_interleave_0, values = (var_19907_cast_fp16, var_19909_cast_fp16, var_19911_cast_fp16, var_19913_cast_fp16))[name = tensor("op_19971_cast_fp16")]; + tensor var_19973_interleave_0 = const()[name = tensor("op_19973_interleave_0"), val = tensor(false)]; + tensor var_19973_cast_fp16 = concat(axis = var_18498, interleave = var_19973_interleave_0, values = (var_19915_cast_fp16, var_19917_cast_fp16, var_19919_cast_fp16, var_19921_cast_fp16))[name = tensor("op_19973_cast_fp16")]; + tensor var_19975_interleave_0 = const()[name = tensor("op_19975_interleave_0"), val = tensor(false)]; + tensor var_19975_cast_fp16 = concat(axis = var_18498, interleave = var_19975_interleave_0, values = (var_19923_cast_fp16, var_19925_cast_fp16, var_19927_cast_fp16, var_19929_cast_fp16))[name = tensor("op_19975_cast_fp16")]; + tensor var_19977_interleave_0 = const()[name = tensor("op_19977_interleave_0"), val = tensor(false)]; + tensor var_19977_cast_fp16 = concat(axis = var_18498, interleave = var_19977_interleave_0, values = (var_19931_cast_fp16, var_19933_cast_fp16, var_19935_cast_fp16, var_19937_cast_fp16))[name = tensor("op_19977_cast_fp16")]; + tensor input_97_interleave_0 = const()[name = tensor("input_97_interleave_0"), val = tensor(false)]; + tensor input_97_cast_fp16 = concat(axis = var_18523, interleave = input_97_interleave_0, values = (var_19939_cast_fp16, var_19941_cast_fp16, var_19943_cast_fp16, var_19945_cast_fp16, var_19947_cast_fp16, var_19949_cast_fp16, var_19951_cast_fp16, var_19953_cast_fp16, var_19955_cast_fp16, var_19957_cast_fp16, var_19959_cast_fp16, var_19961_cast_fp16, var_19963_cast_fp16, var_19965_cast_fp16, var_19967_cast_fp16, var_19969_cast_fp16, var_19971_cast_fp16, var_19973_cast_fp16, var_19975_cast_fp16, var_19977_cast_fp16))[name = tensor("input_97_cast_fp16")]; + tensor var_19982 = const()[name = tensor("op_19982"), val = tensor([1, 1])]; + tensor var_19984 = const()[name = tensor("op_19984"), val = tensor([1, 1])]; + tensor obj_51_pad_type_0 = const()[name = tensor("obj_51_pad_type_0"), val = tensor("custom")]; + tensor obj_51_pad_0 = const()[name = tensor("obj_51_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_12_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_12_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(496376000)))]; + tensor layers_12_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_12_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(499652864)))]; + tensor obj_51_cast_fp16 = conv(bias = layers_12_self_attn_o_proj_bias_to_fp16, dilations = var_19984, groups = var_18523, pad = obj_51_pad_0, pad_type = obj_51_pad_type_0, strides = var_19982, weight = layers_12_self_attn_o_proj_weight_to_fp16, x = input_97_cast_fp16)[name = tensor("obj_51_cast_fp16")]; + tensor inputs_51_cast_fp16 = add(x = inputs_49_cast_fp16, y = obj_51_cast_fp16)[name = tensor("inputs_51_cast_fp16")]; + tensor var_19990 = const()[name = tensor("op_19990"), val = tensor([1])]; + tensor channels_mean_51_cast_fp16 = reduce_mean(axes = var_19990, keep_dims = var_18524, x = inputs_51_cast_fp16)[name = tensor("channels_mean_51_cast_fp16")]; + tensor zero_mean_51_cast_fp16 = sub(x = inputs_51_cast_fp16, y = channels_mean_51_cast_fp16)[name = tensor("zero_mean_51_cast_fp16")]; + tensor zero_mean_sq_51_cast_fp16 = mul(x = zero_mean_51_cast_fp16, y = zero_mean_51_cast_fp16)[name = tensor("zero_mean_sq_51_cast_fp16")]; + tensor var_19994 = const()[name = tensor("op_19994"), val = tensor([1])]; + tensor var_19995_cast_fp16 = reduce_mean(axes = var_19994, keep_dims = var_18524, x = zero_mean_sq_51_cast_fp16)[name = tensor("op_19995_cast_fp16")]; + tensor var_19996_to_fp16 = const()[name = tensor("op_19996_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_19997_cast_fp16 = add(x = var_19995_cast_fp16, y = var_19996_to_fp16)[name = tensor("op_19997_cast_fp16")]; + tensor denom_51_epsilon_0_to_fp16 = const()[name = tensor("denom_51_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_51_cast_fp16 = rsqrt(epsilon = denom_51_epsilon_0_to_fp16, x = var_19997_cast_fp16)[name = tensor("denom_51_cast_fp16")]; + tensor out_51_cast_fp16 = mul(x = zero_mean_51_cast_fp16, y = denom_51_cast_fp16)[name = tensor("out_51_cast_fp16")]; + tensor input_99_gamma_0_to_fp16 = const()[name = tensor("input_99_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(499655488)))]; + tensor input_99_beta_0_to_fp16 = const()[name = tensor("input_99_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(499658112)))]; + tensor input_99_epsilon_0_to_fp16 = const()[name = tensor("input_99_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_99_cast_fp16 = batch_norm(beta = input_99_beta_0_to_fp16, epsilon = input_99_epsilon_0_to_fp16, gamma = input_99_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_51_cast_fp16)[name = tensor("input_99_cast_fp16")]; + tensor var_20008 = const()[name = tensor("op_20008"), val = tensor([1, 1])]; + tensor var_20010 = const()[name = tensor("op_20010"), val = tensor([1, 1])]; + tensor input_101_pad_type_0 = const()[name = tensor("input_101_pad_type_0"), val = tensor("custom")]; + tensor input_101_pad_0 = const()[name = tensor("input_101_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_12_fc1_weight_to_fp16 = const()[name = tensor("layers_12_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(499660736)))]; + tensor layers_12_fc1_bias_to_fp16 = const()[name = tensor("layers_12_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(512768000)))]; + tensor input_101_cast_fp16 = conv(bias = layers_12_fc1_bias_to_fp16, dilations = var_20010, groups = var_18523, pad = input_101_pad_0, pad_type = input_101_pad_type_0, strides = var_20008, weight = layers_12_fc1_weight_to_fp16, x = input_99_cast_fp16)[name = tensor("input_101_cast_fp16")]; + tensor input_103_mode_0 = const()[name = tensor("input_103_mode_0"), val = tensor("EXACT")]; + tensor input_103_cast_fp16 = gelu(mode = input_103_mode_0, x = input_101_cast_fp16)[name = tensor("input_103_cast_fp16")]; + tensor var_20016 = const()[name = tensor("op_20016"), val = tensor([1, 1])]; + tensor var_20018 = const()[name = tensor("op_20018"), val = tensor([1, 1])]; + tensor hidden_states_29_pad_type_0 = const()[name = tensor("hidden_states_29_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_29_pad_0 = const()[name = tensor("hidden_states_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_12_fc2_weight_to_fp16 = const()[name = tensor("layers_12_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(512778304)))]; + tensor layers_12_fc2_bias_to_fp16 = const()[name = tensor("layers_12_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(525885568)))]; + tensor hidden_states_29_cast_fp16 = conv(bias = layers_12_fc2_bias_to_fp16, dilations = var_20018, groups = var_18523, pad = hidden_states_29_pad_0, pad_type = hidden_states_29_pad_type_0, strides = var_20016, weight = layers_12_fc2_weight_to_fp16, x = input_103_cast_fp16)[name = tensor("hidden_states_29_cast_fp16")]; + tensor inputs_53_cast_fp16 = add(x = inputs_51_cast_fp16, y = hidden_states_29_cast_fp16)[name = tensor("inputs_53_cast_fp16")]; + tensor var_20025 = const()[name = tensor("op_20025"), val = tensor(3)]; + tensor var_20050 = const()[name = tensor("op_20050"), val = tensor(1)]; + tensor var_20051 = const()[name = tensor("op_20051"), val = tensor(true)]; + tensor var_20061 = const()[name = tensor("op_20061"), val = tensor([1])]; + tensor channels_mean_53_cast_fp16 = reduce_mean(axes = var_20061, keep_dims = var_20051, x = inputs_53_cast_fp16)[name = tensor("channels_mean_53_cast_fp16")]; + tensor zero_mean_53_cast_fp16 = sub(x = inputs_53_cast_fp16, y = channels_mean_53_cast_fp16)[name = tensor("zero_mean_53_cast_fp16")]; + tensor zero_mean_sq_53_cast_fp16 = mul(x = zero_mean_53_cast_fp16, y = zero_mean_53_cast_fp16)[name = tensor("zero_mean_sq_53_cast_fp16")]; + tensor var_20065 = const()[name = tensor("op_20065"), val = tensor([1])]; + tensor var_20066_cast_fp16 = reduce_mean(axes = var_20065, keep_dims = var_20051, x = zero_mean_sq_53_cast_fp16)[name = tensor("op_20066_cast_fp16")]; + tensor var_20067_to_fp16 = const()[name = tensor("op_20067_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_20068_cast_fp16 = add(x = var_20066_cast_fp16, y = var_20067_to_fp16)[name = tensor("op_20068_cast_fp16")]; + tensor denom_53_epsilon_0_to_fp16 = const()[name = tensor("denom_53_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_53_cast_fp16 = rsqrt(epsilon = denom_53_epsilon_0_to_fp16, x = var_20068_cast_fp16)[name = tensor("denom_53_cast_fp16")]; + tensor out_53_cast_fp16 = mul(x = zero_mean_53_cast_fp16, y = denom_53_cast_fp16)[name = tensor("out_53_cast_fp16")]; + tensor obj_53_gamma_0_to_fp16 = const()[name = tensor("obj_53_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(525888192)))]; + tensor obj_53_beta_0_to_fp16 = const()[name = tensor("obj_53_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(525890816)))]; + tensor obj_53_epsilon_0_to_fp16 = const()[name = tensor("obj_53_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_53_cast_fp16 = batch_norm(beta = obj_53_beta_0_to_fp16, epsilon = obj_53_epsilon_0_to_fp16, gamma = obj_53_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_53_cast_fp16)[name = tensor("obj_53_cast_fp16")]; + tensor var_20083 = const()[name = tensor("op_20083"), val = tensor([1, 1])]; + tensor var_20085 = const()[name = tensor("op_20085"), val = tensor([1, 1])]; + tensor query_27_pad_type_0 = const()[name = tensor("query_27_pad_type_0"), val = tensor("custom")]; + tensor query_27_pad_0 = const()[name = tensor("query_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_13_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_13_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(525893440)))]; + tensor layers_13_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_13_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(529170304)))]; + tensor query_27_cast_fp16 = conv(bias = layers_13_self_attn_q_proj_bias_to_fp16, dilations = var_20085, groups = var_20050, pad = query_27_pad_0, pad_type = query_27_pad_type_0, strides = var_20083, weight = layers_13_self_attn_q_proj_weight_to_fp16, x = obj_53_cast_fp16)[name = tensor("query_27_cast_fp16")]; + tensor var_20089 = const()[name = tensor("op_20089"), val = tensor([1, 1])]; + tensor var_20091 = const()[name = tensor("op_20091"), val = tensor([1, 1])]; + tensor key_27_pad_type_0 = const()[name = tensor("key_27_pad_type_0"), val = tensor("custom")]; + tensor key_27_pad_0 = const()[name = tensor("key_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_13_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_13_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(529172928)))]; + tensor key_27_cast_fp16 = conv(dilations = var_20091, groups = var_20050, pad = key_27_pad_0, pad_type = key_27_pad_type_0, strides = var_20089, weight = layers_13_self_attn_k_proj_weight_to_fp16, x = obj_53_cast_fp16)[name = tensor("key_27_cast_fp16")]; + tensor var_20096 = const()[name = tensor("op_20096"), val = tensor([1, 1])]; + tensor var_20098 = const()[name = tensor("op_20098"), val = tensor([1, 1])]; + tensor value_27_pad_type_0 = const()[name = tensor("value_27_pad_type_0"), val = tensor("custom")]; + tensor value_27_pad_0 = const()[name = tensor("value_27_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_13_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_13_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(532449792)))]; + tensor layers_13_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_13_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(535726656)))]; + tensor value_27_cast_fp16 = conv(bias = layers_13_self_attn_v_proj_bias_to_fp16, dilations = var_20098, groups = var_20050, pad = value_27_pad_0, pad_type = value_27_pad_type_0, strides = var_20096, weight = layers_13_self_attn_v_proj_weight_to_fp16, x = obj_53_cast_fp16)[name = tensor("value_27_cast_fp16")]; + tensor var_20105_begin_0 = const()[name = tensor("op_20105_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_20105_end_0 = const()[name = tensor("op_20105_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_20105_end_mask_0 = const()[name = tensor("op_20105_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20105_cast_fp16 = slice_by_index(begin = var_20105_begin_0, end = var_20105_end_0, end_mask = var_20105_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20105_cast_fp16")]; + tensor var_20109_begin_0 = const()[name = tensor("op_20109_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_20109_end_0 = const()[name = tensor("op_20109_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_20109_end_mask_0 = const()[name = tensor("op_20109_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20109_cast_fp16 = slice_by_index(begin = var_20109_begin_0, end = var_20109_end_0, end_mask = var_20109_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20109_cast_fp16")]; + tensor var_20113_begin_0 = const()[name = tensor("op_20113_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_20113_end_0 = const()[name = tensor("op_20113_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_20113_end_mask_0 = const()[name = tensor("op_20113_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20113_cast_fp16 = slice_by_index(begin = var_20113_begin_0, end = var_20113_end_0, end_mask = var_20113_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20113_cast_fp16")]; + tensor var_20117_begin_0 = const()[name = tensor("op_20117_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_20117_end_0 = const()[name = tensor("op_20117_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_20117_end_mask_0 = const()[name = tensor("op_20117_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20117_cast_fp16 = slice_by_index(begin = var_20117_begin_0, end = var_20117_end_0, end_mask = var_20117_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20117_cast_fp16")]; + tensor var_20121_begin_0 = const()[name = tensor("op_20121_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_20121_end_0 = const()[name = tensor("op_20121_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_20121_end_mask_0 = const()[name = tensor("op_20121_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20121_cast_fp16 = slice_by_index(begin = var_20121_begin_0, end = var_20121_end_0, end_mask = var_20121_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20121_cast_fp16")]; + tensor var_20125_begin_0 = const()[name = tensor("op_20125_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_20125_end_0 = const()[name = tensor("op_20125_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_20125_end_mask_0 = const()[name = tensor("op_20125_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20125_cast_fp16 = slice_by_index(begin = var_20125_begin_0, end = var_20125_end_0, end_mask = var_20125_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20125_cast_fp16")]; + tensor var_20129_begin_0 = const()[name = tensor("op_20129_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_20129_end_0 = const()[name = tensor("op_20129_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_20129_end_mask_0 = const()[name = tensor("op_20129_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20129_cast_fp16 = slice_by_index(begin = var_20129_begin_0, end = var_20129_end_0, end_mask = var_20129_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20129_cast_fp16")]; + tensor var_20133_begin_0 = const()[name = tensor("op_20133_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_20133_end_0 = const()[name = tensor("op_20133_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_20133_end_mask_0 = const()[name = tensor("op_20133_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20133_cast_fp16 = slice_by_index(begin = var_20133_begin_0, end = var_20133_end_0, end_mask = var_20133_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20133_cast_fp16")]; + tensor var_20137_begin_0 = const()[name = tensor("op_20137_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_20137_end_0 = const()[name = tensor("op_20137_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_20137_end_mask_0 = const()[name = tensor("op_20137_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20137_cast_fp16 = slice_by_index(begin = var_20137_begin_0, end = var_20137_end_0, end_mask = var_20137_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20137_cast_fp16")]; + tensor var_20141_begin_0 = const()[name = tensor("op_20141_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_20141_end_0 = const()[name = tensor("op_20141_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_20141_end_mask_0 = const()[name = tensor("op_20141_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20141_cast_fp16 = slice_by_index(begin = var_20141_begin_0, end = var_20141_end_0, end_mask = var_20141_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20141_cast_fp16")]; + tensor var_20145_begin_0 = const()[name = tensor("op_20145_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_20145_end_0 = const()[name = tensor("op_20145_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_20145_end_mask_0 = const()[name = tensor("op_20145_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20145_cast_fp16 = slice_by_index(begin = var_20145_begin_0, end = var_20145_end_0, end_mask = var_20145_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20145_cast_fp16")]; + tensor var_20149_begin_0 = const()[name = tensor("op_20149_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_20149_end_0 = const()[name = tensor("op_20149_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_20149_end_mask_0 = const()[name = tensor("op_20149_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20149_cast_fp16 = slice_by_index(begin = var_20149_begin_0, end = var_20149_end_0, end_mask = var_20149_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20149_cast_fp16")]; + tensor var_20153_begin_0 = const()[name = tensor("op_20153_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_20153_end_0 = const()[name = tensor("op_20153_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_20153_end_mask_0 = const()[name = tensor("op_20153_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20153_cast_fp16 = slice_by_index(begin = var_20153_begin_0, end = var_20153_end_0, end_mask = var_20153_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20153_cast_fp16")]; + tensor var_20157_begin_0 = const()[name = tensor("op_20157_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_20157_end_0 = const()[name = tensor("op_20157_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_20157_end_mask_0 = const()[name = tensor("op_20157_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20157_cast_fp16 = slice_by_index(begin = var_20157_begin_0, end = var_20157_end_0, end_mask = var_20157_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20157_cast_fp16")]; + tensor var_20161_begin_0 = const()[name = tensor("op_20161_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_20161_end_0 = const()[name = tensor("op_20161_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_20161_end_mask_0 = const()[name = tensor("op_20161_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20161_cast_fp16 = slice_by_index(begin = var_20161_begin_0, end = var_20161_end_0, end_mask = var_20161_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20161_cast_fp16")]; + tensor var_20165_begin_0 = const()[name = tensor("op_20165_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_20165_end_0 = const()[name = tensor("op_20165_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_20165_end_mask_0 = const()[name = tensor("op_20165_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20165_cast_fp16 = slice_by_index(begin = var_20165_begin_0, end = var_20165_end_0, end_mask = var_20165_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20165_cast_fp16")]; + tensor var_20169_begin_0 = const()[name = tensor("op_20169_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_20169_end_0 = const()[name = tensor("op_20169_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_20169_end_mask_0 = const()[name = tensor("op_20169_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20169_cast_fp16 = slice_by_index(begin = var_20169_begin_0, end = var_20169_end_0, end_mask = var_20169_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20169_cast_fp16")]; + tensor var_20173_begin_0 = const()[name = tensor("op_20173_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_20173_end_0 = const()[name = tensor("op_20173_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_20173_end_mask_0 = const()[name = tensor("op_20173_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20173_cast_fp16 = slice_by_index(begin = var_20173_begin_0, end = var_20173_end_0, end_mask = var_20173_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20173_cast_fp16")]; + tensor var_20177_begin_0 = const()[name = tensor("op_20177_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_20177_end_0 = const()[name = tensor("op_20177_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_20177_end_mask_0 = const()[name = tensor("op_20177_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20177_cast_fp16 = slice_by_index(begin = var_20177_begin_0, end = var_20177_end_0, end_mask = var_20177_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20177_cast_fp16")]; + tensor var_20181_begin_0 = const()[name = tensor("op_20181_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_20181_end_0 = const()[name = tensor("op_20181_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_20181_end_mask_0 = const()[name = tensor("op_20181_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20181_cast_fp16 = slice_by_index(begin = var_20181_begin_0, end = var_20181_end_0, end_mask = var_20181_end_mask_0, x = query_27_cast_fp16)[name = tensor("op_20181_cast_fp16")]; + tensor var_20190_begin_0 = const()[name = tensor("op_20190_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_20190_end_0 = const()[name = tensor("op_20190_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_20190_end_mask_0 = const()[name = tensor("op_20190_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20190_cast_fp16 = slice_by_index(begin = var_20190_begin_0, end = var_20190_end_0, end_mask = var_20190_end_mask_0, x = var_20105_cast_fp16)[name = tensor("op_20190_cast_fp16")]; + tensor var_20197_begin_0 = const()[name = tensor("op_20197_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_20197_end_0 = const()[name = tensor("op_20197_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_20197_end_mask_0 = const()[name = tensor("op_20197_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20197_cast_fp16 = slice_by_index(begin = var_20197_begin_0, end = var_20197_end_0, end_mask = var_20197_end_mask_0, x = var_20105_cast_fp16)[name = tensor("op_20197_cast_fp16")]; + tensor var_20204_begin_0 = const()[name = tensor("op_20204_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_20204_end_0 = const()[name = tensor("op_20204_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_20204_end_mask_0 = const()[name = tensor("op_20204_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20204_cast_fp16 = slice_by_index(begin = var_20204_begin_0, end = var_20204_end_0, end_mask = var_20204_end_mask_0, x = var_20105_cast_fp16)[name = tensor("op_20204_cast_fp16")]; + tensor var_20211_begin_0 = const()[name = tensor("op_20211_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_20211_end_0 = const()[name = tensor("op_20211_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_20211_end_mask_0 = const()[name = tensor("op_20211_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20211_cast_fp16 = slice_by_index(begin = var_20211_begin_0, end = var_20211_end_0, end_mask = var_20211_end_mask_0, x = var_20105_cast_fp16)[name = tensor("op_20211_cast_fp16")]; + tensor var_20218_begin_0 = const()[name = tensor("op_20218_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_20218_end_0 = const()[name = tensor("op_20218_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_20218_end_mask_0 = const()[name = tensor("op_20218_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20218_cast_fp16 = slice_by_index(begin = var_20218_begin_0, end = var_20218_end_0, end_mask = var_20218_end_mask_0, x = var_20109_cast_fp16)[name = tensor("op_20218_cast_fp16")]; + tensor var_20225_begin_0 = const()[name = tensor("op_20225_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_20225_end_0 = const()[name = tensor("op_20225_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_20225_end_mask_0 = const()[name = tensor("op_20225_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20225_cast_fp16 = slice_by_index(begin = var_20225_begin_0, end = var_20225_end_0, end_mask = var_20225_end_mask_0, x = var_20109_cast_fp16)[name = tensor("op_20225_cast_fp16")]; + tensor var_20232_begin_0 = const()[name = tensor("op_20232_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_20232_end_0 = const()[name = tensor("op_20232_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_20232_end_mask_0 = const()[name = tensor("op_20232_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20232_cast_fp16 = slice_by_index(begin = var_20232_begin_0, end = var_20232_end_0, end_mask = var_20232_end_mask_0, x = var_20109_cast_fp16)[name = tensor("op_20232_cast_fp16")]; + tensor var_20239_begin_0 = const()[name = tensor("op_20239_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_20239_end_0 = const()[name = tensor("op_20239_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_20239_end_mask_0 = const()[name = tensor("op_20239_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20239_cast_fp16 = slice_by_index(begin = var_20239_begin_0, end = var_20239_end_0, end_mask = var_20239_end_mask_0, x = var_20109_cast_fp16)[name = tensor("op_20239_cast_fp16")]; + tensor var_20246_begin_0 = const()[name = tensor("op_20246_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_20246_end_0 = const()[name = tensor("op_20246_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_20246_end_mask_0 = const()[name = tensor("op_20246_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20246_cast_fp16 = slice_by_index(begin = var_20246_begin_0, end = var_20246_end_0, end_mask = var_20246_end_mask_0, x = var_20113_cast_fp16)[name = tensor("op_20246_cast_fp16")]; + tensor var_20253_begin_0 = const()[name = tensor("op_20253_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_20253_end_0 = const()[name = tensor("op_20253_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_20253_end_mask_0 = const()[name = tensor("op_20253_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20253_cast_fp16 = slice_by_index(begin = var_20253_begin_0, end = var_20253_end_0, end_mask = var_20253_end_mask_0, x = var_20113_cast_fp16)[name = tensor("op_20253_cast_fp16")]; + tensor var_20260_begin_0 = const()[name = tensor("op_20260_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_20260_end_0 = const()[name = tensor("op_20260_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_20260_end_mask_0 = const()[name = tensor("op_20260_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20260_cast_fp16 = slice_by_index(begin = var_20260_begin_0, end = var_20260_end_0, end_mask = var_20260_end_mask_0, x = var_20113_cast_fp16)[name = tensor("op_20260_cast_fp16")]; + tensor var_20267_begin_0 = const()[name = tensor("op_20267_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_20267_end_0 = const()[name = tensor("op_20267_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_20267_end_mask_0 = const()[name = tensor("op_20267_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20267_cast_fp16 = slice_by_index(begin = var_20267_begin_0, end = var_20267_end_0, end_mask = var_20267_end_mask_0, x = var_20113_cast_fp16)[name = tensor("op_20267_cast_fp16")]; + tensor var_20274_begin_0 = const()[name = tensor("op_20274_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_20274_end_0 = const()[name = tensor("op_20274_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_20274_end_mask_0 = const()[name = tensor("op_20274_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20274_cast_fp16 = slice_by_index(begin = var_20274_begin_0, end = var_20274_end_0, end_mask = var_20274_end_mask_0, x = var_20117_cast_fp16)[name = tensor("op_20274_cast_fp16")]; + tensor var_20281_begin_0 = const()[name = tensor("op_20281_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_20281_end_0 = const()[name = tensor("op_20281_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_20281_end_mask_0 = const()[name = tensor("op_20281_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20281_cast_fp16 = slice_by_index(begin = var_20281_begin_0, end = var_20281_end_0, end_mask = var_20281_end_mask_0, x = var_20117_cast_fp16)[name = tensor("op_20281_cast_fp16")]; + tensor var_20288_begin_0 = const()[name = tensor("op_20288_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_20288_end_0 = const()[name = tensor("op_20288_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_20288_end_mask_0 = const()[name = tensor("op_20288_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20288_cast_fp16 = slice_by_index(begin = var_20288_begin_0, end = var_20288_end_0, end_mask = var_20288_end_mask_0, x = var_20117_cast_fp16)[name = tensor("op_20288_cast_fp16")]; + tensor var_20295_begin_0 = const()[name = tensor("op_20295_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_20295_end_0 = const()[name = tensor("op_20295_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_20295_end_mask_0 = const()[name = tensor("op_20295_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20295_cast_fp16 = slice_by_index(begin = var_20295_begin_0, end = var_20295_end_0, end_mask = var_20295_end_mask_0, x = var_20117_cast_fp16)[name = tensor("op_20295_cast_fp16")]; + tensor var_20302_begin_0 = const()[name = tensor("op_20302_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_20302_end_0 = const()[name = tensor("op_20302_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_20302_end_mask_0 = const()[name = tensor("op_20302_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20302_cast_fp16 = slice_by_index(begin = var_20302_begin_0, end = var_20302_end_0, end_mask = var_20302_end_mask_0, x = var_20121_cast_fp16)[name = tensor("op_20302_cast_fp16")]; + tensor var_20309_begin_0 = const()[name = tensor("op_20309_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_20309_end_0 = const()[name = tensor("op_20309_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_20309_end_mask_0 = const()[name = tensor("op_20309_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20309_cast_fp16 = slice_by_index(begin = var_20309_begin_0, end = var_20309_end_0, end_mask = var_20309_end_mask_0, x = var_20121_cast_fp16)[name = tensor("op_20309_cast_fp16")]; + tensor var_20316_begin_0 = const()[name = tensor("op_20316_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_20316_end_0 = const()[name = tensor("op_20316_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_20316_end_mask_0 = const()[name = tensor("op_20316_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20316_cast_fp16 = slice_by_index(begin = var_20316_begin_0, end = var_20316_end_0, end_mask = var_20316_end_mask_0, x = var_20121_cast_fp16)[name = tensor("op_20316_cast_fp16")]; + tensor var_20323_begin_0 = const()[name = tensor("op_20323_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_20323_end_0 = const()[name = tensor("op_20323_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_20323_end_mask_0 = const()[name = tensor("op_20323_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20323_cast_fp16 = slice_by_index(begin = var_20323_begin_0, end = var_20323_end_0, end_mask = var_20323_end_mask_0, x = var_20121_cast_fp16)[name = tensor("op_20323_cast_fp16")]; + tensor var_20330_begin_0 = const()[name = tensor("op_20330_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_20330_end_0 = const()[name = tensor("op_20330_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_20330_end_mask_0 = const()[name = tensor("op_20330_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20330_cast_fp16 = slice_by_index(begin = var_20330_begin_0, end = var_20330_end_0, end_mask = var_20330_end_mask_0, x = var_20125_cast_fp16)[name = tensor("op_20330_cast_fp16")]; + tensor var_20337_begin_0 = const()[name = tensor("op_20337_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_20337_end_0 = const()[name = tensor("op_20337_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_20337_end_mask_0 = const()[name = tensor("op_20337_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20337_cast_fp16 = slice_by_index(begin = var_20337_begin_0, end = var_20337_end_0, end_mask = var_20337_end_mask_0, x = var_20125_cast_fp16)[name = tensor("op_20337_cast_fp16")]; + tensor var_20344_begin_0 = const()[name = tensor("op_20344_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_20344_end_0 = const()[name = tensor("op_20344_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_20344_end_mask_0 = const()[name = tensor("op_20344_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20344_cast_fp16 = slice_by_index(begin = var_20344_begin_0, end = var_20344_end_0, end_mask = var_20344_end_mask_0, x = var_20125_cast_fp16)[name = tensor("op_20344_cast_fp16")]; + tensor var_20351_begin_0 = const()[name = tensor("op_20351_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_20351_end_0 = const()[name = tensor("op_20351_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_20351_end_mask_0 = const()[name = tensor("op_20351_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20351_cast_fp16 = slice_by_index(begin = var_20351_begin_0, end = var_20351_end_0, end_mask = var_20351_end_mask_0, x = var_20125_cast_fp16)[name = tensor("op_20351_cast_fp16")]; + tensor var_20358_begin_0 = const()[name = tensor("op_20358_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_20358_end_0 = const()[name = tensor("op_20358_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_20358_end_mask_0 = const()[name = tensor("op_20358_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20358_cast_fp16 = slice_by_index(begin = var_20358_begin_0, end = var_20358_end_0, end_mask = var_20358_end_mask_0, x = var_20129_cast_fp16)[name = tensor("op_20358_cast_fp16")]; + tensor var_20365_begin_0 = const()[name = tensor("op_20365_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_20365_end_0 = const()[name = tensor("op_20365_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_20365_end_mask_0 = const()[name = tensor("op_20365_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20365_cast_fp16 = slice_by_index(begin = var_20365_begin_0, end = var_20365_end_0, end_mask = var_20365_end_mask_0, x = var_20129_cast_fp16)[name = tensor("op_20365_cast_fp16")]; + tensor var_20372_begin_0 = const()[name = tensor("op_20372_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_20372_end_0 = const()[name = tensor("op_20372_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_20372_end_mask_0 = const()[name = tensor("op_20372_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20372_cast_fp16 = slice_by_index(begin = var_20372_begin_0, end = var_20372_end_0, end_mask = var_20372_end_mask_0, x = var_20129_cast_fp16)[name = tensor("op_20372_cast_fp16")]; + tensor var_20379_begin_0 = const()[name = tensor("op_20379_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_20379_end_0 = const()[name = tensor("op_20379_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_20379_end_mask_0 = const()[name = tensor("op_20379_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20379_cast_fp16 = slice_by_index(begin = var_20379_begin_0, end = var_20379_end_0, end_mask = var_20379_end_mask_0, x = var_20129_cast_fp16)[name = tensor("op_20379_cast_fp16")]; + tensor var_20386_begin_0 = const()[name = tensor("op_20386_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_20386_end_0 = const()[name = tensor("op_20386_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_20386_end_mask_0 = const()[name = tensor("op_20386_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20386_cast_fp16 = slice_by_index(begin = var_20386_begin_0, end = var_20386_end_0, end_mask = var_20386_end_mask_0, x = var_20133_cast_fp16)[name = tensor("op_20386_cast_fp16")]; + tensor var_20393_begin_0 = const()[name = tensor("op_20393_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_20393_end_0 = const()[name = tensor("op_20393_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_20393_end_mask_0 = const()[name = tensor("op_20393_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20393_cast_fp16 = slice_by_index(begin = var_20393_begin_0, end = var_20393_end_0, end_mask = var_20393_end_mask_0, x = var_20133_cast_fp16)[name = tensor("op_20393_cast_fp16")]; + tensor var_20400_begin_0 = const()[name = tensor("op_20400_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_20400_end_0 = const()[name = tensor("op_20400_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_20400_end_mask_0 = const()[name = tensor("op_20400_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20400_cast_fp16 = slice_by_index(begin = var_20400_begin_0, end = var_20400_end_0, end_mask = var_20400_end_mask_0, x = var_20133_cast_fp16)[name = tensor("op_20400_cast_fp16")]; + tensor var_20407_begin_0 = const()[name = tensor("op_20407_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_20407_end_0 = const()[name = tensor("op_20407_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_20407_end_mask_0 = const()[name = tensor("op_20407_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20407_cast_fp16 = slice_by_index(begin = var_20407_begin_0, end = var_20407_end_0, end_mask = var_20407_end_mask_0, x = var_20133_cast_fp16)[name = tensor("op_20407_cast_fp16")]; + tensor var_20414_begin_0 = const()[name = tensor("op_20414_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_20414_end_0 = const()[name = tensor("op_20414_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_20414_end_mask_0 = const()[name = tensor("op_20414_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20414_cast_fp16 = slice_by_index(begin = var_20414_begin_0, end = var_20414_end_0, end_mask = var_20414_end_mask_0, x = var_20137_cast_fp16)[name = tensor("op_20414_cast_fp16")]; + tensor var_20421_begin_0 = const()[name = tensor("op_20421_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_20421_end_0 = const()[name = tensor("op_20421_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_20421_end_mask_0 = const()[name = tensor("op_20421_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20421_cast_fp16 = slice_by_index(begin = var_20421_begin_0, end = var_20421_end_0, end_mask = var_20421_end_mask_0, x = var_20137_cast_fp16)[name = tensor("op_20421_cast_fp16")]; + tensor var_20428_begin_0 = const()[name = tensor("op_20428_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_20428_end_0 = const()[name = tensor("op_20428_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_20428_end_mask_0 = const()[name = tensor("op_20428_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20428_cast_fp16 = slice_by_index(begin = var_20428_begin_0, end = var_20428_end_0, end_mask = var_20428_end_mask_0, x = var_20137_cast_fp16)[name = tensor("op_20428_cast_fp16")]; + tensor var_20435_begin_0 = const()[name = tensor("op_20435_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_20435_end_0 = const()[name = tensor("op_20435_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_20435_end_mask_0 = const()[name = tensor("op_20435_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20435_cast_fp16 = slice_by_index(begin = var_20435_begin_0, end = var_20435_end_0, end_mask = var_20435_end_mask_0, x = var_20137_cast_fp16)[name = tensor("op_20435_cast_fp16")]; + tensor var_20442_begin_0 = const()[name = tensor("op_20442_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_20442_end_0 = const()[name = tensor("op_20442_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_20442_end_mask_0 = const()[name = tensor("op_20442_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20442_cast_fp16 = slice_by_index(begin = var_20442_begin_0, end = var_20442_end_0, end_mask = var_20442_end_mask_0, x = var_20141_cast_fp16)[name = tensor("op_20442_cast_fp16")]; + tensor var_20449_begin_0 = const()[name = tensor("op_20449_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_20449_end_0 = const()[name = tensor("op_20449_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_20449_end_mask_0 = const()[name = tensor("op_20449_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20449_cast_fp16 = slice_by_index(begin = var_20449_begin_0, end = var_20449_end_0, end_mask = var_20449_end_mask_0, x = var_20141_cast_fp16)[name = tensor("op_20449_cast_fp16")]; + tensor var_20456_begin_0 = const()[name = tensor("op_20456_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_20456_end_0 = const()[name = tensor("op_20456_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_20456_end_mask_0 = const()[name = tensor("op_20456_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20456_cast_fp16 = slice_by_index(begin = var_20456_begin_0, end = var_20456_end_0, end_mask = var_20456_end_mask_0, x = var_20141_cast_fp16)[name = tensor("op_20456_cast_fp16")]; + tensor var_20463_begin_0 = const()[name = tensor("op_20463_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_20463_end_0 = const()[name = tensor("op_20463_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_20463_end_mask_0 = const()[name = tensor("op_20463_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20463_cast_fp16 = slice_by_index(begin = var_20463_begin_0, end = var_20463_end_0, end_mask = var_20463_end_mask_0, x = var_20141_cast_fp16)[name = tensor("op_20463_cast_fp16")]; + tensor var_20470_begin_0 = const()[name = tensor("op_20470_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_20470_end_0 = const()[name = tensor("op_20470_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_20470_end_mask_0 = const()[name = tensor("op_20470_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20470_cast_fp16 = slice_by_index(begin = var_20470_begin_0, end = var_20470_end_0, end_mask = var_20470_end_mask_0, x = var_20145_cast_fp16)[name = tensor("op_20470_cast_fp16")]; + tensor var_20477_begin_0 = const()[name = tensor("op_20477_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_20477_end_0 = const()[name = tensor("op_20477_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_20477_end_mask_0 = const()[name = tensor("op_20477_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20477_cast_fp16 = slice_by_index(begin = var_20477_begin_0, end = var_20477_end_0, end_mask = var_20477_end_mask_0, x = var_20145_cast_fp16)[name = tensor("op_20477_cast_fp16")]; + tensor var_20484_begin_0 = const()[name = tensor("op_20484_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_20484_end_0 = const()[name = tensor("op_20484_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_20484_end_mask_0 = const()[name = tensor("op_20484_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20484_cast_fp16 = slice_by_index(begin = var_20484_begin_0, end = var_20484_end_0, end_mask = var_20484_end_mask_0, x = var_20145_cast_fp16)[name = tensor("op_20484_cast_fp16")]; + tensor var_20491_begin_0 = const()[name = tensor("op_20491_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_20491_end_0 = const()[name = tensor("op_20491_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_20491_end_mask_0 = const()[name = tensor("op_20491_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20491_cast_fp16 = slice_by_index(begin = var_20491_begin_0, end = var_20491_end_0, end_mask = var_20491_end_mask_0, x = var_20145_cast_fp16)[name = tensor("op_20491_cast_fp16")]; + tensor var_20498_begin_0 = const()[name = tensor("op_20498_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_20498_end_0 = const()[name = tensor("op_20498_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_20498_end_mask_0 = const()[name = tensor("op_20498_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20498_cast_fp16 = slice_by_index(begin = var_20498_begin_0, end = var_20498_end_0, end_mask = var_20498_end_mask_0, x = var_20149_cast_fp16)[name = tensor("op_20498_cast_fp16")]; + tensor var_20505_begin_0 = const()[name = tensor("op_20505_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_20505_end_0 = const()[name = tensor("op_20505_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_20505_end_mask_0 = const()[name = tensor("op_20505_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20505_cast_fp16 = slice_by_index(begin = var_20505_begin_0, end = var_20505_end_0, end_mask = var_20505_end_mask_0, x = var_20149_cast_fp16)[name = tensor("op_20505_cast_fp16")]; + tensor var_20512_begin_0 = const()[name = tensor("op_20512_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_20512_end_0 = const()[name = tensor("op_20512_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_20512_end_mask_0 = const()[name = tensor("op_20512_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20512_cast_fp16 = slice_by_index(begin = var_20512_begin_0, end = var_20512_end_0, end_mask = var_20512_end_mask_0, x = var_20149_cast_fp16)[name = tensor("op_20512_cast_fp16")]; + tensor var_20519_begin_0 = const()[name = tensor("op_20519_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_20519_end_0 = const()[name = tensor("op_20519_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_20519_end_mask_0 = const()[name = tensor("op_20519_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20519_cast_fp16 = slice_by_index(begin = var_20519_begin_0, end = var_20519_end_0, end_mask = var_20519_end_mask_0, x = var_20149_cast_fp16)[name = tensor("op_20519_cast_fp16")]; + tensor var_20526_begin_0 = const()[name = tensor("op_20526_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_20526_end_0 = const()[name = tensor("op_20526_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_20526_end_mask_0 = const()[name = tensor("op_20526_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20526_cast_fp16 = slice_by_index(begin = var_20526_begin_0, end = var_20526_end_0, end_mask = var_20526_end_mask_0, x = var_20153_cast_fp16)[name = tensor("op_20526_cast_fp16")]; + tensor var_20533_begin_0 = const()[name = tensor("op_20533_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_20533_end_0 = const()[name = tensor("op_20533_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_20533_end_mask_0 = const()[name = tensor("op_20533_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20533_cast_fp16 = slice_by_index(begin = var_20533_begin_0, end = var_20533_end_0, end_mask = var_20533_end_mask_0, x = var_20153_cast_fp16)[name = tensor("op_20533_cast_fp16")]; + tensor var_20540_begin_0 = const()[name = tensor("op_20540_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_20540_end_0 = const()[name = tensor("op_20540_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_20540_end_mask_0 = const()[name = tensor("op_20540_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20540_cast_fp16 = slice_by_index(begin = var_20540_begin_0, end = var_20540_end_0, end_mask = var_20540_end_mask_0, x = var_20153_cast_fp16)[name = tensor("op_20540_cast_fp16")]; + tensor var_20547_begin_0 = const()[name = tensor("op_20547_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_20547_end_0 = const()[name = tensor("op_20547_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_20547_end_mask_0 = const()[name = tensor("op_20547_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20547_cast_fp16 = slice_by_index(begin = var_20547_begin_0, end = var_20547_end_0, end_mask = var_20547_end_mask_0, x = var_20153_cast_fp16)[name = tensor("op_20547_cast_fp16")]; + tensor var_20554_begin_0 = const()[name = tensor("op_20554_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_20554_end_0 = const()[name = tensor("op_20554_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_20554_end_mask_0 = const()[name = tensor("op_20554_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20554_cast_fp16 = slice_by_index(begin = var_20554_begin_0, end = var_20554_end_0, end_mask = var_20554_end_mask_0, x = var_20157_cast_fp16)[name = tensor("op_20554_cast_fp16")]; + tensor var_20561_begin_0 = const()[name = tensor("op_20561_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_20561_end_0 = const()[name = tensor("op_20561_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_20561_end_mask_0 = const()[name = tensor("op_20561_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20561_cast_fp16 = slice_by_index(begin = var_20561_begin_0, end = var_20561_end_0, end_mask = var_20561_end_mask_0, x = var_20157_cast_fp16)[name = tensor("op_20561_cast_fp16")]; + tensor var_20568_begin_0 = const()[name = tensor("op_20568_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_20568_end_0 = const()[name = tensor("op_20568_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_20568_end_mask_0 = const()[name = tensor("op_20568_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20568_cast_fp16 = slice_by_index(begin = var_20568_begin_0, end = var_20568_end_0, end_mask = var_20568_end_mask_0, x = var_20157_cast_fp16)[name = tensor("op_20568_cast_fp16")]; + tensor var_20575_begin_0 = const()[name = tensor("op_20575_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_20575_end_0 = const()[name = tensor("op_20575_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_20575_end_mask_0 = const()[name = tensor("op_20575_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20575_cast_fp16 = slice_by_index(begin = var_20575_begin_0, end = var_20575_end_0, end_mask = var_20575_end_mask_0, x = var_20157_cast_fp16)[name = tensor("op_20575_cast_fp16")]; + tensor var_20582_begin_0 = const()[name = tensor("op_20582_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_20582_end_0 = const()[name = tensor("op_20582_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_20582_end_mask_0 = const()[name = tensor("op_20582_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20582_cast_fp16 = slice_by_index(begin = var_20582_begin_0, end = var_20582_end_0, end_mask = var_20582_end_mask_0, x = var_20161_cast_fp16)[name = tensor("op_20582_cast_fp16")]; + tensor var_20589_begin_0 = const()[name = tensor("op_20589_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_20589_end_0 = const()[name = tensor("op_20589_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_20589_end_mask_0 = const()[name = tensor("op_20589_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20589_cast_fp16 = slice_by_index(begin = var_20589_begin_0, end = var_20589_end_0, end_mask = var_20589_end_mask_0, x = var_20161_cast_fp16)[name = tensor("op_20589_cast_fp16")]; + tensor var_20596_begin_0 = const()[name = tensor("op_20596_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_20596_end_0 = const()[name = tensor("op_20596_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_20596_end_mask_0 = const()[name = tensor("op_20596_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20596_cast_fp16 = slice_by_index(begin = var_20596_begin_0, end = var_20596_end_0, end_mask = var_20596_end_mask_0, x = var_20161_cast_fp16)[name = tensor("op_20596_cast_fp16")]; + tensor var_20603_begin_0 = const()[name = tensor("op_20603_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_20603_end_0 = const()[name = tensor("op_20603_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_20603_end_mask_0 = const()[name = tensor("op_20603_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20603_cast_fp16 = slice_by_index(begin = var_20603_begin_0, end = var_20603_end_0, end_mask = var_20603_end_mask_0, x = var_20161_cast_fp16)[name = tensor("op_20603_cast_fp16")]; + tensor var_20610_begin_0 = const()[name = tensor("op_20610_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_20610_end_0 = const()[name = tensor("op_20610_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_20610_end_mask_0 = const()[name = tensor("op_20610_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20610_cast_fp16 = slice_by_index(begin = var_20610_begin_0, end = var_20610_end_0, end_mask = var_20610_end_mask_0, x = var_20165_cast_fp16)[name = tensor("op_20610_cast_fp16")]; + tensor var_20617_begin_0 = const()[name = tensor("op_20617_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_20617_end_0 = const()[name = tensor("op_20617_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_20617_end_mask_0 = const()[name = tensor("op_20617_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20617_cast_fp16 = slice_by_index(begin = var_20617_begin_0, end = var_20617_end_0, end_mask = var_20617_end_mask_0, x = var_20165_cast_fp16)[name = tensor("op_20617_cast_fp16")]; + tensor var_20624_begin_0 = const()[name = tensor("op_20624_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_20624_end_0 = const()[name = tensor("op_20624_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_20624_end_mask_0 = const()[name = tensor("op_20624_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20624_cast_fp16 = slice_by_index(begin = var_20624_begin_0, end = var_20624_end_0, end_mask = var_20624_end_mask_0, x = var_20165_cast_fp16)[name = tensor("op_20624_cast_fp16")]; + tensor var_20631_begin_0 = const()[name = tensor("op_20631_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_20631_end_0 = const()[name = tensor("op_20631_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_20631_end_mask_0 = const()[name = tensor("op_20631_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20631_cast_fp16 = slice_by_index(begin = var_20631_begin_0, end = var_20631_end_0, end_mask = var_20631_end_mask_0, x = var_20165_cast_fp16)[name = tensor("op_20631_cast_fp16")]; + tensor var_20638_begin_0 = const()[name = tensor("op_20638_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_20638_end_0 = const()[name = tensor("op_20638_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_20638_end_mask_0 = const()[name = tensor("op_20638_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20638_cast_fp16 = slice_by_index(begin = var_20638_begin_0, end = var_20638_end_0, end_mask = var_20638_end_mask_0, x = var_20169_cast_fp16)[name = tensor("op_20638_cast_fp16")]; + tensor var_20645_begin_0 = const()[name = tensor("op_20645_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_20645_end_0 = const()[name = tensor("op_20645_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_20645_end_mask_0 = const()[name = tensor("op_20645_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20645_cast_fp16 = slice_by_index(begin = var_20645_begin_0, end = var_20645_end_0, end_mask = var_20645_end_mask_0, x = var_20169_cast_fp16)[name = tensor("op_20645_cast_fp16")]; + tensor var_20652_begin_0 = const()[name = tensor("op_20652_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_20652_end_0 = const()[name = tensor("op_20652_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_20652_end_mask_0 = const()[name = tensor("op_20652_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20652_cast_fp16 = slice_by_index(begin = var_20652_begin_0, end = var_20652_end_0, end_mask = var_20652_end_mask_0, x = var_20169_cast_fp16)[name = tensor("op_20652_cast_fp16")]; + tensor var_20659_begin_0 = const()[name = tensor("op_20659_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_20659_end_0 = const()[name = tensor("op_20659_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_20659_end_mask_0 = const()[name = tensor("op_20659_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20659_cast_fp16 = slice_by_index(begin = var_20659_begin_0, end = var_20659_end_0, end_mask = var_20659_end_mask_0, x = var_20169_cast_fp16)[name = tensor("op_20659_cast_fp16")]; + tensor var_20666_begin_0 = const()[name = tensor("op_20666_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_20666_end_0 = const()[name = tensor("op_20666_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_20666_end_mask_0 = const()[name = tensor("op_20666_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20666_cast_fp16 = slice_by_index(begin = var_20666_begin_0, end = var_20666_end_0, end_mask = var_20666_end_mask_0, x = var_20173_cast_fp16)[name = tensor("op_20666_cast_fp16")]; + tensor var_20673_begin_0 = const()[name = tensor("op_20673_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_20673_end_0 = const()[name = tensor("op_20673_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_20673_end_mask_0 = const()[name = tensor("op_20673_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20673_cast_fp16 = slice_by_index(begin = var_20673_begin_0, end = var_20673_end_0, end_mask = var_20673_end_mask_0, x = var_20173_cast_fp16)[name = tensor("op_20673_cast_fp16")]; + tensor var_20680_begin_0 = const()[name = tensor("op_20680_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_20680_end_0 = const()[name = tensor("op_20680_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_20680_end_mask_0 = const()[name = tensor("op_20680_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20680_cast_fp16 = slice_by_index(begin = var_20680_begin_0, end = var_20680_end_0, end_mask = var_20680_end_mask_0, x = var_20173_cast_fp16)[name = tensor("op_20680_cast_fp16")]; + tensor var_20687_begin_0 = const()[name = tensor("op_20687_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_20687_end_0 = const()[name = tensor("op_20687_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_20687_end_mask_0 = const()[name = tensor("op_20687_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20687_cast_fp16 = slice_by_index(begin = var_20687_begin_0, end = var_20687_end_0, end_mask = var_20687_end_mask_0, x = var_20173_cast_fp16)[name = tensor("op_20687_cast_fp16")]; + tensor var_20694_begin_0 = const()[name = tensor("op_20694_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_20694_end_0 = const()[name = tensor("op_20694_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_20694_end_mask_0 = const()[name = tensor("op_20694_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20694_cast_fp16 = slice_by_index(begin = var_20694_begin_0, end = var_20694_end_0, end_mask = var_20694_end_mask_0, x = var_20177_cast_fp16)[name = tensor("op_20694_cast_fp16")]; + tensor var_20701_begin_0 = const()[name = tensor("op_20701_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_20701_end_0 = const()[name = tensor("op_20701_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_20701_end_mask_0 = const()[name = tensor("op_20701_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20701_cast_fp16 = slice_by_index(begin = var_20701_begin_0, end = var_20701_end_0, end_mask = var_20701_end_mask_0, x = var_20177_cast_fp16)[name = tensor("op_20701_cast_fp16")]; + tensor var_20708_begin_0 = const()[name = tensor("op_20708_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_20708_end_0 = const()[name = tensor("op_20708_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_20708_end_mask_0 = const()[name = tensor("op_20708_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20708_cast_fp16 = slice_by_index(begin = var_20708_begin_0, end = var_20708_end_0, end_mask = var_20708_end_mask_0, x = var_20177_cast_fp16)[name = tensor("op_20708_cast_fp16")]; + tensor var_20715_begin_0 = const()[name = tensor("op_20715_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_20715_end_0 = const()[name = tensor("op_20715_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_20715_end_mask_0 = const()[name = tensor("op_20715_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20715_cast_fp16 = slice_by_index(begin = var_20715_begin_0, end = var_20715_end_0, end_mask = var_20715_end_mask_0, x = var_20177_cast_fp16)[name = tensor("op_20715_cast_fp16")]; + tensor var_20722_begin_0 = const()[name = tensor("op_20722_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_20722_end_0 = const()[name = tensor("op_20722_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_20722_end_mask_0 = const()[name = tensor("op_20722_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20722_cast_fp16 = slice_by_index(begin = var_20722_begin_0, end = var_20722_end_0, end_mask = var_20722_end_mask_0, x = var_20181_cast_fp16)[name = tensor("op_20722_cast_fp16")]; + tensor var_20729_begin_0 = const()[name = tensor("op_20729_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_20729_end_0 = const()[name = tensor("op_20729_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_20729_end_mask_0 = const()[name = tensor("op_20729_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20729_cast_fp16 = slice_by_index(begin = var_20729_begin_0, end = var_20729_end_0, end_mask = var_20729_end_mask_0, x = var_20181_cast_fp16)[name = tensor("op_20729_cast_fp16")]; + tensor var_20736_begin_0 = const()[name = tensor("op_20736_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_20736_end_0 = const()[name = tensor("op_20736_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_20736_end_mask_0 = const()[name = tensor("op_20736_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20736_cast_fp16 = slice_by_index(begin = var_20736_begin_0, end = var_20736_end_0, end_mask = var_20736_end_mask_0, x = var_20181_cast_fp16)[name = tensor("op_20736_cast_fp16")]; + tensor var_20743_begin_0 = const()[name = tensor("op_20743_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_20743_end_0 = const()[name = tensor("op_20743_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_20743_end_mask_0 = const()[name = tensor("op_20743_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20743_cast_fp16 = slice_by_index(begin = var_20743_begin_0, end = var_20743_end_0, end_mask = var_20743_end_mask_0, x = var_20181_cast_fp16)[name = tensor("op_20743_cast_fp16")]; + tensor k_27_perm_0 = const()[name = tensor("k_27_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_20748_begin_0 = const()[name = tensor("op_20748_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_20748_end_0 = const()[name = tensor("op_20748_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_20748_end_mask_0 = const()[name = tensor("op_20748_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_18 = transpose(perm = k_27_perm_0, x = key_27_cast_fp16)[name = tensor("transpose_18")]; + tensor var_20748_cast_fp16 = slice_by_index(begin = var_20748_begin_0, end = var_20748_end_0, end_mask = var_20748_end_mask_0, x = transpose_18)[name = tensor("op_20748_cast_fp16")]; + tensor var_20752_begin_0 = const()[name = tensor("op_20752_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_20752_end_0 = const()[name = tensor("op_20752_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_20752_end_mask_0 = const()[name = tensor("op_20752_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20752_cast_fp16 = slice_by_index(begin = var_20752_begin_0, end = var_20752_end_0, end_mask = var_20752_end_mask_0, x = transpose_18)[name = tensor("op_20752_cast_fp16")]; + tensor var_20756_begin_0 = const()[name = tensor("op_20756_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_20756_end_0 = const()[name = tensor("op_20756_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_20756_end_mask_0 = const()[name = tensor("op_20756_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20756_cast_fp16 = slice_by_index(begin = var_20756_begin_0, end = var_20756_end_0, end_mask = var_20756_end_mask_0, x = transpose_18)[name = tensor("op_20756_cast_fp16")]; + tensor var_20760_begin_0 = const()[name = tensor("op_20760_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_20760_end_0 = const()[name = tensor("op_20760_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_20760_end_mask_0 = const()[name = tensor("op_20760_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20760_cast_fp16 = slice_by_index(begin = var_20760_begin_0, end = var_20760_end_0, end_mask = var_20760_end_mask_0, x = transpose_18)[name = tensor("op_20760_cast_fp16")]; + tensor var_20764_begin_0 = const()[name = tensor("op_20764_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_20764_end_0 = const()[name = tensor("op_20764_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_20764_end_mask_0 = const()[name = tensor("op_20764_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20764_cast_fp16 = slice_by_index(begin = var_20764_begin_0, end = var_20764_end_0, end_mask = var_20764_end_mask_0, x = transpose_18)[name = tensor("op_20764_cast_fp16")]; + tensor var_20768_begin_0 = const()[name = tensor("op_20768_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_20768_end_0 = const()[name = tensor("op_20768_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_20768_end_mask_0 = const()[name = tensor("op_20768_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20768_cast_fp16 = slice_by_index(begin = var_20768_begin_0, end = var_20768_end_0, end_mask = var_20768_end_mask_0, x = transpose_18)[name = tensor("op_20768_cast_fp16")]; + tensor var_20772_begin_0 = const()[name = tensor("op_20772_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_20772_end_0 = const()[name = tensor("op_20772_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_20772_end_mask_0 = const()[name = tensor("op_20772_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20772_cast_fp16 = slice_by_index(begin = var_20772_begin_0, end = var_20772_end_0, end_mask = var_20772_end_mask_0, x = transpose_18)[name = tensor("op_20772_cast_fp16")]; + tensor var_20776_begin_0 = const()[name = tensor("op_20776_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_20776_end_0 = const()[name = tensor("op_20776_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_20776_end_mask_0 = const()[name = tensor("op_20776_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20776_cast_fp16 = slice_by_index(begin = var_20776_begin_0, end = var_20776_end_0, end_mask = var_20776_end_mask_0, x = transpose_18)[name = tensor("op_20776_cast_fp16")]; + tensor var_20780_begin_0 = const()[name = tensor("op_20780_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_20780_end_0 = const()[name = tensor("op_20780_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_20780_end_mask_0 = const()[name = tensor("op_20780_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20780_cast_fp16 = slice_by_index(begin = var_20780_begin_0, end = var_20780_end_0, end_mask = var_20780_end_mask_0, x = transpose_18)[name = tensor("op_20780_cast_fp16")]; + tensor var_20784_begin_0 = const()[name = tensor("op_20784_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_20784_end_0 = const()[name = tensor("op_20784_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_20784_end_mask_0 = const()[name = tensor("op_20784_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20784_cast_fp16 = slice_by_index(begin = var_20784_begin_0, end = var_20784_end_0, end_mask = var_20784_end_mask_0, x = transpose_18)[name = tensor("op_20784_cast_fp16")]; + tensor var_20788_begin_0 = const()[name = tensor("op_20788_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_20788_end_0 = const()[name = tensor("op_20788_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_20788_end_mask_0 = const()[name = tensor("op_20788_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20788_cast_fp16 = slice_by_index(begin = var_20788_begin_0, end = var_20788_end_0, end_mask = var_20788_end_mask_0, x = transpose_18)[name = tensor("op_20788_cast_fp16")]; + tensor var_20792_begin_0 = const()[name = tensor("op_20792_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_20792_end_0 = const()[name = tensor("op_20792_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_20792_end_mask_0 = const()[name = tensor("op_20792_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20792_cast_fp16 = slice_by_index(begin = var_20792_begin_0, end = var_20792_end_0, end_mask = var_20792_end_mask_0, x = transpose_18)[name = tensor("op_20792_cast_fp16")]; + tensor var_20796_begin_0 = const()[name = tensor("op_20796_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_20796_end_0 = const()[name = tensor("op_20796_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_20796_end_mask_0 = const()[name = tensor("op_20796_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20796_cast_fp16 = slice_by_index(begin = var_20796_begin_0, end = var_20796_end_0, end_mask = var_20796_end_mask_0, x = transpose_18)[name = tensor("op_20796_cast_fp16")]; + tensor var_20800_begin_0 = const()[name = tensor("op_20800_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_20800_end_0 = const()[name = tensor("op_20800_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_20800_end_mask_0 = const()[name = tensor("op_20800_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20800_cast_fp16 = slice_by_index(begin = var_20800_begin_0, end = var_20800_end_0, end_mask = var_20800_end_mask_0, x = transpose_18)[name = tensor("op_20800_cast_fp16")]; + tensor var_20804_begin_0 = const()[name = tensor("op_20804_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_20804_end_0 = const()[name = tensor("op_20804_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_20804_end_mask_0 = const()[name = tensor("op_20804_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20804_cast_fp16 = slice_by_index(begin = var_20804_begin_0, end = var_20804_end_0, end_mask = var_20804_end_mask_0, x = transpose_18)[name = tensor("op_20804_cast_fp16")]; + tensor var_20808_begin_0 = const()[name = tensor("op_20808_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_20808_end_0 = const()[name = tensor("op_20808_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_20808_end_mask_0 = const()[name = tensor("op_20808_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20808_cast_fp16 = slice_by_index(begin = var_20808_begin_0, end = var_20808_end_0, end_mask = var_20808_end_mask_0, x = transpose_18)[name = tensor("op_20808_cast_fp16")]; + tensor var_20812_begin_0 = const()[name = tensor("op_20812_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_20812_end_0 = const()[name = tensor("op_20812_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_20812_end_mask_0 = const()[name = tensor("op_20812_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20812_cast_fp16 = slice_by_index(begin = var_20812_begin_0, end = var_20812_end_0, end_mask = var_20812_end_mask_0, x = transpose_18)[name = tensor("op_20812_cast_fp16")]; + tensor var_20816_begin_0 = const()[name = tensor("op_20816_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_20816_end_0 = const()[name = tensor("op_20816_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_20816_end_mask_0 = const()[name = tensor("op_20816_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20816_cast_fp16 = slice_by_index(begin = var_20816_begin_0, end = var_20816_end_0, end_mask = var_20816_end_mask_0, x = transpose_18)[name = tensor("op_20816_cast_fp16")]; + tensor var_20820_begin_0 = const()[name = tensor("op_20820_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_20820_end_0 = const()[name = tensor("op_20820_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_20820_end_mask_0 = const()[name = tensor("op_20820_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20820_cast_fp16 = slice_by_index(begin = var_20820_begin_0, end = var_20820_end_0, end_mask = var_20820_end_mask_0, x = transpose_18)[name = tensor("op_20820_cast_fp16")]; + tensor var_20824_begin_0 = const()[name = tensor("op_20824_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_20824_end_0 = const()[name = tensor("op_20824_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_20824_end_mask_0 = const()[name = tensor("op_20824_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_20824_cast_fp16 = slice_by_index(begin = var_20824_begin_0, end = var_20824_end_0, end_mask = var_20824_end_mask_0, x = transpose_18)[name = tensor("op_20824_cast_fp16")]; + tensor var_20826_begin_0 = const()[name = tensor("op_20826_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_20826_end_0 = const()[name = tensor("op_20826_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_20826_end_mask_0 = const()[name = tensor("op_20826_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20826_cast_fp16 = slice_by_index(begin = var_20826_begin_0, end = var_20826_end_0, end_mask = var_20826_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_20826_cast_fp16")]; + tensor var_20830_begin_0 = const()[name = tensor("op_20830_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_20830_end_0 = const()[name = tensor("op_20830_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_20830_end_mask_0 = const()[name = tensor("op_20830_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20830_cast_fp16 = slice_by_index(begin = var_20830_begin_0, end = var_20830_end_0, end_mask = var_20830_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_20830_cast_fp16")]; + tensor var_20834_begin_0 = const()[name = tensor("op_20834_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_20834_end_0 = const()[name = tensor("op_20834_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_20834_end_mask_0 = const()[name = tensor("op_20834_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20834_cast_fp16 = slice_by_index(begin = var_20834_begin_0, end = var_20834_end_0, end_mask = var_20834_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_20834_cast_fp16")]; + tensor var_20838_begin_0 = const()[name = tensor("op_20838_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_20838_end_0 = const()[name = tensor("op_20838_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_20838_end_mask_0 = const()[name = tensor("op_20838_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20838_cast_fp16 = slice_by_index(begin = var_20838_begin_0, end = var_20838_end_0, end_mask = var_20838_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_20838_cast_fp16")]; + tensor var_20842_begin_0 = const()[name = tensor("op_20842_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_20842_end_0 = const()[name = tensor("op_20842_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_20842_end_mask_0 = const()[name = tensor("op_20842_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20842_cast_fp16 = slice_by_index(begin = var_20842_begin_0, end = var_20842_end_0, end_mask = var_20842_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_20842_cast_fp16")]; + tensor var_20846_begin_0 = const()[name = tensor("op_20846_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_20846_end_0 = const()[name = tensor("op_20846_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_20846_end_mask_0 = const()[name = tensor("op_20846_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20846_cast_fp16 = slice_by_index(begin = var_20846_begin_0, end = var_20846_end_0, end_mask = var_20846_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_20846_cast_fp16")]; + tensor var_20850_begin_0 = const()[name = tensor("op_20850_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_20850_end_0 = const()[name = tensor("op_20850_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_20850_end_mask_0 = const()[name = tensor("op_20850_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20850_cast_fp16 = slice_by_index(begin = var_20850_begin_0, end = var_20850_end_0, end_mask = var_20850_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_20850_cast_fp16")]; + tensor var_20854_begin_0 = const()[name = tensor("op_20854_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_20854_end_0 = const()[name = tensor("op_20854_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_20854_end_mask_0 = const()[name = tensor("op_20854_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20854_cast_fp16 = slice_by_index(begin = var_20854_begin_0, end = var_20854_end_0, end_mask = var_20854_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_20854_cast_fp16")]; + tensor var_20858_begin_0 = const()[name = tensor("op_20858_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_20858_end_0 = const()[name = tensor("op_20858_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_20858_end_mask_0 = const()[name = tensor("op_20858_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20858_cast_fp16 = slice_by_index(begin = var_20858_begin_0, end = var_20858_end_0, end_mask = var_20858_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_20858_cast_fp16")]; + tensor var_20862_begin_0 = const()[name = tensor("op_20862_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_20862_end_0 = const()[name = tensor("op_20862_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_20862_end_mask_0 = const()[name = tensor("op_20862_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20862_cast_fp16 = slice_by_index(begin = var_20862_begin_0, end = var_20862_end_0, end_mask = var_20862_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_20862_cast_fp16")]; + tensor var_20866_begin_0 = const()[name = tensor("op_20866_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_20866_end_0 = const()[name = tensor("op_20866_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_20866_end_mask_0 = const()[name = tensor("op_20866_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20866_cast_fp16 = slice_by_index(begin = var_20866_begin_0, end = var_20866_end_0, end_mask = var_20866_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_20866_cast_fp16")]; + tensor var_20870_begin_0 = const()[name = tensor("op_20870_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_20870_end_0 = const()[name = tensor("op_20870_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_20870_end_mask_0 = const()[name = tensor("op_20870_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20870_cast_fp16 = slice_by_index(begin = var_20870_begin_0, end = var_20870_end_0, end_mask = var_20870_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_20870_cast_fp16")]; + tensor var_20874_begin_0 = const()[name = tensor("op_20874_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_20874_end_0 = const()[name = tensor("op_20874_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_20874_end_mask_0 = const()[name = tensor("op_20874_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20874_cast_fp16 = slice_by_index(begin = var_20874_begin_0, end = var_20874_end_0, end_mask = var_20874_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_20874_cast_fp16")]; + tensor var_20878_begin_0 = const()[name = tensor("op_20878_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_20878_end_0 = const()[name = tensor("op_20878_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_20878_end_mask_0 = const()[name = tensor("op_20878_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20878_cast_fp16 = slice_by_index(begin = var_20878_begin_0, end = var_20878_end_0, end_mask = var_20878_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_20878_cast_fp16")]; + tensor var_20882_begin_0 = const()[name = tensor("op_20882_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_20882_end_0 = const()[name = tensor("op_20882_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_20882_end_mask_0 = const()[name = tensor("op_20882_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20882_cast_fp16 = slice_by_index(begin = var_20882_begin_0, end = var_20882_end_0, end_mask = var_20882_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_20882_cast_fp16")]; + tensor var_20886_begin_0 = const()[name = tensor("op_20886_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_20886_end_0 = const()[name = tensor("op_20886_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_20886_end_mask_0 = const()[name = tensor("op_20886_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20886_cast_fp16 = slice_by_index(begin = var_20886_begin_0, end = var_20886_end_0, end_mask = var_20886_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_20886_cast_fp16")]; + tensor var_20890_begin_0 = const()[name = tensor("op_20890_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_20890_end_0 = const()[name = tensor("op_20890_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_20890_end_mask_0 = const()[name = tensor("op_20890_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20890_cast_fp16 = slice_by_index(begin = var_20890_begin_0, end = var_20890_end_0, end_mask = var_20890_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_20890_cast_fp16")]; + tensor var_20894_begin_0 = const()[name = tensor("op_20894_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_20894_end_0 = const()[name = tensor("op_20894_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_20894_end_mask_0 = const()[name = tensor("op_20894_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20894_cast_fp16 = slice_by_index(begin = var_20894_begin_0, end = var_20894_end_0, end_mask = var_20894_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_20894_cast_fp16")]; + tensor var_20898_begin_0 = const()[name = tensor("op_20898_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_20898_end_0 = const()[name = tensor("op_20898_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_20898_end_mask_0 = const()[name = tensor("op_20898_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20898_cast_fp16 = slice_by_index(begin = var_20898_begin_0, end = var_20898_end_0, end_mask = var_20898_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_20898_cast_fp16")]; + tensor var_20902_begin_0 = const()[name = tensor("op_20902_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_20902_end_0 = const()[name = tensor("op_20902_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_20902_end_mask_0 = const()[name = tensor("op_20902_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_20902_cast_fp16 = slice_by_index(begin = var_20902_begin_0, end = var_20902_end_0, end_mask = var_20902_end_mask_0, x = value_27_cast_fp16)[name = tensor("op_20902_cast_fp16")]; + tensor var_20906_equation_0 = const()[name = tensor("op_20906_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_20906_cast_fp16 = einsum(equation = var_20906_equation_0, values = (var_20748_cast_fp16, var_20190_cast_fp16))[name = tensor("op_20906_cast_fp16")]; + tensor var_20907_to_fp16 = const()[name = tensor("op_20907_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2081_cast_fp16 = mul(x = var_20906_cast_fp16, y = var_20907_to_fp16)[name = tensor("aw_chunk_2081_cast_fp16")]; + tensor var_20910_equation_0 = const()[name = tensor("op_20910_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_20910_cast_fp16 = einsum(equation = var_20910_equation_0, values = (var_20748_cast_fp16, var_20197_cast_fp16))[name = tensor("op_20910_cast_fp16")]; + tensor var_20911_to_fp16 = const()[name = tensor("op_20911_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2083_cast_fp16 = mul(x = var_20910_cast_fp16, y = var_20911_to_fp16)[name = tensor("aw_chunk_2083_cast_fp16")]; + tensor var_20914_equation_0 = const()[name = tensor("op_20914_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_20914_cast_fp16 = einsum(equation = var_20914_equation_0, values = (var_20748_cast_fp16, var_20204_cast_fp16))[name = tensor("op_20914_cast_fp16")]; + tensor var_20915_to_fp16 = const()[name = tensor("op_20915_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2085_cast_fp16 = mul(x = var_20914_cast_fp16, y = var_20915_to_fp16)[name = tensor("aw_chunk_2085_cast_fp16")]; + tensor var_20918_equation_0 = const()[name = tensor("op_20918_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_20918_cast_fp16 = einsum(equation = var_20918_equation_0, values = (var_20748_cast_fp16, var_20211_cast_fp16))[name = tensor("op_20918_cast_fp16")]; + tensor var_20919_to_fp16 = const()[name = tensor("op_20919_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2087_cast_fp16 = mul(x = var_20918_cast_fp16, y = var_20919_to_fp16)[name = tensor("aw_chunk_2087_cast_fp16")]; + tensor var_20922_equation_0 = const()[name = tensor("op_20922_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_20922_cast_fp16 = einsum(equation = var_20922_equation_0, values = (var_20752_cast_fp16, var_20218_cast_fp16))[name = tensor("op_20922_cast_fp16")]; + tensor var_20923_to_fp16 = const()[name = tensor("op_20923_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2089_cast_fp16 = mul(x = var_20922_cast_fp16, y = var_20923_to_fp16)[name = tensor("aw_chunk_2089_cast_fp16")]; + tensor var_20926_equation_0 = const()[name = tensor("op_20926_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_20926_cast_fp16 = einsum(equation = var_20926_equation_0, values = (var_20752_cast_fp16, var_20225_cast_fp16))[name = tensor("op_20926_cast_fp16")]; + tensor var_20927_to_fp16 = const()[name = tensor("op_20927_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2091_cast_fp16 = mul(x = var_20926_cast_fp16, y = var_20927_to_fp16)[name = tensor("aw_chunk_2091_cast_fp16")]; + tensor var_20930_equation_0 = const()[name = tensor("op_20930_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_20930_cast_fp16 = einsum(equation = var_20930_equation_0, values = (var_20752_cast_fp16, var_20232_cast_fp16))[name = tensor("op_20930_cast_fp16")]; + tensor var_20931_to_fp16 = const()[name = tensor("op_20931_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2093_cast_fp16 = mul(x = var_20930_cast_fp16, y = var_20931_to_fp16)[name = tensor("aw_chunk_2093_cast_fp16")]; + tensor var_20934_equation_0 = const()[name = tensor("op_20934_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_20934_cast_fp16 = einsum(equation = var_20934_equation_0, values = (var_20752_cast_fp16, var_20239_cast_fp16))[name = tensor("op_20934_cast_fp16")]; + tensor var_20935_to_fp16 = const()[name = tensor("op_20935_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2095_cast_fp16 = mul(x = var_20934_cast_fp16, y = var_20935_to_fp16)[name = tensor("aw_chunk_2095_cast_fp16")]; + tensor var_20938_equation_0 = const()[name = tensor("op_20938_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_20938_cast_fp16 = einsum(equation = var_20938_equation_0, values = (var_20756_cast_fp16, var_20246_cast_fp16))[name = tensor("op_20938_cast_fp16")]; + tensor var_20939_to_fp16 = const()[name = tensor("op_20939_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2097_cast_fp16 = mul(x = var_20938_cast_fp16, y = var_20939_to_fp16)[name = tensor("aw_chunk_2097_cast_fp16")]; + tensor var_20942_equation_0 = const()[name = tensor("op_20942_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_20942_cast_fp16 = einsum(equation = var_20942_equation_0, values = (var_20756_cast_fp16, var_20253_cast_fp16))[name = tensor("op_20942_cast_fp16")]; + tensor var_20943_to_fp16 = const()[name = tensor("op_20943_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2099_cast_fp16 = mul(x = var_20942_cast_fp16, y = var_20943_to_fp16)[name = tensor("aw_chunk_2099_cast_fp16")]; + tensor var_20946_equation_0 = const()[name = tensor("op_20946_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_20946_cast_fp16 = einsum(equation = var_20946_equation_0, values = (var_20756_cast_fp16, var_20260_cast_fp16))[name = tensor("op_20946_cast_fp16")]; + tensor var_20947_to_fp16 = const()[name = tensor("op_20947_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2101_cast_fp16 = mul(x = var_20946_cast_fp16, y = var_20947_to_fp16)[name = tensor("aw_chunk_2101_cast_fp16")]; + tensor var_20950_equation_0 = const()[name = tensor("op_20950_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_20950_cast_fp16 = einsum(equation = var_20950_equation_0, values = (var_20756_cast_fp16, var_20267_cast_fp16))[name = tensor("op_20950_cast_fp16")]; + tensor var_20951_to_fp16 = const()[name = tensor("op_20951_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2103_cast_fp16 = mul(x = var_20950_cast_fp16, y = var_20951_to_fp16)[name = tensor("aw_chunk_2103_cast_fp16")]; + tensor var_20954_equation_0 = const()[name = tensor("op_20954_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_20954_cast_fp16 = einsum(equation = var_20954_equation_0, values = (var_20760_cast_fp16, var_20274_cast_fp16))[name = tensor("op_20954_cast_fp16")]; + tensor var_20955_to_fp16 = const()[name = tensor("op_20955_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2105_cast_fp16 = mul(x = var_20954_cast_fp16, y = var_20955_to_fp16)[name = tensor("aw_chunk_2105_cast_fp16")]; + tensor var_20958_equation_0 = const()[name = tensor("op_20958_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_20958_cast_fp16 = einsum(equation = var_20958_equation_0, values = (var_20760_cast_fp16, var_20281_cast_fp16))[name = tensor("op_20958_cast_fp16")]; + tensor var_20959_to_fp16 = const()[name = tensor("op_20959_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2107_cast_fp16 = mul(x = var_20958_cast_fp16, y = var_20959_to_fp16)[name = tensor("aw_chunk_2107_cast_fp16")]; + tensor var_20962_equation_0 = const()[name = tensor("op_20962_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_20962_cast_fp16 = einsum(equation = var_20962_equation_0, values = (var_20760_cast_fp16, var_20288_cast_fp16))[name = tensor("op_20962_cast_fp16")]; + tensor var_20963_to_fp16 = const()[name = tensor("op_20963_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2109_cast_fp16 = mul(x = var_20962_cast_fp16, y = var_20963_to_fp16)[name = tensor("aw_chunk_2109_cast_fp16")]; + tensor var_20966_equation_0 = const()[name = tensor("op_20966_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_20966_cast_fp16 = einsum(equation = var_20966_equation_0, values = (var_20760_cast_fp16, var_20295_cast_fp16))[name = tensor("op_20966_cast_fp16")]; + tensor var_20967_to_fp16 = const()[name = tensor("op_20967_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2111_cast_fp16 = mul(x = var_20966_cast_fp16, y = var_20967_to_fp16)[name = tensor("aw_chunk_2111_cast_fp16")]; + tensor var_20970_equation_0 = const()[name = tensor("op_20970_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_20970_cast_fp16 = einsum(equation = var_20970_equation_0, values = (var_20764_cast_fp16, var_20302_cast_fp16))[name = tensor("op_20970_cast_fp16")]; + tensor var_20971_to_fp16 = const()[name = tensor("op_20971_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2113_cast_fp16 = mul(x = var_20970_cast_fp16, y = var_20971_to_fp16)[name = tensor("aw_chunk_2113_cast_fp16")]; + tensor var_20974_equation_0 = const()[name = tensor("op_20974_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_20974_cast_fp16 = einsum(equation = var_20974_equation_0, values = (var_20764_cast_fp16, var_20309_cast_fp16))[name = tensor("op_20974_cast_fp16")]; + tensor var_20975_to_fp16 = const()[name = tensor("op_20975_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2115_cast_fp16 = mul(x = var_20974_cast_fp16, y = var_20975_to_fp16)[name = tensor("aw_chunk_2115_cast_fp16")]; + tensor var_20978_equation_0 = const()[name = tensor("op_20978_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_20978_cast_fp16 = einsum(equation = var_20978_equation_0, values = (var_20764_cast_fp16, var_20316_cast_fp16))[name = tensor("op_20978_cast_fp16")]; + tensor var_20979_to_fp16 = const()[name = tensor("op_20979_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2117_cast_fp16 = mul(x = var_20978_cast_fp16, y = var_20979_to_fp16)[name = tensor("aw_chunk_2117_cast_fp16")]; + tensor var_20982_equation_0 = const()[name = tensor("op_20982_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_20982_cast_fp16 = einsum(equation = var_20982_equation_0, values = (var_20764_cast_fp16, var_20323_cast_fp16))[name = tensor("op_20982_cast_fp16")]; + tensor var_20983_to_fp16 = const()[name = tensor("op_20983_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2119_cast_fp16 = mul(x = var_20982_cast_fp16, y = var_20983_to_fp16)[name = tensor("aw_chunk_2119_cast_fp16")]; + tensor var_20986_equation_0 = const()[name = tensor("op_20986_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_20986_cast_fp16 = einsum(equation = var_20986_equation_0, values = (var_20768_cast_fp16, var_20330_cast_fp16))[name = tensor("op_20986_cast_fp16")]; + tensor var_20987_to_fp16 = const()[name = tensor("op_20987_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2121_cast_fp16 = mul(x = var_20986_cast_fp16, y = var_20987_to_fp16)[name = tensor("aw_chunk_2121_cast_fp16")]; + tensor var_20990_equation_0 = const()[name = tensor("op_20990_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_20990_cast_fp16 = einsum(equation = var_20990_equation_0, values = (var_20768_cast_fp16, var_20337_cast_fp16))[name = tensor("op_20990_cast_fp16")]; + tensor var_20991_to_fp16 = const()[name = tensor("op_20991_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2123_cast_fp16 = mul(x = var_20990_cast_fp16, y = var_20991_to_fp16)[name = tensor("aw_chunk_2123_cast_fp16")]; + tensor var_20994_equation_0 = const()[name = tensor("op_20994_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_20994_cast_fp16 = einsum(equation = var_20994_equation_0, values = (var_20768_cast_fp16, var_20344_cast_fp16))[name = tensor("op_20994_cast_fp16")]; + tensor var_20995_to_fp16 = const()[name = tensor("op_20995_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2125_cast_fp16 = mul(x = var_20994_cast_fp16, y = var_20995_to_fp16)[name = tensor("aw_chunk_2125_cast_fp16")]; + tensor var_20998_equation_0 = const()[name = tensor("op_20998_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_20998_cast_fp16 = einsum(equation = var_20998_equation_0, values = (var_20768_cast_fp16, var_20351_cast_fp16))[name = tensor("op_20998_cast_fp16")]; + tensor var_20999_to_fp16 = const()[name = tensor("op_20999_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2127_cast_fp16 = mul(x = var_20998_cast_fp16, y = var_20999_to_fp16)[name = tensor("aw_chunk_2127_cast_fp16")]; + tensor var_21002_equation_0 = const()[name = tensor("op_21002_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21002_cast_fp16 = einsum(equation = var_21002_equation_0, values = (var_20772_cast_fp16, var_20358_cast_fp16))[name = tensor("op_21002_cast_fp16")]; + tensor var_21003_to_fp16 = const()[name = tensor("op_21003_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2129_cast_fp16 = mul(x = var_21002_cast_fp16, y = var_21003_to_fp16)[name = tensor("aw_chunk_2129_cast_fp16")]; + tensor var_21006_equation_0 = const()[name = tensor("op_21006_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21006_cast_fp16 = einsum(equation = var_21006_equation_0, values = (var_20772_cast_fp16, var_20365_cast_fp16))[name = tensor("op_21006_cast_fp16")]; + tensor var_21007_to_fp16 = const()[name = tensor("op_21007_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2131_cast_fp16 = mul(x = var_21006_cast_fp16, y = var_21007_to_fp16)[name = tensor("aw_chunk_2131_cast_fp16")]; + tensor var_21010_equation_0 = const()[name = tensor("op_21010_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21010_cast_fp16 = einsum(equation = var_21010_equation_0, values = (var_20772_cast_fp16, var_20372_cast_fp16))[name = tensor("op_21010_cast_fp16")]; + tensor var_21011_to_fp16 = const()[name = tensor("op_21011_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2133_cast_fp16 = mul(x = var_21010_cast_fp16, y = var_21011_to_fp16)[name = tensor("aw_chunk_2133_cast_fp16")]; + tensor var_21014_equation_0 = const()[name = tensor("op_21014_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21014_cast_fp16 = einsum(equation = var_21014_equation_0, values = (var_20772_cast_fp16, var_20379_cast_fp16))[name = tensor("op_21014_cast_fp16")]; + tensor var_21015_to_fp16 = const()[name = tensor("op_21015_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2135_cast_fp16 = mul(x = var_21014_cast_fp16, y = var_21015_to_fp16)[name = tensor("aw_chunk_2135_cast_fp16")]; + tensor var_21018_equation_0 = const()[name = tensor("op_21018_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21018_cast_fp16 = einsum(equation = var_21018_equation_0, values = (var_20776_cast_fp16, var_20386_cast_fp16))[name = tensor("op_21018_cast_fp16")]; + tensor var_21019_to_fp16 = const()[name = tensor("op_21019_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2137_cast_fp16 = mul(x = var_21018_cast_fp16, y = var_21019_to_fp16)[name = tensor("aw_chunk_2137_cast_fp16")]; + tensor var_21022_equation_0 = const()[name = tensor("op_21022_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21022_cast_fp16 = einsum(equation = var_21022_equation_0, values = (var_20776_cast_fp16, var_20393_cast_fp16))[name = tensor("op_21022_cast_fp16")]; + tensor var_21023_to_fp16 = const()[name = tensor("op_21023_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2139_cast_fp16 = mul(x = var_21022_cast_fp16, y = var_21023_to_fp16)[name = tensor("aw_chunk_2139_cast_fp16")]; + tensor var_21026_equation_0 = const()[name = tensor("op_21026_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21026_cast_fp16 = einsum(equation = var_21026_equation_0, values = (var_20776_cast_fp16, var_20400_cast_fp16))[name = tensor("op_21026_cast_fp16")]; + tensor var_21027_to_fp16 = const()[name = tensor("op_21027_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2141_cast_fp16 = mul(x = var_21026_cast_fp16, y = var_21027_to_fp16)[name = tensor("aw_chunk_2141_cast_fp16")]; + tensor var_21030_equation_0 = const()[name = tensor("op_21030_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21030_cast_fp16 = einsum(equation = var_21030_equation_0, values = (var_20776_cast_fp16, var_20407_cast_fp16))[name = tensor("op_21030_cast_fp16")]; + tensor var_21031_to_fp16 = const()[name = tensor("op_21031_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2143_cast_fp16 = mul(x = var_21030_cast_fp16, y = var_21031_to_fp16)[name = tensor("aw_chunk_2143_cast_fp16")]; + tensor var_21034_equation_0 = const()[name = tensor("op_21034_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21034_cast_fp16 = einsum(equation = var_21034_equation_0, values = (var_20780_cast_fp16, var_20414_cast_fp16))[name = tensor("op_21034_cast_fp16")]; + tensor var_21035_to_fp16 = const()[name = tensor("op_21035_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2145_cast_fp16 = mul(x = var_21034_cast_fp16, y = var_21035_to_fp16)[name = tensor("aw_chunk_2145_cast_fp16")]; + tensor var_21038_equation_0 = const()[name = tensor("op_21038_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21038_cast_fp16 = einsum(equation = var_21038_equation_0, values = (var_20780_cast_fp16, var_20421_cast_fp16))[name = tensor("op_21038_cast_fp16")]; + tensor var_21039_to_fp16 = const()[name = tensor("op_21039_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2147_cast_fp16 = mul(x = var_21038_cast_fp16, y = var_21039_to_fp16)[name = tensor("aw_chunk_2147_cast_fp16")]; + tensor var_21042_equation_0 = const()[name = tensor("op_21042_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21042_cast_fp16 = einsum(equation = var_21042_equation_0, values = (var_20780_cast_fp16, var_20428_cast_fp16))[name = tensor("op_21042_cast_fp16")]; + tensor var_21043_to_fp16 = const()[name = tensor("op_21043_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2149_cast_fp16 = mul(x = var_21042_cast_fp16, y = var_21043_to_fp16)[name = tensor("aw_chunk_2149_cast_fp16")]; + tensor var_21046_equation_0 = const()[name = tensor("op_21046_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21046_cast_fp16 = einsum(equation = var_21046_equation_0, values = (var_20780_cast_fp16, var_20435_cast_fp16))[name = tensor("op_21046_cast_fp16")]; + tensor var_21047_to_fp16 = const()[name = tensor("op_21047_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2151_cast_fp16 = mul(x = var_21046_cast_fp16, y = var_21047_to_fp16)[name = tensor("aw_chunk_2151_cast_fp16")]; + tensor var_21050_equation_0 = const()[name = tensor("op_21050_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21050_cast_fp16 = einsum(equation = var_21050_equation_0, values = (var_20784_cast_fp16, var_20442_cast_fp16))[name = tensor("op_21050_cast_fp16")]; + tensor var_21051_to_fp16 = const()[name = tensor("op_21051_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2153_cast_fp16 = mul(x = var_21050_cast_fp16, y = var_21051_to_fp16)[name = tensor("aw_chunk_2153_cast_fp16")]; + tensor var_21054_equation_0 = const()[name = tensor("op_21054_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21054_cast_fp16 = einsum(equation = var_21054_equation_0, values = (var_20784_cast_fp16, var_20449_cast_fp16))[name = tensor("op_21054_cast_fp16")]; + tensor var_21055_to_fp16 = const()[name = tensor("op_21055_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2155_cast_fp16 = mul(x = var_21054_cast_fp16, y = var_21055_to_fp16)[name = tensor("aw_chunk_2155_cast_fp16")]; + tensor var_21058_equation_0 = const()[name = tensor("op_21058_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21058_cast_fp16 = einsum(equation = var_21058_equation_0, values = (var_20784_cast_fp16, var_20456_cast_fp16))[name = tensor("op_21058_cast_fp16")]; + tensor var_21059_to_fp16 = const()[name = tensor("op_21059_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2157_cast_fp16 = mul(x = var_21058_cast_fp16, y = var_21059_to_fp16)[name = tensor("aw_chunk_2157_cast_fp16")]; + tensor var_21062_equation_0 = const()[name = tensor("op_21062_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21062_cast_fp16 = einsum(equation = var_21062_equation_0, values = (var_20784_cast_fp16, var_20463_cast_fp16))[name = tensor("op_21062_cast_fp16")]; + tensor var_21063_to_fp16 = const()[name = tensor("op_21063_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2159_cast_fp16 = mul(x = var_21062_cast_fp16, y = var_21063_to_fp16)[name = tensor("aw_chunk_2159_cast_fp16")]; + tensor var_21066_equation_0 = const()[name = tensor("op_21066_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21066_cast_fp16 = einsum(equation = var_21066_equation_0, values = (var_20788_cast_fp16, var_20470_cast_fp16))[name = tensor("op_21066_cast_fp16")]; + tensor var_21067_to_fp16 = const()[name = tensor("op_21067_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2161_cast_fp16 = mul(x = var_21066_cast_fp16, y = var_21067_to_fp16)[name = tensor("aw_chunk_2161_cast_fp16")]; + tensor var_21070_equation_0 = const()[name = tensor("op_21070_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21070_cast_fp16 = einsum(equation = var_21070_equation_0, values = (var_20788_cast_fp16, var_20477_cast_fp16))[name = tensor("op_21070_cast_fp16")]; + tensor var_21071_to_fp16 = const()[name = tensor("op_21071_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2163_cast_fp16 = mul(x = var_21070_cast_fp16, y = var_21071_to_fp16)[name = tensor("aw_chunk_2163_cast_fp16")]; + tensor var_21074_equation_0 = const()[name = tensor("op_21074_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21074_cast_fp16 = einsum(equation = var_21074_equation_0, values = (var_20788_cast_fp16, var_20484_cast_fp16))[name = tensor("op_21074_cast_fp16")]; + tensor var_21075_to_fp16 = const()[name = tensor("op_21075_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2165_cast_fp16 = mul(x = var_21074_cast_fp16, y = var_21075_to_fp16)[name = tensor("aw_chunk_2165_cast_fp16")]; + tensor var_21078_equation_0 = const()[name = tensor("op_21078_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21078_cast_fp16 = einsum(equation = var_21078_equation_0, values = (var_20788_cast_fp16, var_20491_cast_fp16))[name = tensor("op_21078_cast_fp16")]; + tensor var_21079_to_fp16 = const()[name = tensor("op_21079_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2167_cast_fp16 = mul(x = var_21078_cast_fp16, y = var_21079_to_fp16)[name = tensor("aw_chunk_2167_cast_fp16")]; + tensor var_21082_equation_0 = const()[name = tensor("op_21082_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21082_cast_fp16 = einsum(equation = var_21082_equation_0, values = (var_20792_cast_fp16, var_20498_cast_fp16))[name = tensor("op_21082_cast_fp16")]; + tensor var_21083_to_fp16 = const()[name = tensor("op_21083_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2169_cast_fp16 = mul(x = var_21082_cast_fp16, y = var_21083_to_fp16)[name = tensor("aw_chunk_2169_cast_fp16")]; + tensor var_21086_equation_0 = const()[name = tensor("op_21086_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21086_cast_fp16 = einsum(equation = var_21086_equation_0, values = (var_20792_cast_fp16, var_20505_cast_fp16))[name = tensor("op_21086_cast_fp16")]; + tensor var_21087_to_fp16 = const()[name = tensor("op_21087_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2171_cast_fp16 = mul(x = var_21086_cast_fp16, y = var_21087_to_fp16)[name = tensor("aw_chunk_2171_cast_fp16")]; + tensor var_21090_equation_0 = const()[name = tensor("op_21090_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21090_cast_fp16 = einsum(equation = var_21090_equation_0, values = (var_20792_cast_fp16, var_20512_cast_fp16))[name = tensor("op_21090_cast_fp16")]; + tensor var_21091_to_fp16 = const()[name = tensor("op_21091_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2173_cast_fp16 = mul(x = var_21090_cast_fp16, y = var_21091_to_fp16)[name = tensor("aw_chunk_2173_cast_fp16")]; + tensor var_21094_equation_0 = const()[name = tensor("op_21094_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21094_cast_fp16 = einsum(equation = var_21094_equation_0, values = (var_20792_cast_fp16, var_20519_cast_fp16))[name = tensor("op_21094_cast_fp16")]; + tensor var_21095_to_fp16 = const()[name = tensor("op_21095_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2175_cast_fp16 = mul(x = var_21094_cast_fp16, y = var_21095_to_fp16)[name = tensor("aw_chunk_2175_cast_fp16")]; + tensor var_21098_equation_0 = const()[name = tensor("op_21098_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21098_cast_fp16 = einsum(equation = var_21098_equation_0, values = (var_20796_cast_fp16, var_20526_cast_fp16))[name = tensor("op_21098_cast_fp16")]; + tensor var_21099_to_fp16 = const()[name = tensor("op_21099_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2177_cast_fp16 = mul(x = var_21098_cast_fp16, y = var_21099_to_fp16)[name = tensor("aw_chunk_2177_cast_fp16")]; + tensor var_21102_equation_0 = const()[name = tensor("op_21102_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21102_cast_fp16 = einsum(equation = var_21102_equation_0, values = (var_20796_cast_fp16, var_20533_cast_fp16))[name = tensor("op_21102_cast_fp16")]; + tensor var_21103_to_fp16 = const()[name = tensor("op_21103_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2179_cast_fp16 = mul(x = var_21102_cast_fp16, y = var_21103_to_fp16)[name = tensor("aw_chunk_2179_cast_fp16")]; + tensor var_21106_equation_0 = const()[name = tensor("op_21106_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21106_cast_fp16 = einsum(equation = var_21106_equation_0, values = (var_20796_cast_fp16, var_20540_cast_fp16))[name = tensor("op_21106_cast_fp16")]; + tensor var_21107_to_fp16 = const()[name = tensor("op_21107_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2181_cast_fp16 = mul(x = var_21106_cast_fp16, y = var_21107_to_fp16)[name = tensor("aw_chunk_2181_cast_fp16")]; + tensor var_21110_equation_0 = const()[name = tensor("op_21110_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21110_cast_fp16 = einsum(equation = var_21110_equation_0, values = (var_20796_cast_fp16, var_20547_cast_fp16))[name = tensor("op_21110_cast_fp16")]; + tensor var_21111_to_fp16 = const()[name = tensor("op_21111_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2183_cast_fp16 = mul(x = var_21110_cast_fp16, y = var_21111_to_fp16)[name = tensor("aw_chunk_2183_cast_fp16")]; + tensor var_21114_equation_0 = const()[name = tensor("op_21114_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21114_cast_fp16 = einsum(equation = var_21114_equation_0, values = (var_20800_cast_fp16, var_20554_cast_fp16))[name = tensor("op_21114_cast_fp16")]; + tensor var_21115_to_fp16 = const()[name = tensor("op_21115_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2185_cast_fp16 = mul(x = var_21114_cast_fp16, y = var_21115_to_fp16)[name = tensor("aw_chunk_2185_cast_fp16")]; + tensor var_21118_equation_0 = const()[name = tensor("op_21118_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21118_cast_fp16 = einsum(equation = var_21118_equation_0, values = (var_20800_cast_fp16, var_20561_cast_fp16))[name = tensor("op_21118_cast_fp16")]; + tensor var_21119_to_fp16 = const()[name = tensor("op_21119_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2187_cast_fp16 = mul(x = var_21118_cast_fp16, y = var_21119_to_fp16)[name = tensor("aw_chunk_2187_cast_fp16")]; + tensor var_21122_equation_0 = const()[name = tensor("op_21122_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21122_cast_fp16 = einsum(equation = var_21122_equation_0, values = (var_20800_cast_fp16, var_20568_cast_fp16))[name = tensor("op_21122_cast_fp16")]; + tensor var_21123_to_fp16 = const()[name = tensor("op_21123_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2189_cast_fp16 = mul(x = var_21122_cast_fp16, y = var_21123_to_fp16)[name = tensor("aw_chunk_2189_cast_fp16")]; + tensor var_21126_equation_0 = const()[name = tensor("op_21126_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21126_cast_fp16 = einsum(equation = var_21126_equation_0, values = (var_20800_cast_fp16, var_20575_cast_fp16))[name = tensor("op_21126_cast_fp16")]; + tensor var_21127_to_fp16 = const()[name = tensor("op_21127_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2191_cast_fp16 = mul(x = var_21126_cast_fp16, y = var_21127_to_fp16)[name = tensor("aw_chunk_2191_cast_fp16")]; + tensor var_21130_equation_0 = const()[name = tensor("op_21130_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21130_cast_fp16 = einsum(equation = var_21130_equation_0, values = (var_20804_cast_fp16, var_20582_cast_fp16))[name = tensor("op_21130_cast_fp16")]; + tensor var_21131_to_fp16 = const()[name = tensor("op_21131_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2193_cast_fp16 = mul(x = var_21130_cast_fp16, y = var_21131_to_fp16)[name = tensor("aw_chunk_2193_cast_fp16")]; + tensor var_21134_equation_0 = const()[name = tensor("op_21134_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21134_cast_fp16 = einsum(equation = var_21134_equation_0, values = (var_20804_cast_fp16, var_20589_cast_fp16))[name = tensor("op_21134_cast_fp16")]; + tensor var_21135_to_fp16 = const()[name = tensor("op_21135_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2195_cast_fp16 = mul(x = var_21134_cast_fp16, y = var_21135_to_fp16)[name = tensor("aw_chunk_2195_cast_fp16")]; + tensor var_21138_equation_0 = const()[name = tensor("op_21138_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21138_cast_fp16 = einsum(equation = var_21138_equation_0, values = (var_20804_cast_fp16, var_20596_cast_fp16))[name = tensor("op_21138_cast_fp16")]; + tensor var_21139_to_fp16 = const()[name = tensor("op_21139_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2197_cast_fp16 = mul(x = var_21138_cast_fp16, y = var_21139_to_fp16)[name = tensor("aw_chunk_2197_cast_fp16")]; + tensor var_21142_equation_0 = const()[name = tensor("op_21142_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21142_cast_fp16 = einsum(equation = var_21142_equation_0, values = (var_20804_cast_fp16, var_20603_cast_fp16))[name = tensor("op_21142_cast_fp16")]; + tensor var_21143_to_fp16 = const()[name = tensor("op_21143_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2199_cast_fp16 = mul(x = var_21142_cast_fp16, y = var_21143_to_fp16)[name = tensor("aw_chunk_2199_cast_fp16")]; + tensor var_21146_equation_0 = const()[name = tensor("op_21146_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21146_cast_fp16 = einsum(equation = var_21146_equation_0, values = (var_20808_cast_fp16, var_20610_cast_fp16))[name = tensor("op_21146_cast_fp16")]; + tensor var_21147_to_fp16 = const()[name = tensor("op_21147_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2201_cast_fp16 = mul(x = var_21146_cast_fp16, y = var_21147_to_fp16)[name = tensor("aw_chunk_2201_cast_fp16")]; + tensor var_21150_equation_0 = const()[name = tensor("op_21150_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21150_cast_fp16 = einsum(equation = var_21150_equation_0, values = (var_20808_cast_fp16, var_20617_cast_fp16))[name = tensor("op_21150_cast_fp16")]; + tensor var_21151_to_fp16 = const()[name = tensor("op_21151_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2203_cast_fp16 = mul(x = var_21150_cast_fp16, y = var_21151_to_fp16)[name = tensor("aw_chunk_2203_cast_fp16")]; + tensor var_21154_equation_0 = const()[name = tensor("op_21154_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21154_cast_fp16 = einsum(equation = var_21154_equation_0, values = (var_20808_cast_fp16, var_20624_cast_fp16))[name = tensor("op_21154_cast_fp16")]; + tensor var_21155_to_fp16 = const()[name = tensor("op_21155_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2205_cast_fp16 = mul(x = var_21154_cast_fp16, y = var_21155_to_fp16)[name = tensor("aw_chunk_2205_cast_fp16")]; + tensor var_21158_equation_0 = const()[name = tensor("op_21158_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21158_cast_fp16 = einsum(equation = var_21158_equation_0, values = (var_20808_cast_fp16, var_20631_cast_fp16))[name = tensor("op_21158_cast_fp16")]; + tensor var_21159_to_fp16 = const()[name = tensor("op_21159_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2207_cast_fp16 = mul(x = var_21158_cast_fp16, y = var_21159_to_fp16)[name = tensor("aw_chunk_2207_cast_fp16")]; + tensor var_21162_equation_0 = const()[name = tensor("op_21162_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21162_cast_fp16 = einsum(equation = var_21162_equation_0, values = (var_20812_cast_fp16, var_20638_cast_fp16))[name = tensor("op_21162_cast_fp16")]; + tensor var_21163_to_fp16 = const()[name = tensor("op_21163_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2209_cast_fp16 = mul(x = var_21162_cast_fp16, y = var_21163_to_fp16)[name = tensor("aw_chunk_2209_cast_fp16")]; + tensor var_21166_equation_0 = const()[name = tensor("op_21166_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21166_cast_fp16 = einsum(equation = var_21166_equation_0, values = (var_20812_cast_fp16, var_20645_cast_fp16))[name = tensor("op_21166_cast_fp16")]; + tensor var_21167_to_fp16 = const()[name = tensor("op_21167_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2211_cast_fp16 = mul(x = var_21166_cast_fp16, y = var_21167_to_fp16)[name = tensor("aw_chunk_2211_cast_fp16")]; + tensor var_21170_equation_0 = const()[name = tensor("op_21170_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21170_cast_fp16 = einsum(equation = var_21170_equation_0, values = (var_20812_cast_fp16, var_20652_cast_fp16))[name = tensor("op_21170_cast_fp16")]; + tensor var_21171_to_fp16 = const()[name = tensor("op_21171_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2213_cast_fp16 = mul(x = var_21170_cast_fp16, y = var_21171_to_fp16)[name = tensor("aw_chunk_2213_cast_fp16")]; + tensor var_21174_equation_0 = const()[name = tensor("op_21174_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21174_cast_fp16 = einsum(equation = var_21174_equation_0, values = (var_20812_cast_fp16, var_20659_cast_fp16))[name = tensor("op_21174_cast_fp16")]; + tensor var_21175_to_fp16 = const()[name = tensor("op_21175_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2215_cast_fp16 = mul(x = var_21174_cast_fp16, y = var_21175_to_fp16)[name = tensor("aw_chunk_2215_cast_fp16")]; + tensor var_21178_equation_0 = const()[name = tensor("op_21178_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21178_cast_fp16 = einsum(equation = var_21178_equation_0, values = (var_20816_cast_fp16, var_20666_cast_fp16))[name = tensor("op_21178_cast_fp16")]; + tensor var_21179_to_fp16 = const()[name = tensor("op_21179_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2217_cast_fp16 = mul(x = var_21178_cast_fp16, y = var_21179_to_fp16)[name = tensor("aw_chunk_2217_cast_fp16")]; + tensor var_21182_equation_0 = const()[name = tensor("op_21182_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21182_cast_fp16 = einsum(equation = var_21182_equation_0, values = (var_20816_cast_fp16, var_20673_cast_fp16))[name = tensor("op_21182_cast_fp16")]; + tensor var_21183_to_fp16 = const()[name = tensor("op_21183_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2219_cast_fp16 = mul(x = var_21182_cast_fp16, y = var_21183_to_fp16)[name = tensor("aw_chunk_2219_cast_fp16")]; + tensor var_21186_equation_0 = const()[name = tensor("op_21186_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21186_cast_fp16 = einsum(equation = var_21186_equation_0, values = (var_20816_cast_fp16, var_20680_cast_fp16))[name = tensor("op_21186_cast_fp16")]; + tensor var_21187_to_fp16 = const()[name = tensor("op_21187_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2221_cast_fp16 = mul(x = var_21186_cast_fp16, y = var_21187_to_fp16)[name = tensor("aw_chunk_2221_cast_fp16")]; + tensor var_21190_equation_0 = const()[name = tensor("op_21190_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21190_cast_fp16 = einsum(equation = var_21190_equation_0, values = (var_20816_cast_fp16, var_20687_cast_fp16))[name = tensor("op_21190_cast_fp16")]; + tensor var_21191_to_fp16 = const()[name = tensor("op_21191_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2223_cast_fp16 = mul(x = var_21190_cast_fp16, y = var_21191_to_fp16)[name = tensor("aw_chunk_2223_cast_fp16")]; + tensor var_21194_equation_0 = const()[name = tensor("op_21194_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21194_cast_fp16 = einsum(equation = var_21194_equation_0, values = (var_20820_cast_fp16, var_20694_cast_fp16))[name = tensor("op_21194_cast_fp16")]; + tensor var_21195_to_fp16 = const()[name = tensor("op_21195_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2225_cast_fp16 = mul(x = var_21194_cast_fp16, y = var_21195_to_fp16)[name = tensor("aw_chunk_2225_cast_fp16")]; + tensor var_21198_equation_0 = const()[name = tensor("op_21198_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21198_cast_fp16 = einsum(equation = var_21198_equation_0, values = (var_20820_cast_fp16, var_20701_cast_fp16))[name = tensor("op_21198_cast_fp16")]; + tensor var_21199_to_fp16 = const()[name = tensor("op_21199_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2227_cast_fp16 = mul(x = var_21198_cast_fp16, y = var_21199_to_fp16)[name = tensor("aw_chunk_2227_cast_fp16")]; + tensor var_21202_equation_0 = const()[name = tensor("op_21202_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21202_cast_fp16 = einsum(equation = var_21202_equation_0, values = (var_20820_cast_fp16, var_20708_cast_fp16))[name = tensor("op_21202_cast_fp16")]; + tensor var_21203_to_fp16 = const()[name = tensor("op_21203_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2229_cast_fp16 = mul(x = var_21202_cast_fp16, y = var_21203_to_fp16)[name = tensor("aw_chunk_2229_cast_fp16")]; + tensor var_21206_equation_0 = const()[name = tensor("op_21206_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21206_cast_fp16 = einsum(equation = var_21206_equation_0, values = (var_20820_cast_fp16, var_20715_cast_fp16))[name = tensor("op_21206_cast_fp16")]; + tensor var_21207_to_fp16 = const()[name = tensor("op_21207_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2231_cast_fp16 = mul(x = var_21206_cast_fp16, y = var_21207_to_fp16)[name = tensor("aw_chunk_2231_cast_fp16")]; + tensor var_21210_equation_0 = const()[name = tensor("op_21210_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21210_cast_fp16 = einsum(equation = var_21210_equation_0, values = (var_20824_cast_fp16, var_20722_cast_fp16))[name = tensor("op_21210_cast_fp16")]; + tensor var_21211_to_fp16 = const()[name = tensor("op_21211_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2233_cast_fp16 = mul(x = var_21210_cast_fp16, y = var_21211_to_fp16)[name = tensor("aw_chunk_2233_cast_fp16")]; + tensor var_21214_equation_0 = const()[name = tensor("op_21214_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21214_cast_fp16 = einsum(equation = var_21214_equation_0, values = (var_20824_cast_fp16, var_20729_cast_fp16))[name = tensor("op_21214_cast_fp16")]; + tensor var_21215_to_fp16 = const()[name = tensor("op_21215_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2235_cast_fp16 = mul(x = var_21214_cast_fp16, y = var_21215_to_fp16)[name = tensor("aw_chunk_2235_cast_fp16")]; + tensor var_21218_equation_0 = const()[name = tensor("op_21218_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21218_cast_fp16 = einsum(equation = var_21218_equation_0, values = (var_20824_cast_fp16, var_20736_cast_fp16))[name = tensor("op_21218_cast_fp16")]; + tensor var_21219_to_fp16 = const()[name = tensor("op_21219_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2237_cast_fp16 = mul(x = var_21218_cast_fp16, y = var_21219_to_fp16)[name = tensor("aw_chunk_2237_cast_fp16")]; + tensor var_21222_equation_0 = const()[name = tensor("op_21222_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_21222_cast_fp16 = einsum(equation = var_21222_equation_0, values = (var_20824_cast_fp16, var_20743_cast_fp16))[name = tensor("op_21222_cast_fp16")]; + tensor var_21223_to_fp16 = const()[name = tensor("op_21223_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2239_cast_fp16 = mul(x = var_21222_cast_fp16, y = var_21223_to_fp16)[name = tensor("aw_chunk_2239_cast_fp16")]; + tensor var_21225_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2081_cast_fp16)[name = tensor("op_21225_cast_fp16")]; + tensor var_21226_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2083_cast_fp16)[name = tensor("op_21226_cast_fp16")]; + tensor var_21227_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2085_cast_fp16)[name = tensor("op_21227_cast_fp16")]; + tensor var_21228_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2087_cast_fp16)[name = tensor("op_21228_cast_fp16")]; + tensor var_21229_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2089_cast_fp16)[name = tensor("op_21229_cast_fp16")]; + tensor var_21230_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2091_cast_fp16)[name = tensor("op_21230_cast_fp16")]; + tensor var_21231_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2093_cast_fp16)[name = tensor("op_21231_cast_fp16")]; + tensor var_21232_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2095_cast_fp16)[name = tensor("op_21232_cast_fp16")]; + tensor var_21233_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2097_cast_fp16)[name = tensor("op_21233_cast_fp16")]; + tensor var_21234_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2099_cast_fp16)[name = tensor("op_21234_cast_fp16")]; + tensor var_21235_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2101_cast_fp16)[name = tensor("op_21235_cast_fp16")]; + tensor var_21236_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2103_cast_fp16)[name = tensor("op_21236_cast_fp16")]; + tensor var_21237_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2105_cast_fp16)[name = tensor("op_21237_cast_fp16")]; + tensor var_21238_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2107_cast_fp16)[name = tensor("op_21238_cast_fp16")]; + tensor var_21239_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2109_cast_fp16)[name = tensor("op_21239_cast_fp16")]; + tensor var_21240_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2111_cast_fp16)[name = tensor("op_21240_cast_fp16")]; + tensor var_21241_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2113_cast_fp16)[name = tensor("op_21241_cast_fp16")]; + tensor var_21242_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2115_cast_fp16)[name = tensor("op_21242_cast_fp16")]; + tensor var_21243_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2117_cast_fp16)[name = tensor("op_21243_cast_fp16")]; + tensor var_21244_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2119_cast_fp16)[name = tensor("op_21244_cast_fp16")]; + tensor var_21245_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2121_cast_fp16)[name = tensor("op_21245_cast_fp16")]; + tensor var_21246_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2123_cast_fp16)[name = tensor("op_21246_cast_fp16")]; + tensor var_21247_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2125_cast_fp16)[name = tensor("op_21247_cast_fp16")]; + tensor var_21248_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2127_cast_fp16)[name = tensor("op_21248_cast_fp16")]; + tensor var_21249_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2129_cast_fp16)[name = tensor("op_21249_cast_fp16")]; + tensor var_21250_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2131_cast_fp16)[name = tensor("op_21250_cast_fp16")]; + tensor var_21251_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2133_cast_fp16)[name = tensor("op_21251_cast_fp16")]; + tensor var_21252_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2135_cast_fp16)[name = tensor("op_21252_cast_fp16")]; + tensor var_21253_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2137_cast_fp16)[name = tensor("op_21253_cast_fp16")]; + tensor var_21254_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2139_cast_fp16)[name = tensor("op_21254_cast_fp16")]; + tensor var_21255_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2141_cast_fp16)[name = tensor("op_21255_cast_fp16")]; + tensor var_21256_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2143_cast_fp16)[name = tensor("op_21256_cast_fp16")]; + tensor var_21257_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2145_cast_fp16)[name = tensor("op_21257_cast_fp16")]; + tensor var_21258_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2147_cast_fp16)[name = tensor("op_21258_cast_fp16")]; + tensor var_21259_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2149_cast_fp16)[name = tensor("op_21259_cast_fp16")]; + tensor var_21260_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2151_cast_fp16)[name = tensor("op_21260_cast_fp16")]; + tensor var_21261_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2153_cast_fp16)[name = tensor("op_21261_cast_fp16")]; + tensor var_21262_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2155_cast_fp16)[name = tensor("op_21262_cast_fp16")]; + tensor var_21263_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2157_cast_fp16)[name = tensor("op_21263_cast_fp16")]; + tensor var_21264_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2159_cast_fp16)[name = tensor("op_21264_cast_fp16")]; + tensor var_21265_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2161_cast_fp16)[name = tensor("op_21265_cast_fp16")]; + tensor var_21266_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2163_cast_fp16)[name = tensor("op_21266_cast_fp16")]; + tensor var_21267_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2165_cast_fp16)[name = tensor("op_21267_cast_fp16")]; + tensor var_21268_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2167_cast_fp16)[name = tensor("op_21268_cast_fp16")]; + tensor var_21269_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2169_cast_fp16)[name = tensor("op_21269_cast_fp16")]; + tensor var_21270_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2171_cast_fp16)[name = tensor("op_21270_cast_fp16")]; + tensor var_21271_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2173_cast_fp16)[name = tensor("op_21271_cast_fp16")]; + tensor var_21272_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2175_cast_fp16)[name = tensor("op_21272_cast_fp16")]; + tensor var_21273_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2177_cast_fp16)[name = tensor("op_21273_cast_fp16")]; + tensor var_21274_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2179_cast_fp16)[name = tensor("op_21274_cast_fp16")]; + tensor var_21275_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2181_cast_fp16)[name = tensor("op_21275_cast_fp16")]; + tensor var_21276_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2183_cast_fp16)[name = tensor("op_21276_cast_fp16")]; + tensor var_21277_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2185_cast_fp16)[name = tensor("op_21277_cast_fp16")]; + tensor var_21278_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2187_cast_fp16)[name = tensor("op_21278_cast_fp16")]; + tensor var_21279_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2189_cast_fp16)[name = tensor("op_21279_cast_fp16")]; + tensor var_21280_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2191_cast_fp16)[name = tensor("op_21280_cast_fp16")]; + tensor var_21281_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2193_cast_fp16)[name = tensor("op_21281_cast_fp16")]; + tensor var_21282_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2195_cast_fp16)[name = tensor("op_21282_cast_fp16")]; + tensor var_21283_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2197_cast_fp16)[name = tensor("op_21283_cast_fp16")]; + tensor var_21284_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2199_cast_fp16)[name = tensor("op_21284_cast_fp16")]; + tensor var_21285_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2201_cast_fp16)[name = tensor("op_21285_cast_fp16")]; + tensor var_21286_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2203_cast_fp16)[name = tensor("op_21286_cast_fp16")]; + tensor var_21287_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2205_cast_fp16)[name = tensor("op_21287_cast_fp16")]; + tensor var_21288_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2207_cast_fp16)[name = tensor("op_21288_cast_fp16")]; + tensor var_21289_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2209_cast_fp16)[name = tensor("op_21289_cast_fp16")]; + tensor var_21290_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2211_cast_fp16)[name = tensor("op_21290_cast_fp16")]; + tensor var_21291_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2213_cast_fp16)[name = tensor("op_21291_cast_fp16")]; + tensor var_21292_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2215_cast_fp16)[name = tensor("op_21292_cast_fp16")]; + tensor var_21293_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2217_cast_fp16)[name = tensor("op_21293_cast_fp16")]; + tensor var_21294_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2219_cast_fp16)[name = tensor("op_21294_cast_fp16")]; + tensor var_21295_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2221_cast_fp16)[name = tensor("op_21295_cast_fp16")]; + tensor var_21296_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2223_cast_fp16)[name = tensor("op_21296_cast_fp16")]; + tensor var_21297_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2225_cast_fp16)[name = tensor("op_21297_cast_fp16")]; + tensor var_21298_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2227_cast_fp16)[name = tensor("op_21298_cast_fp16")]; + tensor var_21299_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2229_cast_fp16)[name = tensor("op_21299_cast_fp16")]; + tensor var_21300_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2231_cast_fp16)[name = tensor("op_21300_cast_fp16")]; + tensor var_21301_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2233_cast_fp16)[name = tensor("op_21301_cast_fp16")]; + tensor var_21302_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2235_cast_fp16)[name = tensor("op_21302_cast_fp16")]; + tensor var_21303_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2237_cast_fp16)[name = tensor("op_21303_cast_fp16")]; + tensor var_21304_cast_fp16 = softmax(axis = var_20050, x = aw_chunk_2239_cast_fp16)[name = tensor("op_21304_cast_fp16")]; + tensor var_21306_equation_0 = const()[name = tensor("op_21306_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21306_cast_fp16 = einsum(equation = var_21306_equation_0, values = (var_20826_cast_fp16, var_21225_cast_fp16))[name = tensor("op_21306_cast_fp16")]; + tensor var_21308_equation_0 = const()[name = tensor("op_21308_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21308_cast_fp16 = einsum(equation = var_21308_equation_0, values = (var_20826_cast_fp16, var_21226_cast_fp16))[name = tensor("op_21308_cast_fp16")]; + tensor var_21310_equation_0 = const()[name = tensor("op_21310_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21310_cast_fp16 = einsum(equation = var_21310_equation_0, values = (var_20826_cast_fp16, var_21227_cast_fp16))[name = tensor("op_21310_cast_fp16")]; + tensor var_21312_equation_0 = const()[name = tensor("op_21312_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21312_cast_fp16 = einsum(equation = var_21312_equation_0, values = (var_20826_cast_fp16, var_21228_cast_fp16))[name = tensor("op_21312_cast_fp16")]; + tensor var_21314_equation_0 = const()[name = tensor("op_21314_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21314_cast_fp16 = einsum(equation = var_21314_equation_0, values = (var_20830_cast_fp16, var_21229_cast_fp16))[name = tensor("op_21314_cast_fp16")]; + tensor var_21316_equation_0 = const()[name = tensor("op_21316_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21316_cast_fp16 = einsum(equation = var_21316_equation_0, values = (var_20830_cast_fp16, var_21230_cast_fp16))[name = tensor("op_21316_cast_fp16")]; + tensor var_21318_equation_0 = const()[name = tensor("op_21318_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21318_cast_fp16 = einsum(equation = var_21318_equation_0, values = (var_20830_cast_fp16, var_21231_cast_fp16))[name = tensor("op_21318_cast_fp16")]; + tensor var_21320_equation_0 = const()[name = tensor("op_21320_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21320_cast_fp16 = einsum(equation = var_21320_equation_0, values = (var_20830_cast_fp16, var_21232_cast_fp16))[name = tensor("op_21320_cast_fp16")]; + tensor var_21322_equation_0 = const()[name = tensor("op_21322_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21322_cast_fp16 = einsum(equation = var_21322_equation_0, values = (var_20834_cast_fp16, var_21233_cast_fp16))[name = tensor("op_21322_cast_fp16")]; + tensor var_21324_equation_0 = const()[name = tensor("op_21324_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21324_cast_fp16 = einsum(equation = var_21324_equation_0, values = (var_20834_cast_fp16, var_21234_cast_fp16))[name = tensor("op_21324_cast_fp16")]; + tensor var_21326_equation_0 = const()[name = tensor("op_21326_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21326_cast_fp16 = einsum(equation = var_21326_equation_0, values = (var_20834_cast_fp16, var_21235_cast_fp16))[name = tensor("op_21326_cast_fp16")]; + tensor var_21328_equation_0 = const()[name = tensor("op_21328_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21328_cast_fp16 = einsum(equation = var_21328_equation_0, values = (var_20834_cast_fp16, var_21236_cast_fp16))[name = tensor("op_21328_cast_fp16")]; + tensor var_21330_equation_0 = const()[name = tensor("op_21330_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21330_cast_fp16 = einsum(equation = var_21330_equation_0, values = (var_20838_cast_fp16, var_21237_cast_fp16))[name = tensor("op_21330_cast_fp16")]; + tensor var_21332_equation_0 = const()[name = tensor("op_21332_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21332_cast_fp16 = einsum(equation = var_21332_equation_0, values = (var_20838_cast_fp16, var_21238_cast_fp16))[name = tensor("op_21332_cast_fp16")]; + tensor var_21334_equation_0 = const()[name = tensor("op_21334_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21334_cast_fp16 = einsum(equation = var_21334_equation_0, values = (var_20838_cast_fp16, var_21239_cast_fp16))[name = tensor("op_21334_cast_fp16")]; + tensor var_21336_equation_0 = const()[name = tensor("op_21336_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21336_cast_fp16 = einsum(equation = var_21336_equation_0, values = (var_20838_cast_fp16, var_21240_cast_fp16))[name = tensor("op_21336_cast_fp16")]; + tensor var_21338_equation_0 = const()[name = tensor("op_21338_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21338_cast_fp16 = einsum(equation = var_21338_equation_0, values = (var_20842_cast_fp16, var_21241_cast_fp16))[name = tensor("op_21338_cast_fp16")]; + tensor var_21340_equation_0 = const()[name = tensor("op_21340_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21340_cast_fp16 = einsum(equation = var_21340_equation_0, values = (var_20842_cast_fp16, var_21242_cast_fp16))[name = tensor("op_21340_cast_fp16")]; + tensor var_21342_equation_0 = const()[name = tensor("op_21342_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21342_cast_fp16 = einsum(equation = var_21342_equation_0, values = (var_20842_cast_fp16, var_21243_cast_fp16))[name = tensor("op_21342_cast_fp16")]; + tensor var_21344_equation_0 = const()[name = tensor("op_21344_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21344_cast_fp16 = einsum(equation = var_21344_equation_0, values = (var_20842_cast_fp16, var_21244_cast_fp16))[name = tensor("op_21344_cast_fp16")]; + tensor var_21346_equation_0 = const()[name = tensor("op_21346_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21346_cast_fp16 = einsum(equation = var_21346_equation_0, values = (var_20846_cast_fp16, var_21245_cast_fp16))[name = tensor("op_21346_cast_fp16")]; + tensor var_21348_equation_0 = const()[name = tensor("op_21348_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21348_cast_fp16 = einsum(equation = var_21348_equation_0, values = (var_20846_cast_fp16, var_21246_cast_fp16))[name = tensor("op_21348_cast_fp16")]; + tensor var_21350_equation_0 = const()[name = tensor("op_21350_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21350_cast_fp16 = einsum(equation = var_21350_equation_0, values = (var_20846_cast_fp16, var_21247_cast_fp16))[name = tensor("op_21350_cast_fp16")]; + tensor var_21352_equation_0 = const()[name = tensor("op_21352_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21352_cast_fp16 = einsum(equation = var_21352_equation_0, values = (var_20846_cast_fp16, var_21248_cast_fp16))[name = tensor("op_21352_cast_fp16")]; + tensor var_21354_equation_0 = const()[name = tensor("op_21354_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21354_cast_fp16 = einsum(equation = var_21354_equation_0, values = (var_20850_cast_fp16, var_21249_cast_fp16))[name = tensor("op_21354_cast_fp16")]; + tensor var_21356_equation_0 = const()[name = tensor("op_21356_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21356_cast_fp16 = einsum(equation = var_21356_equation_0, values = (var_20850_cast_fp16, var_21250_cast_fp16))[name = tensor("op_21356_cast_fp16")]; + tensor var_21358_equation_0 = const()[name = tensor("op_21358_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21358_cast_fp16 = einsum(equation = var_21358_equation_0, values = (var_20850_cast_fp16, var_21251_cast_fp16))[name = tensor("op_21358_cast_fp16")]; + tensor var_21360_equation_0 = const()[name = tensor("op_21360_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21360_cast_fp16 = einsum(equation = var_21360_equation_0, values = (var_20850_cast_fp16, var_21252_cast_fp16))[name = tensor("op_21360_cast_fp16")]; + tensor var_21362_equation_0 = const()[name = tensor("op_21362_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21362_cast_fp16 = einsum(equation = var_21362_equation_0, values = (var_20854_cast_fp16, var_21253_cast_fp16))[name = tensor("op_21362_cast_fp16")]; + tensor var_21364_equation_0 = const()[name = tensor("op_21364_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21364_cast_fp16 = einsum(equation = var_21364_equation_0, values = (var_20854_cast_fp16, var_21254_cast_fp16))[name = tensor("op_21364_cast_fp16")]; + tensor var_21366_equation_0 = const()[name = tensor("op_21366_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21366_cast_fp16 = einsum(equation = var_21366_equation_0, values = (var_20854_cast_fp16, var_21255_cast_fp16))[name = tensor("op_21366_cast_fp16")]; + tensor var_21368_equation_0 = const()[name = tensor("op_21368_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21368_cast_fp16 = einsum(equation = var_21368_equation_0, values = (var_20854_cast_fp16, var_21256_cast_fp16))[name = tensor("op_21368_cast_fp16")]; + tensor var_21370_equation_0 = const()[name = tensor("op_21370_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21370_cast_fp16 = einsum(equation = var_21370_equation_0, values = (var_20858_cast_fp16, var_21257_cast_fp16))[name = tensor("op_21370_cast_fp16")]; + tensor var_21372_equation_0 = const()[name = tensor("op_21372_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21372_cast_fp16 = einsum(equation = var_21372_equation_0, values = (var_20858_cast_fp16, var_21258_cast_fp16))[name = tensor("op_21372_cast_fp16")]; + tensor var_21374_equation_0 = const()[name = tensor("op_21374_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21374_cast_fp16 = einsum(equation = var_21374_equation_0, values = (var_20858_cast_fp16, var_21259_cast_fp16))[name = tensor("op_21374_cast_fp16")]; + tensor var_21376_equation_0 = const()[name = tensor("op_21376_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21376_cast_fp16 = einsum(equation = var_21376_equation_0, values = (var_20858_cast_fp16, var_21260_cast_fp16))[name = tensor("op_21376_cast_fp16")]; + tensor var_21378_equation_0 = const()[name = tensor("op_21378_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21378_cast_fp16 = einsum(equation = var_21378_equation_0, values = (var_20862_cast_fp16, var_21261_cast_fp16))[name = tensor("op_21378_cast_fp16")]; + tensor var_21380_equation_0 = const()[name = tensor("op_21380_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21380_cast_fp16 = einsum(equation = var_21380_equation_0, values = (var_20862_cast_fp16, var_21262_cast_fp16))[name = tensor("op_21380_cast_fp16")]; + tensor var_21382_equation_0 = const()[name = tensor("op_21382_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21382_cast_fp16 = einsum(equation = var_21382_equation_0, values = (var_20862_cast_fp16, var_21263_cast_fp16))[name = tensor("op_21382_cast_fp16")]; + tensor var_21384_equation_0 = const()[name = tensor("op_21384_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21384_cast_fp16 = einsum(equation = var_21384_equation_0, values = (var_20862_cast_fp16, var_21264_cast_fp16))[name = tensor("op_21384_cast_fp16")]; + tensor var_21386_equation_0 = const()[name = tensor("op_21386_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21386_cast_fp16 = einsum(equation = var_21386_equation_0, values = (var_20866_cast_fp16, var_21265_cast_fp16))[name = tensor("op_21386_cast_fp16")]; + tensor var_21388_equation_0 = const()[name = tensor("op_21388_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21388_cast_fp16 = einsum(equation = var_21388_equation_0, values = (var_20866_cast_fp16, var_21266_cast_fp16))[name = tensor("op_21388_cast_fp16")]; + tensor var_21390_equation_0 = const()[name = tensor("op_21390_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21390_cast_fp16 = einsum(equation = var_21390_equation_0, values = (var_20866_cast_fp16, var_21267_cast_fp16))[name = tensor("op_21390_cast_fp16")]; + tensor var_21392_equation_0 = const()[name = tensor("op_21392_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21392_cast_fp16 = einsum(equation = var_21392_equation_0, values = (var_20866_cast_fp16, var_21268_cast_fp16))[name = tensor("op_21392_cast_fp16")]; + tensor var_21394_equation_0 = const()[name = tensor("op_21394_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21394_cast_fp16 = einsum(equation = var_21394_equation_0, values = (var_20870_cast_fp16, var_21269_cast_fp16))[name = tensor("op_21394_cast_fp16")]; + tensor var_21396_equation_0 = const()[name = tensor("op_21396_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21396_cast_fp16 = einsum(equation = var_21396_equation_0, values = (var_20870_cast_fp16, var_21270_cast_fp16))[name = tensor("op_21396_cast_fp16")]; + tensor var_21398_equation_0 = const()[name = tensor("op_21398_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21398_cast_fp16 = einsum(equation = var_21398_equation_0, values = (var_20870_cast_fp16, var_21271_cast_fp16))[name = tensor("op_21398_cast_fp16")]; + tensor var_21400_equation_0 = const()[name = tensor("op_21400_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21400_cast_fp16 = einsum(equation = var_21400_equation_0, values = (var_20870_cast_fp16, var_21272_cast_fp16))[name = tensor("op_21400_cast_fp16")]; + tensor var_21402_equation_0 = const()[name = tensor("op_21402_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21402_cast_fp16 = einsum(equation = var_21402_equation_0, values = (var_20874_cast_fp16, var_21273_cast_fp16))[name = tensor("op_21402_cast_fp16")]; + tensor var_21404_equation_0 = const()[name = tensor("op_21404_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21404_cast_fp16 = einsum(equation = var_21404_equation_0, values = (var_20874_cast_fp16, var_21274_cast_fp16))[name = tensor("op_21404_cast_fp16")]; + tensor var_21406_equation_0 = const()[name = tensor("op_21406_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21406_cast_fp16 = einsum(equation = var_21406_equation_0, values = (var_20874_cast_fp16, var_21275_cast_fp16))[name = tensor("op_21406_cast_fp16")]; + tensor var_21408_equation_0 = const()[name = tensor("op_21408_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21408_cast_fp16 = einsum(equation = var_21408_equation_0, values = (var_20874_cast_fp16, var_21276_cast_fp16))[name = tensor("op_21408_cast_fp16")]; + tensor var_21410_equation_0 = const()[name = tensor("op_21410_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21410_cast_fp16 = einsum(equation = var_21410_equation_0, values = (var_20878_cast_fp16, var_21277_cast_fp16))[name = tensor("op_21410_cast_fp16")]; + tensor var_21412_equation_0 = const()[name = tensor("op_21412_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21412_cast_fp16 = einsum(equation = var_21412_equation_0, values = (var_20878_cast_fp16, var_21278_cast_fp16))[name = tensor("op_21412_cast_fp16")]; + tensor var_21414_equation_0 = const()[name = tensor("op_21414_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21414_cast_fp16 = einsum(equation = var_21414_equation_0, values = (var_20878_cast_fp16, var_21279_cast_fp16))[name = tensor("op_21414_cast_fp16")]; + tensor var_21416_equation_0 = const()[name = tensor("op_21416_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21416_cast_fp16 = einsum(equation = var_21416_equation_0, values = (var_20878_cast_fp16, var_21280_cast_fp16))[name = tensor("op_21416_cast_fp16")]; + tensor var_21418_equation_0 = const()[name = tensor("op_21418_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21418_cast_fp16 = einsum(equation = var_21418_equation_0, values = (var_20882_cast_fp16, var_21281_cast_fp16))[name = tensor("op_21418_cast_fp16")]; + tensor var_21420_equation_0 = const()[name = tensor("op_21420_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21420_cast_fp16 = einsum(equation = var_21420_equation_0, values = (var_20882_cast_fp16, var_21282_cast_fp16))[name = tensor("op_21420_cast_fp16")]; + tensor var_21422_equation_0 = const()[name = tensor("op_21422_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21422_cast_fp16 = einsum(equation = var_21422_equation_0, values = (var_20882_cast_fp16, var_21283_cast_fp16))[name = tensor("op_21422_cast_fp16")]; + tensor var_21424_equation_0 = const()[name = tensor("op_21424_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21424_cast_fp16 = einsum(equation = var_21424_equation_0, values = (var_20882_cast_fp16, var_21284_cast_fp16))[name = tensor("op_21424_cast_fp16")]; + tensor var_21426_equation_0 = const()[name = tensor("op_21426_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21426_cast_fp16 = einsum(equation = var_21426_equation_0, values = (var_20886_cast_fp16, var_21285_cast_fp16))[name = tensor("op_21426_cast_fp16")]; + tensor var_21428_equation_0 = const()[name = tensor("op_21428_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21428_cast_fp16 = einsum(equation = var_21428_equation_0, values = (var_20886_cast_fp16, var_21286_cast_fp16))[name = tensor("op_21428_cast_fp16")]; + tensor var_21430_equation_0 = const()[name = tensor("op_21430_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21430_cast_fp16 = einsum(equation = var_21430_equation_0, values = (var_20886_cast_fp16, var_21287_cast_fp16))[name = tensor("op_21430_cast_fp16")]; + tensor var_21432_equation_0 = const()[name = tensor("op_21432_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21432_cast_fp16 = einsum(equation = var_21432_equation_0, values = (var_20886_cast_fp16, var_21288_cast_fp16))[name = tensor("op_21432_cast_fp16")]; + tensor var_21434_equation_0 = const()[name = tensor("op_21434_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21434_cast_fp16 = einsum(equation = var_21434_equation_0, values = (var_20890_cast_fp16, var_21289_cast_fp16))[name = tensor("op_21434_cast_fp16")]; + tensor var_21436_equation_0 = const()[name = tensor("op_21436_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21436_cast_fp16 = einsum(equation = var_21436_equation_0, values = (var_20890_cast_fp16, var_21290_cast_fp16))[name = tensor("op_21436_cast_fp16")]; + tensor var_21438_equation_0 = const()[name = tensor("op_21438_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21438_cast_fp16 = einsum(equation = var_21438_equation_0, values = (var_20890_cast_fp16, var_21291_cast_fp16))[name = tensor("op_21438_cast_fp16")]; + tensor var_21440_equation_0 = const()[name = tensor("op_21440_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21440_cast_fp16 = einsum(equation = var_21440_equation_0, values = (var_20890_cast_fp16, var_21292_cast_fp16))[name = tensor("op_21440_cast_fp16")]; + tensor var_21442_equation_0 = const()[name = tensor("op_21442_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21442_cast_fp16 = einsum(equation = var_21442_equation_0, values = (var_20894_cast_fp16, var_21293_cast_fp16))[name = tensor("op_21442_cast_fp16")]; + tensor var_21444_equation_0 = const()[name = tensor("op_21444_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21444_cast_fp16 = einsum(equation = var_21444_equation_0, values = (var_20894_cast_fp16, var_21294_cast_fp16))[name = tensor("op_21444_cast_fp16")]; + tensor var_21446_equation_0 = const()[name = tensor("op_21446_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21446_cast_fp16 = einsum(equation = var_21446_equation_0, values = (var_20894_cast_fp16, var_21295_cast_fp16))[name = tensor("op_21446_cast_fp16")]; + tensor var_21448_equation_0 = const()[name = tensor("op_21448_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21448_cast_fp16 = einsum(equation = var_21448_equation_0, values = (var_20894_cast_fp16, var_21296_cast_fp16))[name = tensor("op_21448_cast_fp16")]; + tensor var_21450_equation_0 = const()[name = tensor("op_21450_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21450_cast_fp16 = einsum(equation = var_21450_equation_0, values = (var_20898_cast_fp16, var_21297_cast_fp16))[name = tensor("op_21450_cast_fp16")]; + tensor var_21452_equation_0 = const()[name = tensor("op_21452_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21452_cast_fp16 = einsum(equation = var_21452_equation_0, values = (var_20898_cast_fp16, var_21298_cast_fp16))[name = tensor("op_21452_cast_fp16")]; + tensor var_21454_equation_0 = const()[name = tensor("op_21454_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21454_cast_fp16 = einsum(equation = var_21454_equation_0, values = (var_20898_cast_fp16, var_21299_cast_fp16))[name = tensor("op_21454_cast_fp16")]; + tensor var_21456_equation_0 = const()[name = tensor("op_21456_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21456_cast_fp16 = einsum(equation = var_21456_equation_0, values = (var_20898_cast_fp16, var_21300_cast_fp16))[name = tensor("op_21456_cast_fp16")]; + tensor var_21458_equation_0 = const()[name = tensor("op_21458_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21458_cast_fp16 = einsum(equation = var_21458_equation_0, values = (var_20902_cast_fp16, var_21301_cast_fp16))[name = tensor("op_21458_cast_fp16")]; + tensor var_21460_equation_0 = const()[name = tensor("op_21460_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21460_cast_fp16 = einsum(equation = var_21460_equation_0, values = (var_20902_cast_fp16, var_21302_cast_fp16))[name = tensor("op_21460_cast_fp16")]; + tensor var_21462_equation_0 = const()[name = tensor("op_21462_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21462_cast_fp16 = einsum(equation = var_21462_equation_0, values = (var_20902_cast_fp16, var_21303_cast_fp16))[name = tensor("op_21462_cast_fp16")]; + tensor var_21464_equation_0 = const()[name = tensor("op_21464_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_21464_cast_fp16 = einsum(equation = var_21464_equation_0, values = (var_20902_cast_fp16, var_21304_cast_fp16))[name = tensor("op_21464_cast_fp16")]; + tensor var_21466_interleave_0 = const()[name = tensor("op_21466_interleave_0"), val = tensor(false)]; + tensor var_21466_cast_fp16 = concat(axis = var_20025, interleave = var_21466_interleave_0, values = (var_21306_cast_fp16, var_21308_cast_fp16, var_21310_cast_fp16, var_21312_cast_fp16))[name = tensor("op_21466_cast_fp16")]; + tensor var_21468_interleave_0 = const()[name = tensor("op_21468_interleave_0"), val = tensor(false)]; + tensor var_21468_cast_fp16 = concat(axis = var_20025, interleave = var_21468_interleave_0, values = (var_21314_cast_fp16, var_21316_cast_fp16, var_21318_cast_fp16, var_21320_cast_fp16))[name = tensor("op_21468_cast_fp16")]; + tensor var_21470_interleave_0 = const()[name = tensor("op_21470_interleave_0"), val = tensor(false)]; + tensor var_21470_cast_fp16 = concat(axis = var_20025, interleave = var_21470_interleave_0, values = (var_21322_cast_fp16, var_21324_cast_fp16, var_21326_cast_fp16, var_21328_cast_fp16))[name = tensor("op_21470_cast_fp16")]; + tensor var_21472_interleave_0 = const()[name = tensor("op_21472_interleave_0"), val = tensor(false)]; + tensor var_21472_cast_fp16 = concat(axis = var_20025, interleave = var_21472_interleave_0, values = (var_21330_cast_fp16, var_21332_cast_fp16, var_21334_cast_fp16, var_21336_cast_fp16))[name = tensor("op_21472_cast_fp16")]; + tensor var_21474_interleave_0 = const()[name = tensor("op_21474_interleave_0"), val = tensor(false)]; + tensor var_21474_cast_fp16 = concat(axis = var_20025, interleave = var_21474_interleave_0, values = (var_21338_cast_fp16, var_21340_cast_fp16, var_21342_cast_fp16, var_21344_cast_fp16))[name = tensor("op_21474_cast_fp16")]; + tensor var_21476_interleave_0 = const()[name = tensor("op_21476_interleave_0"), val = tensor(false)]; + tensor var_21476_cast_fp16 = concat(axis = var_20025, interleave = var_21476_interleave_0, values = (var_21346_cast_fp16, var_21348_cast_fp16, var_21350_cast_fp16, var_21352_cast_fp16))[name = tensor("op_21476_cast_fp16")]; + tensor var_21478_interleave_0 = const()[name = tensor("op_21478_interleave_0"), val = tensor(false)]; + tensor var_21478_cast_fp16 = concat(axis = var_20025, interleave = var_21478_interleave_0, values = (var_21354_cast_fp16, var_21356_cast_fp16, var_21358_cast_fp16, var_21360_cast_fp16))[name = tensor("op_21478_cast_fp16")]; + tensor var_21480_interleave_0 = const()[name = tensor("op_21480_interleave_0"), val = tensor(false)]; + tensor var_21480_cast_fp16 = concat(axis = var_20025, interleave = var_21480_interleave_0, values = (var_21362_cast_fp16, var_21364_cast_fp16, var_21366_cast_fp16, var_21368_cast_fp16))[name = tensor("op_21480_cast_fp16")]; + tensor var_21482_interleave_0 = const()[name = tensor("op_21482_interleave_0"), val = tensor(false)]; + tensor var_21482_cast_fp16 = concat(axis = var_20025, interleave = var_21482_interleave_0, values = (var_21370_cast_fp16, var_21372_cast_fp16, var_21374_cast_fp16, var_21376_cast_fp16))[name = tensor("op_21482_cast_fp16")]; + tensor var_21484_interleave_0 = const()[name = tensor("op_21484_interleave_0"), val = tensor(false)]; + tensor var_21484_cast_fp16 = concat(axis = var_20025, interleave = var_21484_interleave_0, values = (var_21378_cast_fp16, var_21380_cast_fp16, var_21382_cast_fp16, var_21384_cast_fp16))[name = tensor("op_21484_cast_fp16")]; + tensor var_21486_interleave_0 = const()[name = tensor("op_21486_interleave_0"), val = tensor(false)]; + tensor var_21486_cast_fp16 = concat(axis = var_20025, interleave = var_21486_interleave_0, values = (var_21386_cast_fp16, var_21388_cast_fp16, var_21390_cast_fp16, var_21392_cast_fp16))[name = tensor("op_21486_cast_fp16")]; + tensor var_21488_interleave_0 = const()[name = tensor("op_21488_interleave_0"), val = tensor(false)]; + tensor var_21488_cast_fp16 = concat(axis = var_20025, interleave = var_21488_interleave_0, values = (var_21394_cast_fp16, var_21396_cast_fp16, var_21398_cast_fp16, var_21400_cast_fp16))[name = tensor("op_21488_cast_fp16")]; + tensor var_21490_interleave_0 = const()[name = tensor("op_21490_interleave_0"), val = tensor(false)]; + tensor var_21490_cast_fp16 = concat(axis = var_20025, interleave = var_21490_interleave_0, values = (var_21402_cast_fp16, var_21404_cast_fp16, var_21406_cast_fp16, var_21408_cast_fp16))[name = tensor("op_21490_cast_fp16")]; + tensor var_21492_interleave_0 = const()[name = tensor("op_21492_interleave_0"), val = tensor(false)]; + tensor var_21492_cast_fp16 = concat(axis = var_20025, interleave = var_21492_interleave_0, values = (var_21410_cast_fp16, var_21412_cast_fp16, var_21414_cast_fp16, var_21416_cast_fp16))[name = tensor("op_21492_cast_fp16")]; + tensor var_21494_interleave_0 = const()[name = tensor("op_21494_interleave_0"), val = tensor(false)]; + tensor var_21494_cast_fp16 = concat(axis = var_20025, interleave = var_21494_interleave_0, values = (var_21418_cast_fp16, var_21420_cast_fp16, var_21422_cast_fp16, var_21424_cast_fp16))[name = tensor("op_21494_cast_fp16")]; + tensor var_21496_interleave_0 = const()[name = tensor("op_21496_interleave_0"), val = tensor(false)]; + tensor var_21496_cast_fp16 = concat(axis = var_20025, interleave = var_21496_interleave_0, values = (var_21426_cast_fp16, var_21428_cast_fp16, var_21430_cast_fp16, var_21432_cast_fp16))[name = tensor("op_21496_cast_fp16")]; + tensor var_21498_interleave_0 = const()[name = tensor("op_21498_interleave_0"), val = tensor(false)]; + tensor var_21498_cast_fp16 = concat(axis = var_20025, interleave = var_21498_interleave_0, values = (var_21434_cast_fp16, var_21436_cast_fp16, var_21438_cast_fp16, var_21440_cast_fp16))[name = tensor("op_21498_cast_fp16")]; + tensor var_21500_interleave_0 = const()[name = tensor("op_21500_interleave_0"), val = tensor(false)]; + tensor var_21500_cast_fp16 = concat(axis = var_20025, interleave = var_21500_interleave_0, values = (var_21442_cast_fp16, var_21444_cast_fp16, var_21446_cast_fp16, var_21448_cast_fp16))[name = tensor("op_21500_cast_fp16")]; + tensor var_21502_interleave_0 = const()[name = tensor("op_21502_interleave_0"), val = tensor(false)]; + tensor var_21502_cast_fp16 = concat(axis = var_20025, interleave = var_21502_interleave_0, values = (var_21450_cast_fp16, var_21452_cast_fp16, var_21454_cast_fp16, var_21456_cast_fp16))[name = tensor("op_21502_cast_fp16")]; + tensor var_21504_interleave_0 = const()[name = tensor("op_21504_interleave_0"), val = tensor(false)]; + tensor var_21504_cast_fp16 = concat(axis = var_20025, interleave = var_21504_interleave_0, values = (var_21458_cast_fp16, var_21460_cast_fp16, var_21462_cast_fp16, var_21464_cast_fp16))[name = tensor("op_21504_cast_fp16")]; + tensor input_105_interleave_0 = const()[name = tensor("input_105_interleave_0"), val = tensor(false)]; + tensor input_105_cast_fp16 = concat(axis = var_20050, interleave = input_105_interleave_0, values = (var_21466_cast_fp16, var_21468_cast_fp16, var_21470_cast_fp16, var_21472_cast_fp16, var_21474_cast_fp16, var_21476_cast_fp16, var_21478_cast_fp16, var_21480_cast_fp16, var_21482_cast_fp16, var_21484_cast_fp16, var_21486_cast_fp16, var_21488_cast_fp16, var_21490_cast_fp16, var_21492_cast_fp16, var_21494_cast_fp16, var_21496_cast_fp16, var_21498_cast_fp16, var_21500_cast_fp16, var_21502_cast_fp16, var_21504_cast_fp16))[name = tensor("input_105_cast_fp16")]; + tensor var_21509 = const()[name = tensor("op_21509"), val = tensor([1, 1])]; + tensor var_21511 = const()[name = tensor("op_21511"), val = tensor([1, 1])]; + tensor obj_55_pad_type_0 = const()[name = tensor("obj_55_pad_type_0"), val = tensor("custom")]; + tensor obj_55_pad_0 = const()[name = tensor("obj_55_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_13_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_13_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(535729280)))]; + tensor layers_13_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_13_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(539006144)))]; + tensor obj_55_cast_fp16 = conv(bias = layers_13_self_attn_o_proj_bias_to_fp16, dilations = var_21511, groups = var_20050, pad = obj_55_pad_0, pad_type = obj_55_pad_type_0, strides = var_21509, weight = layers_13_self_attn_o_proj_weight_to_fp16, x = input_105_cast_fp16)[name = tensor("obj_55_cast_fp16")]; + tensor inputs_55_cast_fp16 = add(x = inputs_53_cast_fp16, y = obj_55_cast_fp16)[name = tensor("inputs_55_cast_fp16")]; + tensor var_21517 = const()[name = tensor("op_21517"), val = tensor([1])]; + tensor channels_mean_55_cast_fp16 = reduce_mean(axes = var_21517, keep_dims = var_20051, x = inputs_55_cast_fp16)[name = tensor("channels_mean_55_cast_fp16")]; + tensor zero_mean_55_cast_fp16 = sub(x = inputs_55_cast_fp16, y = channels_mean_55_cast_fp16)[name = tensor("zero_mean_55_cast_fp16")]; + tensor zero_mean_sq_55_cast_fp16 = mul(x = zero_mean_55_cast_fp16, y = zero_mean_55_cast_fp16)[name = tensor("zero_mean_sq_55_cast_fp16")]; + tensor var_21521 = const()[name = tensor("op_21521"), val = tensor([1])]; + tensor var_21522_cast_fp16 = reduce_mean(axes = var_21521, keep_dims = var_20051, x = zero_mean_sq_55_cast_fp16)[name = tensor("op_21522_cast_fp16")]; + tensor var_21523_to_fp16 = const()[name = tensor("op_21523_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_21524_cast_fp16 = add(x = var_21522_cast_fp16, y = var_21523_to_fp16)[name = tensor("op_21524_cast_fp16")]; + tensor denom_55_epsilon_0_to_fp16 = const()[name = tensor("denom_55_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_55_cast_fp16 = rsqrt(epsilon = denom_55_epsilon_0_to_fp16, x = var_21524_cast_fp16)[name = tensor("denom_55_cast_fp16")]; + tensor out_55_cast_fp16 = mul(x = zero_mean_55_cast_fp16, y = denom_55_cast_fp16)[name = tensor("out_55_cast_fp16")]; + tensor input_107_gamma_0_to_fp16 = const()[name = tensor("input_107_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(539008768)))]; + tensor input_107_beta_0_to_fp16 = const()[name = tensor("input_107_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(539011392)))]; + tensor input_107_epsilon_0_to_fp16 = const()[name = tensor("input_107_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_107_cast_fp16 = batch_norm(beta = input_107_beta_0_to_fp16, epsilon = input_107_epsilon_0_to_fp16, gamma = input_107_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_55_cast_fp16)[name = tensor("input_107_cast_fp16")]; + tensor var_21535 = const()[name = tensor("op_21535"), val = tensor([1, 1])]; + tensor var_21537 = const()[name = tensor("op_21537"), val = tensor([1, 1])]; + tensor input_109_pad_type_0 = const()[name = tensor("input_109_pad_type_0"), val = tensor("custom")]; + tensor input_109_pad_0 = const()[name = tensor("input_109_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_13_fc1_weight_to_fp16 = const()[name = tensor("layers_13_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(539014016)))]; + tensor layers_13_fc1_bias_to_fp16 = const()[name = tensor("layers_13_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(552121280)))]; + tensor input_109_cast_fp16 = conv(bias = layers_13_fc1_bias_to_fp16, dilations = var_21537, groups = var_20050, pad = input_109_pad_0, pad_type = input_109_pad_type_0, strides = var_21535, weight = layers_13_fc1_weight_to_fp16, x = input_107_cast_fp16)[name = tensor("input_109_cast_fp16")]; + tensor input_111_mode_0 = const()[name = tensor("input_111_mode_0"), val = tensor("EXACT")]; + tensor input_111_cast_fp16 = gelu(mode = input_111_mode_0, x = input_109_cast_fp16)[name = tensor("input_111_cast_fp16")]; + tensor var_21543 = const()[name = tensor("op_21543"), val = tensor([1, 1])]; + tensor var_21545 = const()[name = tensor("op_21545"), val = tensor([1, 1])]; + tensor hidden_states_31_pad_type_0 = const()[name = tensor("hidden_states_31_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_31_pad_0 = const()[name = tensor("hidden_states_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_13_fc2_weight_to_fp16 = const()[name = tensor("layers_13_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(552131584)))]; + tensor layers_13_fc2_bias_to_fp16 = const()[name = tensor("layers_13_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(565238848)))]; + tensor hidden_states_31_cast_fp16 = conv(bias = layers_13_fc2_bias_to_fp16, dilations = var_21545, groups = var_20050, pad = hidden_states_31_pad_0, pad_type = hidden_states_31_pad_type_0, strides = var_21543, weight = layers_13_fc2_weight_to_fp16, x = input_111_cast_fp16)[name = tensor("hidden_states_31_cast_fp16")]; + tensor inputs_57_cast_fp16 = add(x = inputs_55_cast_fp16, y = hidden_states_31_cast_fp16)[name = tensor("inputs_57_cast_fp16")]; + tensor var_21552 = const()[name = tensor("op_21552"), val = tensor(3)]; + tensor var_21577 = const()[name = tensor("op_21577"), val = tensor(1)]; + tensor var_21578 = const()[name = tensor("op_21578"), val = tensor(true)]; + tensor var_21588 = const()[name = tensor("op_21588"), val = tensor([1])]; + tensor channels_mean_57_cast_fp16 = reduce_mean(axes = var_21588, keep_dims = var_21578, x = inputs_57_cast_fp16)[name = tensor("channels_mean_57_cast_fp16")]; + tensor zero_mean_57_cast_fp16 = sub(x = inputs_57_cast_fp16, y = channels_mean_57_cast_fp16)[name = tensor("zero_mean_57_cast_fp16")]; + tensor zero_mean_sq_57_cast_fp16 = mul(x = zero_mean_57_cast_fp16, y = zero_mean_57_cast_fp16)[name = tensor("zero_mean_sq_57_cast_fp16")]; + tensor var_21592 = const()[name = tensor("op_21592"), val = tensor([1])]; + tensor var_21593_cast_fp16 = reduce_mean(axes = var_21592, keep_dims = var_21578, x = zero_mean_sq_57_cast_fp16)[name = tensor("op_21593_cast_fp16")]; + tensor var_21594_to_fp16 = const()[name = tensor("op_21594_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_21595_cast_fp16 = add(x = var_21593_cast_fp16, y = var_21594_to_fp16)[name = tensor("op_21595_cast_fp16")]; + tensor denom_57_epsilon_0_to_fp16 = const()[name = tensor("denom_57_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_57_cast_fp16 = rsqrt(epsilon = denom_57_epsilon_0_to_fp16, x = var_21595_cast_fp16)[name = tensor("denom_57_cast_fp16")]; + tensor out_57_cast_fp16 = mul(x = zero_mean_57_cast_fp16, y = denom_57_cast_fp16)[name = tensor("out_57_cast_fp16")]; + tensor obj_57_gamma_0_to_fp16 = const()[name = tensor("obj_57_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(565241472)))]; + tensor obj_57_beta_0_to_fp16 = const()[name = tensor("obj_57_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(565244096)))]; + tensor obj_57_epsilon_0_to_fp16 = const()[name = tensor("obj_57_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_57_cast_fp16 = batch_norm(beta = obj_57_beta_0_to_fp16, epsilon = obj_57_epsilon_0_to_fp16, gamma = obj_57_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_57_cast_fp16)[name = tensor("obj_57_cast_fp16")]; + tensor var_21610 = const()[name = tensor("op_21610"), val = tensor([1, 1])]; + tensor var_21612 = const()[name = tensor("op_21612"), val = tensor([1, 1])]; + tensor query_29_pad_type_0 = const()[name = tensor("query_29_pad_type_0"), val = tensor("custom")]; + tensor query_29_pad_0 = const()[name = tensor("query_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_14_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_14_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(565246720)))]; + tensor layers_14_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_14_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(568523584)))]; + tensor query_29_cast_fp16 = conv(bias = layers_14_self_attn_q_proj_bias_to_fp16, dilations = var_21612, groups = var_21577, pad = query_29_pad_0, pad_type = query_29_pad_type_0, strides = var_21610, weight = layers_14_self_attn_q_proj_weight_to_fp16, x = obj_57_cast_fp16)[name = tensor("query_29_cast_fp16")]; + tensor var_21616 = const()[name = tensor("op_21616"), val = tensor([1, 1])]; + tensor var_21618 = const()[name = tensor("op_21618"), val = tensor([1, 1])]; + tensor key_29_pad_type_0 = const()[name = tensor("key_29_pad_type_0"), val = tensor("custom")]; + tensor key_29_pad_0 = const()[name = tensor("key_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_14_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_14_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(568526208)))]; + tensor key_29_cast_fp16 = conv(dilations = var_21618, groups = var_21577, pad = key_29_pad_0, pad_type = key_29_pad_type_0, strides = var_21616, weight = layers_14_self_attn_k_proj_weight_to_fp16, x = obj_57_cast_fp16)[name = tensor("key_29_cast_fp16")]; + tensor var_21623 = const()[name = tensor("op_21623"), val = tensor([1, 1])]; + tensor var_21625 = const()[name = tensor("op_21625"), val = tensor([1, 1])]; + tensor value_29_pad_type_0 = const()[name = tensor("value_29_pad_type_0"), val = tensor("custom")]; + tensor value_29_pad_0 = const()[name = tensor("value_29_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_14_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_14_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(571803072)))]; + tensor layers_14_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_14_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(575079936)))]; + tensor value_29_cast_fp16 = conv(bias = layers_14_self_attn_v_proj_bias_to_fp16, dilations = var_21625, groups = var_21577, pad = value_29_pad_0, pad_type = value_29_pad_type_0, strides = var_21623, weight = layers_14_self_attn_v_proj_weight_to_fp16, x = obj_57_cast_fp16)[name = tensor("value_29_cast_fp16")]; + tensor var_21632_begin_0 = const()[name = tensor("op_21632_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_21632_end_0 = const()[name = tensor("op_21632_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_21632_end_mask_0 = const()[name = tensor("op_21632_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_21632_cast_fp16 = slice_by_index(begin = var_21632_begin_0, end = var_21632_end_0, end_mask = var_21632_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_21632_cast_fp16")]; + tensor var_21636_begin_0 = const()[name = tensor("op_21636_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_21636_end_0 = const()[name = tensor("op_21636_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_21636_end_mask_0 = const()[name = tensor("op_21636_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_21636_cast_fp16 = slice_by_index(begin = var_21636_begin_0, end = var_21636_end_0, end_mask = var_21636_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_21636_cast_fp16")]; + tensor var_21640_begin_0 = const()[name = tensor("op_21640_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_21640_end_0 = const()[name = tensor("op_21640_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_21640_end_mask_0 = const()[name = tensor("op_21640_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_21640_cast_fp16 = slice_by_index(begin = var_21640_begin_0, end = var_21640_end_0, end_mask = var_21640_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_21640_cast_fp16")]; + tensor var_21644_begin_0 = const()[name = tensor("op_21644_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_21644_end_0 = const()[name = tensor("op_21644_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_21644_end_mask_0 = const()[name = tensor("op_21644_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_21644_cast_fp16 = slice_by_index(begin = var_21644_begin_0, end = var_21644_end_0, end_mask = var_21644_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_21644_cast_fp16")]; + tensor var_21648_begin_0 = const()[name = tensor("op_21648_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_21648_end_0 = const()[name = tensor("op_21648_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_21648_end_mask_0 = const()[name = tensor("op_21648_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_21648_cast_fp16 = slice_by_index(begin = var_21648_begin_0, end = var_21648_end_0, end_mask = var_21648_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_21648_cast_fp16")]; + tensor var_21652_begin_0 = const()[name = tensor("op_21652_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_21652_end_0 = const()[name = tensor("op_21652_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_21652_end_mask_0 = const()[name = tensor("op_21652_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_21652_cast_fp16 = slice_by_index(begin = var_21652_begin_0, end = var_21652_end_0, end_mask = var_21652_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_21652_cast_fp16")]; + tensor var_21656_begin_0 = const()[name = tensor("op_21656_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_21656_end_0 = const()[name = tensor("op_21656_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_21656_end_mask_0 = const()[name = tensor("op_21656_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_21656_cast_fp16 = slice_by_index(begin = var_21656_begin_0, end = var_21656_end_0, end_mask = var_21656_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_21656_cast_fp16")]; + tensor var_21660_begin_0 = const()[name = tensor("op_21660_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_21660_end_0 = const()[name = tensor("op_21660_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_21660_end_mask_0 = const()[name = tensor("op_21660_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_21660_cast_fp16 = slice_by_index(begin = var_21660_begin_0, end = var_21660_end_0, end_mask = var_21660_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_21660_cast_fp16")]; + tensor var_21664_begin_0 = const()[name = tensor("op_21664_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_21664_end_0 = const()[name = tensor("op_21664_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_21664_end_mask_0 = const()[name = tensor("op_21664_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_21664_cast_fp16 = slice_by_index(begin = var_21664_begin_0, end = var_21664_end_0, end_mask = var_21664_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_21664_cast_fp16")]; + tensor var_21668_begin_0 = const()[name = tensor("op_21668_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_21668_end_0 = const()[name = tensor("op_21668_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_21668_end_mask_0 = const()[name = tensor("op_21668_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_21668_cast_fp16 = slice_by_index(begin = var_21668_begin_0, end = var_21668_end_0, end_mask = var_21668_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_21668_cast_fp16")]; + tensor var_21672_begin_0 = const()[name = tensor("op_21672_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_21672_end_0 = const()[name = tensor("op_21672_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_21672_end_mask_0 = const()[name = tensor("op_21672_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_21672_cast_fp16 = slice_by_index(begin = var_21672_begin_0, end = var_21672_end_0, end_mask = var_21672_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_21672_cast_fp16")]; + tensor var_21676_begin_0 = const()[name = tensor("op_21676_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_21676_end_0 = const()[name = tensor("op_21676_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_21676_end_mask_0 = const()[name = tensor("op_21676_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_21676_cast_fp16 = slice_by_index(begin = var_21676_begin_0, end = var_21676_end_0, end_mask = var_21676_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_21676_cast_fp16")]; + tensor var_21680_begin_0 = const()[name = tensor("op_21680_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_21680_end_0 = const()[name = tensor("op_21680_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_21680_end_mask_0 = const()[name = tensor("op_21680_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_21680_cast_fp16 = slice_by_index(begin = var_21680_begin_0, end = var_21680_end_0, end_mask = var_21680_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_21680_cast_fp16")]; + tensor var_21684_begin_0 = const()[name = tensor("op_21684_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_21684_end_0 = const()[name = tensor("op_21684_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_21684_end_mask_0 = const()[name = tensor("op_21684_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_21684_cast_fp16 = slice_by_index(begin = var_21684_begin_0, end = var_21684_end_0, end_mask = var_21684_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_21684_cast_fp16")]; + tensor var_21688_begin_0 = const()[name = tensor("op_21688_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_21688_end_0 = const()[name = tensor("op_21688_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_21688_end_mask_0 = const()[name = tensor("op_21688_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_21688_cast_fp16 = slice_by_index(begin = var_21688_begin_0, end = var_21688_end_0, end_mask = var_21688_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_21688_cast_fp16")]; + tensor var_21692_begin_0 = const()[name = tensor("op_21692_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_21692_end_0 = const()[name = tensor("op_21692_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_21692_end_mask_0 = const()[name = tensor("op_21692_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_21692_cast_fp16 = slice_by_index(begin = var_21692_begin_0, end = var_21692_end_0, end_mask = var_21692_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_21692_cast_fp16")]; + tensor var_21696_begin_0 = const()[name = tensor("op_21696_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_21696_end_0 = const()[name = tensor("op_21696_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_21696_end_mask_0 = const()[name = tensor("op_21696_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_21696_cast_fp16 = slice_by_index(begin = var_21696_begin_0, end = var_21696_end_0, end_mask = var_21696_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_21696_cast_fp16")]; + tensor var_21700_begin_0 = const()[name = tensor("op_21700_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_21700_end_0 = const()[name = tensor("op_21700_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_21700_end_mask_0 = const()[name = tensor("op_21700_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_21700_cast_fp16 = slice_by_index(begin = var_21700_begin_0, end = var_21700_end_0, end_mask = var_21700_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_21700_cast_fp16")]; + tensor var_21704_begin_0 = const()[name = tensor("op_21704_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_21704_end_0 = const()[name = tensor("op_21704_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_21704_end_mask_0 = const()[name = tensor("op_21704_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_21704_cast_fp16 = slice_by_index(begin = var_21704_begin_0, end = var_21704_end_0, end_mask = var_21704_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_21704_cast_fp16")]; + tensor var_21708_begin_0 = const()[name = tensor("op_21708_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_21708_end_0 = const()[name = tensor("op_21708_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_21708_end_mask_0 = const()[name = tensor("op_21708_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_21708_cast_fp16 = slice_by_index(begin = var_21708_begin_0, end = var_21708_end_0, end_mask = var_21708_end_mask_0, x = query_29_cast_fp16)[name = tensor("op_21708_cast_fp16")]; + tensor var_21717_begin_0 = const()[name = tensor("op_21717_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_21717_end_0 = const()[name = tensor("op_21717_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_21717_end_mask_0 = const()[name = tensor("op_21717_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21717_cast_fp16 = slice_by_index(begin = var_21717_begin_0, end = var_21717_end_0, end_mask = var_21717_end_mask_0, x = var_21632_cast_fp16)[name = tensor("op_21717_cast_fp16")]; + tensor var_21724_begin_0 = const()[name = tensor("op_21724_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_21724_end_0 = const()[name = tensor("op_21724_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_21724_end_mask_0 = const()[name = tensor("op_21724_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21724_cast_fp16 = slice_by_index(begin = var_21724_begin_0, end = var_21724_end_0, end_mask = var_21724_end_mask_0, x = var_21632_cast_fp16)[name = tensor("op_21724_cast_fp16")]; + tensor var_21731_begin_0 = const()[name = tensor("op_21731_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_21731_end_0 = const()[name = tensor("op_21731_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_21731_end_mask_0 = const()[name = tensor("op_21731_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21731_cast_fp16 = slice_by_index(begin = var_21731_begin_0, end = var_21731_end_0, end_mask = var_21731_end_mask_0, x = var_21632_cast_fp16)[name = tensor("op_21731_cast_fp16")]; + tensor var_21738_begin_0 = const()[name = tensor("op_21738_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_21738_end_0 = const()[name = tensor("op_21738_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_21738_end_mask_0 = const()[name = tensor("op_21738_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21738_cast_fp16 = slice_by_index(begin = var_21738_begin_0, end = var_21738_end_0, end_mask = var_21738_end_mask_0, x = var_21632_cast_fp16)[name = tensor("op_21738_cast_fp16")]; + tensor var_21745_begin_0 = const()[name = tensor("op_21745_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_21745_end_0 = const()[name = tensor("op_21745_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_21745_end_mask_0 = const()[name = tensor("op_21745_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21745_cast_fp16 = slice_by_index(begin = var_21745_begin_0, end = var_21745_end_0, end_mask = var_21745_end_mask_0, x = var_21636_cast_fp16)[name = tensor("op_21745_cast_fp16")]; + tensor var_21752_begin_0 = const()[name = tensor("op_21752_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_21752_end_0 = const()[name = tensor("op_21752_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_21752_end_mask_0 = const()[name = tensor("op_21752_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21752_cast_fp16 = slice_by_index(begin = var_21752_begin_0, end = var_21752_end_0, end_mask = var_21752_end_mask_0, x = var_21636_cast_fp16)[name = tensor("op_21752_cast_fp16")]; + tensor var_21759_begin_0 = const()[name = tensor("op_21759_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_21759_end_0 = const()[name = tensor("op_21759_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_21759_end_mask_0 = const()[name = tensor("op_21759_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21759_cast_fp16 = slice_by_index(begin = var_21759_begin_0, end = var_21759_end_0, end_mask = var_21759_end_mask_0, x = var_21636_cast_fp16)[name = tensor("op_21759_cast_fp16")]; + tensor var_21766_begin_0 = const()[name = tensor("op_21766_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_21766_end_0 = const()[name = tensor("op_21766_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_21766_end_mask_0 = const()[name = tensor("op_21766_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21766_cast_fp16 = slice_by_index(begin = var_21766_begin_0, end = var_21766_end_0, end_mask = var_21766_end_mask_0, x = var_21636_cast_fp16)[name = tensor("op_21766_cast_fp16")]; + tensor var_21773_begin_0 = const()[name = tensor("op_21773_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_21773_end_0 = const()[name = tensor("op_21773_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_21773_end_mask_0 = const()[name = tensor("op_21773_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21773_cast_fp16 = slice_by_index(begin = var_21773_begin_0, end = var_21773_end_0, end_mask = var_21773_end_mask_0, x = var_21640_cast_fp16)[name = tensor("op_21773_cast_fp16")]; + tensor var_21780_begin_0 = const()[name = tensor("op_21780_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_21780_end_0 = const()[name = tensor("op_21780_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_21780_end_mask_0 = const()[name = tensor("op_21780_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21780_cast_fp16 = slice_by_index(begin = var_21780_begin_0, end = var_21780_end_0, end_mask = var_21780_end_mask_0, x = var_21640_cast_fp16)[name = tensor("op_21780_cast_fp16")]; + tensor var_21787_begin_0 = const()[name = tensor("op_21787_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_21787_end_0 = const()[name = tensor("op_21787_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_21787_end_mask_0 = const()[name = tensor("op_21787_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21787_cast_fp16 = slice_by_index(begin = var_21787_begin_0, end = var_21787_end_0, end_mask = var_21787_end_mask_0, x = var_21640_cast_fp16)[name = tensor("op_21787_cast_fp16")]; + tensor var_21794_begin_0 = const()[name = tensor("op_21794_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_21794_end_0 = const()[name = tensor("op_21794_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_21794_end_mask_0 = const()[name = tensor("op_21794_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21794_cast_fp16 = slice_by_index(begin = var_21794_begin_0, end = var_21794_end_0, end_mask = var_21794_end_mask_0, x = var_21640_cast_fp16)[name = tensor("op_21794_cast_fp16")]; + tensor var_21801_begin_0 = const()[name = tensor("op_21801_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_21801_end_0 = const()[name = tensor("op_21801_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_21801_end_mask_0 = const()[name = tensor("op_21801_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21801_cast_fp16 = slice_by_index(begin = var_21801_begin_0, end = var_21801_end_0, end_mask = var_21801_end_mask_0, x = var_21644_cast_fp16)[name = tensor("op_21801_cast_fp16")]; + tensor var_21808_begin_0 = const()[name = tensor("op_21808_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_21808_end_0 = const()[name = tensor("op_21808_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_21808_end_mask_0 = const()[name = tensor("op_21808_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21808_cast_fp16 = slice_by_index(begin = var_21808_begin_0, end = var_21808_end_0, end_mask = var_21808_end_mask_0, x = var_21644_cast_fp16)[name = tensor("op_21808_cast_fp16")]; + tensor var_21815_begin_0 = const()[name = tensor("op_21815_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_21815_end_0 = const()[name = tensor("op_21815_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_21815_end_mask_0 = const()[name = tensor("op_21815_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21815_cast_fp16 = slice_by_index(begin = var_21815_begin_0, end = var_21815_end_0, end_mask = var_21815_end_mask_0, x = var_21644_cast_fp16)[name = tensor("op_21815_cast_fp16")]; + tensor var_21822_begin_0 = const()[name = tensor("op_21822_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_21822_end_0 = const()[name = tensor("op_21822_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_21822_end_mask_0 = const()[name = tensor("op_21822_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21822_cast_fp16 = slice_by_index(begin = var_21822_begin_0, end = var_21822_end_0, end_mask = var_21822_end_mask_0, x = var_21644_cast_fp16)[name = tensor("op_21822_cast_fp16")]; + tensor var_21829_begin_0 = const()[name = tensor("op_21829_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_21829_end_0 = const()[name = tensor("op_21829_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_21829_end_mask_0 = const()[name = tensor("op_21829_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21829_cast_fp16 = slice_by_index(begin = var_21829_begin_0, end = var_21829_end_0, end_mask = var_21829_end_mask_0, x = var_21648_cast_fp16)[name = tensor("op_21829_cast_fp16")]; + tensor var_21836_begin_0 = const()[name = tensor("op_21836_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_21836_end_0 = const()[name = tensor("op_21836_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_21836_end_mask_0 = const()[name = tensor("op_21836_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21836_cast_fp16 = slice_by_index(begin = var_21836_begin_0, end = var_21836_end_0, end_mask = var_21836_end_mask_0, x = var_21648_cast_fp16)[name = tensor("op_21836_cast_fp16")]; + tensor var_21843_begin_0 = const()[name = tensor("op_21843_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_21843_end_0 = const()[name = tensor("op_21843_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_21843_end_mask_0 = const()[name = tensor("op_21843_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21843_cast_fp16 = slice_by_index(begin = var_21843_begin_0, end = var_21843_end_0, end_mask = var_21843_end_mask_0, x = var_21648_cast_fp16)[name = tensor("op_21843_cast_fp16")]; + tensor var_21850_begin_0 = const()[name = tensor("op_21850_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_21850_end_0 = const()[name = tensor("op_21850_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_21850_end_mask_0 = const()[name = tensor("op_21850_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21850_cast_fp16 = slice_by_index(begin = var_21850_begin_0, end = var_21850_end_0, end_mask = var_21850_end_mask_0, x = var_21648_cast_fp16)[name = tensor("op_21850_cast_fp16")]; + tensor var_21857_begin_0 = const()[name = tensor("op_21857_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_21857_end_0 = const()[name = tensor("op_21857_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_21857_end_mask_0 = const()[name = tensor("op_21857_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21857_cast_fp16 = slice_by_index(begin = var_21857_begin_0, end = var_21857_end_0, end_mask = var_21857_end_mask_0, x = var_21652_cast_fp16)[name = tensor("op_21857_cast_fp16")]; + tensor var_21864_begin_0 = const()[name = tensor("op_21864_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_21864_end_0 = const()[name = tensor("op_21864_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_21864_end_mask_0 = const()[name = tensor("op_21864_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21864_cast_fp16 = slice_by_index(begin = var_21864_begin_0, end = var_21864_end_0, end_mask = var_21864_end_mask_0, x = var_21652_cast_fp16)[name = tensor("op_21864_cast_fp16")]; + tensor var_21871_begin_0 = const()[name = tensor("op_21871_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_21871_end_0 = const()[name = tensor("op_21871_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_21871_end_mask_0 = const()[name = tensor("op_21871_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21871_cast_fp16 = slice_by_index(begin = var_21871_begin_0, end = var_21871_end_0, end_mask = var_21871_end_mask_0, x = var_21652_cast_fp16)[name = tensor("op_21871_cast_fp16")]; + tensor var_21878_begin_0 = const()[name = tensor("op_21878_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_21878_end_0 = const()[name = tensor("op_21878_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_21878_end_mask_0 = const()[name = tensor("op_21878_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21878_cast_fp16 = slice_by_index(begin = var_21878_begin_0, end = var_21878_end_0, end_mask = var_21878_end_mask_0, x = var_21652_cast_fp16)[name = tensor("op_21878_cast_fp16")]; + tensor var_21885_begin_0 = const()[name = tensor("op_21885_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_21885_end_0 = const()[name = tensor("op_21885_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_21885_end_mask_0 = const()[name = tensor("op_21885_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21885_cast_fp16 = slice_by_index(begin = var_21885_begin_0, end = var_21885_end_0, end_mask = var_21885_end_mask_0, x = var_21656_cast_fp16)[name = tensor("op_21885_cast_fp16")]; + tensor var_21892_begin_0 = const()[name = tensor("op_21892_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_21892_end_0 = const()[name = tensor("op_21892_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_21892_end_mask_0 = const()[name = tensor("op_21892_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21892_cast_fp16 = slice_by_index(begin = var_21892_begin_0, end = var_21892_end_0, end_mask = var_21892_end_mask_0, x = var_21656_cast_fp16)[name = tensor("op_21892_cast_fp16")]; + tensor var_21899_begin_0 = const()[name = tensor("op_21899_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_21899_end_0 = const()[name = tensor("op_21899_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_21899_end_mask_0 = const()[name = tensor("op_21899_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21899_cast_fp16 = slice_by_index(begin = var_21899_begin_0, end = var_21899_end_0, end_mask = var_21899_end_mask_0, x = var_21656_cast_fp16)[name = tensor("op_21899_cast_fp16")]; + tensor var_21906_begin_0 = const()[name = tensor("op_21906_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_21906_end_0 = const()[name = tensor("op_21906_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_21906_end_mask_0 = const()[name = tensor("op_21906_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21906_cast_fp16 = slice_by_index(begin = var_21906_begin_0, end = var_21906_end_0, end_mask = var_21906_end_mask_0, x = var_21656_cast_fp16)[name = tensor("op_21906_cast_fp16")]; + tensor var_21913_begin_0 = const()[name = tensor("op_21913_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_21913_end_0 = const()[name = tensor("op_21913_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_21913_end_mask_0 = const()[name = tensor("op_21913_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21913_cast_fp16 = slice_by_index(begin = var_21913_begin_0, end = var_21913_end_0, end_mask = var_21913_end_mask_0, x = var_21660_cast_fp16)[name = tensor("op_21913_cast_fp16")]; + tensor var_21920_begin_0 = const()[name = tensor("op_21920_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_21920_end_0 = const()[name = tensor("op_21920_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_21920_end_mask_0 = const()[name = tensor("op_21920_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21920_cast_fp16 = slice_by_index(begin = var_21920_begin_0, end = var_21920_end_0, end_mask = var_21920_end_mask_0, x = var_21660_cast_fp16)[name = tensor("op_21920_cast_fp16")]; + tensor var_21927_begin_0 = const()[name = tensor("op_21927_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_21927_end_0 = const()[name = tensor("op_21927_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_21927_end_mask_0 = const()[name = tensor("op_21927_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21927_cast_fp16 = slice_by_index(begin = var_21927_begin_0, end = var_21927_end_0, end_mask = var_21927_end_mask_0, x = var_21660_cast_fp16)[name = tensor("op_21927_cast_fp16")]; + tensor var_21934_begin_0 = const()[name = tensor("op_21934_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_21934_end_0 = const()[name = tensor("op_21934_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_21934_end_mask_0 = const()[name = tensor("op_21934_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21934_cast_fp16 = slice_by_index(begin = var_21934_begin_0, end = var_21934_end_0, end_mask = var_21934_end_mask_0, x = var_21660_cast_fp16)[name = tensor("op_21934_cast_fp16")]; + tensor var_21941_begin_0 = const()[name = tensor("op_21941_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_21941_end_0 = const()[name = tensor("op_21941_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_21941_end_mask_0 = const()[name = tensor("op_21941_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21941_cast_fp16 = slice_by_index(begin = var_21941_begin_0, end = var_21941_end_0, end_mask = var_21941_end_mask_0, x = var_21664_cast_fp16)[name = tensor("op_21941_cast_fp16")]; + tensor var_21948_begin_0 = const()[name = tensor("op_21948_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_21948_end_0 = const()[name = tensor("op_21948_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_21948_end_mask_0 = const()[name = tensor("op_21948_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21948_cast_fp16 = slice_by_index(begin = var_21948_begin_0, end = var_21948_end_0, end_mask = var_21948_end_mask_0, x = var_21664_cast_fp16)[name = tensor("op_21948_cast_fp16")]; + tensor var_21955_begin_0 = const()[name = tensor("op_21955_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_21955_end_0 = const()[name = tensor("op_21955_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_21955_end_mask_0 = const()[name = tensor("op_21955_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21955_cast_fp16 = slice_by_index(begin = var_21955_begin_0, end = var_21955_end_0, end_mask = var_21955_end_mask_0, x = var_21664_cast_fp16)[name = tensor("op_21955_cast_fp16")]; + tensor var_21962_begin_0 = const()[name = tensor("op_21962_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_21962_end_0 = const()[name = tensor("op_21962_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_21962_end_mask_0 = const()[name = tensor("op_21962_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21962_cast_fp16 = slice_by_index(begin = var_21962_begin_0, end = var_21962_end_0, end_mask = var_21962_end_mask_0, x = var_21664_cast_fp16)[name = tensor("op_21962_cast_fp16")]; + tensor var_21969_begin_0 = const()[name = tensor("op_21969_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_21969_end_0 = const()[name = tensor("op_21969_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_21969_end_mask_0 = const()[name = tensor("op_21969_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21969_cast_fp16 = slice_by_index(begin = var_21969_begin_0, end = var_21969_end_0, end_mask = var_21969_end_mask_0, x = var_21668_cast_fp16)[name = tensor("op_21969_cast_fp16")]; + tensor var_21976_begin_0 = const()[name = tensor("op_21976_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_21976_end_0 = const()[name = tensor("op_21976_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_21976_end_mask_0 = const()[name = tensor("op_21976_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21976_cast_fp16 = slice_by_index(begin = var_21976_begin_0, end = var_21976_end_0, end_mask = var_21976_end_mask_0, x = var_21668_cast_fp16)[name = tensor("op_21976_cast_fp16")]; + tensor var_21983_begin_0 = const()[name = tensor("op_21983_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_21983_end_0 = const()[name = tensor("op_21983_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_21983_end_mask_0 = const()[name = tensor("op_21983_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21983_cast_fp16 = slice_by_index(begin = var_21983_begin_0, end = var_21983_end_0, end_mask = var_21983_end_mask_0, x = var_21668_cast_fp16)[name = tensor("op_21983_cast_fp16")]; + tensor var_21990_begin_0 = const()[name = tensor("op_21990_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_21990_end_0 = const()[name = tensor("op_21990_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_21990_end_mask_0 = const()[name = tensor("op_21990_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21990_cast_fp16 = slice_by_index(begin = var_21990_begin_0, end = var_21990_end_0, end_mask = var_21990_end_mask_0, x = var_21668_cast_fp16)[name = tensor("op_21990_cast_fp16")]; + tensor var_21997_begin_0 = const()[name = tensor("op_21997_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_21997_end_0 = const()[name = tensor("op_21997_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_21997_end_mask_0 = const()[name = tensor("op_21997_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_21997_cast_fp16 = slice_by_index(begin = var_21997_begin_0, end = var_21997_end_0, end_mask = var_21997_end_mask_0, x = var_21672_cast_fp16)[name = tensor("op_21997_cast_fp16")]; + tensor var_22004_begin_0 = const()[name = tensor("op_22004_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_22004_end_0 = const()[name = tensor("op_22004_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_22004_end_mask_0 = const()[name = tensor("op_22004_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22004_cast_fp16 = slice_by_index(begin = var_22004_begin_0, end = var_22004_end_0, end_mask = var_22004_end_mask_0, x = var_21672_cast_fp16)[name = tensor("op_22004_cast_fp16")]; + tensor var_22011_begin_0 = const()[name = tensor("op_22011_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_22011_end_0 = const()[name = tensor("op_22011_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_22011_end_mask_0 = const()[name = tensor("op_22011_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22011_cast_fp16 = slice_by_index(begin = var_22011_begin_0, end = var_22011_end_0, end_mask = var_22011_end_mask_0, x = var_21672_cast_fp16)[name = tensor("op_22011_cast_fp16")]; + tensor var_22018_begin_0 = const()[name = tensor("op_22018_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_22018_end_0 = const()[name = tensor("op_22018_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_22018_end_mask_0 = const()[name = tensor("op_22018_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22018_cast_fp16 = slice_by_index(begin = var_22018_begin_0, end = var_22018_end_0, end_mask = var_22018_end_mask_0, x = var_21672_cast_fp16)[name = tensor("op_22018_cast_fp16")]; + tensor var_22025_begin_0 = const()[name = tensor("op_22025_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_22025_end_0 = const()[name = tensor("op_22025_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_22025_end_mask_0 = const()[name = tensor("op_22025_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22025_cast_fp16 = slice_by_index(begin = var_22025_begin_0, end = var_22025_end_0, end_mask = var_22025_end_mask_0, x = var_21676_cast_fp16)[name = tensor("op_22025_cast_fp16")]; + tensor var_22032_begin_0 = const()[name = tensor("op_22032_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_22032_end_0 = const()[name = tensor("op_22032_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_22032_end_mask_0 = const()[name = tensor("op_22032_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22032_cast_fp16 = slice_by_index(begin = var_22032_begin_0, end = var_22032_end_0, end_mask = var_22032_end_mask_0, x = var_21676_cast_fp16)[name = tensor("op_22032_cast_fp16")]; + tensor var_22039_begin_0 = const()[name = tensor("op_22039_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_22039_end_0 = const()[name = tensor("op_22039_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_22039_end_mask_0 = const()[name = tensor("op_22039_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22039_cast_fp16 = slice_by_index(begin = var_22039_begin_0, end = var_22039_end_0, end_mask = var_22039_end_mask_0, x = var_21676_cast_fp16)[name = tensor("op_22039_cast_fp16")]; + tensor var_22046_begin_0 = const()[name = tensor("op_22046_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_22046_end_0 = const()[name = tensor("op_22046_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_22046_end_mask_0 = const()[name = tensor("op_22046_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22046_cast_fp16 = slice_by_index(begin = var_22046_begin_0, end = var_22046_end_0, end_mask = var_22046_end_mask_0, x = var_21676_cast_fp16)[name = tensor("op_22046_cast_fp16")]; + tensor var_22053_begin_0 = const()[name = tensor("op_22053_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_22053_end_0 = const()[name = tensor("op_22053_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_22053_end_mask_0 = const()[name = tensor("op_22053_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22053_cast_fp16 = slice_by_index(begin = var_22053_begin_0, end = var_22053_end_0, end_mask = var_22053_end_mask_0, x = var_21680_cast_fp16)[name = tensor("op_22053_cast_fp16")]; + tensor var_22060_begin_0 = const()[name = tensor("op_22060_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_22060_end_0 = const()[name = tensor("op_22060_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_22060_end_mask_0 = const()[name = tensor("op_22060_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22060_cast_fp16 = slice_by_index(begin = var_22060_begin_0, end = var_22060_end_0, end_mask = var_22060_end_mask_0, x = var_21680_cast_fp16)[name = tensor("op_22060_cast_fp16")]; + tensor var_22067_begin_0 = const()[name = tensor("op_22067_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_22067_end_0 = const()[name = tensor("op_22067_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_22067_end_mask_0 = const()[name = tensor("op_22067_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22067_cast_fp16 = slice_by_index(begin = var_22067_begin_0, end = var_22067_end_0, end_mask = var_22067_end_mask_0, x = var_21680_cast_fp16)[name = tensor("op_22067_cast_fp16")]; + tensor var_22074_begin_0 = const()[name = tensor("op_22074_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_22074_end_0 = const()[name = tensor("op_22074_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_22074_end_mask_0 = const()[name = tensor("op_22074_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22074_cast_fp16 = slice_by_index(begin = var_22074_begin_0, end = var_22074_end_0, end_mask = var_22074_end_mask_0, x = var_21680_cast_fp16)[name = tensor("op_22074_cast_fp16")]; + tensor var_22081_begin_0 = const()[name = tensor("op_22081_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_22081_end_0 = const()[name = tensor("op_22081_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_22081_end_mask_0 = const()[name = tensor("op_22081_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22081_cast_fp16 = slice_by_index(begin = var_22081_begin_0, end = var_22081_end_0, end_mask = var_22081_end_mask_0, x = var_21684_cast_fp16)[name = tensor("op_22081_cast_fp16")]; + tensor var_22088_begin_0 = const()[name = tensor("op_22088_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_22088_end_0 = const()[name = tensor("op_22088_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_22088_end_mask_0 = const()[name = tensor("op_22088_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22088_cast_fp16 = slice_by_index(begin = var_22088_begin_0, end = var_22088_end_0, end_mask = var_22088_end_mask_0, x = var_21684_cast_fp16)[name = tensor("op_22088_cast_fp16")]; + tensor var_22095_begin_0 = const()[name = tensor("op_22095_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_22095_end_0 = const()[name = tensor("op_22095_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_22095_end_mask_0 = const()[name = tensor("op_22095_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22095_cast_fp16 = slice_by_index(begin = var_22095_begin_0, end = var_22095_end_0, end_mask = var_22095_end_mask_0, x = var_21684_cast_fp16)[name = tensor("op_22095_cast_fp16")]; + tensor var_22102_begin_0 = const()[name = tensor("op_22102_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_22102_end_0 = const()[name = tensor("op_22102_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_22102_end_mask_0 = const()[name = tensor("op_22102_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22102_cast_fp16 = slice_by_index(begin = var_22102_begin_0, end = var_22102_end_0, end_mask = var_22102_end_mask_0, x = var_21684_cast_fp16)[name = tensor("op_22102_cast_fp16")]; + tensor var_22109_begin_0 = const()[name = tensor("op_22109_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_22109_end_0 = const()[name = tensor("op_22109_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_22109_end_mask_0 = const()[name = tensor("op_22109_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22109_cast_fp16 = slice_by_index(begin = var_22109_begin_0, end = var_22109_end_0, end_mask = var_22109_end_mask_0, x = var_21688_cast_fp16)[name = tensor("op_22109_cast_fp16")]; + tensor var_22116_begin_0 = const()[name = tensor("op_22116_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_22116_end_0 = const()[name = tensor("op_22116_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_22116_end_mask_0 = const()[name = tensor("op_22116_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22116_cast_fp16 = slice_by_index(begin = var_22116_begin_0, end = var_22116_end_0, end_mask = var_22116_end_mask_0, x = var_21688_cast_fp16)[name = tensor("op_22116_cast_fp16")]; + tensor var_22123_begin_0 = const()[name = tensor("op_22123_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_22123_end_0 = const()[name = tensor("op_22123_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_22123_end_mask_0 = const()[name = tensor("op_22123_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22123_cast_fp16 = slice_by_index(begin = var_22123_begin_0, end = var_22123_end_0, end_mask = var_22123_end_mask_0, x = var_21688_cast_fp16)[name = tensor("op_22123_cast_fp16")]; + tensor var_22130_begin_0 = const()[name = tensor("op_22130_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_22130_end_0 = const()[name = tensor("op_22130_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_22130_end_mask_0 = const()[name = tensor("op_22130_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22130_cast_fp16 = slice_by_index(begin = var_22130_begin_0, end = var_22130_end_0, end_mask = var_22130_end_mask_0, x = var_21688_cast_fp16)[name = tensor("op_22130_cast_fp16")]; + tensor var_22137_begin_0 = const()[name = tensor("op_22137_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_22137_end_0 = const()[name = tensor("op_22137_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_22137_end_mask_0 = const()[name = tensor("op_22137_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22137_cast_fp16 = slice_by_index(begin = var_22137_begin_0, end = var_22137_end_0, end_mask = var_22137_end_mask_0, x = var_21692_cast_fp16)[name = tensor("op_22137_cast_fp16")]; + tensor var_22144_begin_0 = const()[name = tensor("op_22144_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_22144_end_0 = const()[name = tensor("op_22144_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_22144_end_mask_0 = const()[name = tensor("op_22144_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22144_cast_fp16 = slice_by_index(begin = var_22144_begin_0, end = var_22144_end_0, end_mask = var_22144_end_mask_0, x = var_21692_cast_fp16)[name = tensor("op_22144_cast_fp16")]; + tensor var_22151_begin_0 = const()[name = tensor("op_22151_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_22151_end_0 = const()[name = tensor("op_22151_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_22151_end_mask_0 = const()[name = tensor("op_22151_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22151_cast_fp16 = slice_by_index(begin = var_22151_begin_0, end = var_22151_end_0, end_mask = var_22151_end_mask_0, x = var_21692_cast_fp16)[name = tensor("op_22151_cast_fp16")]; + tensor var_22158_begin_0 = const()[name = tensor("op_22158_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_22158_end_0 = const()[name = tensor("op_22158_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_22158_end_mask_0 = const()[name = tensor("op_22158_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22158_cast_fp16 = slice_by_index(begin = var_22158_begin_0, end = var_22158_end_0, end_mask = var_22158_end_mask_0, x = var_21692_cast_fp16)[name = tensor("op_22158_cast_fp16")]; + tensor var_22165_begin_0 = const()[name = tensor("op_22165_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_22165_end_0 = const()[name = tensor("op_22165_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_22165_end_mask_0 = const()[name = tensor("op_22165_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22165_cast_fp16 = slice_by_index(begin = var_22165_begin_0, end = var_22165_end_0, end_mask = var_22165_end_mask_0, x = var_21696_cast_fp16)[name = tensor("op_22165_cast_fp16")]; + tensor var_22172_begin_0 = const()[name = tensor("op_22172_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_22172_end_0 = const()[name = tensor("op_22172_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_22172_end_mask_0 = const()[name = tensor("op_22172_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22172_cast_fp16 = slice_by_index(begin = var_22172_begin_0, end = var_22172_end_0, end_mask = var_22172_end_mask_0, x = var_21696_cast_fp16)[name = tensor("op_22172_cast_fp16")]; + tensor var_22179_begin_0 = const()[name = tensor("op_22179_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_22179_end_0 = const()[name = tensor("op_22179_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_22179_end_mask_0 = const()[name = tensor("op_22179_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22179_cast_fp16 = slice_by_index(begin = var_22179_begin_0, end = var_22179_end_0, end_mask = var_22179_end_mask_0, x = var_21696_cast_fp16)[name = tensor("op_22179_cast_fp16")]; + tensor var_22186_begin_0 = const()[name = tensor("op_22186_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_22186_end_0 = const()[name = tensor("op_22186_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_22186_end_mask_0 = const()[name = tensor("op_22186_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22186_cast_fp16 = slice_by_index(begin = var_22186_begin_0, end = var_22186_end_0, end_mask = var_22186_end_mask_0, x = var_21696_cast_fp16)[name = tensor("op_22186_cast_fp16")]; + tensor var_22193_begin_0 = const()[name = tensor("op_22193_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_22193_end_0 = const()[name = tensor("op_22193_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_22193_end_mask_0 = const()[name = tensor("op_22193_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22193_cast_fp16 = slice_by_index(begin = var_22193_begin_0, end = var_22193_end_0, end_mask = var_22193_end_mask_0, x = var_21700_cast_fp16)[name = tensor("op_22193_cast_fp16")]; + tensor var_22200_begin_0 = const()[name = tensor("op_22200_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_22200_end_0 = const()[name = tensor("op_22200_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_22200_end_mask_0 = const()[name = tensor("op_22200_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22200_cast_fp16 = slice_by_index(begin = var_22200_begin_0, end = var_22200_end_0, end_mask = var_22200_end_mask_0, x = var_21700_cast_fp16)[name = tensor("op_22200_cast_fp16")]; + tensor var_22207_begin_0 = const()[name = tensor("op_22207_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_22207_end_0 = const()[name = tensor("op_22207_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_22207_end_mask_0 = const()[name = tensor("op_22207_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22207_cast_fp16 = slice_by_index(begin = var_22207_begin_0, end = var_22207_end_0, end_mask = var_22207_end_mask_0, x = var_21700_cast_fp16)[name = tensor("op_22207_cast_fp16")]; + tensor var_22214_begin_0 = const()[name = tensor("op_22214_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_22214_end_0 = const()[name = tensor("op_22214_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_22214_end_mask_0 = const()[name = tensor("op_22214_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22214_cast_fp16 = slice_by_index(begin = var_22214_begin_0, end = var_22214_end_0, end_mask = var_22214_end_mask_0, x = var_21700_cast_fp16)[name = tensor("op_22214_cast_fp16")]; + tensor var_22221_begin_0 = const()[name = tensor("op_22221_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_22221_end_0 = const()[name = tensor("op_22221_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_22221_end_mask_0 = const()[name = tensor("op_22221_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22221_cast_fp16 = slice_by_index(begin = var_22221_begin_0, end = var_22221_end_0, end_mask = var_22221_end_mask_0, x = var_21704_cast_fp16)[name = tensor("op_22221_cast_fp16")]; + tensor var_22228_begin_0 = const()[name = tensor("op_22228_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_22228_end_0 = const()[name = tensor("op_22228_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_22228_end_mask_0 = const()[name = tensor("op_22228_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22228_cast_fp16 = slice_by_index(begin = var_22228_begin_0, end = var_22228_end_0, end_mask = var_22228_end_mask_0, x = var_21704_cast_fp16)[name = tensor("op_22228_cast_fp16")]; + tensor var_22235_begin_0 = const()[name = tensor("op_22235_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_22235_end_0 = const()[name = tensor("op_22235_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_22235_end_mask_0 = const()[name = tensor("op_22235_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22235_cast_fp16 = slice_by_index(begin = var_22235_begin_0, end = var_22235_end_0, end_mask = var_22235_end_mask_0, x = var_21704_cast_fp16)[name = tensor("op_22235_cast_fp16")]; + tensor var_22242_begin_0 = const()[name = tensor("op_22242_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_22242_end_0 = const()[name = tensor("op_22242_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_22242_end_mask_0 = const()[name = tensor("op_22242_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22242_cast_fp16 = slice_by_index(begin = var_22242_begin_0, end = var_22242_end_0, end_mask = var_22242_end_mask_0, x = var_21704_cast_fp16)[name = tensor("op_22242_cast_fp16")]; + tensor var_22249_begin_0 = const()[name = tensor("op_22249_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_22249_end_0 = const()[name = tensor("op_22249_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_22249_end_mask_0 = const()[name = tensor("op_22249_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22249_cast_fp16 = slice_by_index(begin = var_22249_begin_0, end = var_22249_end_0, end_mask = var_22249_end_mask_0, x = var_21708_cast_fp16)[name = tensor("op_22249_cast_fp16")]; + tensor var_22256_begin_0 = const()[name = tensor("op_22256_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_22256_end_0 = const()[name = tensor("op_22256_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_22256_end_mask_0 = const()[name = tensor("op_22256_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22256_cast_fp16 = slice_by_index(begin = var_22256_begin_0, end = var_22256_end_0, end_mask = var_22256_end_mask_0, x = var_21708_cast_fp16)[name = tensor("op_22256_cast_fp16")]; + tensor var_22263_begin_0 = const()[name = tensor("op_22263_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_22263_end_0 = const()[name = tensor("op_22263_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_22263_end_mask_0 = const()[name = tensor("op_22263_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22263_cast_fp16 = slice_by_index(begin = var_22263_begin_0, end = var_22263_end_0, end_mask = var_22263_end_mask_0, x = var_21708_cast_fp16)[name = tensor("op_22263_cast_fp16")]; + tensor var_22270_begin_0 = const()[name = tensor("op_22270_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_22270_end_0 = const()[name = tensor("op_22270_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_22270_end_mask_0 = const()[name = tensor("op_22270_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22270_cast_fp16 = slice_by_index(begin = var_22270_begin_0, end = var_22270_end_0, end_mask = var_22270_end_mask_0, x = var_21708_cast_fp16)[name = tensor("op_22270_cast_fp16")]; + tensor k_29_perm_0 = const()[name = tensor("k_29_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_22275_begin_0 = const()[name = tensor("op_22275_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_22275_end_0 = const()[name = tensor("op_22275_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_22275_end_mask_0 = const()[name = tensor("op_22275_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_17 = transpose(perm = k_29_perm_0, x = key_29_cast_fp16)[name = tensor("transpose_17")]; + tensor var_22275_cast_fp16 = slice_by_index(begin = var_22275_begin_0, end = var_22275_end_0, end_mask = var_22275_end_mask_0, x = transpose_17)[name = tensor("op_22275_cast_fp16")]; + tensor var_22279_begin_0 = const()[name = tensor("op_22279_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_22279_end_0 = const()[name = tensor("op_22279_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_22279_end_mask_0 = const()[name = tensor("op_22279_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22279_cast_fp16 = slice_by_index(begin = var_22279_begin_0, end = var_22279_end_0, end_mask = var_22279_end_mask_0, x = transpose_17)[name = tensor("op_22279_cast_fp16")]; + tensor var_22283_begin_0 = const()[name = tensor("op_22283_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_22283_end_0 = const()[name = tensor("op_22283_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_22283_end_mask_0 = const()[name = tensor("op_22283_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22283_cast_fp16 = slice_by_index(begin = var_22283_begin_0, end = var_22283_end_0, end_mask = var_22283_end_mask_0, x = transpose_17)[name = tensor("op_22283_cast_fp16")]; + tensor var_22287_begin_0 = const()[name = tensor("op_22287_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_22287_end_0 = const()[name = tensor("op_22287_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_22287_end_mask_0 = const()[name = tensor("op_22287_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22287_cast_fp16 = slice_by_index(begin = var_22287_begin_0, end = var_22287_end_0, end_mask = var_22287_end_mask_0, x = transpose_17)[name = tensor("op_22287_cast_fp16")]; + tensor var_22291_begin_0 = const()[name = tensor("op_22291_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_22291_end_0 = const()[name = tensor("op_22291_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_22291_end_mask_0 = const()[name = tensor("op_22291_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22291_cast_fp16 = slice_by_index(begin = var_22291_begin_0, end = var_22291_end_0, end_mask = var_22291_end_mask_0, x = transpose_17)[name = tensor("op_22291_cast_fp16")]; + tensor var_22295_begin_0 = const()[name = tensor("op_22295_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_22295_end_0 = const()[name = tensor("op_22295_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_22295_end_mask_0 = const()[name = tensor("op_22295_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22295_cast_fp16 = slice_by_index(begin = var_22295_begin_0, end = var_22295_end_0, end_mask = var_22295_end_mask_0, x = transpose_17)[name = tensor("op_22295_cast_fp16")]; + tensor var_22299_begin_0 = const()[name = tensor("op_22299_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_22299_end_0 = const()[name = tensor("op_22299_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_22299_end_mask_0 = const()[name = tensor("op_22299_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22299_cast_fp16 = slice_by_index(begin = var_22299_begin_0, end = var_22299_end_0, end_mask = var_22299_end_mask_0, x = transpose_17)[name = tensor("op_22299_cast_fp16")]; + tensor var_22303_begin_0 = const()[name = tensor("op_22303_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_22303_end_0 = const()[name = tensor("op_22303_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_22303_end_mask_0 = const()[name = tensor("op_22303_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22303_cast_fp16 = slice_by_index(begin = var_22303_begin_0, end = var_22303_end_0, end_mask = var_22303_end_mask_0, x = transpose_17)[name = tensor("op_22303_cast_fp16")]; + tensor var_22307_begin_0 = const()[name = tensor("op_22307_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_22307_end_0 = const()[name = tensor("op_22307_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_22307_end_mask_0 = const()[name = tensor("op_22307_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22307_cast_fp16 = slice_by_index(begin = var_22307_begin_0, end = var_22307_end_0, end_mask = var_22307_end_mask_0, x = transpose_17)[name = tensor("op_22307_cast_fp16")]; + tensor var_22311_begin_0 = const()[name = tensor("op_22311_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_22311_end_0 = const()[name = tensor("op_22311_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_22311_end_mask_0 = const()[name = tensor("op_22311_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22311_cast_fp16 = slice_by_index(begin = var_22311_begin_0, end = var_22311_end_0, end_mask = var_22311_end_mask_0, x = transpose_17)[name = tensor("op_22311_cast_fp16")]; + tensor var_22315_begin_0 = const()[name = tensor("op_22315_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_22315_end_0 = const()[name = tensor("op_22315_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_22315_end_mask_0 = const()[name = tensor("op_22315_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22315_cast_fp16 = slice_by_index(begin = var_22315_begin_0, end = var_22315_end_0, end_mask = var_22315_end_mask_0, x = transpose_17)[name = tensor("op_22315_cast_fp16")]; + tensor var_22319_begin_0 = const()[name = tensor("op_22319_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_22319_end_0 = const()[name = tensor("op_22319_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_22319_end_mask_0 = const()[name = tensor("op_22319_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22319_cast_fp16 = slice_by_index(begin = var_22319_begin_0, end = var_22319_end_0, end_mask = var_22319_end_mask_0, x = transpose_17)[name = tensor("op_22319_cast_fp16")]; + tensor var_22323_begin_0 = const()[name = tensor("op_22323_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_22323_end_0 = const()[name = tensor("op_22323_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_22323_end_mask_0 = const()[name = tensor("op_22323_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22323_cast_fp16 = slice_by_index(begin = var_22323_begin_0, end = var_22323_end_0, end_mask = var_22323_end_mask_0, x = transpose_17)[name = tensor("op_22323_cast_fp16")]; + tensor var_22327_begin_0 = const()[name = tensor("op_22327_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_22327_end_0 = const()[name = tensor("op_22327_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_22327_end_mask_0 = const()[name = tensor("op_22327_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22327_cast_fp16 = slice_by_index(begin = var_22327_begin_0, end = var_22327_end_0, end_mask = var_22327_end_mask_0, x = transpose_17)[name = tensor("op_22327_cast_fp16")]; + tensor var_22331_begin_0 = const()[name = tensor("op_22331_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_22331_end_0 = const()[name = tensor("op_22331_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_22331_end_mask_0 = const()[name = tensor("op_22331_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22331_cast_fp16 = slice_by_index(begin = var_22331_begin_0, end = var_22331_end_0, end_mask = var_22331_end_mask_0, x = transpose_17)[name = tensor("op_22331_cast_fp16")]; + tensor var_22335_begin_0 = const()[name = tensor("op_22335_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_22335_end_0 = const()[name = tensor("op_22335_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_22335_end_mask_0 = const()[name = tensor("op_22335_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22335_cast_fp16 = slice_by_index(begin = var_22335_begin_0, end = var_22335_end_0, end_mask = var_22335_end_mask_0, x = transpose_17)[name = tensor("op_22335_cast_fp16")]; + tensor var_22339_begin_0 = const()[name = tensor("op_22339_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_22339_end_0 = const()[name = tensor("op_22339_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_22339_end_mask_0 = const()[name = tensor("op_22339_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22339_cast_fp16 = slice_by_index(begin = var_22339_begin_0, end = var_22339_end_0, end_mask = var_22339_end_mask_0, x = transpose_17)[name = tensor("op_22339_cast_fp16")]; + tensor var_22343_begin_0 = const()[name = tensor("op_22343_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_22343_end_0 = const()[name = tensor("op_22343_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_22343_end_mask_0 = const()[name = tensor("op_22343_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22343_cast_fp16 = slice_by_index(begin = var_22343_begin_0, end = var_22343_end_0, end_mask = var_22343_end_mask_0, x = transpose_17)[name = tensor("op_22343_cast_fp16")]; + tensor var_22347_begin_0 = const()[name = tensor("op_22347_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_22347_end_0 = const()[name = tensor("op_22347_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_22347_end_mask_0 = const()[name = tensor("op_22347_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22347_cast_fp16 = slice_by_index(begin = var_22347_begin_0, end = var_22347_end_0, end_mask = var_22347_end_mask_0, x = transpose_17)[name = tensor("op_22347_cast_fp16")]; + tensor var_22351_begin_0 = const()[name = tensor("op_22351_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_22351_end_0 = const()[name = tensor("op_22351_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_22351_end_mask_0 = const()[name = tensor("op_22351_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_22351_cast_fp16 = slice_by_index(begin = var_22351_begin_0, end = var_22351_end_0, end_mask = var_22351_end_mask_0, x = transpose_17)[name = tensor("op_22351_cast_fp16")]; + tensor var_22353_begin_0 = const()[name = tensor("op_22353_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_22353_end_0 = const()[name = tensor("op_22353_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_22353_end_mask_0 = const()[name = tensor("op_22353_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_22353_cast_fp16 = slice_by_index(begin = var_22353_begin_0, end = var_22353_end_0, end_mask = var_22353_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_22353_cast_fp16")]; + tensor var_22357_begin_0 = const()[name = tensor("op_22357_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_22357_end_0 = const()[name = tensor("op_22357_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_22357_end_mask_0 = const()[name = tensor("op_22357_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_22357_cast_fp16 = slice_by_index(begin = var_22357_begin_0, end = var_22357_end_0, end_mask = var_22357_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_22357_cast_fp16")]; + tensor var_22361_begin_0 = const()[name = tensor("op_22361_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_22361_end_0 = const()[name = tensor("op_22361_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_22361_end_mask_0 = const()[name = tensor("op_22361_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_22361_cast_fp16 = slice_by_index(begin = var_22361_begin_0, end = var_22361_end_0, end_mask = var_22361_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_22361_cast_fp16")]; + tensor var_22365_begin_0 = const()[name = tensor("op_22365_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_22365_end_0 = const()[name = tensor("op_22365_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_22365_end_mask_0 = const()[name = tensor("op_22365_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_22365_cast_fp16 = slice_by_index(begin = var_22365_begin_0, end = var_22365_end_0, end_mask = var_22365_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_22365_cast_fp16")]; + tensor var_22369_begin_0 = const()[name = tensor("op_22369_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_22369_end_0 = const()[name = tensor("op_22369_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_22369_end_mask_0 = const()[name = tensor("op_22369_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_22369_cast_fp16 = slice_by_index(begin = var_22369_begin_0, end = var_22369_end_0, end_mask = var_22369_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_22369_cast_fp16")]; + tensor var_22373_begin_0 = const()[name = tensor("op_22373_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_22373_end_0 = const()[name = tensor("op_22373_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_22373_end_mask_0 = const()[name = tensor("op_22373_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_22373_cast_fp16 = slice_by_index(begin = var_22373_begin_0, end = var_22373_end_0, end_mask = var_22373_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_22373_cast_fp16")]; + tensor var_22377_begin_0 = const()[name = tensor("op_22377_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_22377_end_0 = const()[name = tensor("op_22377_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_22377_end_mask_0 = const()[name = tensor("op_22377_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_22377_cast_fp16 = slice_by_index(begin = var_22377_begin_0, end = var_22377_end_0, end_mask = var_22377_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_22377_cast_fp16")]; + tensor var_22381_begin_0 = const()[name = tensor("op_22381_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_22381_end_0 = const()[name = tensor("op_22381_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_22381_end_mask_0 = const()[name = tensor("op_22381_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_22381_cast_fp16 = slice_by_index(begin = var_22381_begin_0, end = var_22381_end_0, end_mask = var_22381_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_22381_cast_fp16")]; + tensor var_22385_begin_0 = const()[name = tensor("op_22385_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_22385_end_0 = const()[name = tensor("op_22385_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_22385_end_mask_0 = const()[name = tensor("op_22385_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_22385_cast_fp16 = slice_by_index(begin = var_22385_begin_0, end = var_22385_end_0, end_mask = var_22385_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_22385_cast_fp16")]; + tensor var_22389_begin_0 = const()[name = tensor("op_22389_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_22389_end_0 = const()[name = tensor("op_22389_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_22389_end_mask_0 = const()[name = tensor("op_22389_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_22389_cast_fp16 = slice_by_index(begin = var_22389_begin_0, end = var_22389_end_0, end_mask = var_22389_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_22389_cast_fp16")]; + tensor var_22393_begin_0 = const()[name = tensor("op_22393_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_22393_end_0 = const()[name = tensor("op_22393_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_22393_end_mask_0 = const()[name = tensor("op_22393_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_22393_cast_fp16 = slice_by_index(begin = var_22393_begin_0, end = var_22393_end_0, end_mask = var_22393_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_22393_cast_fp16")]; + tensor var_22397_begin_0 = const()[name = tensor("op_22397_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_22397_end_0 = const()[name = tensor("op_22397_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_22397_end_mask_0 = const()[name = tensor("op_22397_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_22397_cast_fp16 = slice_by_index(begin = var_22397_begin_0, end = var_22397_end_0, end_mask = var_22397_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_22397_cast_fp16")]; + tensor var_22401_begin_0 = const()[name = tensor("op_22401_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_22401_end_0 = const()[name = tensor("op_22401_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_22401_end_mask_0 = const()[name = tensor("op_22401_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_22401_cast_fp16 = slice_by_index(begin = var_22401_begin_0, end = var_22401_end_0, end_mask = var_22401_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_22401_cast_fp16")]; + tensor var_22405_begin_0 = const()[name = tensor("op_22405_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_22405_end_0 = const()[name = tensor("op_22405_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_22405_end_mask_0 = const()[name = tensor("op_22405_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_22405_cast_fp16 = slice_by_index(begin = var_22405_begin_0, end = var_22405_end_0, end_mask = var_22405_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_22405_cast_fp16")]; + tensor var_22409_begin_0 = const()[name = tensor("op_22409_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_22409_end_0 = const()[name = tensor("op_22409_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_22409_end_mask_0 = const()[name = tensor("op_22409_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_22409_cast_fp16 = slice_by_index(begin = var_22409_begin_0, end = var_22409_end_0, end_mask = var_22409_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_22409_cast_fp16")]; + tensor var_22413_begin_0 = const()[name = tensor("op_22413_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_22413_end_0 = const()[name = tensor("op_22413_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_22413_end_mask_0 = const()[name = tensor("op_22413_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_22413_cast_fp16 = slice_by_index(begin = var_22413_begin_0, end = var_22413_end_0, end_mask = var_22413_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_22413_cast_fp16")]; + tensor var_22417_begin_0 = const()[name = tensor("op_22417_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_22417_end_0 = const()[name = tensor("op_22417_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_22417_end_mask_0 = const()[name = tensor("op_22417_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_22417_cast_fp16 = slice_by_index(begin = var_22417_begin_0, end = var_22417_end_0, end_mask = var_22417_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_22417_cast_fp16")]; + tensor var_22421_begin_0 = const()[name = tensor("op_22421_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_22421_end_0 = const()[name = tensor("op_22421_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_22421_end_mask_0 = const()[name = tensor("op_22421_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_22421_cast_fp16 = slice_by_index(begin = var_22421_begin_0, end = var_22421_end_0, end_mask = var_22421_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_22421_cast_fp16")]; + tensor var_22425_begin_0 = const()[name = tensor("op_22425_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_22425_end_0 = const()[name = tensor("op_22425_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_22425_end_mask_0 = const()[name = tensor("op_22425_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_22425_cast_fp16 = slice_by_index(begin = var_22425_begin_0, end = var_22425_end_0, end_mask = var_22425_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_22425_cast_fp16")]; + tensor var_22429_begin_0 = const()[name = tensor("op_22429_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_22429_end_0 = const()[name = tensor("op_22429_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_22429_end_mask_0 = const()[name = tensor("op_22429_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_22429_cast_fp16 = slice_by_index(begin = var_22429_begin_0, end = var_22429_end_0, end_mask = var_22429_end_mask_0, x = value_29_cast_fp16)[name = tensor("op_22429_cast_fp16")]; + tensor var_22433_equation_0 = const()[name = tensor("op_22433_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22433_cast_fp16 = einsum(equation = var_22433_equation_0, values = (var_22275_cast_fp16, var_21717_cast_fp16))[name = tensor("op_22433_cast_fp16")]; + tensor var_22434_to_fp16 = const()[name = tensor("op_22434_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2241_cast_fp16 = mul(x = var_22433_cast_fp16, y = var_22434_to_fp16)[name = tensor("aw_chunk_2241_cast_fp16")]; + tensor var_22437_equation_0 = const()[name = tensor("op_22437_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22437_cast_fp16 = einsum(equation = var_22437_equation_0, values = (var_22275_cast_fp16, var_21724_cast_fp16))[name = tensor("op_22437_cast_fp16")]; + tensor var_22438_to_fp16 = const()[name = tensor("op_22438_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2243_cast_fp16 = mul(x = var_22437_cast_fp16, y = var_22438_to_fp16)[name = tensor("aw_chunk_2243_cast_fp16")]; + tensor var_22441_equation_0 = const()[name = tensor("op_22441_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22441_cast_fp16 = einsum(equation = var_22441_equation_0, values = (var_22275_cast_fp16, var_21731_cast_fp16))[name = tensor("op_22441_cast_fp16")]; + tensor var_22442_to_fp16 = const()[name = tensor("op_22442_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2245_cast_fp16 = mul(x = var_22441_cast_fp16, y = var_22442_to_fp16)[name = tensor("aw_chunk_2245_cast_fp16")]; + tensor var_22445_equation_0 = const()[name = tensor("op_22445_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22445_cast_fp16 = einsum(equation = var_22445_equation_0, values = (var_22275_cast_fp16, var_21738_cast_fp16))[name = tensor("op_22445_cast_fp16")]; + tensor var_22446_to_fp16 = const()[name = tensor("op_22446_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2247_cast_fp16 = mul(x = var_22445_cast_fp16, y = var_22446_to_fp16)[name = tensor("aw_chunk_2247_cast_fp16")]; + tensor var_22449_equation_0 = const()[name = tensor("op_22449_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22449_cast_fp16 = einsum(equation = var_22449_equation_0, values = (var_22279_cast_fp16, var_21745_cast_fp16))[name = tensor("op_22449_cast_fp16")]; + tensor var_22450_to_fp16 = const()[name = tensor("op_22450_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2249_cast_fp16 = mul(x = var_22449_cast_fp16, y = var_22450_to_fp16)[name = tensor("aw_chunk_2249_cast_fp16")]; + tensor var_22453_equation_0 = const()[name = tensor("op_22453_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22453_cast_fp16 = einsum(equation = var_22453_equation_0, values = (var_22279_cast_fp16, var_21752_cast_fp16))[name = tensor("op_22453_cast_fp16")]; + tensor var_22454_to_fp16 = const()[name = tensor("op_22454_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2251_cast_fp16 = mul(x = var_22453_cast_fp16, y = var_22454_to_fp16)[name = tensor("aw_chunk_2251_cast_fp16")]; + tensor var_22457_equation_0 = const()[name = tensor("op_22457_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22457_cast_fp16 = einsum(equation = var_22457_equation_0, values = (var_22279_cast_fp16, var_21759_cast_fp16))[name = tensor("op_22457_cast_fp16")]; + tensor var_22458_to_fp16 = const()[name = tensor("op_22458_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2253_cast_fp16 = mul(x = var_22457_cast_fp16, y = var_22458_to_fp16)[name = tensor("aw_chunk_2253_cast_fp16")]; + tensor var_22461_equation_0 = const()[name = tensor("op_22461_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22461_cast_fp16 = einsum(equation = var_22461_equation_0, values = (var_22279_cast_fp16, var_21766_cast_fp16))[name = tensor("op_22461_cast_fp16")]; + tensor var_22462_to_fp16 = const()[name = tensor("op_22462_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2255_cast_fp16 = mul(x = var_22461_cast_fp16, y = var_22462_to_fp16)[name = tensor("aw_chunk_2255_cast_fp16")]; + tensor var_22465_equation_0 = const()[name = tensor("op_22465_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22465_cast_fp16 = einsum(equation = var_22465_equation_0, values = (var_22283_cast_fp16, var_21773_cast_fp16))[name = tensor("op_22465_cast_fp16")]; + tensor var_22466_to_fp16 = const()[name = tensor("op_22466_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2257_cast_fp16 = mul(x = var_22465_cast_fp16, y = var_22466_to_fp16)[name = tensor("aw_chunk_2257_cast_fp16")]; + tensor var_22469_equation_0 = const()[name = tensor("op_22469_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22469_cast_fp16 = einsum(equation = var_22469_equation_0, values = (var_22283_cast_fp16, var_21780_cast_fp16))[name = tensor("op_22469_cast_fp16")]; + tensor var_22470_to_fp16 = const()[name = tensor("op_22470_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2259_cast_fp16 = mul(x = var_22469_cast_fp16, y = var_22470_to_fp16)[name = tensor("aw_chunk_2259_cast_fp16")]; + tensor var_22473_equation_0 = const()[name = tensor("op_22473_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22473_cast_fp16 = einsum(equation = var_22473_equation_0, values = (var_22283_cast_fp16, var_21787_cast_fp16))[name = tensor("op_22473_cast_fp16")]; + tensor var_22474_to_fp16 = const()[name = tensor("op_22474_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2261_cast_fp16 = mul(x = var_22473_cast_fp16, y = var_22474_to_fp16)[name = tensor("aw_chunk_2261_cast_fp16")]; + tensor var_22477_equation_0 = const()[name = tensor("op_22477_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22477_cast_fp16 = einsum(equation = var_22477_equation_0, values = (var_22283_cast_fp16, var_21794_cast_fp16))[name = tensor("op_22477_cast_fp16")]; + tensor var_22478_to_fp16 = const()[name = tensor("op_22478_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2263_cast_fp16 = mul(x = var_22477_cast_fp16, y = var_22478_to_fp16)[name = tensor("aw_chunk_2263_cast_fp16")]; + tensor var_22481_equation_0 = const()[name = tensor("op_22481_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22481_cast_fp16 = einsum(equation = var_22481_equation_0, values = (var_22287_cast_fp16, var_21801_cast_fp16))[name = tensor("op_22481_cast_fp16")]; + tensor var_22482_to_fp16 = const()[name = tensor("op_22482_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2265_cast_fp16 = mul(x = var_22481_cast_fp16, y = var_22482_to_fp16)[name = tensor("aw_chunk_2265_cast_fp16")]; + tensor var_22485_equation_0 = const()[name = tensor("op_22485_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22485_cast_fp16 = einsum(equation = var_22485_equation_0, values = (var_22287_cast_fp16, var_21808_cast_fp16))[name = tensor("op_22485_cast_fp16")]; + tensor var_22486_to_fp16 = const()[name = tensor("op_22486_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2267_cast_fp16 = mul(x = var_22485_cast_fp16, y = var_22486_to_fp16)[name = tensor("aw_chunk_2267_cast_fp16")]; + tensor var_22489_equation_0 = const()[name = tensor("op_22489_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22489_cast_fp16 = einsum(equation = var_22489_equation_0, values = (var_22287_cast_fp16, var_21815_cast_fp16))[name = tensor("op_22489_cast_fp16")]; + tensor var_22490_to_fp16 = const()[name = tensor("op_22490_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2269_cast_fp16 = mul(x = var_22489_cast_fp16, y = var_22490_to_fp16)[name = tensor("aw_chunk_2269_cast_fp16")]; + tensor var_22493_equation_0 = const()[name = tensor("op_22493_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22493_cast_fp16 = einsum(equation = var_22493_equation_0, values = (var_22287_cast_fp16, var_21822_cast_fp16))[name = tensor("op_22493_cast_fp16")]; + tensor var_22494_to_fp16 = const()[name = tensor("op_22494_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2271_cast_fp16 = mul(x = var_22493_cast_fp16, y = var_22494_to_fp16)[name = tensor("aw_chunk_2271_cast_fp16")]; + tensor var_22497_equation_0 = const()[name = tensor("op_22497_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22497_cast_fp16 = einsum(equation = var_22497_equation_0, values = (var_22291_cast_fp16, var_21829_cast_fp16))[name = tensor("op_22497_cast_fp16")]; + tensor var_22498_to_fp16 = const()[name = tensor("op_22498_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2273_cast_fp16 = mul(x = var_22497_cast_fp16, y = var_22498_to_fp16)[name = tensor("aw_chunk_2273_cast_fp16")]; + tensor var_22501_equation_0 = const()[name = tensor("op_22501_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22501_cast_fp16 = einsum(equation = var_22501_equation_0, values = (var_22291_cast_fp16, var_21836_cast_fp16))[name = tensor("op_22501_cast_fp16")]; + tensor var_22502_to_fp16 = const()[name = tensor("op_22502_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2275_cast_fp16 = mul(x = var_22501_cast_fp16, y = var_22502_to_fp16)[name = tensor("aw_chunk_2275_cast_fp16")]; + tensor var_22505_equation_0 = const()[name = tensor("op_22505_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22505_cast_fp16 = einsum(equation = var_22505_equation_0, values = (var_22291_cast_fp16, var_21843_cast_fp16))[name = tensor("op_22505_cast_fp16")]; + tensor var_22506_to_fp16 = const()[name = tensor("op_22506_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2277_cast_fp16 = mul(x = var_22505_cast_fp16, y = var_22506_to_fp16)[name = tensor("aw_chunk_2277_cast_fp16")]; + tensor var_22509_equation_0 = const()[name = tensor("op_22509_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22509_cast_fp16 = einsum(equation = var_22509_equation_0, values = (var_22291_cast_fp16, var_21850_cast_fp16))[name = tensor("op_22509_cast_fp16")]; + tensor var_22510_to_fp16 = const()[name = tensor("op_22510_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2279_cast_fp16 = mul(x = var_22509_cast_fp16, y = var_22510_to_fp16)[name = tensor("aw_chunk_2279_cast_fp16")]; + tensor var_22513_equation_0 = const()[name = tensor("op_22513_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22513_cast_fp16 = einsum(equation = var_22513_equation_0, values = (var_22295_cast_fp16, var_21857_cast_fp16))[name = tensor("op_22513_cast_fp16")]; + tensor var_22514_to_fp16 = const()[name = tensor("op_22514_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2281_cast_fp16 = mul(x = var_22513_cast_fp16, y = var_22514_to_fp16)[name = tensor("aw_chunk_2281_cast_fp16")]; + tensor var_22517_equation_0 = const()[name = tensor("op_22517_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22517_cast_fp16 = einsum(equation = var_22517_equation_0, values = (var_22295_cast_fp16, var_21864_cast_fp16))[name = tensor("op_22517_cast_fp16")]; + tensor var_22518_to_fp16 = const()[name = tensor("op_22518_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2283_cast_fp16 = mul(x = var_22517_cast_fp16, y = var_22518_to_fp16)[name = tensor("aw_chunk_2283_cast_fp16")]; + tensor var_22521_equation_0 = const()[name = tensor("op_22521_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22521_cast_fp16 = einsum(equation = var_22521_equation_0, values = (var_22295_cast_fp16, var_21871_cast_fp16))[name = tensor("op_22521_cast_fp16")]; + tensor var_22522_to_fp16 = const()[name = tensor("op_22522_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2285_cast_fp16 = mul(x = var_22521_cast_fp16, y = var_22522_to_fp16)[name = tensor("aw_chunk_2285_cast_fp16")]; + tensor var_22525_equation_0 = const()[name = tensor("op_22525_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22525_cast_fp16 = einsum(equation = var_22525_equation_0, values = (var_22295_cast_fp16, var_21878_cast_fp16))[name = tensor("op_22525_cast_fp16")]; + tensor var_22526_to_fp16 = const()[name = tensor("op_22526_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2287_cast_fp16 = mul(x = var_22525_cast_fp16, y = var_22526_to_fp16)[name = tensor("aw_chunk_2287_cast_fp16")]; + tensor var_22529_equation_0 = const()[name = tensor("op_22529_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22529_cast_fp16 = einsum(equation = var_22529_equation_0, values = (var_22299_cast_fp16, var_21885_cast_fp16))[name = tensor("op_22529_cast_fp16")]; + tensor var_22530_to_fp16 = const()[name = tensor("op_22530_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2289_cast_fp16 = mul(x = var_22529_cast_fp16, y = var_22530_to_fp16)[name = tensor("aw_chunk_2289_cast_fp16")]; + tensor var_22533_equation_0 = const()[name = tensor("op_22533_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22533_cast_fp16 = einsum(equation = var_22533_equation_0, values = (var_22299_cast_fp16, var_21892_cast_fp16))[name = tensor("op_22533_cast_fp16")]; + tensor var_22534_to_fp16 = const()[name = tensor("op_22534_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2291_cast_fp16 = mul(x = var_22533_cast_fp16, y = var_22534_to_fp16)[name = tensor("aw_chunk_2291_cast_fp16")]; + tensor var_22537_equation_0 = const()[name = tensor("op_22537_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22537_cast_fp16 = einsum(equation = var_22537_equation_0, values = (var_22299_cast_fp16, var_21899_cast_fp16))[name = tensor("op_22537_cast_fp16")]; + tensor var_22538_to_fp16 = const()[name = tensor("op_22538_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2293_cast_fp16 = mul(x = var_22537_cast_fp16, y = var_22538_to_fp16)[name = tensor("aw_chunk_2293_cast_fp16")]; + tensor var_22541_equation_0 = const()[name = tensor("op_22541_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22541_cast_fp16 = einsum(equation = var_22541_equation_0, values = (var_22299_cast_fp16, var_21906_cast_fp16))[name = tensor("op_22541_cast_fp16")]; + tensor var_22542_to_fp16 = const()[name = tensor("op_22542_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2295_cast_fp16 = mul(x = var_22541_cast_fp16, y = var_22542_to_fp16)[name = tensor("aw_chunk_2295_cast_fp16")]; + tensor var_22545_equation_0 = const()[name = tensor("op_22545_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22545_cast_fp16 = einsum(equation = var_22545_equation_0, values = (var_22303_cast_fp16, var_21913_cast_fp16))[name = tensor("op_22545_cast_fp16")]; + tensor var_22546_to_fp16 = const()[name = tensor("op_22546_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2297_cast_fp16 = mul(x = var_22545_cast_fp16, y = var_22546_to_fp16)[name = tensor("aw_chunk_2297_cast_fp16")]; + tensor var_22549_equation_0 = const()[name = tensor("op_22549_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22549_cast_fp16 = einsum(equation = var_22549_equation_0, values = (var_22303_cast_fp16, var_21920_cast_fp16))[name = tensor("op_22549_cast_fp16")]; + tensor var_22550_to_fp16 = const()[name = tensor("op_22550_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2299_cast_fp16 = mul(x = var_22549_cast_fp16, y = var_22550_to_fp16)[name = tensor("aw_chunk_2299_cast_fp16")]; + tensor var_22553_equation_0 = const()[name = tensor("op_22553_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22553_cast_fp16 = einsum(equation = var_22553_equation_0, values = (var_22303_cast_fp16, var_21927_cast_fp16))[name = tensor("op_22553_cast_fp16")]; + tensor var_22554_to_fp16 = const()[name = tensor("op_22554_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2301_cast_fp16 = mul(x = var_22553_cast_fp16, y = var_22554_to_fp16)[name = tensor("aw_chunk_2301_cast_fp16")]; + tensor var_22557_equation_0 = const()[name = tensor("op_22557_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22557_cast_fp16 = einsum(equation = var_22557_equation_0, values = (var_22303_cast_fp16, var_21934_cast_fp16))[name = tensor("op_22557_cast_fp16")]; + tensor var_22558_to_fp16 = const()[name = tensor("op_22558_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2303_cast_fp16 = mul(x = var_22557_cast_fp16, y = var_22558_to_fp16)[name = tensor("aw_chunk_2303_cast_fp16")]; + tensor var_22561_equation_0 = const()[name = tensor("op_22561_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22561_cast_fp16 = einsum(equation = var_22561_equation_0, values = (var_22307_cast_fp16, var_21941_cast_fp16))[name = tensor("op_22561_cast_fp16")]; + tensor var_22562_to_fp16 = const()[name = tensor("op_22562_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2305_cast_fp16 = mul(x = var_22561_cast_fp16, y = var_22562_to_fp16)[name = tensor("aw_chunk_2305_cast_fp16")]; + tensor var_22565_equation_0 = const()[name = tensor("op_22565_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22565_cast_fp16 = einsum(equation = var_22565_equation_0, values = (var_22307_cast_fp16, var_21948_cast_fp16))[name = tensor("op_22565_cast_fp16")]; + tensor var_22566_to_fp16 = const()[name = tensor("op_22566_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2307_cast_fp16 = mul(x = var_22565_cast_fp16, y = var_22566_to_fp16)[name = tensor("aw_chunk_2307_cast_fp16")]; + tensor var_22569_equation_0 = const()[name = tensor("op_22569_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22569_cast_fp16 = einsum(equation = var_22569_equation_0, values = (var_22307_cast_fp16, var_21955_cast_fp16))[name = tensor("op_22569_cast_fp16")]; + tensor var_22570_to_fp16 = const()[name = tensor("op_22570_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2309_cast_fp16 = mul(x = var_22569_cast_fp16, y = var_22570_to_fp16)[name = tensor("aw_chunk_2309_cast_fp16")]; + tensor var_22573_equation_0 = const()[name = tensor("op_22573_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22573_cast_fp16 = einsum(equation = var_22573_equation_0, values = (var_22307_cast_fp16, var_21962_cast_fp16))[name = tensor("op_22573_cast_fp16")]; + tensor var_22574_to_fp16 = const()[name = tensor("op_22574_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2311_cast_fp16 = mul(x = var_22573_cast_fp16, y = var_22574_to_fp16)[name = tensor("aw_chunk_2311_cast_fp16")]; + tensor var_22577_equation_0 = const()[name = tensor("op_22577_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22577_cast_fp16 = einsum(equation = var_22577_equation_0, values = (var_22311_cast_fp16, var_21969_cast_fp16))[name = tensor("op_22577_cast_fp16")]; + tensor var_22578_to_fp16 = const()[name = tensor("op_22578_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2313_cast_fp16 = mul(x = var_22577_cast_fp16, y = var_22578_to_fp16)[name = tensor("aw_chunk_2313_cast_fp16")]; + tensor var_22581_equation_0 = const()[name = tensor("op_22581_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22581_cast_fp16 = einsum(equation = var_22581_equation_0, values = (var_22311_cast_fp16, var_21976_cast_fp16))[name = tensor("op_22581_cast_fp16")]; + tensor var_22582_to_fp16 = const()[name = tensor("op_22582_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2315_cast_fp16 = mul(x = var_22581_cast_fp16, y = var_22582_to_fp16)[name = tensor("aw_chunk_2315_cast_fp16")]; + tensor var_22585_equation_0 = const()[name = tensor("op_22585_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22585_cast_fp16 = einsum(equation = var_22585_equation_0, values = (var_22311_cast_fp16, var_21983_cast_fp16))[name = tensor("op_22585_cast_fp16")]; + tensor var_22586_to_fp16 = const()[name = tensor("op_22586_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2317_cast_fp16 = mul(x = var_22585_cast_fp16, y = var_22586_to_fp16)[name = tensor("aw_chunk_2317_cast_fp16")]; + tensor var_22589_equation_0 = const()[name = tensor("op_22589_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22589_cast_fp16 = einsum(equation = var_22589_equation_0, values = (var_22311_cast_fp16, var_21990_cast_fp16))[name = tensor("op_22589_cast_fp16")]; + tensor var_22590_to_fp16 = const()[name = tensor("op_22590_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2319_cast_fp16 = mul(x = var_22589_cast_fp16, y = var_22590_to_fp16)[name = tensor("aw_chunk_2319_cast_fp16")]; + tensor var_22593_equation_0 = const()[name = tensor("op_22593_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22593_cast_fp16 = einsum(equation = var_22593_equation_0, values = (var_22315_cast_fp16, var_21997_cast_fp16))[name = tensor("op_22593_cast_fp16")]; + tensor var_22594_to_fp16 = const()[name = tensor("op_22594_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2321_cast_fp16 = mul(x = var_22593_cast_fp16, y = var_22594_to_fp16)[name = tensor("aw_chunk_2321_cast_fp16")]; + tensor var_22597_equation_0 = const()[name = tensor("op_22597_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22597_cast_fp16 = einsum(equation = var_22597_equation_0, values = (var_22315_cast_fp16, var_22004_cast_fp16))[name = tensor("op_22597_cast_fp16")]; + tensor var_22598_to_fp16 = const()[name = tensor("op_22598_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2323_cast_fp16 = mul(x = var_22597_cast_fp16, y = var_22598_to_fp16)[name = tensor("aw_chunk_2323_cast_fp16")]; + tensor var_22601_equation_0 = const()[name = tensor("op_22601_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22601_cast_fp16 = einsum(equation = var_22601_equation_0, values = (var_22315_cast_fp16, var_22011_cast_fp16))[name = tensor("op_22601_cast_fp16")]; + tensor var_22602_to_fp16 = const()[name = tensor("op_22602_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2325_cast_fp16 = mul(x = var_22601_cast_fp16, y = var_22602_to_fp16)[name = tensor("aw_chunk_2325_cast_fp16")]; + tensor var_22605_equation_0 = const()[name = tensor("op_22605_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22605_cast_fp16 = einsum(equation = var_22605_equation_0, values = (var_22315_cast_fp16, var_22018_cast_fp16))[name = tensor("op_22605_cast_fp16")]; + tensor var_22606_to_fp16 = const()[name = tensor("op_22606_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2327_cast_fp16 = mul(x = var_22605_cast_fp16, y = var_22606_to_fp16)[name = tensor("aw_chunk_2327_cast_fp16")]; + tensor var_22609_equation_0 = const()[name = tensor("op_22609_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22609_cast_fp16 = einsum(equation = var_22609_equation_0, values = (var_22319_cast_fp16, var_22025_cast_fp16))[name = tensor("op_22609_cast_fp16")]; + tensor var_22610_to_fp16 = const()[name = tensor("op_22610_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2329_cast_fp16 = mul(x = var_22609_cast_fp16, y = var_22610_to_fp16)[name = tensor("aw_chunk_2329_cast_fp16")]; + tensor var_22613_equation_0 = const()[name = tensor("op_22613_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22613_cast_fp16 = einsum(equation = var_22613_equation_0, values = (var_22319_cast_fp16, var_22032_cast_fp16))[name = tensor("op_22613_cast_fp16")]; + tensor var_22614_to_fp16 = const()[name = tensor("op_22614_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2331_cast_fp16 = mul(x = var_22613_cast_fp16, y = var_22614_to_fp16)[name = tensor("aw_chunk_2331_cast_fp16")]; + tensor var_22617_equation_0 = const()[name = tensor("op_22617_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22617_cast_fp16 = einsum(equation = var_22617_equation_0, values = (var_22319_cast_fp16, var_22039_cast_fp16))[name = tensor("op_22617_cast_fp16")]; + tensor var_22618_to_fp16 = const()[name = tensor("op_22618_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2333_cast_fp16 = mul(x = var_22617_cast_fp16, y = var_22618_to_fp16)[name = tensor("aw_chunk_2333_cast_fp16")]; + tensor var_22621_equation_0 = const()[name = tensor("op_22621_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22621_cast_fp16 = einsum(equation = var_22621_equation_0, values = (var_22319_cast_fp16, var_22046_cast_fp16))[name = tensor("op_22621_cast_fp16")]; + tensor var_22622_to_fp16 = const()[name = tensor("op_22622_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2335_cast_fp16 = mul(x = var_22621_cast_fp16, y = var_22622_to_fp16)[name = tensor("aw_chunk_2335_cast_fp16")]; + tensor var_22625_equation_0 = const()[name = tensor("op_22625_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22625_cast_fp16 = einsum(equation = var_22625_equation_0, values = (var_22323_cast_fp16, var_22053_cast_fp16))[name = tensor("op_22625_cast_fp16")]; + tensor var_22626_to_fp16 = const()[name = tensor("op_22626_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2337_cast_fp16 = mul(x = var_22625_cast_fp16, y = var_22626_to_fp16)[name = tensor("aw_chunk_2337_cast_fp16")]; + tensor var_22629_equation_0 = const()[name = tensor("op_22629_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22629_cast_fp16 = einsum(equation = var_22629_equation_0, values = (var_22323_cast_fp16, var_22060_cast_fp16))[name = tensor("op_22629_cast_fp16")]; + tensor var_22630_to_fp16 = const()[name = tensor("op_22630_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2339_cast_fp16 = mul(x = var_22629_cast_fp16, y = var_22630_to_fp16)[name = tensor("aw_chunk_2339_cast_fp16")]; + tensor var_22633_equation_0 = const()[name = tensor("op_22633_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22633_cast_fp16 = einsum(equation = var_22633_equation_0, values = (var_22323_cast_fp16, var_22067_cast_fp16))[name = tensor("op_22633_cast_fp16")]; + tensor var_22634_to_fp16 = const()[name = tensor("op_22634_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2341_cast_fp16 = mul(x = var_22633_cast_fp16, y = var_22634_to_fp16)[name = tensor("aw_chunk_2341_cast_fp16")]; + tensor var_22637_equation_0 = const()[name = tensor("op_22637_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22637_cast_fp16 = einsum(equation = var_22637_equation_0, values = (var_22323_cast_fp16, var_22074_cast_fp16))[name = tensor("op_22637_cast_fp16")]; + tensor var_22638_to_fp16 = const()[name = tensor("op_22638_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2343_cast_fp16 = mul(x = var_22637_cast_fp16, y = var_22638_to_fp16)[name = tensor("aw_chunk_2343_cast_fp16")]; + tensor var_22641_equation_0 = const()[name = tensor("op_22641_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22641_cast_fp16 = einsum(equation = var_22641_equation_0, values = (var_22327_cast_fp16, var_22081_cast_fp16))[name = tensor("op_22641_cast_fp16")]; + tensor var_22642_to_fp16 = const()[name = tensor("op_22642_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2345_cast_fp16 = mul(x = var_22641_cast_fp16, y = var_22642_to_fp16)[name = tensor("aw_chunk_2345_cast_fp16")]; + tensor var_22645_equation_0 = const()[name = tensor("op_22645_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22645_cast_fp16 = einsum(equation = var_22645_equation_0, values = (var_22327_cast_fp16, var_22088_cast_fp16))[name = tensor("op_22645_cast_fp16")]; + tensor var_22646_to_fp16 = const()[name = tensor("op_22646_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2347_cast_fp16 = mul(x = var_22645_cast_fp16, y = var_22646_to_fp16)[name = tensor("aw_chunk_2347_cast_fp16")]; + tensor var_22649_equation_0 = const()[name = tensor("op_22649_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22649_cast_fp16 = einsum(equation = var_22649_equation_0, values = (var_22327_cast_fp16, var_22095_cast_fp16))[name = tensor("op_22649_cast_fp16")]; + tensor var_22650_to_fp16 = const()[name = tensor("op_22650_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2349_cast_fp16 = mul(x = var_22649_cast_fp16, y = var_22650_to_fp16)[name = tensor("aw_chunk_2349_cast_fp16")]; + tensor var_22653_equation_0 = const()[name = tensor("op_22653_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22653_cast_fp16 = einsum(equation = var_22653_equation_0, values = (var_22327_cast_fp16, var_22102_cast_fp16))[name = tensor("op_22653_cast_fp16")]; + tensor var_22654_to_fp16 = const()[name = tensor("op_22654_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2351_cast_fp16 = mul(x = var_22653_cast_fp16, y = var_22654_to_fp16)[name = tensor("aw_chunk_2351_cast_fp16")]; + tensor var_22657_equation_0 = const()[name = tensor("op_22657_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22657_cast_fp16 = einsum(equation = var_22657_equation_0, values = (var_22331_cast_fp16, var_22109_cast_fp16))[name = tensor("op_22657_cast_fp16")]; + tensor var_22658_to_fp16 = const()[name = tensor("op_22658_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2353_cast_fp16 = mul(x = var_22657_cast_fp16, y = var_22658_to_fp16)[name = tensor("aw_chunk_2353_cast_fp16")]; + tensor var_22661_equation_0 = const()[name = tensor("op_22661_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22661_cast_fp16 = einsum(equation = var_22661_equation_0, values = (var_22331_cast_fp16, var_22116_cast_fp16))[name = tensor("op_22661_cast_fp16")]; + tensor var_22662_to_fp16 = const()[name = tensor("op_22662_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2355_cast_fp16 = mul(x = var_22661_cast_fp16, y = var_22662_to_fp16)[name = tensor("aw_chunk_2355_cast_fp16")]; + tensor var_22665_equation_0 = const()[name = tensor("op_22665_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22665_cast_fp16 = einsum(equation = var_22665_equation_0, values = (var_22331_cast_fp16, var_22123_cast_fp16))[name = tensor("op_22665_cast_fp16")]; + tensor var_22666_to_fp16 = const()[name = tensor("op_22666_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2357_cast_fp16 = mul(x = var_22665_cast_fp16, y = var_22666_to_fp16)[name = tensor("aw_chunk_2357_cast_fp16")]; + tensor var_22669_equation_0 = const()[name = tensor("op_22669_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22669_cast_fp16 = einsum(equation = var_22669_equation_0, values = (var_22331_cast_fp16, var_22130_cast_fp16))[name = tensor("op_22669_cast_fp16")]; + tensor var_22670_to_fp16 = const()[name = tensor("op_22670_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2359_cast_fp16 = mul(x = var_22669_cast_fp16, y = var_22670_to_fp16)[name = tensor("aw_chunk_2359_cast_fp16")]; + tensor var_22673_equation_0 = const()[name = tensor("op_22673_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22673_cast_fp16 = einsum(equation = var_22673_equation_0, values = (var_22335_cast_fp16, var_22137_cast_fp16))[name = tensor("op_22673_cast_fp16")]; + tensor var_22674_to_fp16 = const()[name = tensor("op_22674_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2361_cast_fp16 = mul(x = var_22673_cast_fp16, y = var_22674_to_fp16)[name = tensor("aw_chunk_2361_cast_fp16")]; + tensor var_22677_equation_0 = const()[name = tensor("op_22677_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22677_cast_fp16 = einsum(equation = var_22677_equation_0, values = (var_22335_cast_fp16, var_22144_cast_fp16))[name = tensor("op_22677_cast_fp16")]; + tensor var_22678_to_fp16 = const()[name = tensor("op_22678_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2363_cast_fp16 = mul(x = var_22677_cast_fp16, y = var_22678_to_fp16)[name = tensor("aw_chunk_2363_cast_fp16")]; + tensor var_22681_equation_0 = const()[name = tensor("op_22681_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22681_cast_fp16 = einsum(equation = var_22681_equation_0, values = (var_22335_cast_fp16, var_22151_cast_fp16))[name = tensor("op_22681_cast_fp16")]; + tensor var_22682_to_fp16 = const()[name = tensor("op_22682_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2365_cast_fp16 = mul(x = var_22681_cast_fp16, y = var_22682_to_fp16)[name = tensor("aw_chunk_2365_cast_fp16")]; + tensor var_22685_equation_0 = const()[name = tensor("op_22685_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22685_cast_fp16 = einsum(equation = var_22685_equation_0, values = (var_22335_cast_fp16, var_22158_cast_fp16))[name = tensor("op_22685_cast_fp16")]; + tensor var_22686_to_fp16 = const()[name = tensor("op_22686_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2367_cast_fp16 = mul(x = var_22685_cast_fp16, y = var_22686_to_fp16)[name = tensor("aw_chunk_2367_cast_fp16")]; + tensor var_22689_equation_0 = const()[name = tensor("op_22689_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22689_cast_fp16 = einsum(equation = var_22689_equation_0, values = (var_22339_cast_fp16, var_22165_cast_fp16))[name = tensor("op_22689_cast_fp16")]; + tensor var_22690_to_fp16 = const()[name = tensor("op_22690_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2369_cast_fp16 = mul(x = var_22689_cast_fp16, y = var_22690_to_fp16)[name = tensor("aw_chunk_2369_cast_fp16")]; + tensor var_22693_equation_0 = const()[name = tensor("op_22693_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22693_cast_fp16 = einsum(equation = var_22693_equation_0, values = (var_22339_cast_fp16, var_22172_cast_fp16))[name = tensor("op_22693_cast_fp16")]; + tensor var_22694_to_fp16 = const()[name = tensor("op_22694_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2371_cast_fp16 = mul(x = var_22693_cast_fp16, y = var_22694_to_fp16)[name = tensor("aw_chunk_2371_cast_fp16")]; + tensor var_22697_equation_0 = const()[name = tensor("op_22697_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22697_cast_fp16 = einsum(equation = var_22697_equation_0, values = (var_22339_cast_fp16, var_22179_cast_fp16))[name = tensor("op_22697_cast_fp16")]; + tensor var_22698_to_fp16 = const()[name = tensor("op_22698_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2373_cast_fp16 = mul(x = var_22697_cast_fp16, y = var_22698_to_fp16)[name = tensor("aw_chunk_2373_cast_fp16")]; + tensor var_22701_equation_0 = const()[name = tensor("op_22701_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22701_cast_fp16 = einsum(equation = var_22701_equation_0, values = (var_22339_cast_fp16, var_22186_cast_fp16))[name = tensor("op_22701_cast_fp16")]; + tensor var_22702_to_fp16 = const()[name = tensor("op_22702_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2375_cast_fp16 = mul(x = var_22701_cast_fp16, y = var_22702_to_fp16)[name = tensor("aw_chunk_2375_cast_fp16")]; + tensor var_22705_equation_0 = const()[name = tensor("op_22705_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22705_cast_fp16 = einsum(equation = var_22705_equation_0, values = (var_22343_cast_fp16, var_22193_cast_fp16))[name = tensor("op_22705_cast_fp16")]; + tensor var_22706_to_fp16 = const()[name = tensor("op_22706_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2377_cast_fp16 = mul(x = var_22705_cast_fp16, y = var_22706_to_fp16)[name = tensor("aw_chunk_2377_cast_fp16")]; + tensor var_22709_equation_0 = const()[name = tensor("op_22709_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22709_cast_fp16 = einsum(equation = var_22709_equation_0, values = (var_22343_cast_fp16, var_22200_cast_fp16))[name = tensor("op_22709_cast_fp16")]; + tensor var_22710_to_fp16 = const()[name = tensor("op_22710_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2379_cast_fp16 = mul(x = var_22709_cast_fp16, y = var_22710_to_fp16)[name = tensor("aw_chunk_2379_cast_fp16")]; + tensor var_22713_equation_0 = const()[name = tensor("op_22713_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22713_cast_fp16 = einsum(equation = var_22713_equation_0, values = (var_22343_cast_fp16, var_22207_cast_fp16))[name = tensor("op_22713_cast_fp16")]; + tensor var_22714_to_fp16 = const()[name = tensor("op_22714_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2381_cast_fp16 = mul(x = var_22713_cast_fp16, y = var_22714_to_fp16)[name = tensor("aw_chunk_2381_cast_fp16")]; + tensor var_22717_equation_0 = const()[name = tensor("op_22717_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22717_cast_fp16 = einsum(equation = var_22717_equation_0, values = (var_22343_cast_fp16, var_22214_cast_fp16))[name = tensor("op_22717_cast_fp16")]; + tensor var_22718_to_fp16 = const()[name = tensor("op_22718_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2383_cast_fp16 = mul(x = var_22717_cast_fp16, y = var_22718_to_fp16)[name = tensor("aw_chunk_2383_cast_fp16")]; + tensor var_22721_equation_0 = const()[name = tensor("op_22721_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22721_cast_fp16 = einsum(equation = var_22721_equation_0, values = (var_22347_cast_fp16, var_22221_cast_fp16))[name = tensor("op_22721_cast_fp16")]; + tensor var_22722_to_fp16 = const()[name = tensor("op_22722_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2385_cast_fp16 = mul(x = var_22721_cast_fp16, y = var_22722_to_fp16)[name = tensor("aw_chunk_2385_cast_fp16")]; + tensor var_22725_equation_0 = const()[name = tensor("op_22725_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22725_cast_fp16 = einsum(equation = var_22725_equation_0, values = (var_22347_cast_fp16, var_22228_cast_fp16))[name = tensor("op_22725_cast_fp16")]; + tensor var_22726_to_fp16 = const()[name = tensor("op_22726_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2387_cast_fp16 = mul(x = var_22725_cast_fp16, y = var_22726_to_fp16)[name = tensor("aw_chunk_2387_cast_fp16")]; + tensor var_22729_equation_0 = const()[name = tensor("op_22729_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22729_cast_fp16 = einsum(equation = var_22729_equation_0, values = (var_22347_cast_fp16, var_22235_cast_fp16))[name = tensor("op_22729_cast_fp16")]; + tensor var_22730_to_fp16 = const()[name = tensor("op_22730_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2389_cast_fp16 = mul(x = var_22729_cast_fp16, y = var_22730_to_fp16)[name = tensor("aw_chunk_2389_cast_fp16")]; + tensor var_22733_equation_0 = const()[name = tensor("op_22733_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22733_cast_fp16 = einsum(equation = var_22733_equation_0, values = (var_22347_cast_fp16, var_22242_cast_fp16))[name = tensor("op_22733_cast_fp16")]; + tensor var_22734_to_fp16 = const()[name = tensor("op_22734_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2391_cast_fp16 = mul(x = var_22733_cast_fp16, y = var_22734_to_fp16)[name = tensor("aw_chunk_2391_cast_fp16")]; + tensor var_22737_equation_0 = const()[name = tensor("op_22737_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22737_cast_fp16 = einsum(equation = var_22737_equation_0, values = (var_22351_cast_fp16, var_22249_cast_fp16))[name = tensor("op_22737_cast_fp16")]; + tensor var_22738_to_fp16 = const()[name = tensor("op_22738_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2393_cast_fp16 = mul(x = var_22737_cast_fp16, y = var_22738_to_fp16)[name = tensor("aw_chunk_2393_cast_fp16")]; + tensor var_22741_equation_0 = const()[name = tensor("op_22741_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22741_cast_fp16 = einsum(equation = var_22741_equation_0, values = (var_22351_cast_fp16, var_22256_cast_fp16))[name = tensor("op_22741_cast_fp16")]; + tensor var_22742_to_fp16 = const()[name = tensor("op_22742_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2395_cast_fp16 = mul(x = var_22741_cast_fp16, y = var_22742_to_fp16)[name = tensor("aw_chunk_2395_cast_fp16")]; + tensor var_22745_equation_0 = const()[name = tensor("op_22745_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22745_cast_fp16 = einsum(equation = var_22745_equation_0, values = (var_22351_cast_fp16, var_22263_cast_fp16))[name = tensor("op_22745_cast_fp16")]; + tensor var_22746_to_fp16 = const()[name = tensor("op_22746_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2397_cast_fp16 = mul(x = var_22745_cast_fp16, y = var_22746_to_fp16)[name = tensor("aw_chunk_2397_cast_fp16")]; + tensor var_22749_equation_0 = const()[name = tensor("op_22749_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_22749_cast_fp16 = einsum(equation = var_22749_equation_0, values = (var_22351_cast_fp16, var_22270_cast_fp16))[name = tensor("op_22749_cast_fp16")]; + tensor var_22750_to_fp16 = const()[name = tensor("op_22750_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2399_cast_fp16 = mul(x = var_22749_cast_fp16, y = var_22750_to_fp16)[name = tensor("aw_chunk_2399_cast_fp16")]; + tensor var_22752_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2241_cast_fp16)[name = tensor("op_22752_cast_fp16")]; + tensor var_22753_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2243_cast_fp16)[name = tensor("op_22753_cast_fp16")]; + tensor var_22754_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2245_cast_fp16)[name = tensor("op_22754_cast_fp16")]; + tensor var_22755_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2247_cast_fp16)[name = tensor("op_22755_cast_fp16")]; + tensor var_22756_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2249_cast_fp16)[name = tensor("op_22756_cast_fp16")]; + tensor var_22757_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2251_cast_fp16)[name = tensor("op_22757_cast_fp16")]; + tensor var_22758_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2253_cast_fp16)[name = tensor("op_22758_cast_fp16")]; + tensor var_22759_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2255_cast_fp16)[name = tensor("op_22759_cast_fp16")]; + tensor var_22760_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2257_cast_fp16)[name = tensor("op_22760_cast_fp16")]; + tensor var_22761_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2259_cast_fp16)[name = tensor("op_22761_cast_fp16")]; + tensor var_22762_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2261_cast_fp16)[name = tensor("op_22762_cast_fp16")]; + tensor var_22763_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2263_cast_fp16)[name = tensor("op_22763_cast_fp16")]; + tensor var_22764_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2265_cast_fp16)[name = tensor("op_22764_cast_fp16")]; + tensor var_22765_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2267_cast_fp16)[name = tensor("op_22765_cast_fp16")]; + tensor var_22766_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2269_cast_fp16)[name = tensor("op_22766_cast_fp16")]; + tensor var_22767_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2271_cast_fp16)[name = tensor("op_22767_cast_fp16")]; + tensor var_22768_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2273_cast_fp16)[name = tensor("op_22768_cast_fp16")]; + tensor var_22769_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2275_cast_fp16)[name = tensor("op_22769_cast_fp16")]; + tensor var_22770_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2277_cast_fp16)[name = tensor("op_22770_cast_fp16")]; + tensor var_22771_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2279_cast_fp16)[name = tensor("op_22771_cast_fp16")]; + tensor var_22772_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2281_cast_fp16)[name = tensor("op_22772_cast_fp16")]; + tensor var_22773_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2283_cast_fp16)[name = tensor("op_22773_cast_fp16")]; + tensor var_22774_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2285_cast_fp16)[name = tensor("op_22774_cast_fp16")]; + tensor var_22775_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2287_cast_fp16)[name = tensor("op_22775_cast_fp16")]; + tensor var_22776_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2289_cast_fp16)[name = tensor("op_22776_cast_fp16")]; + tensor var_22777_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2291_cast_fp16)[name = tensor("op_22777_cast_fp16")]; + tensor var_22778_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2293_cast_fp16)[name = tensor("op_22778_cast_fp16")]; + tensor var_22779_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2295_cast_fp16)[name = tensor("op_22779_cast_fp16")]; + tensor var_22780_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2297_cast_fp16)[name = tensor("op_22780_cast_fp16")]; + tensor var_22781_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2299_cast_fp16)[name = tensor("op_22781_cast_fp16")]; + tensor var_22782_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2301_cast_fp16)[name = tensor("op_22782_cast_fp16")]; + tensor var_22783_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2303_cast_fp16)[name = tensor("op_22783_cast_fp16")]; + tensor var_22784_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2305_cast_fp16)[name = tensor("op_22784_cast_fp16")]; + tensor var_22785_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2307_cast_fp16)[name = tensor("op_22785_cast_fp16")]; + tensor var_22786_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2309_cast_fp16)[name = tensor("op_22786_cast_fp16")]; + tensor var_22787_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2311_cast_fp16)[name = tensor("op_22787_cast_fp16")]; + tensor var_22788_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2313_cast_fp16)[name = tensor("op_22788_cast_fp16")]; + tensor var_22789_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2315_cast_fp16)[name = tensor("op_22789_cast_fp16")]; + tensor var_22790_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2317_cast_fp16)[name = tensor("op_22790_cast_fp16")]; + tensor var_22791_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2319_cast_fp16)[name = tensor("op_22791_cast_fp16")]; + tensor var_22792_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2321_cast_fp16)[name = tensor("op_22792_cast_fp16")]; + tensor var_22793_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2323_cast_fp16)[name = tensor("op_22793_cast_fp16")]; + tensor var_22794_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2325_cast_fp16)[name = tensor("op_22794_cast_fp16")]; + tensor var_22795_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2327_cast_fp16)[name = tensor("op_22795_cast_fp16")]; + tensor var_22796_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2329_cast_fp16)[name = tensor("op_22796_cast_fp16")]; + tensor var_22797_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2331_cast_fp16)[name = tensor("op_22797_cast_fp16")]; + tensor var_22798_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2333_cast_fp16)[name = tensor("op_22798_cast_fp16")]; + tensor var_22799_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2335_cast_fp16)[name = tensor("op_22799_cast_fp16")]; + tensor var_22800_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2337_cast_fp16)[name = tensor("op_22800_cast_fp16")]; + tensor var_22801_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2339_cast_fp16)[name = tensor("op_22801_cast_fp16")]; + tensor var_22802_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2341_cast_fp16)[name = tensor("op_22802_cast_fp16")]; + tensor var_22803_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2343_cast_fp16)[name = tensor("op_22803_cast_fp16")]; + tensor var_22804_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2345_cast_fp16)[name = tensor("op_22804_cast_fp16")]; + tensor var_22805_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2347_cast_fp16)[name = tensor("op_22805_cast_fp16")]; + tensor var_22806_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2349_cast_fp16)[name = tensor("op_22806_cast_fp16")]; + tensor var_22807_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2351_cast_fp16)[name = tensor("op_22807_cast_fp16")]; + tensor var_22808_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2353_cast_fp16)[name = tensor("op_22808_cast_fp16")]; + tensor var_22809_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2355_cast_fp16)[name = tensor("op_22809_cast_fp16")]; + tensor var_22810_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2357_cast_fp16)[name = tensor("op_22810_cast_fp16")]; + tensor var_22811_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2359_cast_fp16)[name = tensor("op_22811_cast_fp16")]; + tensor var_22812_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2361_cast_fp16)[name = tensor("op_22812_cast_fp16")]; + tensor var_22813_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2363_cast_fp16)[name = tensor("op_22813_cast_fp16")]; + tensor var_22814_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2365_cast_fp16)[name = tensor("op_22814_cast_fp16")]; + tensor var_22815_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2367_cast_fp16)[name = tensor("op_22815_cast_fp16")]; + tensor var_22816_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2369_cast_fp16)[name = tensor("op_22816_cast_fp16")]; + tensor var_22817_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2371_cast_fp16)[name = tensor("op_22817_cast_fp16")]; + tensor var_22818_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2373_cast_fp16)[name = tensor("op_22818_cast_fp16")]; + tensor var_22819_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2375_cast_fp16)[name = tensor("op_22819_cast_fp16")]; + tensor var_22820_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2377_cast_fp16)[name = tensor("op_22820_cast_fp16")]; + tensor var_22821_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2379_cast_fp16)[name = tensor("op_22821_cast_fp16")]; + tensor var_22822_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2381_cast_fp16)[name = tensor("op_22822_cast_fp16")]; + tensor var_22823_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2383_cast_fp16)[name = tensor("op_22823_cast_fp16")]; + tensor var_22824_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2385_cast_fp16)[name = tensor("op_22824_cast_fp16")]; + tensor var_22825_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2387_cast_fp16)[name = tensor("op_22825_cast_fp16")]; + tensor var_22826_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2389_cast_fp16)[name = tensor("op_22826_cast_fp16")]; + tensor var_22827_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2391_cast_fp16)[name = tensor("op_22827_cast_fp16")]; + tensor var_22828_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2393_cast_fp16)[name = tensor("op_22828_cast_fp16")]; + tensor var_22829_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2395_cast_fp16)[name = tensor("op_22829_cast_fp16")]; + tensor var_22830_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2397_cast_fp16)[name = tensor("op_22830_cast_fp16")]; + tensor var_22831_cast_fp16 = softmax(axis = var_21577, x = aw_chunk_2399_cast_fp16)[name = tensor("op_22831_cast_fp16")]; + tensor var_22833_equation_0 = const()[name = tensor("op_22833_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22833_cast_fp16 = einsum(equation = var_22833_equation_0, values = (var_22353_cast_fp16, var_22752_cast_fp16))[name = tensor("op_22833_cast_fp16")]; + tensor var_22835_equation_0 = const()[name = tensor("op_22835_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22835_cast_fp16 = einsum(equation = var_22835_equation_0, values = (var_22353_cast_fp16, var_22753_cast_fp16))[name = tensor("op_22835_cast_fp16")]; + tensor var_22837_equation_0 = const()[name = tensor("op_22837_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22837_cast_fp16 = einsum(equation = var_22837_equation_0, values = (var_22353_cast_fp16, var_22754_cast_fp16))[name = tensor("op_22837_cast_fp16")]; + tensor var_22839_equation_0 = const()[name = tensor("op_22839_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22839_cast_fp16 = einsum(equation = var_22839_equation_0, values = (var_22353_cast_fp16, var_22755_cast_fp16))[name = tensor("op_22839_cast_fp16")]; + tensor var_22841_equation_0 = const()[name = tensor("op_22841_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22841_cast_fp16 = einsum(equation = var_22841_equation_0, values = (var_22357_cast_fp16, var_22756_cast_fp16))[name = tensor("op_22841_cast_fp16")]; + tensor var_22843_equation_0 = const()[name = tensor("op_22843_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22843_cast_fp16 = einsum(equation = var_22843_equation_0, values = (var_22357_cast_fp16, var_22757_cast_fp16))[name = tensor("op_22843_cast_fp16")]; + tensor var_22845_equation_0 = const()[name = tensor("op_22845_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22845_cast_fp16 = einsum(equation = var_22845_equation_0, values = (var_22357_cast_fp16, var_22758_cast_fp16))[name = tensor("op_22845_cast_fp16")]; + tensor var_22847_equation_0 = const()[name = tensor("op_22847_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22847_cast_fp16 = einsum(equation = var_22847_equation_0, values = (var_22357_cast_fp16, var_22759_cast_fp16))[name = tensor("op_22847_cast_fp16")]; + tensor var_22849_equation_0 = const()[name = tensor("op_22849_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22849_cast_fp16 = einsum(equation = var_22849_equation_0, values = (var_22361_cast_fp16, var_22760_cast_fp16))[name = tensor("op_22849_cast_fp16")]; + tensor var_22851_equation_0 = const()[name = tensor("op_22851_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22851_cast_fp16 = einsum(equation = var_22851_equation_0, values = (var_22361_cast_fp16, var_22761_cast_fp16))[name = tensor("op_22851_cast_fp16")]; + tensor var_22853_equation_0 = const()[name = tensor("op_22853_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22853_cast_fp16 = einsum(equation = var_22853_equation_0, values = (var_22361_cast_fp16, var_22762_cast_fp16))[name = tensor("op_22853_cast_fp16")]; + tensor var_22855_equation_0 = const()[name = tensor("op_22855_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22855_cast_fp16 = einsum(equation = var_22855_equation_0, values = (var_22361_cast_fp16, var_22763_cast_fp16))[name = tensor("op_22855_cast_fp16")]; + tensor var_22857_equation_0 = const()[name = tensor("op_22857_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22857_cast_fp16 = einsum(equation = var_22857_equation_0, values = (var_22365_cast_fp16, var_22764_cast_fp16))[name = tensor("op_22857_cast_fp16")]; + tensor var_22859_equation_0 = const()[name = tensor("op_22859_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22859_cast_fp16 = einsum(equation = var_22859_equation_0, values = (var_22365_cast_fp16, var_22765_cast_fp16))[name = tensor("op_22859_cast_fp16")]; + tensor var_22861_equation_0 = const()[name = tensor("op_22861_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22861_cast_fp16 = einsum(equation = var_22861_equation_0, values = (var_22365_cast_fp16, var_22766_cast_fp16))[name = tensor("op_22861_cast_fp16")]; + tensor var_22863_equation_0 = const()[name = tensor("op_22863_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22863_cast_fp16 = einsum(equation = var_22863_equation_0, values = (var_22365_cast_fp16, var_22767_cast_fp16))[name = tensor("op_22863_cast_fp16")]; + tensor var_22865_equation_0 = const()[name = tensor("op_22865_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22865_cast_fp16 = einsum(equation = var_22865_equation_0, values = (var_22369_cast_fp16, var_22768_cast_fp16))[name = tensor("op_22865_cast_fp16")]; + tensor var_22867_equation_0 = const()[name = tensor("op_22867_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22867_cast_fp16 = einsum(equation = var_22867_equation_0, values = (var_22369_cast_fp16, var_22769_cast_fp16))[name = tensor("op_22867_cast_fp16")]; + tensor var_22869_equation_0 = const()[name = tensor("op_22869_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22869_cast_fp16 = einsum(equation = var_22869_equation_0, values = (var_22369_cast_fp16, var_22770_cast_fp16))[name = tensor("op_22869_cast_fp16")]; + tensor var_22871_equation_0 = const()[name = tensor("op_22871_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22871_cast_fp16 = einsum(equation = var_22871_equation_0, values = (var_22369_cast_fp16, var_22771_cast_fp16))[name = tensor("op_22871_cast_fp16")]; + tensor var_22873_equation_0 = const()[name = tensor("op_22873_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22873_cast_fp16 = einsum(equation = var_22873_equation_0, values = (var_22373_cast_fp16, var_22772_cast_fp16))[name = tensor("op_22873_cast_fp16")]; + tensor var_22875_equation_0 = const()[name = tensor("op_22875_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22875_cast_fp16 = einsum(equation = var_22875_equation_0, values = (var_22373_cast_fp16, var_22773_cast_fp16))[name = tensor("op_22875_cast_fp16")]; + tensor var_22877_equation_0 = const()[name = tensor("op_22877_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22877_cast_fp16 = einsum(equation = var_22877_equation_0, values = (var_22373_cast_fp16, var_22774_cast_fp16))[name = tensor("op_22877_cast_fp16")]; + tensor var_22879_equation_0 = const()[name = tensor("op_22879_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22879_cast_fp16 = einsum(equation = var_22879_equation_0, values = (var_22373_cast_fp16, var_22775_cast_fp16))[name = tensor("op_22879_cast_fp16")]; + tensor var_22881_equation_0 = const()[name = tensor("op_22881_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22881_cast_fp16 = einsum(equation = var_22881_equation_0, values = (var_22377_cast_fp16, var_22776_cast_fp16))[name = tensor("op_22881_cast_fp16")]; + tensor var_22883_equation_0 = const()[name = tensor("op_22883_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22883_cast_fp16 = einsum(equation = var_22883_equation_0, values = (var_22377_cast_fp16, var_22777_cast_fp16))[name = tensor("op_22883_cast_fp16")]; + tensor var_22885_equation_0 = const()[name = tensor("op_22885_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22885_cast_fp16 = einsum(equation = var_22885_equation_0, values = (var_22377_cast_fp16, var_22778_cast_fp16))[name = tensor("op_22885_cast_fp16")]; + tensor var_22887_equation_0 = const()[name = tensor("op_22887_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22887_cast_fp16 = einsum(equation = var_22887_equation_0, values = (var_22377_cast_fp16, var_22779_cast_fp16))[name = tensor("op_22887_cast_fp16")]; + tensor var_22889_equation_0 = const()[name = tensor("op_22889_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22889_cast_fp16 = einsum(equation = var_22889_equation_0, values = (var_22381_cast_fp16, var_22780_cast_fp16))[name = tensor("op_22889_cast_fp16")]; + tensor var_22891_equation_0 = const()[name = tensor("op_22891_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22891_cast_fp16 = einsum(equation = var_22891_equation_0, values = (var_22381_cast_fp16, var_22781_cast_fp16))[name = tensor("op_22891_cast_fp16")]; + tensor var_22893_equation_0 = const()[name = tensor("op_22893_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22893_cast_fp16 = einsum(equation = var_22893_equation_0, values = (var_22381_cast_fp16, var_22782_cast_fp16))[name = tensor("op_22893_cast_fp16")]; + tensor var_22895_equation_0 = const()[name = tensor("op_22895_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22895_cast_fp16 = einsum(equation = var_22895_equation_0, values = (var_22381_cast_fp16, var_22783_cast_fp16))[name = tensor("op_22895_cast_fp16")]; + tensor var_22897_equation_0 = const()[name = tensor("op_22897_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22897_cast_fp16 = einsum(equation = var_22897_equation_0, values = (var_22385_cast_fp16, var_22784_cast_fp16))[name = tensor("op_22897_cast_fp16")]; + tensor var_22899_equation_0 = const()[name = tensor("op_22899_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22899_cast_fp16 = einsum(equation = var_22899_equation_0, values = (var_22385_cast_fp16, var_22785_cast_fp16))[name = tensor("op_22899_cast_fp16")]; + tensor var_22901_equation_0 = const()[name = tensor("op_22901_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22901_cast_fp16 = einsum(equation = var_22901_equation_0, values = (var_22385_cast_fp16, var_22786_cast_fp16))[name = tensor("op_22901_cast_fp16")]; + tensor var_22903_equation_0 = const()[name = tensor("op_22903_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22903_cast_fp16 = einsum(equation = var_22903_equation_0, values = (var_22385_cast_fp16, var_22787_cast_fp16))[name = tensor("op_22903_cast_fp16")]; + tensor var_22905_equation_0 = const()[name = tensor("op_22905_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22905_cast_fp16 = einsum(equation = var_22905_equation_0, values = (var_22389_cast_fp16, var_22788_cast_fp16))[name = tensor("op_22905_cast_fp16")]; + tensor var_22907_equation_0 = const()[name = tensor("op_22907_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22907_cast_fp16 = einsum(equation = var_22907_equation_0, values = (var_22389_cast_fp16, var_22789_cast_fp16))[name = tensor("op_22907_cast_fp16")]; + tensor var_22909_equation_0 = const()[name = tensor("op_22909_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22909_cast_fp16 = einsum(equation = var_22909_equation_0, values = (var_22389_cast_fp16, var_22790_cast_fp16))[name = tensor("op_22909_cast_fp16")]; + tensor var_22911_equation_0 = const()[name = tensor("op_22911_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22911_cast_fp16 = einsum(equation = var_22911_equation_0, values = (var_22389_cast_fp16, var_22791_cast_fp16))[name = tensor("op_22911_cast_fp16")]; + tensor var_22913_equation_0 = const()[name = tensor("op_22913_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22913_cast_fp16 = einsum(equation = var_22913_equation_0, values = (var_22393_cast_fp16, var_22792_cast_fp16))[name = tensor("op_22913_cast_fp16")]; + tensor var_22915_equation_0 = const()[name = tensor("op_22915_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22915_cast_fp16 = einsum(equation = var_22915_equation_0, values = (var_22393_cast_fp16, var_22793_cast_fp16))[name = tensor("op_22915_cast_fp16")]; + tensor var_22917_equation_0 = const()[name = tensor("op_22917_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22917_cast_fp16 = einsum(equation = var_22917_equation_0, values = (var_22393_cast_fp16, var_22794_cast_fp16))[name = tensor("op_22917_cast_fp16")]; + tensor var_22919_equation_0 = const()[name = tensor("op_22919_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22919_cast_fp16 = einsum(equation = var_22919_equation_0, values = (var_22393_cast_fp16, var_22795_cast_fp16))[name = tensor("op_22919_cast_fp16")]; + tensor var_22921_equation_0 = const()[name = tensor("op_22921_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22921_cast_fp16 = einsum(equation = var_22921_equation_0, values = (var_22397_cast_fp16, var_22796_cast_fp16))[name = tensor("op_22921_cast_fp16")]; + tensor var_22923_equation_0 = const()[name = tensor("op_22923_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22923_cast_fp16 = einsum(equation = var_22923_equation_0, values = (var_22397_cast_fp16, var_22797_cast_fp16))[name = tensor("op_22923_cast_fp16")]; + tensor var_22925_equation_0 = const()[name = tensor("op_22925_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22925_cast_fp16 = einsum(equation = var_22925_equation_0, values = (var_22397_cast_fp16, var_22798_cast_fp16))[name = tensor("op_22925_cast_fp16")]; + tensor var_22927_equation_0 = const()[name = tensor("op_22927_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22927_cast_fp16 = einsum(equation = var_22927_equation_0, values = (var_22397_cast_fp16, var_22799_cast_fp16))[name = tensor("op_22927_cast_fp16")]; + tensor var_22929_equation_0 = const()[name = tensor("op_22929_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22929_cast_fp16 = einsum(equation = var_22929_equation_0, values = (var_22401_cast_fp16, var_22800_cast_fp16))[name = tensor("op_22929_cast_fp16")]; + tensor var_22931_equation_0 = const()[name = tensor("op_22931_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22931_cast_fp16 = einsum(equation = var_22931_equation_0, values = (var_22401_cast_fp16, var_22801_cast_fp16))[name = tensor("op_22931_cast_fp16")]; + tensor var_22933_equation_0 = const()[name = tensor("op_22933_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22933_cast_fp16 = einsum(equation = var_22933_equation_0, values = (var_22401_cast_fp16, var_22802_cast_fp16))[name = tensor("op_22933_cast_fp16")]; + tensor var_22935_equation_0 = const()[name = tensor("op_22935_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22935_cast_fp16 = einsum(equation = var_22935_equation_0, values = (var_22401_cast_fp16, var_22803_cast_fp16))[name = tensor("op_22935_cast_fp16")]; + tensor var_22937_equation_0 = const()[name = tensor("op_22937_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22937_cast_fp16 = einsum(equation = var_22937_equation_0, values = (var_22405_cast_fp16, var_22804_cast_fp16))[name = tensor("op_22937_cast_fp16")]; + tensor var_22939_equation_0 = const()[name = tensor("op_22939_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22939_cast_fp16 = einsum(equation = var_22939_equation_0, values = (var_22405_cast_fp16, var_22805_cast_fp16))[name = tensor("op_22939_cast_fp16")]; + tensor var_22941_equation_0 = const()[name = tensor("op_22941_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22941_cast_fp16 = einsum(equation = var_22941_equation_0, values = (var_22405_cast_fp16, var_22806_cast_fp16))[name = tensor("op_22941_cast_fp16")]; + tensor var_22943_equation_0 = const()[name = tensor("op_22943_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22943_cast_fp16 = einsum(equation = var_22943_equation_0, values = (var_22405_cast_fp16, var_22807_cast_fp16))[name = tensor("op_22943_cast_fp16")]; + tensor var_22945_equation_0 = const()[name = tensor("op_22945_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22945_cast_fp16 = einsum(equation = var_22945_equation_0, values = (var_22409_cast_fp16, var_22808_cast_fp16))[name = tensor("op_22945_cast_fp16")]; + tensor var_22947_equation_0 = const()[name = tensor("op_22947_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22947_cast_fp16 = einsum(equation = var_22947_equation_0, values = (var_22409_cast_fp16, var_22809_cast_fp16))[name = tensor("op_22947_cast_fp16")]; + tensor var_22949_equation_0 = const()[name = tensor("op_22949_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22949_cast_fp16 = einsum(equation = var_22949_equation_0, values = (var_22409_cast_fp16, var_22810_cast_fp16))[name = tensor("op_22949_cast_fp16")]; + tensor var_22951_equation_0 = const()[name = tensor("op_22951_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22951_cast_fp16 = einsum(equation = var_22951_equation_0, values = (var_22409_cast_fp16, var_22811_cast_fp16))[name = tensor("op_22951_cast_fp16")]; + tensor var_22953_equation_0 = const()[name = tensor("op_22953_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22953_cast_fp16 = einsum(equation = var_22953_equation_0, values = (var_22413_cast_fp16, var_22812_cast_fp16))[name = tensor("op_22953_cast_fp16")]; + tensor var_22955_equation_0 = const()[name = tensor("op_22955_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22955_cast_fp16 = einsum(equation = var_22955_equation_0, values = (var_22413_cast_fp16, var_22813_cast_fp16))[name = tensor("op_22955_cast_fp16")]; + tensor var_22957_equation_0 = const()[name = tensor("op_22957_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22957_cast_fp16 = einsum(equation = var_22957_equation_0, values = (var_22413_cast_fp16, var_22814_cast_fp16))[name = tensor("op_22957_cast_fp16")]; + tensor var_22959_equation_0 = const()[name = tensor("op_22959_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22959_cast_fp16 = einsum(equation = var_22959_equation_0, values = (var_22413_cast_fp16, var_22815_cast_fp16))[name = tensor("op_22959_cast_fp16")]; + tensor var_22961_equation_0 = const()[name = tensor("op_22961_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22961_cast_fp16 = einsum(equation = var_22961_equation_0, values = (var_22417_cast_fp16, var_22816_cast_fp16))[name = tensor("op_22961_cast_fp16")]; + tensor var_22963_equation_0 = const()[name = tensor("op_22963_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22963_cast_fp16 = einsum(equation = var_22963_equation_0, values = (var_22417_cast_fp16, var_22817_cast_fp16))[name = tensor("op_22963_cast_fp16")]; + tensor var_22965_equation_0 = const()[name = tensor("op_22965_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22965_cast_fp16 = einsum(equation = var_22965_equation_0, values = (var_22417_cast_fp16, var_22818_cast_fp16))[name = tensor("op_22965_cast_fp16")]; + tensor var_22967_equation_0 = const()[name = tensor("op_22967_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22967_cast_fp16 = einsum(equation = var_22967_equation_0, values = (var_22417_cast_fp16, var_22819_cast_fp16))[name = tensor("op_22967_cast_fp16")]; + tensor var_22969_equation_0 = const()[name = tensor("op_22969_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22969_cast_fp16 = einsum(equation = var_22969_equation_0, values = (var_22421_cast_fp16, var_22820_cast_fp16))[name = tensor("op_22969_cast_fp16")]; + tensor var_22971_equation_0 = const()[name = tensor("op_22971_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22971_cast_fp16 = einsum(equation = var_22971_equation_0, values = (var_22421_cast_fp16, var_22821_cast_fp16))[name = tensor("op_22971_cast_fp16")]; + tensor var_22973_equation_0 = const()[name = tensor("op_22973_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22973_cast_fp16 = einsum(equation = var_22973_equation_0, values = (var_22421_cast_fp16, var_22822_cast_fp16))[name = tensor("op_22973_cast_fp16")]; + tensor var_22975_equation_0 = const()[name = tensor("op_22975_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22975_cast_fp16 = einsum(equation = var_22975_equation_0, values = (var_22421_cast_fp16, var_22823_cast_fp16))[name = tensor("op_22975_cast_fp16")]; + tensor var_22977_equation_0 = const()[name = tensor("op_22977_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22977_cast_fp16 = einsum(equation = var_22977_equation_0, values = (var_22425_cast_fp16, var_22824_cast_fp16))[name = tensor("op_22977_cast_fp16")]; + tensor var_22979_equation_0 = const()[name = tensor("op_22979_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22979_cast_fp16 = einsum(equation = var_22979_equation_0, values = (var_22425_cast_fp16, var_22825_cast_fp16))[name = tensor("op_22979_cast_fp16")]; + tensor var_22981_equation_0 = const()[name = tensor("op_22981_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22981_cast_fp16 = einsum(equation = var_22981_equation_0, values = (var_22425_cast_fp16, var_22826_cast_fp16))[name = tensor("op_22981_cast_fp16")]; + tensor var_22983_equation_0 = const()[name = tensor("op_22983_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22983_cast_fp16 = einsum(equation = var_22983_equation_0, values = (var_22425_cast_fp16, var_22827_cast_fp16))[name = tensor("op_22983_cast_fp16")]; + tensor var_22985_equation_0 = const()[name = tensor("op_22985_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22985_cast_fp16 = einsum(equation = var_22985_equation_0, values = (var_22429_cast_fp16, var_22828_cast_fp16))[name = tensor("op_22985_cast_fp16")]; + tensor var_22987_equation_0 = const()[name = tensor("op_22987_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22987_cast_fp16 = einsum(equation = var_22987_equation_0, values = (var_22429_cast_fp16, var_22829_cast_fp16))[name = tensor("op_22987_cast_fp16")]; + tensor var_22989_equation_0 = const()[name = tensor("op_22989_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22989_cast_fp16 = einsum(equation = var_22989_equation_0, values = (var_22429_cast_fp16, var_22830_cast_fp16))[name = tensor("op_22989_cast_fp16")]; + tensor var_22991_equation_0 = const()[name = tensor("op_22991_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_22991_cast_fp16 = einsum(equation = var_22991_equation_0, values = (var_22429_cast_fp16, var_22831_cast_fp16))[name = tensor("op_22991_cast_fp16")]; + tensor var_22993_interleave_0 = const()[name = tensor("op_22993_interleave_0"), val = tensor(false)]; + tensor var_22993_cast_fp16 = concat(axis = var_21552, interleave = var_22993_interleave_0, values = (var_22833_cast_fp16, var_22835_cast_fp16, var_22837_cast_fp16, var_22839_cast_fp16))[name = tensor("op_22993_cast_fp16")]; + tensor var_22995_interleave_0 = const()[name = tensor("op_22995_interleave_0"), val = tensor(false)]; + tensor var_22995_cast_fp16 = concat(axis = var_21552, interleave = var_22995_interleave_0, values = (var_22841_cast_fp16, var_22843_cast_fp16, var_22845_cast_fp16, var_22847_cast_fp16))[name = tensor("op_22995_cast_fp16")]; + tensor var_22997_interleave_0 = const()[name = tensor("op_22997_interleave_0"), val = tensor(false)]; + tensor var_22997_cast_fp16 = concat(axis = var_21552, interleave = var_22997_interleave_0, values = (var_22849_cast_fp16, var_22851_cast_fp16, var_22853_cast_fp16, var_22855_cast_fp16))[name = tensor("op_22997_cast_fp16")]; + tensor var_22999_interleave_0 = const()[name = tensor("op_22999_interleave_0"), val = tensor(false)]; + tensor var_22999_cast_fp16 = concat(axis = var_21552, interleave = var_22999_interleave_0, values = (var_22857_cast_fp16, var_22859_cast_fp16, var_22861_cast_fp16, var_22863_cast_fp16))[name = tensor("op_22999_cast_fp16")]; + tensor var_23001_interleave_0 = const()[name = tensor("op_23001_interleave_0"), val = tensor(false)]; + tensor var_23001_cast_fp16 = concat(axis = var_21552, interleave = var_23001_interleave_0, values = (var_22865_cast_fp16, var_22867_cast_fp16, var_22869_cast_fp16, var_22871_cast_fp16))[name = tensor("op_23001_cast_fp16")]; + tensor var_23003_interleave_0 = const()[name = tensor("op_23003_interleave_0"), val = tensor(false)]; + tensor var_23003_cast_fp16 = concat(axis = var_21552, interleave = var_23003_interleave_0, values = (var_22873_cast_fp16, var_22875_cast_fp16, var_22877_cast_fp16, var_22879_cast_fp16))[name = tensor("op_23003_cast_fp16")]; + tensor var_23005_interleave_0 = const()[name = tensor("op_23005_interleave_0"), val = tensor(false)]; + tensor var_23005_cast_fp16 = concat(axis = var_21552, interleave = var_23005_interleave_0, values = (var_22881_cast_fp16, var_22883_cast_fp16, var_22885_cast_fp16, var_22887_cast_fp16))[name = tensor("op_23005_cast_fp16")]; + tensor var_23007_interleave_0 = const()[name = tensor("op_23007_interleave_0"), val = tensor(false)]; + tensor var_23007_cast_fp16 = concat(axis = var_21552, interleave = var_23007_interleave_0, values = (var_22889_cast_fp16, var_22891_cast_fp16, var_22893_cast_fp16, var_22895_cast_fp16))[name = tensor("op_23007_cast_fp16")]; + tensor var_23009_interleave_0 = const()[name = tensor("op_23009_interleave_0"), val = tensor(false)]; + tensor var_23009_cast_fp16 = concat(axis = var_21552, interleave = var_23009_interleave_0, values = (var_22897_cast_fp16, var_22899_cast_fp16, var_22901_cast_fp16, var_22903_cast_fp16))[name = tensor("op_23009_cast_fp16")]; + tensor var_23011_interleave_0 = const()[name = tensor("op_23011_interleave_0"), val = tensor(false)]; + tensor var_23011_cast_fp16 = concat(axis = var_21552, interleave = var_23011_interleave_0, values = (var_22905_cast_fp16, var_22907_cast_fp16, var_22909_cast_fp16, var_22911_cast_fp16))[name = tensor("op_23011_cast_fp16")]; + tensor var_23013_interleave_0 = const()[name = tensor("op_23013_interleave_0"), val = tensor(false)]; + tensor var_23013_cast_fp16 = concat(axis = var_21552, interleave = var_23013_interleave_0, values = (var_22913_cast_fp16, var_22915_cast_fp16, var_22917_cast_fp16, var_22919_cast_fp16))[name = tensor("op_23013_cast_fp16")]; + tensor var_23015_interleave_0 = const()[name = tensor("op_23015_interleave_0"), val = tensor(false)]; + tensor var_23015_cast_fp16 = concat(axis = var_21552, interleave = var_23015_interleave_0, values = (var_22921_cast_fp16, var_22923_cast_fp16, var_22925_cast_fp16, var_22927_cast_fp16))[name = tensor("op_23015_cast_fp16")]; + tensor var_23017_interleave_0 = const()[name = tensor("op_23017_interleave_0"), val = tensor(false)]; + tensor var_23017_cast_fp16 = concat(axis = var_21552, interleave = var_23017_interleave_0, values = (var_22929_cast_fp16, var_22931_cast_fp16, var_22933_cast_fp16, var_22935_cast_fp16))[name = tensor("op_23017_cast_fp16")]; + tensor var_23019_interleave_0 = const()[name = tensor("op_23019_interleave_0"), val = tensor(false)]; + tensor var_23019_cast_fp16 = concat(axis = var_21552, interleave = var_23019_interleave_0, values = (var_22937_cast_fp16, var_22939_cast_fp16, var_22941_cast_fp16, var_22943_cast_fp16))[name = tensor("op_23019_cast_fp16")]; + tensor var_23021_interleave_0 = const()[name = tensor("op_23021_interleave_0"), val = tensor(false)]; + tensor var_23021_cast_fp16 = concat(axis = var_21552, interleave = var_23021_interleave_0, values = (var_22945_cast_fp16, var_22947_cast_fp16, var_22949_cast_fp16, var_22951_cast_fp16))[name = tensor("op_23021_cast_fp16")]; + tensor var_23023_interleave_0 = const()[name = tensor("op_23023_interleave_0"), val = tensor(false)]; + tensor var_23023_cast_fp16 = concat(axis = var_21552, interleave = var_23023_interleave_0, values = (var_22953_cast_fp16, var_22955_cast_fp16, var_22957_cast_fp16, var_22959_cast_fp16))[name = tensor("op_23023_cast_fp16")]; + tensor var_23025_interleave_0 = const()[name = tensor("op_23025_interleave_0"), val = tensor(false)]; + tensor var_23025_cast_fp16 = concat(axis = var_21552, interleave = var_23025_interleave_0, values = (var_22961_cast_fp16, var_22963_cast_fp16, var_22965_cast_fp16, var_22967_cast_fp16))[name = tensor("op_23025_cast_fp16")]; + tensor var_23027_interleave_0 = const()[name = tensor("op_23027_interleave_0"), val = tensor(false)]; + tensor var_23027_cast_fp16 = concat(axis = var_21552, interleave = var_23027_interleave_0, values = (var_22969_cast_fp16, var_22971_cast_fp16, var_22973_cast_fp16, var_22975_cast_fp16))[name = tensor("op_23027_cast_fp16")]; + tensor var_23029_interleave_0 = const()[name = tensor("op_23029_interleave_0"), val = tensor(false)]; + tensor var_23029_cast_fp16 = concat(axis = var_21552, interleave = var_23029_interleave_0, values = (var_22977_cast_fp16, var_22979_cast_fp16, var_22981_cast_fp16, var_22983_cast_fp16))[name = tensor("op_23029_cast_fp16")]; + tensor var_23031_interleave_0 = const()[name = tensor("op_23031_interleave_0"), val = tensor(false)]; + tensor var_23031_cast_fp16 = concat(axis = var_21552, interleave = var_23031_interleave_0, values = (var_22985_cast_fp16, var_22987_cast_fp16, var_22989_cast_fp16, var_22991_cast_fp16))[name = tensor("op_23031_cast_fp16")]; + tensor input_113_interleave_0 = const()[name = tensor("input_113_interleave_0"), val = tensor(false)]; + tensor input_113_cast_fp16 = concat(axis = var_21577, interleave = input_113_interleave_0, values = (var_22993_cast_fp16, var_22995_cast_fp16, var_22997_cast_fp16, var_22999_cast_fp16, var_23001_cast_fp16, var_23003_cast_fp16, var_23005_cast_fp16, var_23007_cast_fp16, var_23009_cast_fp16, var_23011_cast_fp16, var_23013_cast_fp16, var_23015_cast_fp16, var_23017_cast_fp16, var_23019_cast_fp16, var_23021_cast_fp16, var_23023_cast_fp16, var_23025_cast_fp16, var_23027_cast_fp16, var_23029_cast_fp16, var_23031_cast_fp16))[name = tensor("input_113_cast_fp16")]; + tensor var_23036 = const()[name = tensor("op_23036"), val = tensor([1, 1])]; + tensor var_23038 = const()[name = tensor("op_23038"), val = tensor([1, 1])]; + tensor obj_59_pad_type_0 = const()[name = tensor("obj_59_pad_type_0"), val = tensor("custom")]; + tensor obj_59_pad_0 = const()[name = tensor("obj_59_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_14_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_14_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(575082560)))]; + tensor layers_14_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_14_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(578359424)))]; + tensor obj_59_cast_fp16 = conv(bias = layers_14_self_attn_o_proj_bias_to_fp16, dilations = var_23038, groups = var_21577, pad = obj_59_pad_0, pad_type = obj_59_pad_type_0, strides = var_23036, weight = layers_14_self_attn_o_proj_weight_to_fp16, x = input_113_cast_fp16)[name = tensor("obj_59_cast_fp16")]; + tensor inputs_59_cast_fp16 = add(x = inputs_57_cast_fp16, y = obj_59_cast_fp16)[name = tensor("inputs_59_cast_fp16")]; + tensor var_23044 = const()[name = tensor("op_23044"), val = tensor([1])]; + tensor channels_mean_59_cast_fp16 = reduce_mean(axes = var_23044, keep_dims = var_21578, x = inputs_59_cast_fp16)[name = tensor("channels_mean_59_cast_fp16")]; + tensor zero_mean_59_cast_fp16 = sub(x = inputs_59_cast_fp16, y = channels_mean_59_cast_fp16)[name = tensor("zero_mean_59_cast_fp16")]; + tensor zero_mean_sq_59_cast_fp16 = mul(x = zero_mean_59_cast_fp16, y = zero_mean_59_cast_fp16)[name = tensor("zero_mean_sq_59_cast_fp16")]; + tensor var_23048 = const()[name = tensor("op_23048"), val = tensor([1])]; + tensor var_23049_cast_fp16 = reduce_mean(axes = var_23048, keep_dims = var_21578, x = zero_mean_sq_59_cast_fp16)[name = tensor("op_23049_cast_fp16")]; + tensor var_23050_to_fp16 = const()[name = tensor("op_23050_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_23051_cast_fp16 = add(x = var_23049_cast_fp16, y = var_23050_to_fp16)[name = tensor("op_23051_cast_fp16")]; + tensor denom_59_epsilon_0_to_fp16 = const()[name = tensor("denom_59_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_59_cast_fp16 = rsqrt(epsilon = denom_59_epsilon_0_to_fp16, x = var_23051_cast_fp16)[name = tensor("denom_59_cast_fp16")]; + tensor out_59_cast_fp16 = mul(x = zero_mean_59_cast_fp16, y = denom_59_cast_fp16)[name = tensor("out_59_cast_fp16")]; + tensor input_115_gamma_0_to_fp16 = const()[name = tensor("input_115_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(578362048)))]; + tensor input_115_beta_0_to_fp16 = const()[name = tensor("input_115_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(578364672)))]; + tensor input_115_epsilon_0_to_fp16 = const()[name = tensor("input_115_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_115_cast_fp16 = batch_norm(beta = input_115_beta_0_to_fp16, epsilon = input_115_epsilon_0_to_fp16, gamma = input_115_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_59_cast_fp16)[name = tensor("input_115_cast_fp16")]; + tensor var_23062 = const()[name = tensor("op_23062"), val = tensor([1, 1])]; + tensor var_23064 = const()[name = tensor("op_23064"), val = tensor([1, 1])]; + tensor input_117_pad_type_0 = const()[name = tensor("input_117_pad_type_0"), val = tensor("custom")]; + tensor input_117_pad_0 = const()[name = tensor("input_117_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_14_fc1_weight_to_fp16 = const()[name = tensor("layers_14_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(578367296)))]; + tensor layers_14_fc1_bias_to_fp16 = const()[name = tensor("layers_14_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(591474560)))]; + tensor input_117_cast_fp16 = conv(bias = layers_14_fc1_bias_to_fp16, dilations = var_23064, groups = var_21577, pad = input_117_pad_0, pad_type = input_117_pad_type_0, strides = var_23062, weight = layers_14_fc1_weight_to_fp16, x = input_115_cast_fp16)[name = tensor("input_117_cast_fp16")]; + tensor input_119_mode_0 = const()[name = tensor("input_119_mode_0"), val = tensor("EXACT")]; + tensor input_119_cast_fp16 = gelu(mode = input_119_mode_0, x = input_117_cast_fp16)[name = tensor("input_119_cast_fp16")]; + tensor var_23070 = const()[name = tensor("op_23070"), val = tensor([1, 1])]; + tensor var_23072 = const()[name = tensor("op_23072"), val = tensor([1, 1])]; + tensor hidden_states_33_pad_type_0 = const()[name = tensor("hidden_states_33_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_33_pad_0 = const()[name = tensor("hidden_states_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_14_fc2_weight_to_fp16 = const()[name = tensor("layers_14_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(591484864)))]; + tensor layers_14_fc2_bias_to_fp16 = const()[name = tensor("layers_14_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(604592128)))]; + tensor hidden_states_33_cast_fp16 = conv(bias = layers_14_fc2_bias_to_fp16, dilations = var_23072, groups = var_21577, pad = hidden_states_33_pad_0, pad_type = hidden_states_33_pad_type_0, strides = var_23070, weight = layers_14_fc2_weight_to_fp16, x = input_119_cast_fp16)[name = tensor("hidden_states_33_cast_fp16")]; + tensor inputs_61_cast_fp16 = add(x = inputs_59_cast_fp16, y = hidden_states_33_cast_fp16)[name = tensor("inputs_61_cast_fp16")]; + tensor var_23079 = const()[name = tensor("op_23079"), val = tensor(3)]; + tensor var_23104 = const()[name = tensor("op_23104"), val = tensor(1)]; + tensor var_23105 = const()[name = tensor("op_23105"), val = tensor(true)]; + tensor var_23115 = const()[name = tensor("op_23115"), val = tensor([1])]; + tensor channels_mean_61_cast_fp16 = reduce_mean(axes = var_23115, keep_dims = var_23105, x = inputs_61_cast_fp16)[name = tensor("channels_mean_61_cast_fp16")]; + tensor zero_mean_61_cast_fp16 = sub(x = inputs_61_cast_fp16, y = channels_mean_61_cast_fp16)[name = tensor("zero_mean_61_cast_fp16")]; + tensor zero_mean_sq_61_cast_fp16 = mul(x = zero_mean_61_cast_fp16, y = zero_mean_61_cast_fp16)[name = tensor("zero_mean_sq_61_cast_fp16")]; + tensor var_23119 = const()[name = tensor("op_23119"), val = tensor([1])]; + tensor var_23120_cast_fp16 = reduce_mean(axes = var_23119, keep_dims = var_23105, x = zero_mean_sq_61_cast_fp16)[name = tensor("op_23120_cast_fp16")]; + tensor var_23121_to_fp16 = const()[name = tensor("op_23121_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_23122_cast_fp16 = add(x = var_23120_cast_fp16, y = var_23121_to_fp16)[name = tensor("op_23122_cast_fp16")]; + tensor denom_61_epsilon_0_to_fp16 = const()[name = tensor("denom_61_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_61_cast_fp16 = rsqrt(epsilon = denom_61_epsilon_0_to_fp16, x = var_23122_cast_fp16)[name = tensor("denom_61_cast_fp16")]; + tensor out_61_cast_fp16 = mul(x = zero_mean_61_cast_fp16, y = denom_61_cast_fp16)[name = tensor("out_61_cast_fp16")]; + tensor obj_61_gamma_0_to_fp16 = const()[name = tensor("obj_61_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(604594752)))]; + tensor obj_61_beta_0_to_fp16 = const()[name = tensor("obj_61_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(604597376)))]; + tensor obj_61_epsilon_0_to_fp16 = const()[name = tensor("obj_61_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_61_cast_fp16 = batch_norm(beta = obj_61_beta_0_to_fp16, epsilon = obj_61_epsilon_0_to_fp16, gamma = obj_61_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_61_cast_fp16)[name = tensor("obj_61_cast_fp16")]; + tensor var_23137 = const()[name = tensor("op_23137"), val = tensor([1, 1])]; + tensor var_23139 = const()[name = tensor("op_23139"), val = tensor([1, 1])]; + tensor query_31_pad_type_0 = const()[name = tensor("query_31_pad_type_0"), val = tensor("custom")]; + tensor query_31_pad_0 = const()[name = tensor("query_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_15_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_15_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(604600000)))]; + tensor layers_15_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_15_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(607876864)))]; + tensor query_31_cast_fp16 = conv(bias = layers_15_self_attn_q_proj_bias_to_fp16, dilations = var_23139, groups = var_23104, pad = query_31_pad_0, pad_type = query_31_pad_type_0, strides = var_23137, weight = layers_15_self_attn_q_proj_weight_to_fp16, x = obj_61_cast_fp16)[name = tensor("query_31_cast_fp16")]; + tensor var_23143 = const()[name = tensor("op_23143"), val = tensor([1, 1])]; + tensor var_23145 = const()[name = tensor("op_23145"), val = tensor([1, 1])]; + tensor key_31_pad_type_0 = const()[name = tensor("key_31_pad_type_0"), val = tensor("custom")]; + tensor key_31_pad_0 = const()[name = tensor("key_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_15_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_15_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(607879488)))]; + tensor key_31_cast_fp16 = conv(dilations = var_23145, groups = var_23104, pad = key_31_pad_0, pad_type = key_31_pad_type_0, strides = var_23143, weight = layers_15_self_attn_k_proj_weight_to_fp16, x = obj_61_cast_fp16)[name = tensor("key_31_cast_fp16")]; + tensor var_23150 = const()[name = tensor("op_23150"), val = tensor([1, 1])]; + tensor var_23152 = const()[name = tensor("op_23152"), val = tensor([1, 1])]; + tensor value_31_pad_type_0 = const()[name = tensor("value_31_pad_type_0"), val = tensor("custom")]; + tensor value_31_pad_0 = const()[name = tensor("value_31_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_15_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_15_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(611156352)))]; + tensor layers_15_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_15_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(614433216)))]; + tensor value_31_cast_fp16 = conv(bias = layers_15_self_attn_v_proj_bias_to_fp16, dilations = var_23152, groups = var_23104, pad = value_31_pad_0, pad_type = value_31_pad_type_0, strides = var_23150, weight = layers_15_self_attn_v_proj_weight_to_fp16, x = obj_61_cast_fp16)[name = tensor("value_31_cast_fp16")]; + tensor var_23159_begin_0 = const()[name = tensor("op_23159_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_23159_end_0 = const()[name = tensor("op_23159_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_23159_end_mask_0 = const()[name = tensor("op_23159_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23159_cast_fp16 = slice_by_index(begin = var_23159_begin_0, end = var_23159_end_0, end_mask = var_23159_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_23159_cast_fp16")]; + tensor var_23163_begin_0 = const()[name = tensor("op_23163_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_23163_end_0 = const()[name = tensor("op_23163_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_23163_end_mask_0 = const()[name = tensor("op_23163_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23163_cast_fp16 = slice_by_index(begin = var_23163_begin_0, end = var_23163_end_0, end_mask = var_23163_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_23163_cast_fp16")]; + tensor var_23167_begin_0 = const()[name = tensor("op_23167_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_23167_end_0 = const()[name = tensor("op_23167_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_23167_end_mask_0 = const()[name = tensor("op_23167_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23167_cast_fp16 = slice_by_index(begin = var_23167_begin_0, end = var_23167_end_0, end_mask = var_23167_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_23167_cast_fp16")]; + tensor var_23171_begin_0 = const()[name = tensor("op_23171_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_23171_end_0 = const()[name = tensor("op_23171_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_23171_end_mask_0 = const()[name = tensor("op_23171_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23171_cast_fp16 = slice_by_index(begin = var_23171_begin_0, end = var_23171_end_0, end_mask = var_23171_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_23171_cast_fp16")]; + tensor var_23175_begin_0 = const()[name = tensor("op_23175_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_23175_end_0 = const()[name = tensor("op_23175_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_23175_end_mask_0 = const()[name = tensor("op_23175_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23175_cast_fp16 = slice_by_index(begin = var_23175_begin_0, end = var_23175_end_0, end_mask = var_23175_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_23175_cast_fp16")]; + tensor var_23179_begin_0 = const()[name = tensor("op_23179_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_23179_end_0 = const()[name = tensor("op_23179_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_23179_end_mask_0 = const()[name = tensor("op_23179_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23179_cast_fp16 = slice_by_index(begin = var_23179_begin_0, end = var_23179_end_0, end_mask = var_23179_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_23179_cast_fp16")]; + tensor var_23183_begin_0 = const()[name = tensor("op_23183_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_23183_end_0 = const()[name = tensor("op_23183_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_23183_end_mask_0 = const()[name = tensor("op_23183_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23183_cast_fp16 = slice_by_index(begin = var_23183_begin_0, end = var_23183_end_0, end_mask = var_23183_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_23183_cast_fp16")]; + tensor var_23187_begin_0 = const()[name = tensor("op_23187_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_23187_end_0 = const()[name = tensor("op_23187_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_23187_end_mask_0 = const()[name = tensor("op_23187_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23187_cast_fp16 = slice_by_index(begin = var_23187_begin_0, end = var_23187_end_0, end_mask = var_23187_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_23187_cast_fp16")]; + tensor var_23191_begin_0 = const()[name = tensor("op_23191_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_23191_end_0 = const()[name = tensor("op_23191_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_23191_end_mask_0 = const()[name = tensor("op_23191_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23191_cast_fp16 = slice_by_index(begin = var_23191_begin_0, end = var_23191_end_0, end_mask = var_23191_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_23191_cast_fp16")]; + tensor var_23195_begin_0 = const()[name = tensor("op_23195_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_23195_end_0 = const()[name = tensor("op_23195_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_23195_end_mask_0 = const()[name = tensor("op_23195_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23195_cast_fp16 = slice_by_index(begin = var_23195_begin_0, end = var_23195_end_0, end_mask = var_23195_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_23195_cast_fp16")]; + tensor var_23199_begin_0 = const()[name = tensor("op_23199_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_23199_end_0 = const()[name = tensor("op_23199_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_23199_end_mask_0 = const()[name = tensor("op_23199_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23199_cast_fp16 = slice_by_index(begin = var_23199_begin_0, end = var_23199_end_0, end_mask = var_23199_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_23199_cast_fp16")]; + tensor var_23203_begin_0 = const()[name = tensor("op_23203_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_23203_end_0 = const()[name = tensor("op_23203_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_23203_end_mask_0 = const()[name = tensor("op_23203_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23203_cast_fp16 = slice_by_index(begin = var_23203_begin_0, end = var_23203_end_0, end_mask = var_23203_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_23203_cast_fp16")]; + tensor var_23207_begin_0 = const()[name = tensor("op_23207_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_23207_end_0 = const()[name = tensor("op_23207_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_23207_end_mask_0 = const()[name = tensor("op_23207_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23207_cast_fp16 = slice_by_index(begin = var_23207_begin_0, end = var_23207_end_0, end_mask = var_23207_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_23207_cast_fp16")]; + tensor var_23211_begin_0 = const()[name = tensor("op_23211_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_23211_end_0 = const()[name = tensor("op_23211_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_23211_end_mask_0 = const()[name = tensor("op_23211_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23211_cast_fp16 = slice_by_index(begin = var_23211_begin_0, end = var_23211_end_0, end_mask = var_23211_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_23211_cast_fp16")]; + tensor var_23215_begin_0 = const()[name = tensor("op_23215_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_23215_end_0 = const()[name = tensor("op_23215_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_23215_end_mask_0 = const()[name = tensor("op_23215_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23215_cast_fp16 = slice_by_index(begin = var_23215_begin_0, end = var_23215_end_0, end_mask = var_23215_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_23215_cast_fp16")]; + tensor var_23219_begin_0 = const()[name = tensor("op_23219_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_23219_end_0 = const()[name = tensor("op_23219_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_23219_end_mask_0 = const()[name = tensor("op_23219_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23219_cast_fp16 = slice_by_index(begin = var_23219_begin_0, end = var_23219_end_0, end_mask = var_23219_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_23219_cast_fp16")]; + tensor var_23223_begin_0 = const()[name = tensor("op_23223_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_23223_end_0 = const()[name = tensor("op_23223_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_23223_end_mask_0 = const()[name = tensor("op_23223_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23223_cast_fp16 = slice_by_index(begin = var_23223_begin_0, end = var_23223_end_0, end_mask = var_23223_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_23223_cast_fp16")]; + tensor var_23227_begin_0 = const()[name = tensor("op_23227_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_23227_end_0 = const()[name = tensor("op_23227_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_23227_end_mask_0 = const()[name = tensor("op_23227_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23227_cast_fp16 = slice_by_index(begin = var_23227_begin_0, end = var_23227_end_0, end_mask = var_23227_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_23227_cast_fp16")]; + tensor var_23231_begin_0 = const()[name = tensor("op_23231_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_23231_end_0 = const()[name = tensor("op_23231_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_23231_end_mask_0 = const()[name = tensor("op_23231_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23231_cast_fp16 = slice_by_index(begin = var_23231_begin_0, end = var_23231_end_0, end_mask = var_23231_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_23231_cast_fp16")]; + tensor var_23235_begin_0 = const()[name = tensor("op_23235_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_23235_end_0 = const()[name = tensor("op_23235_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_23235_end_mask_0 = const()[name = tensor("op_23235_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23235_cast_fp16 = slice_by_index(begin = var_23235_begin_0, end = var_23235_end_0, end_mask = var_23235_end_mask_0, x = query_31_cast_fp16)[name = tensor("op_23235_cast_fp16")]; + tensor var_23244_begin_0 = const()[name = tensor("op_23244_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_23244_end_0 = const()[name = tensor("op_23244_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_23244_end_mask_0 = const()[name = tensor("op_23244_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23244_cast_fp16 = slice_by_index(begin = var_23244_begin_0, end = var_23244_end_0, end_mask = var_23244_end_mask_0, x = var_23159_cast_fp16)[name = tensor("op_23244_cast_fp16")]; + tensor var_23251_begin_0 = const()[name = tensor("op_23251_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_23251_end_0 = const()[name = tensor("op_23251_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_23251_end_mask_0 = const()[name = tensor("op_23251_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23251_cast_fp16 = slice_by_index(begin = var_23251_begin_0, end = var_23251_end_0, end_mask = var_23251_end_mask_0, x = var_23159_cast_fp16)[name = tensor("op_23251_cast_fp16")]; + tensor var_23258_begin_0 = const()[name = tensor("op_23258_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_23258_end_0 = const()[name = tensor("op_23258_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_23258_end_mask_0 = const()[name = tensor("op_23258_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23258_cast_fp16 = slice_by_index(begin = var_23258_begin_0, end = var_23258_end_0, end_mask = var_23258_end_mask_0, x = var_23159_cast_fp16)[name = tensor("op_23258_cast_fp16")]; + tensor var_23265_begin_0 = const()[name = tensor("op_23265_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_23265_end_0 = const()[name = tensor("op_23265_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_23265_end_mask_0 = const()[name = tensor("op_23265_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23265_cast_fp16 = slice_by_index(begin = var_23265_begin_0, end = var_23265_end_0, end_mask = var_23265_end_mask_0, x = var_23159_cast_fp16)[name = tensor("op_23265_cast_fp16")]; + tensor var_23272_begin_0 = const()[name = tensor("op_23272_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_23272_end_0 = const()[name = tensor("op_23272_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_23272_end_mask_0 = const()[name = tensor("op_23272_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23272_cast_fp16 = slice_by_index(begin = var_23272_begin_0, end = var_23272_end_0, end_mask = var_23272_end_mask_0, x = var_23163_cast_fp16)[name = tensor("op_23272_cast_fp16")]; + tensor var_23279_begin_0 = const()[name = tensor("op_23279_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_23279_end_0 = const()[name = tensor("op_23279_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_23279_end_mask_0 = const()[name = tensor("op_23279_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23279_cast_fp16 = slice_by_index(begin = var_23279_begin_0, end = var_23279_end_0, end_mask = var_23279_end_mask_0, x = var_23163_cast_fp16)[name = tensor("op_23279_cast_fp16")]; + tensor var_23286_begin_0 = const()[name = tensor("op_23286_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_23286_end_0 = const()[name = tensor("op_23286_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_23286_end_mask_0 = const()[name = tensor("op_23286_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23286_cast_fp16 = slice_by_index(begin = var_23286_begin_0, end = var_23286_end_0, end_mask = var_23286_end_mask_0, x = var_23163_cast_fp16)[name = tensor("op_23286_cast_fp16")]; + tensor var_23293_begin_0 = const()[name = tensor("op_23293_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_23293_end_0 = const()[name = tensor("op_23293_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_23293_end_mask_0 = const()[name = tensor("op_23293_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23293_cast_fp16 = slice_by_index(begin = var_23293_begin_0, end = var_23293_end_0, end_mask = var_23293_end_mask_0, x = var_23163_cast_fp16)[name = tensor("op_23293_cast_fp16")]; + tensor var_23300_begin_0 = const()[name = tensor("op_23300_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_23300_end_0 = const()[name = tensor("op_23300_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_23300_end_mask_0 = const()[name = tensor("op_23300_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23300_cast_fp16 = slice_by_index(begin = var_23300_begin_0, end = var_23300_end_0, end_mask = var_23300_end_mask_0, x = var_23167_cast_fp16)[name = tensor("op_23300_cast_fp16")]; + tensor var_23307_begin_0 = const()[name = tensor("op_23307_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_23307_end_0 = const()[name = tensor("op_23307_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_23307_end_mask_0 = const()[name = tensor("op_23307_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23307_cast_fp16 = slice_by_index(begin = var_23307_begin_0, end = var_23307_end_0, end_mask = var_23307_end_mask_0, x = var_23167_cast_fp16)[name = tensor("op_23307_cast_fp16")]; + tensor var_23314_begin_0 = const()[name = tensor("op_23314_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_23314_end_0 = const()[name = tensor("op_23314_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_23314_end_mask_0 = const()[name = tensor("op_23314_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23314_cast_fp16 = slice_by_index(begin = var_23314_begin_0, end = var_23314_end_0, end_mask = var_23314_end_mask_0, x = var_23167_cast_fp16)[name = tensor("op_23314_cast_fp16")]; + tensor var_23321_begin_0 = const()[name = tensor("op_23321_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_23321_end_0 = const()[name = tensor("op_23321_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_23321_end_mask_0 = const()[name = tensor("op_23321_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23321_cast_fp16 = slice_by_index(begin = var_23321_begin_0, end = var_23321_end_0, end_mask = var_23321_end_mask_0, x = var_23167_cast_fp16)[name = tensor("op_23321_cast_fp16")]; + tensor var_23328_begin_0 = const()[name = tensor("op_23328_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_23328_end_0 = const()[name = tensor("op_23328_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_23328_end_mask_0 = const()[name = tensor("op_23328_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23328_cast_fp16 = slice_by_index(begin = var_23328_begin_0, end = var_23328_end_0, end_mask = var_23328_end_mask_0, x = var_23171_cast_fp16)[name = tensor("op_23328_cast_fp16")]; + tensor var_23335_begin_0 = const()[name = tensor("op_23335_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_23335_end_0 = const()[name = tensor("op_23335_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_23335_end_mask_0 = const()[name = tensor("op_23335_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23335_cast_fp16 = slice_by_index(begin = var_23335_begin_0, end = var_23335_end_0, end_mask = var_23335_end_mask_0, x = var_23171_cast_fp16)[name = tensor("op_23335_cast_fp16")]; + tensor var_23342_begin_0 = const()[name = tensor("op_23342_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_23342_end_0 = const()[name = tensor("op_23342_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_23342_end_mask_0 = const()[name = tensor("op_23342_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23342_cast_fp16 = slice_by_index(begin = var_23342_begin_0, end = var_23342_end_0, end_mask = var_23342_end_mask_0, x = var_23171_cast_fp16)[name = tensor("op_23342_cast_fp16")]; + tensor var_23349_begin_0 = const()[name = tensor("op_23349_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_23349_end_0 = const()[name = tensor("op_23349_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_23349_end_mask_0 = const()[name = tensor("op_23349_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23349_cast_fp16 = slice_by_index(begin = var_23349_begin_0, end = var_23349_end_0, end_mask = var_23349_end_mask_0, x = var_23171_cast_fp16)[name = tensor("op_23349_cast_fp16")]; + tensor var_23356_begin_0 = const()[name = tensor("op_23356_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_23356_end_0 = const()[name = tensor("op_23356_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_23356_end_mask_0 = const()[name = tensor("op_23356_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23356_cast_fp16 = slice_by_index(begin = var_23356_begin_0, end = var_23356_end_0, end_mask = var_23356_end_mask_0, x = var_23175_cast_fp16)[name = tensor("op_23356_cast_fp16")]; + tensor var_23363_begin_0 = const()[name = tensor("op_23363_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_23363_end_0 = const()[name = tensor("op_23363_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_23363_end_mask_0 = const()[name = tensor("op_23363_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23363_cast_fp16 = slice_by_index(begin = var_23363_begin_0, end = var_23363_end_0, end_mask = var_23363_end_mask_0, x = var_23175_cast_fp16)[name = tensor("op_23363_cast_fp16")]; + tensor var_23370_begin_0 = const()[name = tensor("op_23370_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_23370_end_0 = const()[name = tensor("op_23370_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_23370_end_mask_0 = const()[name = tensor("op_23370_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23370_cast_fp16 = slice_by_index(begin = var_23370_begin_0, end = var_23370_end_0, end_mask = var_23370_end_mask_0, x = var_23175_cast_fp16)[name = tensor("op_23370_cast_fp16")]; + tensor var_23377_begin_0 = const()[name = tensor("op_23377_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_23377_end_0 = const()[name = tensor("op_23377_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_23377_end_mask_0 = const()[name = tensor("op_23377_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23377_cast_fp16 = slice_by_index(begin = var_23377_begin_0, end = var_23377_end_0, end_mask = var_23377_end_mask_0, x = var_23175_cast_fp16)[name = tensor("op_23377_cast_fp16")]; + tensor var_23384_begin_0 = const()[name = tensor("op_23384_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_23384_end_0 = const()[name = tensor("op_23384_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_23384_end_mask_0 = const()[name = tensor("op_23384_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23384_cast_fp16 = slice_by_index(begin = var_23384_begin_0, end = var_23384_end_0, end_mask = var_23384_end_mask_0, x = var_23179_cast_fp16)[name = tensor("op_23384_cast_fp16")]; + tensor var_23391_begin_0 = const()[name = tensor("op_23391_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_23391_end_0 = const()[name = tensor("op_23391_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_23391_end_mask_0 = const()[name = tensor("op_23391_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23391_cast_fp16 = slice_by_index(begin = var_23391_begin_0, end = var_23391_end_0, end_mask = var_23391_end_mask_0, x = var_23179_cast_fp16)[name = tensor("op_23391_cast_fp16")]; + tensor var_23398_begin_0 = const()[name = tensor("op_23398_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_23398_end_0 = const()[name = tensor("op_23398_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_23398_end_mask_0 = const()[name = tensor("op_23398_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23398_cast_fp16 = slice_by_index(begin = var_23398_begin_0, end = var_23398_end_0, end_mask = var_23398_end_mask_0, x = var_23179_cast_fp16)[name = tensor("op_23398_cast_fp16")]; + tensor var_23405_begin_0 = const()[name = tensor("op_23405_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_23405_end_0 = const()[name = tensor("op_23405_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_23405_end_mask_0 = const()[name = tensor("op_23405_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23405_cast_fp16 = slice_by_index(begin = var_23405_begin_0, end = var_23405_end_0, end_mask = var_23405_end_mask_0, x = var_23179_cast_fp16)[name = tensor("op_23405_cast_fp16")]; + tensor var_23412_begin_0 = const()[name = tensor("op_23412_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_23412_end_0 = const()[name = tensor("op_23412_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_23412_end_mask_0 = const()[name = tensor("op_23412_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23412_cast_fp16 = slice_by_index(begin = var_23412_begin_0, end = var_23412_end_0, end_mask = var_23412_end_mask_0, x = var_23183_cast_fp16)[name = tensor("op_23412_cast_fp16")]; + tensor var_23419_begin_0 = const()[name = tensor("op_23419_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_23419_end_0 = const()[name = tensor("op_23419_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_23419_end_mask_0 = const()[name = tensor("op_23419_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23419_cast_fp16 = slice_by_index(begin = var_23419_begin_0, end = var_23419_end_0, end_mask = var_23419_end_mask_0, x = var_23183_cast_fp16)[name = tensor("op_23419_cast_fp16")]; + tensor var_23426_begin_0 = const()[name = tensor("op_23426_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_23426_end_0 = const()[name = tensor("op_23426_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_23426_end_mask_0 = const()[name = tensor("op_23426_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23426_cast_fp16 = slice_by_index(begin = var_23426_begin_0, end = var_23426_end_0, end_mask = var_23426_end_mask_0, x = var_23183_cast_fp16)[name = tensor("op_23426_cast_fp16")]; + tensor var_23433_begin_0 = const()[name = tensor("op_23433_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_23433_end_0 = const()[name = tensor("op_23433_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_23433_end_mask_0 = const()[name = tensor("op_23433_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23433_cast_fp16 = slice_by_index(begin = var_23433_begin_0, end = var_23433_end_0, end_mask = var_23433_end_mask_0, x = var_23183_cast_fp16)[name = tensor("op_23433_cast_fp16")]; + tensor var_23440_begin_0 = const()[name = tensor("op_23440_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_23440_end_0 = const()[name = tensor("op_23440_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_23440_end_mask_0 = const()[name = tensor("op_23440_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23440_cast_fp16 = slice_by_index(begin = var_23440_begin_0, end = var_23440_end_0, end_mask = var_23440_end_mask_0, x = var_23187_cast_fp16)[name = tensor("op_23440_cast_fp16")]; + tensor var_23447_begin_0 = const()[name = tensor("op_23447_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_23447_end_0 = const()[name = tensor("op_23447_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_23447_end_mask_0 = const()[name = tensor("op_23447_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23447_cast_fp16 = slice_by_index(begin = var_23447_begin_0, end = var_23447_end_0, end_mask = var_23447_end_mask_0, x = var_23187_cast_fp16)[name = tensor("op_23447_cast_fp16")]; + tensor var_23454_begin_0 = const()[name = tensor("op_23454_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_23454_end_0 = const()[name = tensor("op_23454_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_23454_end_mask_0 = const()[name = tensor("op_23454_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23454_cast_fp16 = slice_by_index(begin = var_23454_begin_0, end = var_23454_end_0, end_mask = var_23454_end_mask_0, x = var_23187_cast_fp16)[name = tensor("op_23454_cast_fp16")]; + tensor var_23461_begin_0 = const()[name = tensor("op_23461_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_23461_end_0 = const()[name = tensor("op_23461_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_23461_end_mask_0 = const()[name = tensor("op_23461_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23461_cast_fp16 = slice_by_index(begin = var_23461_begin_0, end = var_23461_end_0, end_mask = var_23461_end_mask_0, x = var_23187_cast_fp16)[name = tensor("op_23461_cast_fp16")]; + tensor var_23468_begin_0 = const()[name = tensor("op_23468_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_23468_end_0 = const()[name = tensor("op_23468_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_23468_end_mask_0 = const()[name = tensor("op_23468_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23468_cast_fp16 = slice_by_index(begin = var_23468_begin_0, end = var_23468_end_0, end_mask = var_23468_end_mask_0, x = var_23191_cast_fp16)[name = tensor("op_23468_cast_fp16")]; + tensor var_23475_begin_0 = const()[name = tensor("op_23475_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_23475_end_0 = const()[name = tensor("op_23475_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_23475_end_mask_0 = const()[name = tensor("op_23475_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23475_cast_fp16 = slice_by_index(begin = var_23475_begin_0, end = var_23475_end_0, end_mask = var_23475_end_mask_0, x = var_23191_cast_fp16)[name = tensor("op_23475_cast_fp16")]; + tensor var_23482_begin_0 = const()[name = tensor("op_23482_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_23482_end_0 = const()[name = tensor("op_23482_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_23482_end_mask_0 = const()[name = tensor("op_23482_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23482_cast_fp16 = slice_by_index(begin = var_23482_begin_0, end = var_23482_end_0, end_mask = var_23482_end_mask_0, x = var_23191_cast_fp16)[name = tensor("op_23482_cast_fp16")]; + tensor var_23489_begin_0 = const()[name = tensor("op_23489_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_23489_end_0 = const()[name = tensor("op_23489_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_23489_end_mask_0 = const()[name = tensor("op_23489_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23489_cast_fp16 = slice_by_index(begin = var_23489_begin_0, end = var_23489_end_0, end_mask = var_23489_end_mask_0, x = var_23191_cast_fp16)[name = tensor("op_23489_cast_fp16")]; + tensor var_23496_begin_0 = const()[name = tensor("op_23496_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_23496_end_0 = const()[name = tensor("op_23496_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_23496_end_mask_0 = const()[name = tensor("op_23496_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23496_cast_fp16 = slice_by_index(begin = var_23496_begin_0, end = var_23496_end_0, end_mask = var_23496_end_mask_0, x = var_23195_cast_fp16)[name = tensor("op_23496_cast_fp16")]; + tensor var_23503_begin_0 = const()[name = tensor("op_23503_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_23503_end_0 = const()[name = tensor("op_23503_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_23503_end_mask_0 = const()[name = tensor("op_23503_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23503_cast_fp16 = slice_by_index(begin = var_23503_begin_0, end = var_23503_end_0, end_mask = var_23503_end_mask_0, x = var_23195_cast_fp16)[name = tensor("op_23503_cast_fp16")]; + tensor var_23510_begin_0 = const()[name = tensor("op_23510_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_23510_end_0 = const()[name = tensor("op_23510_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_23510_end_mask_0 = const()[name = tensor("op_23510_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23510_cast_fp16 = slice_by_index(begin = var_23510_begin_0, end = var_23510_end_0, end_mask = var_23510_end_mask_0, x = var_23195_cast_fp16)[name = tensor("op_23510_cast_fp16")]; + tensor var_23517_begin_0 = const()[name = tensor("op_23517_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_23517_end_0 = const()[name = tensor("op_23517_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_23517_end_mask_0 = const()[name = tensor("op_23517_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23517_cast_fp16 = slice_by_index(begin = var_23517_begin_0, end = var_23517_end_0, end_mask = var_23517_end_mask_0, x = var_23195_cast_fp16)[name = tensor("op_23517_cast_fp16")]; + tensor var_23524_begin_0 = const()[name = tensor("op_23524_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_23524_end_0 = const()[name = tensor("op_23524_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_23524_end_mask_0 = const()[name = tensor("op_23524_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23524_cast_fp16 = slice_by_index(begin = var_23524_begin_0, end = var_23524_end_0, end_mask = var_23524_end_mask_0, x = var_23199_cast_fp16)[name = tensor("op_23524_cast_fp16")]; + tensor var_23531_begin_0 = const()[name = tensor("op_23531_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_23531_end_0 = const()[name = tensor("op_23531_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_23531_end_mask_0 = const()[name = tensor("op_23531_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23531_cast_fp16 = slice_by_index(begin = var_23531_begin_0, end = var_23531_end_0, end_mask = var_23531_end_mask_0, x = var_23199_cast_fp16)[name = tensor("op_23531_cast_fp16")]; + tensor var_23538_begin_0 = const()[name = tensor("op_23538_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_23538_end_0 = const()[name = tensor("op_23538_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_23538_end_mask_0 = const()[name = tensor("op_23538_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23538_cast_fp16 = slice_by_index(begin = var_23538_begin_0, end = var_23538_end_0, end_mask = var_23538_end_mask_0, x = var_23199_cast_fp16)[name = tensor("op_23538_cast_fp16")]; + tensor var_23545_begin_0 = const()[name = tensor("op_23545_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_23545_end_0 = const()[name = tensor("op_23545_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_23545_end_mask_0 = const()[name = tensor("op_23545_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23545_cast_fp16 = slice_by_index(begin = var_23545_begin_0, end = var_23545_end_0, end_mask = var_23545_end_mask_0, x = var_23199_cast_fp16)[name = tensor("op_23545_cast_fp16")]; + tensor var_23552_begin_0 = const()[name = tensor("op_23552_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_23552_end_0 = const()[name = tensor("op_23552_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_23552_end_mask_0 = const()[name = tensor("op_23552_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23552_cast_fp16 = slice_by_index(begin = var_23552_begin_0, end = var_23552_end_0, end_mask = var_23552_end_mask_0, x = var_23203_cast_fp16)[name = tensor("op_23552_cast_fp16")]; + tensor var_23559_begin_0 = const()[name = tensor("op_23559_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_23559_end_0 = const()[name = tensor("op_23559_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_23559_end_mask_0 = const()[name = tensor("op_23559_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23559_cast_fp16 = slice_by_index(begin = var_23559_begin_0, end = var_23559_end_0, end_mask = var_23559_end_mask_0, x = var_23203_cast_fp16)[name = tensor("op_23559_cast_fp16")]; + tensor var_23566_begin_0 = const()[name = tensor("op_23566_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_23566_end_0 = const()[name = tensor("op_23566_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_23566_end_mask_0 = const()[name = tensor("op_23566_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23566_cast_fp16 = slice_by_index(begin = var_23566_begin_0, end = var_23566_end_0, end_mask = var_23566_end_mask_0, x = var_23203_cast_fp16)[name = tensor("op_23566_cast_fp16")]; + tensor var_23573_begin_0 = const()[name = tensor("op_23573_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_23573_end_0 = const()[name = tensor("op_23573_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_23573_end_mask_0 = const()[name = tensor("op_23573_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23573_cast_fp16 = slice_by_index(begin = var_23573_begin_0, end = var_23573_end_0, end_mask = var_23573_end_mask_0, x = var_23203_cast_fp16)[name = tensor("op_23573_cast_fp16")]; + tensor var_23580_begin_0 = const()[name = tensor("op_23580_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_23580_end_0 = const()[name = tensor("op_23580_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_23580_end_mask_0 = const()[name = tensor("op_23580_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23580_cast_fp16 = slice_by_index(begin = var_23580_begin_0, end = var_23580_end_0, end_mask = var_23580_end_mask_0, x = var_23207_cast_fp16)[name = tensor("op_23580_cast_fp16")]; + tensor var_23587_begin_0 = const()[name = tensor("op_23587_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_23587_end_0 = const()[name = tensor("op_23587_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_23587_end_mask_0 = const()[name = tensor("op_23587_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23587_cast_fp16 = slice_by_index(begin = var_23587_begin_0, end = var_23587_end_0, end_mask = var_23587_end_mask_0, x = var_23207_cast_fp16)[name = tensor("op_23587_cast_fp16")]; + tensor var_23594_begin_0 = const()[name = tensor("op_23594_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_23594_end_0 = const()[name = tensor("op_23594_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_23594_end_mask_0 = const()[name = tensor("op_23594_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23594_cast_fp16 = slice_by_index(begin = var_23594_begin_0, end = var_23594_end_0, end_mask = var_23594_end_mask_0, x = var_23207_cast_fp16)[name = tensor("op_23594_cast_fp16")]; + tensor var_23601_begin_0 = const()[name = tensor("op_23601_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_23601_end_0 = const()[name = tensor("op_23601_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_23601_end_mask_0 = const()[name = tensor("op_23601_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23601_cast_fp16 = slice_by_index(begin = var_23601_begin_0, end = var_23601_end_0, end_mask = var_23601_end_mask_0, x = var_23207_cast_fp16)[name = tensor("op_23601_cast_fp16")]; + tensor var_23608_begin_0 = const()[name = tensor("op_23608_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_23608_end_0 = const()[name = tensor("op_23608_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_23608_end_mask_0 = const()[name = tensor("op_23608_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23608_cast_fp16 = slice_by_index(begin = var_23608_begin_0, end = var_23608_end_0, end_mask = var_23608_end_mask_0, x = var_23211_cast_fp16)[name = tensor("op_23608_cast_fp16")]; + tensor var_23615_begin_0 = const()[name = tensor("op_23615_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_23615_end_0 = const()[name = tensor("op_23615_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_23615_end_mask_0 = const()[name = tensor("op_23615_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23615_cast_fp16 = slice_by_index(begin = var_23615_begin_0, end = var_23615_end_0, end_mask = var_23615_end_mask_0, x = var_23211_cast_fp16)[name = tensor("op_23615_cast_fp16")]; + tensor var_23622_begin_0 = const()[name = tensor("op_23622_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_23622_end_0 = const()[name = tensor("op_23622_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_23622_end_mask_0 = const()[name = tensor("op_23622_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23622_cast_fp16 = slice_by_index(begin = var_23622_begin_0, end = var_23622_end_0, end_mask = var_23622_end_mask_0, x = var_23211_cast_fp16)[name = tensor("op_23622_cast_fp16")]; + tensor var_23629_begin_0 = const()[name = tensor("op_23629_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_23629_end_0 = const()[name = tensor("op_23629_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_23629_end_mask_0 = const()[name = tensor("op_23629_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23629_cast_fp16 = slice_by_index(begin = var_23629_begin_0, end = var_23629_end_0, end_mask = var_23629_end_mask_0, x = var_23211_cast_fp16)[name = tensor("op_23629_cast_fp16")]; + tensor var_23636_begin_0 = const()[name = tensor("op_23636_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_23636_end_0 = const()[name = tensor("op_23636_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_23636_end_mask_0 = const()[name = tensor("op_23636_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23636_cast_fp16 = slice_by_index(begin = var_23636_begin_0, end = var_23636_end_0, end_mask = var_23636_end_mask_0, x = var_23215_cast_fp16)[name = tensor("op_23636_cast_fp16")]; + tensor var_23643_begin_0 = const()[name = tensor("op_23643_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_23643_end_0 = const()[name = tensor("op_23643_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_23643_end_mask_0 = const()[name = tensor("op_23643_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23643_cast_fp16 = slice_by_index(begin = var_23643_begin_0, end = var_23643_end_0, end_mask = var_23643_end_mask_0, x = var_23215_cast_fp16)[name = tensor("op_23643_cast_fp16")]; + tensor var_23650_begin_0 = const()[name = tensor("op_23650_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_23650_end_0 = const()[name = tensor("op_23650_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_23650_end_mask_0 = const()[name = tensor("op_23650_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23650_cast_fp16 = slice_by_index(begin = var_23650_begin_0, end = var_23650_end_0, end_mask = var_23650_end_mask_0, x = var_23215_cast_fp16)[name = tensor("op_23650_cast_fp16")]; + tensor var_23657_begin_0 = const()[name = tensor("op_23657_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_23657_end_0 = const()[name = tensor("op_23657_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_23657_end_mask_0 = const()[name = tensor("op_23657_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23657_cast_fp16 = slice_by_index(begin = var_23657_begin_0, end = var_23657_end_0, end_mask = var_23657_end_mask_0, x = var_23215_cast_fp16)[name = tensor("op_23657_cast_fp16")]; + tensor var_23664_begin_0 = const()[name = tensor("op_23664_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_23664_end_0 = const()[name = tensor("op_23664_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_23664_end_mask_0 = const()[name = tensor("op_23664_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23664_cast_fp16 = slice_by_index(begin = var_23664_begin_0, end = var_23664_end_0, end_mask = var_23664_end_mask_0, x = var_23219_cast_fp16)[name = tensor("op_23664_cast_fp16")]; + tensor var_23671_begin_0 = const()[name = tensor("op_23671_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_23671_end_0 = const()[name = tensor("op_23671_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_23671_end_mask_0 = const()[name = tensor("op_23671_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23671_cast_fp16 = slice_by_index(begin = var_23671_begin_0, end = var_23671_end_0, end_mask = var_23671_end_mask_0, x = var_23219_cast_fp16)[name = tensor("op_23671_cast_fp16")]; + tensor var_23678_begin_0 = const()[name = tensor("op_23678_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_23678_end_0 = const()[name = tensor("op_23678_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_23678_end_mask_0 = const()[name = tensor("op_23678_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23678_cast_fp16 = slice_by_index(begin = var_23678_begin_0, end = var_23678_end_0, end_mask = var_23678_end_mask_0, x = var_23219_cast_fp16)[name = tensor("op_23678_cast_fp16")]; + tensor var_23685_begin_0 = const()[name = tensor("op_23685_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_23685_end_0 = const()[name = tensor("op_23685_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_23685_end_mask_0 = const()[name = tensor("op_23685_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23685_cast_fp16 = slice_by_index(begin = var_23685_begin_0, end = var_23685_end_0, end_mask = var_23685_end_mask_0, x = var_23219_cast_fp16)[name = tensor("op_23685_cast_fp16")]; + tensor var_23692_begin_0 = const()[name = tensor("op_23692_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_23692_end_0 = const()[name = tensor("op_23692_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_23692_end_mask_0 = const()[name = tensor("op_23692_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23692_cast_fp16 = slice_by_index(begin = var_23692_begin_0, end = var_23692_end_0, end_mask = var_23692_end_mask_0, x = var_23223_cast_fp16)[name = tensor("op_23692_cast_fp16")]; + tensor var_23699_begin_0 = const()[name = tensor("op_23699_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_23699_end_0 = const()[name = tensor("op_23699_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_23699_end_mask_0 = const()[name = tensor("op_23699_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23699_cast_fp16 = slice_by_index(begin = var_23699_begin_0, end = var_23699_end_0, end_mask = var_23699_end_mask_0, x = var_23223_cast_fp16)[name = tensor("op_23699_cast_fp16")]; + tensor var_23706_begin_0 = const()[name = tensor("op_23706_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_23706_end_0 = const()[name = tensor("op_23706_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_23706_end_mask_0 = const()[name = tensor("op_23706_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23706_cast_fp16 = slice_by_index(begin = var_23706_begin_0, end = var_23706_end_0, end_mask = var_23706_end_mask_0, x = var_23223_cast_fp16)[name = tensor("op_23706_cast_fp16")]; + tensor var_23713_begin_0 = const()[name = tensor("op_23713_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_23713_end_0 = const()[name = tensor("op_23713_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_23713_end_mask_0 = const()[name = tensor("op_23713_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23713_cast_fp16 = slice_by_index(begin = var_23713_begin_0, end = var_23713_end_0, end_mask = var_23713_end_mask_0, x = var_23223_cast_fp16)[name = tensor("op_23713_cast_fp16")]; + tensor var_23720_begin_0 = const()[name = tensor("op_23720_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_23720_end_0 = const()[name = tensor("op_23720_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_23720_end_mask_0 = const()[name = tensor("op_23720_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23720_cast_fp16 = slice_by_index(begin = var_23720_begin_0, end = var_23720_end_0, end_mask = var_23720_end_mask_0, x = var_23227_cast_fp16)[name = tensor("op_23720_cast_fp16")]; + tensor var_23727_begin_0 = const()[name = tensor("op_23727_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_23727_end_0 = const()[name = tensor("op_23727_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_23727_end_mask_0 = const()[name = tensor("op_23727_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23727_cast_fp16 = slice_by_index(begin = var_23727_begin_0, end = var_23727_end_0, end_mask = var_23727_end_mask_0, x = var_23227_cast_fp16)[name = tensor("op_23727_cast_fp16")]; + tensor var_23734_begin_0 = const()[name = tensor("op_23734_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_23734_end_0 = const()[name = tensor("op_23734_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_23734_end_mask_0 = const()[name = tensor("op_23734_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23734_cast_fp16 = slice_by_index(begin = var_23734_begin_0, end = var_23734_end_0, end_mask = var_23734_end_mask_0, x = var_23227_cast_fp16)[name = tensor("op_23734_cast_fp16")]; + tensor var_23741_begin_0 = const()[name = tensor("op_23741_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_23741_end_0 = const()[name = tensor("op_23741_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_23741_end_mask_0 = const()[name = tensor("op_23741_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23741_cast_fp16 = slice_by_index(begin = var_23741_begin_0, end = var_23741_end_0, end_mask = var_23741_end_mask_0, x = var_23227_cast_fp16)[name = tensor("op_23741_cast_fp16")]; + tensor var_23748_begin_0 = const()[name = tensor("op_23748_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_23748_end_0 = const()[name = tensor("op_23748_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_23748_end_mask_0 = const()[name = tensor("op_23748_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23748_cast_fp16 = slice_by_index(begin = var_23748_begin_0, end = var_23748_end_0, end_mask = var_23748_end_mask_0, x = var_23231_cast_fp16)[name = tensor("op_23748_cast_fp16")]; + tensor var_23755_begin_0 = const()[name = tensor("op_23755_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_23755_end_0 = const()[name = tensor("op_23755_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_23755_end_mask_0 = const()[name = tensor("op_23755_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23755_cast_fp16 = slice_by_index(begin = var_23755_begin_0, end = var_23755_end_0, end_mask = var_23755_end_mask_0, x = var_23231_cast_fp16)[name = tensor("op_23755_cast_fp16")]; + tensor var_23762_begin_0 = const()[name = tensor("op_23762_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_23762_end_0 = const()[name = tensor("op_23762_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_23762_end_mask_0 = const()[name = tensor("op_23762_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23762_cast_fp16 = slice_by_index(begin = var_23762_begin_0, end = var_23762_end_0, end_mask = var_23762_end_mask_0, x = var_23231_cast_fp16)[name = tensor("op_23762_cast_fp16")]; + tensor var_23769_begin_0 = const()[name = tensor("op_23769_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_23769_end_0 = const()[name = tensor("op_23769_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_23769_end_mask_0 = const()[name = tensor("op_23769_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23769_cast_fp16 = slice_by_index(begin = var_23769_begin_0, end = var_23769_end_0, end_mask = var_23769_end_mask_0, x = var_23231_cast_fp16)[name = tensor("op_23769_cast_fp16")]; + tensor var_23776_begin_0 = const()[name = tensor("op_23776_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_23776_end_0 = const()[name = tensor("op_23776_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_23776_end_mask_0 = const()[name = tensor("op_23776_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23776_cast_fp16 = slice_by_index(begin = var_23776_begin_0, end = var_23776_end_0, end_mask = var_23776_end_mask_0, x = var_23235_cast_fp16)[name = tensor("op_23776_cast_fp16")]; + tensor var_23783_begin_0 = const()[name = tensor("op_23783_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_23783_end_0 = const()[name = tensor("op_23783_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_23783_end_mask_0 = const()[name = tensor("op_23783_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23783_cast_fp16 = slice_by_index(begin = var_23783_begin_0, end = var_23783_end_0, end_mask = var_23783_end_mask_0, x = var_23235_cast_fp16)[name = tensor("op_23783_cast_fp16")]; + tensor var_23790_begin_0 = const()[name = tensor("op_23790_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_23790_end_0 = const()[name = tensor("op_23790_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_23790_end_mask_0 = const()[name = tensor("op_23790_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23790_cast_fp16 = slice_by_index(begin = var_23790_begin_0, end = var_23790_end_0, end_mask = var_23790_end_mask_0, x = var_23235_cast_fp16)[name = tensor("op_23790_cast_fp16")]; + tensor var_23797_begin_0 = const()[name = tensor("op_23797_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_23797_end_0 = const()[name = tensor("op_23797_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_23797_end_mask_0 = const()[name = tensor("op_23797_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23797_cast_fp16 = slice_by_index(begin = var_23797_begin_0, end = var_23797_end_0, end_mask = var_23797_end_mask_0, x = var_23235_cast_fp16)[name = tensor("op_23797_cast_fp16")]; + tensor k_31_perm_0 = const()[name = tensor("k_31_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_23802_begin_0 = const()[name = tensor("op_23802_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_23802_end_0 = const()[name = tensor("op_23802_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_23802_end_mask_0 = const()[name = tensor("op_23802_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_16 = transpose(perm = k_31_perm_0, x = key_31_cast_fp16)[name = tensor("transpose_16")]; + tensor var_23802_cast_fp16 = slice_by_index(begin = var_23802_begin_0, end = var_23802_end_0, end_mask = var_23802_end_mask_0, x = transpose_16)[name = tensor("op_23802_cast_fp16")]; + tensor var_23806_begin_0 = const()[name = tensor("op_23806_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_23806_end_0 = const()[name = tensor("op_23806_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_23806_end_mask_0 = const()[name = tensor("op_23806_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23806_cast_fp16 = slice_by_index(begin = var_23806_begin_0, end = var_23806_end_0, end_mask = var_23806_end_mask_0, x = transpose_16)[name = tensor("op_23806_cast_fp16")]; + tensor var_23810_begin_0 = const()[name = tensor("op_23810_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_23810_end_0 = const()[name = tensor("op_23810_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_23810_end_mask_0 = const()[name = tensor("op_23810_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23810_cast_fp16 = slice_by_index(begin = var_23810_begin_0, end = var_23810_end_0, end_mask = var_23810_end_mask_0, x = transpose_16)[name = tensor("op_23810_cast_fp16")]; + tensor var_23814_begin_0 = const()[name = tensor("op_23814_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_23814_end_0 = const()[name = tensor("op_23814_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_23814_end_mask_0 = const()[name = tensor("op_23814_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23814_cast_fp16 = slice_by_index(begin = var_23814_begin_0, end = var_23814_end_0, end_mask = var_23814_end_mask_0, x = transpose_16)[name = tensor("op_23814_cast_fp16")]; + tensor var_23818_begin_0 = const()[name = tensor("op_23818_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_23818_end_0 = const()[name = tensor("op_23818_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_23818_end_mask_0 = const()[name = tensor("op_23818_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23818_cast_fp16 = slice_by_index(begin = var_23818_begin_0, end = var_23818_end_0, end_mask = var_23818_end_mask_0, x = transpose_16)[name = tensor("op_23818_cast_fp16")]; + tensor var_23822_begin_0 = const()[name = tensor("op_23822_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_23822_end_0 = const()[name = tensor("op_23822_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_23822_end_mask_0 = const()[name = tensor("op_23822_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23822_cast_fp16 = slice_by_index(begin = var_23822_begin_0, end = var_23822_end_0, end_mask = var_23822_end_mask_0, x = transpose_16)[name = tensor("op_23822_cast_fp16")]; + tensor var_23826_begin_0 = const()[name = tensor("op_23826_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_23826_end_0 = const()[name = tensor("op_23826_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_23826_end_mask_0 = const()[name = tensor("op_23826_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23826_cast_fp16 = slice_by_index(begin = var_23826_begin_0, end = var_23826_end_0, end_mask = var_23826_end_mask_0, x = transpose_16)[name = tensor("op_23826_cast_fp16")]; + tensor var_23830_begin_0 = const()[name = tensor("op_23830_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_23830_end_0 = const()[name = tensor("op_23830_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_23830_end_mask_0 = const()[name = tensor("op_23830_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23830_cast_fp16 = slice_by_index(begin = var_23830_begin_0, end = var_23830_end_0, end_mask = var_23830_end_mask_0, x = transpose_16)[name = tensor("op_23830_cast_fp16")]; + tensor var_23834_begin_0 = const()[name = tensor("op_23834_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_23834_end_0 = const()[name = tensor("op_23834_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_23834_end_mask_0 = const()[name = tensor("op_23834_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23834_cast_fp16 = slice_by_index(begin = var_23834_begin_0, end = var_23834_end_0, end_mask = var_23834_end_mask_0, x = transpose_16)[name = tensor("op_23834_cast_fp16")]; + tensor var_23838_begin_0 = const()[name = tensor("op_23838_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_23838_end_0 = const()[name = tensor("op_23838_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_23838_end_mask_0 = const()[name = tensor("op_23838_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23838_cast_fp16 = slice_by_index(begin = var_23838_begin_0, end = var_23838_end_0, end_mask = var_23838_end_mask_0, x = transpose_16)[name = tensor("op_23838_cast_fp16")]; + tensor var_23842_begin_0 = const()[name = tensor("op_23842_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_23842_end_0 = const()[name = tensor("op_23842_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_23842_end_mask_0 = const()[name = tensor("op_23842_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23842_cast_fp16 = slice_by_index(begin = var_23842_begin_0, end = var_23842_end_0, end_mask = var_23842_end_mask_0, x = transpose_16)[name = tensor("op_23842_cast_fp16")]; + tensor var_23846_begin_0 = const()[name = tensor("op_23846_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_23846_end_0 = const()[name = tensor("op_23846_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_23846_end_mask_0 = const()[name = tensor("op_23846_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23846_cast_fp16 = slice_by_index(begin = var_23846_begin_0, end = var_23846_end_0, end_mask = var_23846_end_mask_0, x = transpose_16)[name = tensor("op_23846_cast_fp16")]; + tensor var_23850_begin_0 = const()[name = tensor("op_23850_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_23850_end_0 = const()[name = tensor("op_23850_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_23850_end_mask_0 = const()[name = tensor("op_23850_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23850_cast_fp16 = slice_by_index(begin = var_23850_begin_0, end = var_23850_end_0, end_mask = var_23850_end_mask_0, x = transpose_16)[name = tensor("op_23850_cast_fp16")]; + tensor var_23854_begin_0 = const()[name = tensor("op_23854_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_23854_end_0 = const()[name = tensor("op_23854_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_23854_end_mask_0 = const()[name = tensor("op_23854_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23854_cast_fp16 = slice_by_index(begin = var_23854_begin_0, end = var_23854_end_0, end_mask = var_23854_end_mask_0, x = transpose_16)[name = tensor("op_23854_cast_fp16")]; + tensor var_23858_begin_0 = const()[name = tensor("op_23858_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_23858_end_0 = const()[name = tensor("op_23858_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_23858_end_mask_0 = const()[name = tensor("op_23858_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23858_cast_fp16 = slice_by_index(begin = var_23858_begin_0, end = var_23858_end_0, end_mask = var_23858_end_mask_0, x = transpose_16)[name = tensor("op_23858_cast_fp16")]; + tensor var_23862_begin_0 = const()[name = tensor("op_23862_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_23862_end_0 = const()[name = tensor("op_23862_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_23862_end_mask_0 = const()[name = tensor("op_23862_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23862_cast_fp16 = slice_by_index(begin = var_23862_begin_0, end = var_23862_end_0, end_mask = var_23862_end_mask_0, x = transpose_16)[name = tensor("op_23862_cast_fp16")]; + tensor var_23866_begin_0 = const()[name = tensor("op_23866_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_23866_end_0 = const()[name = tensor("op_23866_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_23866_end_mask_0 = const()[name = tensor("op_23866_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23866_cast_fp16 = slice_by_index(begin = var_23866_begin_0, end = var_23866_end_0, end_mask = var_23866_end_mask_0, x = transpose_16)[name = tensor("op_23866_cast_fp16")]; + tensor var_23870_begin_0 = const()[name = tensor("op_23870_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_23870_end_0 = const()[name = tensor("op_23870_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_23870_end_mask_0 = const()[name = tensor("op_23870_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23870_cast_fp16 = slice_by_index(begin = var_23870_begin_0, end = var_23870_end_0, end_mask = var_23870_end_mask_0, x = transpose_16)[name = tensor("op_23870_cast_fp16")]; + tensor var_23874_begin_0 = const()[name = tensor("op_23874_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_23874_end_0 = const()[name = tensor("op_23874_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_23874_end_mask_0 = const()[name = tensor("op_23874_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23874_cast_fp16 = slice_by_index(begin = var_23874_begin_0, end = var_23874_end_0, end_mask = var_23874_end_mask_0, x = transpose_16)[name = tensor("op_23874_cast_fp16")]; + tensor var_23878_begin_0 = const()[name = tensor("op_23878_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_23878_end_0 = const()[name = tensor("op_23878_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_23878_end_mask_0 = const()[name = tensor("op_23878_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_23878_cast_fp16 = slice_by_index(begin = var_23878_begin_0, end = var_23878_end_0, end_mask = var_23878_end_mask_0, x = transpose_16)[name = tensor("op_23878_cast_fp16")]; + tensor var_23880_begin_0 = const()[name = tensor("op_23880_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_23880_end_0 = const()[name = tensor("op_23880_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_23880_end_mask_0 = const()[name = tensor("op_23880_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23880_cast_fp16 = slice_by_index(begin = var_23880_begin_0, end = var_23880_end_0, end_mask = var_23880_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_23880_cast_fp16")]; + tensor var_23884_begin_0 = const()[name = tensor("op_23884_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_23884_end_0 = const()[name = tensor("op_23884_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_23884_end_mask_0 = const()[name = tensor("op_23884_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23884_cast_fp16 = slice_by_index(begin = var_23884_begin_0, end = var_23884_end_0, end_mask = var_23884_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_23884_cast_fp16")]; + tensor var_23888_begin_0 = const()[name = tensor("op_23888_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_23888_end_0 = const()[name = tensor("op_23888_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_23888_end_mask_0 = const()[name = tensor("op_23888_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23888_cast_fp16 = slice_by_index(begin = var_23888_begin_0, end = var_23888_end_0, end_mask = var_23888_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_23888_cast_fp16")]; + tensor var_23892_begin_0 = const()[name = tensor("op_23892_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_23892_end_0 = const()[name = tensor("op_23892_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_23892_end_mask_0 = const()[name = tensor("op_23892_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23892_cast_fp16 = slice_by_index(begin = var_23892_begin_0, end = var_23892_end_0, end_mask = var_23892_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_23892_cast_fp16")]; + tensor var_23896_begin_0 = const()[name = tensor("op_23896_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_23896_end_0 = const()[name = tensor("op_23896_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_23896_end_mask_0 = const()[name = tensor("op_23896_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23896_cast_fp16 = slice_by_index(begin = var_23896_begin_0, end = var_23896_end_0, end_mask = var_23896_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_23896_cast_fp16")]; + tensor var_23900_begin_0 = const()[name = tensor("op_23900_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_23900_end_0 = const()[name = tensor("op_23900_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_23900_end_mask_0 = const()[name = tensor("op_23900_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23900_cast_fp16 = slice_by_index(begin = var_23900_begin_0, end = var_23900_end_0, end_mask = var_23900_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_23900_cast_fp16")]; + tensor var_23904_begin_0 = const()[name = tensor("op_23904_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_23904_end_0 = const()[name = tensor("op_23904_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_23904_end_mask_0 = const()[name = tensor("op_23904_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23904_cast_fp16 = slice_by_index(begin = var_23904_begin_0, end = var_23904_end_0, end_mask = var_23904_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_23904_cast_fp16")]; + tensor var_23908_begin_0 = const()[name = tensor("op_23908_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_23908_end_0 = const()[name = tensor("op_23908_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_23908_end_mask_0 = const()[name = tensor("op_23908_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23908_cast_fp16 = slice_by_index(begin = var_23908_begin_0, end = var_23908_end_0, end_mask = var_23908_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_23908_cast_fp16")]; + tensor var_23912_begin_0 = const()[name = tensor("op_23912_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_23912_end_0 = const()[name = tensor("op_23912_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_23912_end_mask_0 = const()[name = tensor("op_23912_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23912_cast_fp16 = slice_by_index(begin = var_23912_begin_0, end = var_23912_end_0, end_mask = var_23912_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_23912_cast_fp16")]; + tensor var_23916_begin_0 = const()[name = tensor("op_23916_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_23916_end_0 = const()[name = tensor("op_23916_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_23916_end_mask_0 = const()[name = tensor("op_23916_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23916_cast_fp16 = slice_by_index(begin = var_23916_begin_0, end = var_23916_end_0, end_mask = var_23916_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_23916_cast_fp16")]; + tensor var_23920_begin_0 = const()[name = tensor("op_23920_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_23920_end_0 = const()[name = tensor("op_23920_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_23920_end_mask_0 = const()[name = tensor("op_23920_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23920_cast_fp16 = slice_by_index(begin = var_23920_begin_0, end = var_23920_end_0, end_mask = var_23920_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_23920_cast_fp16")]; + tensor var_23924_begin_0 = const()[name = tensor("op_23924_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_23924_end_0 = const()[name = tensor("op_23924_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_23924_end_mask_0 = const()[name = tensor("op_23924_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23924_cast_fp16 = slice_by_index(begin = var_23924_begin_0, end = var_23924_end_0, end_mask = var_23924_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_23924_cast_fp16")]; + tensor var_23928_begin_0 = const()[name = tensor("op_23928_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_23928_end_0 = const()[name = tensor("op_23928_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_23928_end_mask_0 = const()[name = tensor("op_23928_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23928_cast_fp16 = slice_by_index(begin = var_23928_begin_0, end = var_23928_end_0, end_mask = var_23928_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_23928_cast_fp16")]; + tensor var_23932_begin_0 = const()[name = tensor("op_23932_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_23932_end_0 = const()[name = tensor("op_23932_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_23932_end_mask_0 = const()[name = tensor("op_23932_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23932_cast_fp16 = slice_by_index(begin = var_23932_begin_0, end = var_23932_end_0, end_mask = var_23932_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_23932_cast_fp16")]; + tensor var_23936_begin_0 = const()[name = tensor("op_23936_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_23936_end_0 = const()[name = tensor("op_23936_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_23936_end_mask_0 = const()[name = tensor("op_23936_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23936_cast_fp16 = slice_by_index(begin = var_23936_begin_0, end = var_23936_end_0, end_mask = var_23936_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_23936_cast_fp16")]; + tensor var_23940_begin_0 = const()[name = tensor("op_23940_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_23940_end_0 = const()[name = tensor("op_23940_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_23940_end_mask_0 = const()[name = tensor("op_23940_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23940_cast_fp16 = slice_by_index(begin = var_23940_begin_0, end = var_23940_end_0, end_mask = var_23940_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_23940_cast_fp16")]; + tensor var_23944_begin_0 = const()[name = tensor("op_23944_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_23944_end_0 = const()[name = tensor("op_23944_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_23944_end_mask_0 = const()[name = tensor("op_23944_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23944_cast_fp16 = slice_by_index(begin = var_23944_begin_0, end = var_23944_end_0, end_mask = var_23944_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_23944_cast_fp16")]; + tensor var_23948_begin_0 = const()[name = tensor("op_23948_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_23948_end_0 = const()[name = tensor("op_23948_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_23948_end_mask_0 = const()[name = tensor("op_23948_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23948_cast_fp16 = slice_by_index(begin = var_23948_begin_0, end = var_23948_end_0, end_mask = var_23948_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_23948_cast_fp16")]; + tensor var_23952_begin_0 = const()[name = tensor("op_23952_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_23952_end_0 = const()[name = tensor("op_23952_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_23952_end_mask_0 = const()[name = tensor("op_23952_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23952_cast_fp16 = slice_by_index(begin = var_23952_begin_0, end = var_23952_end_0, end_mask = var_23952_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_23952_cast_fp16")]; + tensor var_23956_begin_0 = const()[name = tensor("op_23956_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_23956_end_0 = const()[name = tensor("op_23956_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_23956_end_mask_0 = const()[name = tensor("op_23956_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_23956_cast_fp16 = slice_by_index(begin = var_23956_begin_0, end = var_23956_end_0, end_mask = var_23956_end_mask_0, x = value_31_cast_fp16)[name = tensor("op_23956_cast_fp16")]; + tensor var_23960_equation_0 = const()[name = tensor("op_23960_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_23960_cast_fp16 = einsum(equation = var_23960_equation_0, values = (var_23802_cast_fp16, var_23244_cast_fp16))[name = tensor("op_23960_cast_fp16")]; + tensor var_23961_to_fp16 = const()[name = tensor("op_23961_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2401_cast_fp16 = mul(x = var_23960_cast_fp16, y = var_23961_to_fp16)[name = tensor("aw_chunk_2401_cast_fp16")]; + tensor var_23964_equation_0 = const()[name = tensor("op_23964_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_23964_cast_fp16 = einsum(equation = var_23964_equation_0, values = (var_23802_cast_fp16, var_23251_cast_fp16))[name = tensor("op_23964_cast_fp16")]; + tensor var_23965_to_fp16 = const()[name = tensor("op_23965_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2403_cast_fp16 = mul(x = var_23964_cast_fp16, y = var_23965_to_fp16)[name = tensor("aw_chunk_2403_cast_fp16")]; + tensor var_23968_equation_0 = const()[name = tensor("op_23968_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_23968_cast_fp16 = einsum(equation = var_23968_equation_0, values = (var_23802_cast_fp16, var_23258_cast_fp16))[name = tensor("op_23968_cast_fp16")]; + tensor var_23969_to_fp16 = const()[name = tensor("op_23969_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2405_cast_fp16 = mul(x = var_23968_cast_fp16, y = var_23969_to_fp16)[name = tensor("aw_chunk_2405_cast_fp16")]; + tensor var_23972_equation_0 = const()[name = tensor("op_23972_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_23972_cast_fp16 = einsum(equation = var_23972_equation_0, values = (var_23802_cast_fp16, var_23265_cast_fp16))[name = tensor("op_23972_cast_fp16")]; + tensor var_23973_to_fp16 = const()[name = tensor("op_23973_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2407_cast_fp16 = mul(x = var_23972_cast_fp16, y = var_23973_to_fp16)[name = tensor("aw_chunk_2407_cast_fp16")]; + tensor var_23976_equation_0 = const()[name = tensor("op_23976_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_23976_cast_fp16 = einsum(equation = var_23976_equation_0, values = (var_23806_cast_fp16, var_23272_cast_fp16))[name = tensor("op_23976_cast_fp16")]; + tensor var_23977_to_fp16 = const()[name = tensor("op_23977_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2409_cast_fp16 = mul(x = var_23976_cast_fp16, y = var_23977_to_fp16)[name = tensor("aw_chunk_2409_cast_fp16")]; + tensor var_23980_equation_0 = const()[name = tensor("op_23980_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_23980_cast_fp16 = einsum(equation = var_23980_equation_0, values = (var_23806_cast_fp16, var_23279_cast_fp16))[name = tensor("op_23980_cast_fp16")]; + tensor var_23981_to_fp16 = const()[name = tensor("op_23981_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2411_cast_fp16 = mul(x = var_23980_cast_fp16, y = var_23981_to_fp16)[name = tensor("aw_chunk_2411_cast_fp16")]; + tensor var_23984_equation_0 = const()[name = tensor("op_23984_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_23984_cast_fp16 = einsum(equation = var_23984_equation_0, values = (var_23806_cast_fp16, var_23286_cast_fp16))[name = tensor("op_23984_cast_fp16")]; + tensor var_23985_to_fp16 = const()[name = tensor("op_23985_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2413_cast_fp16 = mul(x = var_23984_cast_fp16, y = var_23985_to_fp16)[name = tensor("aw_chunk_2413_cast_fp16")]; + tensor var_23988_equation_0 = const()[name = tensor("op_23988_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_23988_cast_fp16 = einsum(equation = var_23988_equation_0, values = (var_23806_cast_fp16, var_23293_cast_fp16))[name = tensor("op_23988_cast_fp16")]; + tensor var_23989_to_fp16 = const()[name = tensor("op_23989_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2415_cast_fp16 = mul(x = var_23988_cast_fp16, y = var_23989_to_fp16)[name = tensor("aw_chunk_2415_cast_fp16")]; + tensor var_23992_equation_0 = const()[name = tensor("op_23992_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_23992_cast_fp16 = einsum(equation = var_23992_equation_0, values = (var_23810_cast_fp16, var_23300_cast_fp16))[name = tensor("op_23992_cast_fp16")]; + tensor var_23993_to_fp16 = const()[name = tensor("op_23993_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2417_cast_fp16 = mul(x = var_23992_cast_fp16, y = var_23993_to_fp16)[name = tensor("aw_chunk_2417_cast_fp16")]; + tensor var_23996_equation_0 = const()[name = tensor("op_23996_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_23996_cast_fp16 = einsum(equation = var_23996_equation_0, values = (var_23810_cast_fp16, var_23307_cast_fp16))[name = tensor("op_23996_cast_fp16")]; + tensor var_23997_to_fp16 = const()[name = tensor("op_23997_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2419_cast_fp16 = mul(x = var_23996_cast_fp16, y = var_23997_to_fp16)[name = tensor("aw_chunk_2419_cast_fp16")]; + tensor var_24000_equation_0 = const()[name = tensor("op_24000_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24000_cast_fp16 = einsum(equation = var_24000_equation_0, values = (var_23810_cast_fp16, var_23314_cast_fp16))[name = tensor("op_24000_cast_fp16")]; + tensor var_24001_to_fp16 = const()[name = tensor("op_24001_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2421_cast_fp16 = mul(x = var_24000_cast_fp16, y = var_24001_to_fp16)[name = tensor("aw_chunk_2421_cast_fp16")]; + tensor var_24004_equation_0 = const()[name = tensor("op_24004_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24004_cast_fp16 = einsum(equation = var_24004_equation_0, values = (var_23810_cast_fp16, var_23321_cast_fp16))[name = tensor("op_24004_cast_fp16")]; + tensor var_24005_to_fp16 = const()[name = tensor("op_24005_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2423_cast_fp16 = mul(x = var_24004_cast_fp16, y = var_24005_to_fp16)[name = tensor("aw_chunk_2423_cast_fp16")]; + tensor var_24008_equation_0 = const()[name = tensor("op_24008_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24008_cast_fp16 = einsum(equation = var_24008_equation_0, values = (var_23814_cast_fp16, var_23328_cast_fp16))[name = tensor("op_24008_cast_fp16")]; + tensor var_24009_to_fp16 = const()[name = tensor("op_24009_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2425_cast_fp16 = mul(x = var_24008_cast_fp16, y = var_24009_to_fp16)[name = tensor("aw_chunk_2425_cast_fp16")]; + tensor var_24012_equation_0 = const()[name = tensor("op_24012_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24012_cast_fp16 = einsum(equation = var_24012_equation_0, values = (var_23814_cast_fp16, var_23335_cast_fp16))[name = tensor("op_24012_cast_fp16")]; + tensor var_24013_to_fp16 = const()[name = tensor("op_24013_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2427_cast_fp16 = mul(x = var_24012_cast_fp16, y = var_24013_to_fp16)[name = tensor("aw_chunk_2427_cast_fp16")]; + tensor var_24016_equation_0 = const()[name = tensor("op_24016_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24016_cast_fp16 = einsum(equation = var_24016_equation_0, values = (var_23814_cast_fp16, var_23342_cast_fp16))[name = tensor("op_24016_cast_fp16")]; + tensor var_24017_to_fp16 = const()[name = tensor("op_24017_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2429_cast_fp16 = mul(x = var_24016_cast_fp16, y = var_24017_to_fp16)[name = tensor("aw_chunk_2429_cast_fp16")]; + tensor var_24020_equation_0 = const()[name = tensor("op_24020_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24020_cast_fp16 = einsum(equation = var_24020_equation_0, values = (var_23814_cast_fp16, var_23349_cast_fp16))[name = tensor("op_24020_cast_fp16")]; + tensor var_24021_to_fp16 = const()[name = tensor("op_24021_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2431_cast_fp16 = mul(x = var_24020_cast_fp16, y = var_24021_to_fp16)[name = tensor("aw_chunk_2431_cast_fp16")]; + tensor var_24024_equation_0 = const()[name = tensor("op_24024_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24024_cast_fp16 = einsum(equation = var_24024_equation_0, values = (var_23818_cast_fp16, var_23356_cast_fp16))[name = tensor("op_24024_cast_fp16")]; + tensor var_24025_to_fp16 = const()[name = tensor("op_24025_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2433_cast_fp16 = mul(x = var_24024_cast_fp16, y = var_24025_to_fp16)[name = tensor("aw_chunk_2433_cast_fp16")]; + tensor var_24028_equation_0 = const()[name = tensor("op_24028_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24028_cast_fp16 = einsum(equation = var_24028_equation_0, values = (var_23818_cast_fp16, var_23363_cast_fp16))[name = tensor("op_24028_cast_fp16")]; + tensor var_24029_to_fp16 = const()[name = tensor("op_24029_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2435_cast_fp16 = mul(x = var_24028_cast_fp16, y = var_24029_to_fp16)[name = tensor("aw_chunk_2435_cast_fp16")]; + tensor var_24032_equation_0 = const()[name = tensor("op_24032_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24032_cast_fp16 = einsum(equation = var_24032_equation_0, values = (var_23818_cast_fp16, var_23370_cast_fp16))[name = tensor("op_24032_cast_fp16")]; + tensor var_24033_to_fp16 = const()[name = tensor("op_24033_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2437_cast_fp16 = mul(x = var_24032_cast_fp16, y = var_24033_to_fp16)[name = tensor("aw_chunk_2437_cast_fp16")]; + tensor var_24036_equation_0 = const()[name = tensor("op_24036_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24036_cast_fp16 = einsum(equation = var_24036_equation_0, values = (var_23818_cast_fp16, var_23377_cast_fp16))[name = tensor("op_24036_cast_fp16")]; + tensor var_24037_to_fp16 = const()[name = tensor("op_24037_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2439_cast_fp16 = mul(x = var_24036_cast_fp16, y = var_24037_to_fp16)[name = tensor("aw_chunk_2439_cast_fp16")]; + tensor var_24040_equation_0 = const()[name = tensor("op_24040_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24040_cast_fp16 = einsum(equation = var_24040_equation_0, values = (var_23822_cast_fp16, var_23384_cast_fp16))[name = tensor("op_24040_cast_fp16")]; + tensor var_24041_to_fp16 = const()[name = tensor("op_24041_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2441_cast_fp16 = mul(x = var_24040_cast_fp16, y = var_24041_to_fp16)[name = tensor("aw_chunk_2441_cast_fp16")]; + tensor var_24044_equation_0 = const()[name = tensor("op_24044_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24044_cast_fp16 = einsum(equation = var_24044_equation_0, values = (var_23822_cast_fp16, var_23391_cast_fp16))[name = tensor("op_24044_cast_fp16")]; + tensor var_24045_to_fp16 = const()[name = tensor("op_24045_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2443_cast_fp16 = mul(x = var_24044_cast_fp16, y = var_24045_to_fp16)[name = tensor("aw_chunk_2443_cast_fp16")]; + tensor var_24048_equation_0 = const()[name = tensor("op_24048_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24048_cast_fp16 = einsum(equation = var_24048_equation_0, values = (var_23822_cast_fp16, var_23398_cast_fp16))[name = tensor("op_24048_cast_fp16")]; + tensor var_24049_to_fp16 = const()[name = tensor("op_24049_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2445_cast_fp16 = mul(x = var_24048_cast_fp16, y = var_24049_to_fp16)[name = tensor("aw_chunk_2445_cast_fp16")]; + tensor var_24052_equation_0 = const()[name = tensor("op_24052_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24052_cast_fp16 = einsum(equation = var_24052_equation_0, values = (var_23822_cast_fp16, var_23405_cast_fp16))[name = tensor("op_24052_cast_fp16")]; + tensor var_24053_to_fp16 = const()[name = tensor("op_24053_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2447_cast_fp16 = mul(x = var_24052_cast_fp16, y = var_24053_to_fp16)[name = tensor("aw_chunk_2447_cast_fp16")]; + tensor var_24056_equation_0 = const()[name = tensor("op_24056_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24056_cast_fp16 = einsum(equation = var_24056_equation_0, values = (var_23826_cast_fp16, var_23412_cast_fp16))[name = tensor("op_24056_cast_fp16")]; + tensor var_24057_to_fp16 = const()[name = tensor("op_24057_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2449_cast_fp16 = mul(x = var_24056_cast_fp16, y = var_24057_to_fp16)[name = tensor("aw_chunk_2449_cast_fp16")]; + tensor var_24060_equation_0 = const()[name = tensor("op_24060_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24060_cast_fp16 = einsum(equation = var_24060_equation_0, values = (var_23826_cast_fp16, var_23419_cast_fp16))[name = tensor("op_24060_cast_fp16")]; + tensor var_24061_to_fp16 = const()[name = tensor("op_24061_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2451_cast_fp16 = mul(x = var_24060_cast_fp16, y = var_24061_to_fp16)[name = tensor("aw_chunk_2451_cast_fp16")]; + tensor var_24064_equation_0 = const()[name = tensor("op_24064_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24064_cast_fp16 = einsum(equation = var_24064_equation_0, values = (var_23826_cast_fp16, var_23426_cast_fp16))[name = tensor("op_24064_cast_fp16")]; + tensor var_24065_to_fp16 = const()[name = tensor("op_24065_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2453_cast_fp16 = mul(x = var_24064_cast_fp16, y = var_24065_to_fp16)[name = tensor("aw_chunk_2453_cast_fp16")]; + tensor var_24068_equation_0 = const()[name = tensor("op_24068_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24068_cast_fp16 = einsum(equation = var_24068_equation_0, values = (var_23826_cast_fp16, var_23433_cast_fp16))[name = tensor("op_24068_cast_fp16")]; + tensor var_24069_to_fp16 = const()[name = tensor("op_24069_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2455_cast_fp16 = mul(x = var_24068_cast_fp16, y = var_24069_to_fp16)[name = tensor("aw_chunk_2455_cast_fp16")]; + tensor var_24072_equation_0 = const()[name = tensor("op_24072_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24072_cast_fp16 = einsum(equation = var_24072_equation_0, values = (var_23830_cast_fp16, var_23440_cast_fp16))[name = tensor("op_24072_cast_fp16")]; + tensor var_24073_to_fp16 = const()[name = tensor("op_24073_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2457_cast_fp16 = mul(x = var_24072_cast_fp16, y = var_24073_to_fp16)[name = tensor("aw_chunk_2457_cast_fp16")]; + tensor var_24076_equation_0 = const()[name = tensor("op_24076_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24076_cast_fp16 = einsum(equation = var_24076_equation_0, values = (var_23830_cast_fp16, var_23447_cast_fp16))[name = tensor("op_24076_cast_fp16")]; + tensor var_24077_to_fp16 = const()[name = tensor("op_24077_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2459_cast_fp16 = mul(x = var_24076_cast_fp16, y = var_24077_to_fp16)[name = tensor("aw_chunk_2459_cast_fp16")]; + tensor var_24080_equation_0 = const()[name = tensor("op_24080_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24080_cast_fp16 = einsum(equation = var_24080_equation_0, values = (var_23830_cast_fp16, var_23454_cast_fp16))[name = tensor("op_24080_cast_fp16")]; + tensor var_24081_to_fp16 = const()[name = tensor("op_24081_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2461_cast_fp16 = mul(x = var_24080_cast_fp16, y = var_24081_to_fp16)[name = tensor("aw_chunk_2461_cast_fp16")]; + tensor var_24084_equation_0 = const()[name = tensor("op_24084_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24084_cast_fp16 = einsum(equation = var_24084_equation_0, values = (var_23830_cast_fp16, var_23461_cast_fp16))[name = tensor("op_24084_cast_fp16")]; + tensor var_24085_to_fp16 = const()[name = tensor("op_24085_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2463_cast_fp16 = mul(x = var_24084_cast_fp16, y = var_24085_to_fp16)[name = tensor("aw_chunk_2463_cast_fp16")]; + tensor var_24088_equation_0 = const()[name = tensor("op_24088_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24088_cast_fp16 = einsum(equation = var_24088_equation_0, values = (var_23834_cast_fp16, var_23468_cast_fp16))[name = tensor("op_24088_cast_fp16")]; + tensor var_24089_to_fp16 = const()[name = tensor("op_24089_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2465_cast_fp16 = mul(x = var_24088_cast_fp16, y = var_24089_to_fp16)[name = tensor("aw_chunk_2465_cast_fp16")]; + tensor var_24092_equation_0 = const()[name = tensor("op_24092_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24092_cast_fp16 = einsum(equation = var_24092_equation_0, values = (var_23834_cast_fp16, var_23475_cast_fp16))[name = tensor("op_24092_cast_fp16")]; + tensor var_24093_to_fp16 = const()[name = tensor("op_24093_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2467_cast_fp16 = mul(x = var_24092_cast_fp16, y = var_24093_to_fp16)[name = tensor("aw_chunk_2467_cast_fp16")]; + tensor var_24096_equation_0 = const()[name = tensor("op_24096_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24096_cast_fp16 = einsum(equation = var_24096_equation_0, values = (var_23834_cast_fp16, var_23482_cast_fp16))[name = tensor("op_24096_cast_fp16")]; + tensor var_24097_to_fp16 = const()[name = tensor("op_24097_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2469_cast_fp16 = mul(x = var_24096_cast_fp16, y = var_24097_to_fp16)[name = tensor("aw_chunk_2469_cast_fp16")]; + tensor var_24100_equation_0 = const()[name = tensor("op_24100_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24100_cast_fp16 = einsum(equation = var_24100_equation_0, values = (var_23834_cast_fp16, var_23489_cast_fp16))[name = tensor("op_24100_cast_fp16")]; + tensor var_24101_to_fp16 = const()[name = tensor("op_24101_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2471_cast_fp16 = mul(x = var_24100_cast_fp16, y = var_24101_to_fp16)[name = tensor("aw_chunk_2471_cast_fp16")]; + tensor var_24104_equation_0 = const()[name = tensor("op_24104_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24104_cast_fp16 = einsum(equation = var_24104_equation_0, values = (var_23838_cast_fp16, var_23496_cast_fp16))[name = tensor("op_24104_cast_fp16")]; + tensor var_24105_to_fp16 = const()[name = tensor("op_24105_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2473_cast_fp16 = mul(x = var_24104_cast_fp16, y = var_24105_to_fp16)[name = tensor("aw_chunk_2473_cast_fp16")]; + tensor var_24108_equation_0 = const()[name = tensor("op_24108_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24108_cast_fp16 = einsum(equation = var_24108_equation_0, values = (var_23838_cast_fp16, var_23503_cast_fp16))[name = tensor("op_24108_cast_fp16")]; + tensor var_24109_to_fp16 = const()[name = tensor("op_24109_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2475_cast_fp16 = mul(x = var_24108_cast_fp16, y = var_24109_to_fp16)[name = tensor("aw_chunk_2475_cast_fp16")]; + tensor var_24112_equation_0 = const()[name = tensor("op_24112_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24112_cast_fp16 = einsum(equation = var_24112_equation_0, values = (var_23838_cast_fp16, var_23510_cast_fp16))[name = tensor("op_24112_cast_fp16")]; + tensor var_24113_to_fp16 = const()[name = tensor("op_24113_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2477_cast_fp16 = mul(x = var_24112_cast_fp16, y = var_24113_to_fp16)[name = tensor("aw_chunk_2477_cast_fp16")]; + tensor var_24116_equation_0 = const()[name = tensor("op_24116_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24116_cast_fp16 = einsum(equation = var_24116_equation_0, values = (var_23838_cast_fp16, var_23517_cast_fp16))[name = tensor("op_24116_cast_fp16")]; + tensor var_24117_to_fp16 = const()[name = tensor("op_24117_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2479_cast_fp16 = mul(x = var_24116_cast_fp16, y = var_24117_to_fp16)[name = tensor("aw_chunk_2479_cast_fp16")]; + tensor var_24120_equation_0 = const()[name = tensor("op_24120_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24120_cast_fp16 = einsum(equation = var_24120_equation_0, values = (var_23842_cast_fp16, var_23524_cast_fp16))[name = tensor("op_24120_cast_fp16")]; + tensor var_24121_to_fp16 = const()[name = tensor("op_24121_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2481_cast_fp16 = mul(x = var_24120_cast_fp16, y = var_24121_to_fp16)[name = tensor("aw_chunk_2481_cast_fp16")]; + tensor var_24124_equation_0 = const()[name = tensor("op_24124_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24124_cast_fp16 = einsum(equation = var_24124_equation_0, values = (var_23842_cast_fp16, var_23531_cast_fp16))[name = tensor("op_24124_cast_fp16")]; + tensor var_24125_to_fp16 = const()[name = tensor("op_24125_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2483_cast_fp16 = mul(x = var_24124_cast_fp16, y = var_24125_to_fp16)[name = tensor("aw_chunk_2483_cast_fp16")]; + tensor var_24128_equation_0 = const()[name = tensor("op_24128_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24128_cast_fp16 = einsum(equation = var_24128_equation_0, values = (var_23842_cast_fp16, var_23538_cast_fp16))[name = tensor("op_24128_cast_fp16")]; + tensor var_24129_to_fp16 = const()[name = tensor("op_24129_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2485_cast_fp16 = mul(x = var_24128_cast_fp16, y = var_24129_to_fp16)[name = tensor("aw_chunk_2485_cast_fp16")]; + tensor var_24132_equation_0 = const()[name = tensor("op_24132_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24132_cast_fp16 = einsum(equation = var_24132_equation_0, values = (var_23842_cast_fp16, var_23545_cast_fp16))[name = tensor("op_24132_cast_fp16")]; + tensor var_24133_to_fp16 = const()[name = tensor("op_24133_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2487_cast_fp16 = mul(x = var_24132_cast_fp16, y = var_24133_to_fp16)[name = tensor("aw_chunk_2487_cast_fp16")]; + tensor var_24136_equation_0 = const()[name = tensor("op_24136_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24136_cast_fp16 = einsum(equation = var_24136_equation_0, values = (var_23846_cast_fp16, var_23552_cast_fp16))[name = tensor("op_24136_cast_fp16")]; + tensor var_24137_to_fp16 = const()[name = tensor("op_24137_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2489_cast_fp16 = mul(x = var_24136_cast_fp16, y = var_24137_to_fp16)[name = tensor("aw_chunk_2489_cast_fp16")]; + tensor var_24140_equation_0 = const()[name = tensor("op_24140_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24140_cast_fp16 = einsum(equation = var_24140_equation_0, values = (var_23846_cast_fp16, var_23559_cast_fp16))[name = tensor("op_24140_cast_fp16")]; + tensor var_24141_to_fp16 = const()[name = tensor("op_24141_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2491_cast_fp16 = mul(x = var_24140_cast_fp16, y = var_24141_to_fp16)[name = tensor("aw_chunk_2491_cast_fp16")]; + tensor var_24144_equation_0 = const()[name = tensor("op_24144_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24144_cast_fp16 = einsum(equation = var_24144_equation_0, values = (var_23846_cast_fp16, var_23566_cast_fp16))[name = tensor("op_24144_cast_fp16")]; + tensor var_24145_to_fp16 = const()[name = tensor("op_24145_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2493_cast_fp16 = mul(x = var_24144_cast_fp16, y = var_24145_to_fp16)[name = tensor("aw_chunk_2493_cast_fp16")]; + tensor var_24148_equation_0 = const()[name = tensor("op_24148_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24148_cast_fp16 = einsum(equation = var_24148_equation_0, values = (var_23846_cast_fp16, var_23573_cast_fp16))[name = tensor("op_24148_cast_fp16")]; + tensor var_24149_to_fp16 = const()[name = tensor("op_24149_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2495_cast_fp16 = mul(x = var_24148_cast_fp16, y = var_24149_to_fp16)[name = tensor("aw_chunk_2495_cast_fp16")]; + tensor var_24152_equation_0 = const()[name = tensor("op_24152_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24152_cast_fp16 = einsum(equation = var_24152_equation_0, values = (var_23850_cast_fp16, var_23580_cast_fp16))[name = tensor("op_24152_cast_fp16")]; + tensor var_24153_to_fp16 = const()[name = tensor("op_24153_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2497_cast_fp16 = mul(x = var_24152_cast_fp16, y = var_24153_to_fp16)[name = tensor("aw_chunk_2497_cast_fp16")]; + tensor var_24156_equation_0 = const()[name = tensor("op_24156_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24156_cast_fp16 = einsum(equation = var_24156_equation_0, values = (var_23850_cast_fp16, var_23587_cast_fp16))[name = tensor("op_24156_cast_fp16")]; + tensor var_24157_to_fp16 = const()[name = tensor("op_24157_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2499_cast_fp16 = mul(x = var_24156_cast_fp16, y = var_24157_to_fp16)[name = tensor("aw_chunk_2499_cast_fp16")]; + tensor var_24160_equation_0 = const()[name = tensor("op_24160_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24160_cast_fp16 = einsum(equation = var_24160_equation_0, values = (var_23850_cast_fp16, var_23594_cast_fp16))[name = tensor("op_24160_cast_fp16")]; + tensor var_24161_to_fp16 = const()[name = tensor("op_24161_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2501_cast_fp16 = mul(x = var_24160_cast_fp16, y = var_24161_to_fp16)[name = tensor("aw_chunk_2501_cast_fp16")]; + tensor var_24164_equation_0 = const()[name = tensor("op_24164_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24164_cast_fp16 = einsum(equation = var_24164_equation_0, values = (var_23850_cast_fp16, var_23601_cast_fp16))[name = tensor("op_24164_cast_fp16")]; + tensor var_24165_to_fp16 = const()[name = tensor("op_24165_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2503_cast_fp16 = mul(x = var_24164_cast_fp16, y = var_24165_to_fp16)[name = tensor("aw_chunk_2503_cast_fp16")]; + tensor var_24168_equation_0 = const()[name = tensor("op_24168_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24168_cast_fp16 = einsum(equation = var_24168_equation_0, values = (var_23854_cast_fp16, var_23608_cast_fp16))[name = tensor("op_24168_cast_fp16")]; + tensor var_24169_to_fp16 = const()[name = tensor("op_24169_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2505_cast_fp16 = mul(x = var_24168_cast_fp16, y = var_24169_to_fp16)[name = tensor("aw_chunk_2505_cast_fp16")]; + tensor var_24172_equation_0 = const()[name = tensor("op_24172_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24172_cast_fp16 = einsum(equation = var_24172_equation_0, values = (var_23854_cast_fp16, var_23615_cast_fp16))[name = tensor("op_24172_cast_fp16")]; + tensor var_24173_to_fp16 = const()[name = tensor("op_24173_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2507_cast_fp16 = mul(x = var_24172_cast_fp16, y = var_24173_to_fp16)[name = tensor("aw_chunk_2507_cast_fp16")]; + tensor var_24176_equation_0 = const()[name = tensor("op_24176_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24176_cast_fp16 = einsum(equation = var_24176_equation_0, values = (var_23854_cast_fp16, var_23622_cast_fp16))[name = tensor("op_24176_cast_fp16")]; + tensor var_24177_to_fp16 = const()[name = tensor("op_24177_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2509_cast_fp16 = mul(x = var_24176_cast_fp16, y = var_24177_to_fp16)[name = tensor("aw_chunk_2509_cast_fp16")]; + tensor var_24180_equation_0 = const()[name = tensor("op_24180_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24180_cast_fp16 = einsum(equation = var_24180_equation_0, values = (var_23854_cast_fp16, var_23629_cast_fp16))[name = tensor("op_24180_cast_fp16")]; + tensor var_24181_to_fp16 = const()[name = tensor("op_24181_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2511_cast_fp16 = mul(x = var_24180_cast_fp16, y = var_24181_to_fp16)[name = tensor("aw_chunk_2511_cast_fp16")]; + tensor var_24184_equation_0 = const()[name = tensor("op_24184_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24184_cast_fp16 = einsum(equation = var_24184_equation_0, values = (var_23858_cast_fp16, var_23636_cast_fp16))[name = tensor("op_24184_cast_fp16")]; + tensor var_24185_to_fp16 = const()[name = tensor("op_24185_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2513_cast_fp16 = mul(x = var_24184_cast_fp16, y = var_24185_to_fp16)[name = tensor("aw_chunk_2513_cast_fp16")]; + tensor var_24188_equation_0 = const()[name = tensor("op_24188_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24188_cast_fp16 = einsum(equation = var_24188_equation_0, values = (var_23858_cast_fp16, var_23643_cast_fp16))[name = tensor("op_24188_cast_fp16")]; + tensor var_24189_to_fp16 = const()[name = tensor("op_24189_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2515_cast_fp16 = mul(x = var_24188_cast_fp16, y = var_24189_to_fp16)[name = tensor("aw_chunk_2515_cast_fp16")]; + tensor var_24192_equation_0 = const()[name = tensor("op_24192_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24192_cast_fp16 = einsum(equation = var_24192_equation_0, values = (var_23858_cast_fp16, var_23650_cast_fp16))[name = tensor("op_24192_cast_fp16")]; + tensor var_24193_to_fp16 = const()[name = tensor("op_24193_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2517_cast_fp16 = mul(x = var_24192_cast_fp16, y = var_24193_to_fp16)[name = tensor("aw_chunk_2517_cast_fp16")]; + tensor var_24196_equation_0 = const()[name = tensor("op_24196_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24196_cast_fp16 = einsum(equation = var_24196_equation_0, values = (var_23858_cast_fp16, var_23657_cast_fp16))[name = tensor("op_24196_cast_fp16")]; + tensor var_24197_to_fp16 = const()[name = tensor("op_24197_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2519_cast_fp16 = mul(x = var_24196_cast_fp16, y = var_24197_to_fp16)[name = tensor("aw_chunk_2519_cast_fp16")]; + tensor var_24200_equation_0 = const()[name = tensor("op_24200_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24200_cast_fp16 = einsum(equation = var_24200_equation_0, values = (var_23862_cast_fp16, var_23664_cast_fp16))[name = tensor("op_24200_cast_fp16")]; + tensor var_24201_to_fp16 = const()[name = tensor("op_24201_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2521_cast_fp16 = mul(x = var_24200_cast_fp16, y = var_24201_to_fp16)[name = tensor("aw_chunk_2521_cast_fp16")]; + tensor var_24204_equation_0 = const()[name = tensor("op_24204_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24204_cast_fp16 = einsum(equation = var_24204_equation_0, values = (var_23862_cast_fp16, var_23671_cast_fp16))[name = tensor("op_24204_cast_fp16")]; + tensor var_24205_to_fp16 = const()[name = tensor("op_24205_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2523_cast_fp16 = mul(x = var_24204_cast_fp16, y = var_24205_to_fp16)[name = tensor("aw_chunk_2523_cast_fp16")]; + tensor var_24208_equation_0 = const()[name = tensor("op_24208_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24208_cast_fp16 = einsum(equation = var_24208_equation_0, values = (var_23862_cast_fp16, var_23678_cast_fp16))[name = tensor("op_24208_cast_fp16")]; + tensor var_24209_to_fp16 = const()[name = tensor("op_24209_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2525_cast_fp16 = mul(x = var_24208_cast_fp16, y = var_24209_to_fp16)[name = tensor("aw_chunk_2525_cast_fp16")]; + tensor var_24212_equation_0 = const()[name = tensor("op_24212_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24212_cast_fp16 = einsum(equation = var_24212_equation_0, values = (var_23862_cast_fp16, var_23685_cast_fp16))[name = tensor("op_24212_cast_fp16")]; + tensor var_24213_to_fp16 = const()[name = tensor("op_24213_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2527_cast_fp16 = mul(x = var_24212_cast_fp16, y = var_24213_to_fp16)[name = tensor("aw_chunk_2527_cast_fp16")]; + tensor var_24216_equation_0 = const()[name = tensor("op_24216_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24216_cast_fp16 = einsum(equation = var_24216_equation_0, values = (var_23866_cast_fp16, var_23692_cast_fp16))[name = tensor("op_24216_cast_fp16")]; + tensor var_24217_to_fp16 = const()[name = tensor("op_24217_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2529_cast_fp16 = mul(x = var_24216_cast_fp16, y = var_24217_to_fp16)[name = tensor("aw_chunk_2529_cast_fp16")]; + tensor var_24220_equation_0 = const()[name = tensor("op_24220_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24220_cast_fp16 = einsum(equation = var_24220_equation_0, values = (var_23866_cast_fp16, var_23699_cast_fp16))[name = tensor("op_24220_cast_fp16")]; + tensor var_24221_to_fp16 = const()[name = tensor("op_24221_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2531_cast_fp16 = mul(x = var_24220_cast_fp16, y = var_24221_to_fp16)[name = tensor("aw_chunk_2531_cast_fp16")]; + tensor var_24224_equation_0 = const()[name = tensor("op_24224_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24224_cast_fp16 = einsum(equation = var_24224_equation_0, values = (var_23866_cast_fp16, var_23706_cast_fp16))[name = tensor("op_24224_cast_fp16")]; + tensor var_24225_to_fp16 = const()[name = tensor("op_24225_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2533_cast_fp16 = mul(x = var_24224_cast_fp16, y = var_24225_to_fp16)[name = tensor("aw_chunk_2533_cast_fp16")]; + tensor var_24228_equation_0 = const()[name = tensor("op_24228_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24228_cast_fp16 = einsum(equation = var_24228_equation_0, values = (var_23866_cast_fp16, var_23713_cast_fp16))[name = tensor("op_24228_cast_fp16")]; + tensor var_24229_to_fp16 = const()[name = tensor("op_24229_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2535_cast_fp16 = mul(x = var_24228_cast_fp16, y = var_24229_to_fp16)[name = tensor("aw_chunk_2535_cast_fp16")]; + tensor var_24232_equation_0 = const()[name = tensor("op_24232_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24232_cast_fp16 = einsum(equation = var_24232_equation_0, values = (var_23870_cast_fp16, var_23720_cast_fp16))[name = tensor("op_24232_cast_fp16")]; + tensor var_24233_to_fp16 = const()[name = tensor("op_24233_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2537_cast_fp16 = mul(x = var_24232_cast_fp16, y = var_24233_to_fp16)[name = tensor("aw_chunk_2537_cast_fp16")]; + tensor var_24236_equation_0 = const()[name = tensor("op_24236_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24236_cast_fp16 = einsum(equation = var_24236_equation_0, values = (var_23870_cast_fp16, var_23727_cast_fp16))[name = tensor("op_24236_cast_fp16")]; + tensor var_24237_to_fp16 = const()[name = tensor("op_24237_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2539_cast_fp16 = mul(x = var_24236_cast_fp16, y = var_24237_to_fp16)[name = tensor("aw_chunk_2539_cast_fp16")]; + tensor var_24240_equation_0 = const()[name = tensor("op_24240_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24240_cast_fp16 = einsum(equation = var_24240_equation_0, values = (var_23870_cast_fp16, var_23734_cast_fp16))[name = tensor("op_24240_cast_fp16")]; + tensor var_24241_to_fp16 = const()[name = tensor("op_24241_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2541_cast_fp16 = mul(x = var_24240_cast_fp16, y = var_24241_to_fp16)[name = tensor("aw_chunk_2541_cast_fp16")]; + tensor var_24244_equation_0 = const()[name = tensor("op_24244_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24244_cast_fp16 = einsum(equation = var_24244_equation_0, values = (var_23870_cast_fp16, var_23741_cast_fp16))[name = tensor("op_24244_cast_fp16")]; + tensor var_24245_to_fp16 = const()[name = tensor("op_24245_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2543_cast_fp16 = mul(x = var_24244_cast_fp16, y = var_24245_to_fp16)[name = tensor("aw_chunk_2543_cast_fp16")]; + tensor var_24248_equation_0 = const()[name = tensor("op_24248_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24248_cast_fp16 = einsum(equation = var_24248_equation_0, values = (var_23874_cast_fp16, var_23748_cast_fp16))[name = tensor("op_24248_cast_fp16")]; + tensor var_24249_to_fp16 = const()[name = tensor("op_24249_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2545_cast_fp16 = mul(x = var_24248_cast_fp16, y = var_24249_to_fp16)[name = tensor("aw_chunk_2545_cast_fp16")]; + tensor var_24252_equation_0 = const()[name = tensor("op_24252_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24252_cast_fp16 = einsum(equation = var_24252_equation_0, values = (var_23874_cast_fp16, var_23755_cast_fp16))[name = tensor("op_24252_cast_fp16")]; + tensor var_24253_to_fp16 = const()[name = tensor("op_24253_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2547_cast_fp16 = mul(x = var_24252_cast_fp16, y = var_24253_to_fp16)[name = tensor("aw_chunk_2547_cast_fp16")]; + tensor var_24256_equation_0 = const()[name = tensor("op_24256_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24256_cast_fp16 = einsum(equation = var_24256_equation_0, values = (var_23874_cast_fp16, var_23762_cast_fp16))[name = tensor("op_24256_cast_fp16")]; + tensor var_24257_to_fp16 = const()[name = tensor("op_24257_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2549_cast_fp16 = mul(x = var_24256_cast_fp16, y = var_24257_to_fp16)[name = tensor("aw_chunk_2549_cast_fp16")]; + tensor var_24260_equation_0 = const()[name = tensor("op_24260_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24260_cast_fp16 = einsum(equation = var_24260_equation_0, values = (var_23874_cast_fp16, var_23769_cast_fp16))[name = tensor("op_24260_cast_fp16")]; + tensor var_24261_to_fp16 = const()[name = tensor("op_24261_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2551_cast_fp16 = mul(x = var_24260_cast_fp16, y = var_24261_to_fp16)[name = tensor("aw_chunk_2551_cast_fp16")]; + tensor var_24264_equation_0 = const()[name = tensor("op_24264_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24264_cast_fp16 = einsum(equation = var_24264_equation_0, values = (var_23878_cast_fp16, var_23776_cast_fp16))[name = tensor("op_24264_cast_fp16")]; + tensor var_24265_to_fp16 = const()[name = tensor("op_24265_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2553_cast_fp16 = mul(x = var_24264_cast_fp16, y = var_24265_to_fp16)[name = tensor("aw_chunk_2553_cast_fp16")]; + tensor var_24268_equation_0 = const()[name = tensor("op_24268_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24268_cast_fp16 = einsum(equation = var_24268_equation_0, values = (var_23878_cast_fp16, var_23783_cast_fp16))[name = tensor("op_24268_cast_fp16")]; + tensor var_24269_to_fp16 = const()[name = tensor("op_24269_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2555_cast_fp16 = mul(x = var_24268_cast_fp16, y = var_24269_to_fp16)[name = tensor("aw_chunk_2555_cast_fp16")]; + tensor var_24272_equation_0 = const()[name = tensor("op_24272_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24272_cast_fp16 = einsum(equation = var_24272_equation_0, values = (var_23878_cast_fp16, var_23790_cast_fp16))[name = tensor("op_24272_cast_fp16")]; + tensor var_24273_to_fp16 = const()[name = tensor("op_24273_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2557_cast_fp16 = mul(x = var_24272_cast_fp16, y = var_24273_to_fp16)[name = tensor("aw_chunk_2557_cast_fp16")]; + tensor var_24276_equation_0 = const()[name = tensor("op_24276_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_24276_cast_fp16 = einsum(equation = var_24276_equation_0, values = (var_23878_cast_fp16, var_23797_cast_fp16))[name = tensor("op_24276_cast_fp16")]; + tensor var_24277_to_fp16 = const()[name = tensor("op_24277_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2559_cast_fp16 = mul(x = var_24276_cast_fp16, y = var_24277_to_fp16)[name = tensor("aw_chunk_2559_cast_fp16")]; + tensor var_24279_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2401_cast_fp16)[name = tensor("op_24279_cast_fp16")]; + tensor var_24280_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2403_cast_fp16)[name = tensor("op_24280_cast_fp16")]; + tensor var_24281_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2405_cast_fp16)[name = tensor("op_24281_cast_fp16")]; + tensor var_24282_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2407_cast_fp16)[name = tensor("op_24282_cast_fp16")]; + tensor var_24283_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2409_cast_fp16)[name = tensor("op_24283_cast_fp16")]; + tensor var_24284_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2411_cast_fp16)[name = tensor("op_24284_cast_fp16")]; + tensor var_24285_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2413_cast_fp16)[name = tensor("op_24285_cast_fp16")]; + tensor var_24286_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2415_cast_fp16)[name = tensor("op_24286_cast_fp16")]; + tensor var_24287_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2417_cast_fp16)[name = tensor("op_24287_cast_fp16")]; + tensor var_24288_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2419_cast_fp16)[name = tensor("op_24288_cast_fp16")]; + tensor var_24289_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2421_cast_fp16)[name = tensor("op_24289_cast_fp16")]; + tensor var_24290_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2423_cast_fp16)[name = tensor("op_24290_cast_fp16")]; + tensor var_24291_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2425_cast_fp16)[name = tensor("op_24291_cast_fp16")]; + tensor var_24292_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2427_cast_fp16)[name = tensor("op_24292_cast_fp16")]; + tensor var_24293_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2429_cast_fp16)[name = tensor("op_24293_cast_fp16")]; + tensor var_24294_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2431_cast_fp16)[name = tensor("op_24294_cast_fp16")]; + tensor var_24295_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2433_cast_fp16)[name = tensor("op_24295_cast_fp16")]; + tensor var_24296_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2435_cast_fp16)[name = tensor("op_24296_cast_fp16")]; + tensor var_24297_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2437_cast_fp16)[name = tensor("op_24297_cast_fp16")]; + tensor var_24298_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2439_cast_fp16)[name = tensor("op_24298_cast_fp16")]; + tensor var_24299_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2441_cast_fp16)[name = tensor("op_24299_cast_fp16")]; + tensor var_24300_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2443_cast_fp16)[name = tensor("op_24300_cast_fp16")]; + tensor var_24301_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2445_cast_fp16)[name = tensor("op_24301_cast_fp16")]; + tensor var_24302_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2447_cast_fp16)[name = tensor("op_24302_cast_fp16")]; + tensor var_24303_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2449_cast_fp16)[name = tensor("op_24303_cast_fp16")]; + tensor var_24304_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2451_cast_fp16)[name = tensor("op_24304_cast_fp16")]; + tensor var_24305_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2453_cast_fp16)[name = tensor("op_24305_cast_fp16")]; + tensor var_24306_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2455_cast_fp16)[name = tensor("op_24306_cast_fp16")]; + tensor var_24307_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2457_cast_fp16)[name = tensor("op_24307_cast_fp16")]; + tensor var_24308_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2459_cast_fp16)[name = tensor("op_24308_cast_fp16")]; + tensor var_24309_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2461_cast_fp16)[name = tensor("op_24309_cast_fp16")]; + tensor var_24310_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2463_cast_fp16)[name = tensor("op_24310_cast_fp16")]; + tensor var_24311_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2465_cast_fp16)[name = tensor("op_24311_cast_fp16")]; + tensor var_24312_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2467_cast_fp16)[name = tensor("op_24312_cast_fp16")]; + tensor var_24313_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2469_cast_fp16)[name = tensor("op_24313_cast_fp16")]; + tensor var_24314_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2471_cast_fp16)[name = tensor("op_24314_cast_fp16")]; + tensor var_24315_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2473_cast_fp16)[name = tensor("op_24315_cast_fp16")]; + tensor var_24316_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2475_cast_fp16)[name = tensor("op_24316_cast_fp16")]; + tensor var_24317_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2477_cast_fp16)[name = tensor("op_24317_cast_fp16")]; + tensor var_24318_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2479_cast_fp16)[name = tensor("op_24318_cast_fp16")]; + tensor var_24319_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2481_cast_fp16)[name = tensor("op_24319_cast_fp16")]; + tensor var_24320_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2483_cast_fp16)[name = tensor("op_24320_cast_fp16")]; + tensor var_24321_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2485_cast_fp16)[name = tensor("op_24321_cast_fp16")]; + tensor var_24322_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2487_cast_fp16)[name = tensor("op_24322_cast_fp16")]; + tensor var_24323_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2489_cast_fp16)[name = tensor("op_24323_cast_fp16")]; + tensor var_24324_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2491_cast_fp16)[name = tensor("op_24324_cast_fp16")]; + tensor var_24325_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2493_cast_fp16)[name = tensor("op_24325_cast_fp16")]; + tensor var_24326_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2495_cast_fp16)[name = tensor("op_24326_cast_fp16")]; + tensor var_24327_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2497_cast_fp16)[name = tensor("op_24327_cast_fp16")]; + tensor var_24328_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2499_cast_fp16)[name = tensor("op_24328_cast_fp16")]; + tensor var_24329_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2501_cast_fp16)[name = tensor("op_24329_cast_fp16")]; + tensor var_24330_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2503_cast_fp16)[name = tensor("op_24330_cast_fp16")]; + tensor var_24331_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2505_cast_fp16)[name = tensor("op_24331_cast_fp16")]; + tensor var_24332_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2507_cast_fp16)[name = tensor("op_24332_cast_fp16")]; + tensor var_24333_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2509_cast_fp16)[name = tensor("op_24333_cast_fp16")]; + tensor var_24334_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2511_cast_fp16)[name = tensor("op_24334_cast_fp16")]; + tensor var_24335_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2513_cast_fp16)[name = tensor("op_24335_cast_fp16")]; + tensor var_24336_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2515_cast_fp16)[name = tensor("op_24336_cast_fp16")]; + tensor var_24337_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2517_cast_fp16)[name = tensor("op_24337_cast_fp16")]; + tensor var_24338_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2519_cast_fp16)[name = tensor("op_24338_cast_fp16")]; + tensor var_24339_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2521_cast_fp16)[name = tensor("op_24339_cast_fp16")]; + tensor var_24340_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2523_cast_fp16)[name = tensor("op_24340_cast_fp16")]; + tensor var_24341_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2525_cast_fp16)[name = tensor("op_24341_cast_fp16")]; + tensor var_24342_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2527_cast_fp16)[name = tensor("op_24342_cast_fp16")]; + tensor var_24343_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2529_cast_fp16)[name = tensor("op_24343_cast_fp16")]; + tensor var_24344_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2531_cast_fp16)[name = tensor("op_24344_cast_fp16")]; + tensor var_24345_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2533_cast_fp16)[name = tensor("op_24345_cast_fp16")]; + tensor var_24346_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2535_cast_fp16)[name = tensor("op_24346_cast_fp16")]; + tensor var_24347_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2537_cast_fp16)[name = tensor("op_24347_cast_fp16")]; + tensor var_24348_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2539_cast_fp16)[name = tensor("op_24348_cast_fp16")]; + tensor var_24349_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2541_cast_fp16)[name = tensor("op_24349_cast_fp16")]; + tensor var_24350_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2543_cast_fp16)[name = tensor("op_24350_cast_fp16")]; + tensor var_24351_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2545_cast_fp16)[name = tensor("op_24351_cast_fp16")]; + tensor var_24352_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2547_cast_fp16)[name = tensor("op_24352_cast_fp16")]; + tensor var_24353_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2549_cast_fp16)[name = tensor("op_24353_cast_fp16")]; + tensor var_24354_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2551_cast_fp16)[name = tensor("op_24354_cast_fp16")]; + tensor var_24355_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2553_cast_fp16)[name = tensor("op_24355_cast_fp16")]; + tensor var_24356_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2555_cast_fp16)[name = tensor("op_24356_cast_fp16")]; + tensor var_24357_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2557_cast_fp16)[name = tensor("op_24357_cast_fp16")]; + tensor var_24358_cast_fp16 = softmax(axis = var_23104, x = aw_chunk_2559_cast_fp16)[name = tensor("op_24358_cast_fp16")]; + tensor var_24360_equation_0 = const()[name = tensor("op_24360_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24360_cast_fp16 = einsum(equation = var_24360_equation_0, values = (var_23880_cast_fp16, var_24279_cast_fp16))[name = tensor("op_24360_cast_fp16")]; + tensor var_24362_equation_0 = const()[name = tensor("op_24362_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24362_cast_fp16 = einsum(equation = var_24362_equation_0, values = (var_23880_cast_fp16, var_24280_cast_fp16))[name = tensor("op_24362_cast_fp16")]; + tensor var_24364_equation_0 = const()[name = tensor("op_24364_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24364_cast_fp16 = einsum(equation = var_24364_equation_0, values = (var_23880_cast_fp16, var_24281_cast_fp16))[name = tensor("op_24364_cast_fp16")]; + tensor var_24366_equation_0 = const()[name = tensor("op_24366_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24366_cast_fp16 = einsum(equation = var_24366_equation_0, values = (var_23880_cast_fp16, var_24282_cast_fp16))[name = tensor("op_24366_cast_fp16")]; + tensor var_24368_equation_0 = const()[name = tensor("op_24368_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24368_cast_fp16 = einsum(equation = var_24368_equation_0, values = (var_23884_cast_fp16, var_24283_cast_fp16))[name = tensor("op_24368_cast_fp16")]; + tensor var_24370_equation_0 = const()[name = tensor("op_24370_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24370_cast_fp16 = einsum(equation = var_24370_equation_0, values = (var_23884_cast_fp16, var_24284_cast_fp16))[name = tensor("op_24370_cast_fp16")]; + tensor var_24372_equation_0 = const()[name = tensor("op_24372_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24372_cast_fp16 = einsum(equation = var_24372_equation_0, values = (var_23884_cast_fp16, var_24285_cast_fp16))[name = tensor("op_24372_cast_fp16")]; + tensor var_24374_equation_0 = const()[name = tensor("op_24374_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24374_cast_fp16 = einsum(equation = var_24374_equation_0, values = (var_23884_cast_fp16, var_24286_cast_fp16))[name = tensor("op_24374_cast_fp16")]; + tensor var_24376_equation_0 = const()[name = tensor("op_24376_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24376_cast_fp16 = einsum(equation = var_24376_equation_0, values = (var_23888_cast_fp16, var_24287_cast_fp16))[name = tensor("op_24376_cast_fp16")]; + tensor var_24378_equation_0 = const()[name = tensor("op_24378_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24378_cast_fp16 = einsum(equation = var_24378_equation_0, values = (var_23888_cast_fp16, var_24288_cast_fp16))[name = tensor("op_24378_cast_fp16")]; + tensor var_24380_equation_0 = const()[name = tensor("op_24380_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24380_cast_fp16 = einsum(equation = var_24380_equation_0, values = (var_23888_cast_fp16, var_24289_cast_fp16))[name = tensor("op_24380_cast_fp16")]; + tensor var_24382_equation_0 = const()[name = tensor("op_24382_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24382_cast_fp16 = einsum(equation = var_24382_equation_0, values = (var_23888_cast_fp16, var_24290_cast_fp16))[name = tensor("op_24382_cast_fp16")]; + tensor var_24384_equation_0 = const()[name = tensor("op_24384_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24384_cast_fp16 = einsum(equation = var_24384_equation_0, values = (var_23892_cast_fp16, var_24291_cast_fp16))[name = tensor("op_24384_cast_fp16")]; + tensor var_24386_equation_0 = const()[name = tensor("op_24386_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24386_cast_fp16 = einsum(equation = var_24386_equation_0, values = (var_23892_cast_fp16, var_24292_cast_fp16))[name = tensor("op_24386_cast_fp16")]; + tensor var_24388_equation_0 = const()[name = tensor("op_24388_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24388_cast_fp16 = einsum(equation = var_24388_equation_0, values = (var_23892_cast_fp16, var_24293_cast_fp16))[name = tensor("op_24388_cast_fp16")]; + tensor var_24390_equation_0 = const()[name = tensor("op_24390_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24390_cast_fp16 = einsum(equation = var_24390_equation_0, values = (var_23892_cast_fp16, var_24294_cast_fp16))[name = tensor("op_24390_cast_fp16")]; + tensor var_24392_equation_0 = const()[name = tensor("op_24392_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24392_cast_fp16 = einsum(equation = var_24392_equation_0, values = (var_23896_cast_fp16, var_24295_cast_fp16))[name = tensor("op_24392_cast_fp16")]; + tensor var_24394_equation_0 = const()[name = tensor("op_24394_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24394_cast_fp16 = einsum(equation = var_24394_equation_0, values = (var_23896_cast_fp16, var_24296_cast_fp16))[name = tensor("op_24394_cast_fp16")]; + tensor var_24396_equation_0 = const()[name = tensor("op_24396_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24396_cast_fp16 = einsum(equation = var_24396_equation_0, values = (var_23896_cast_fp16, var_24297_cast_fp16))[name = tensor("op_24396_cast_fp16")]; + tensor var_24398_equation_0 = const()[name = tensor("op_24398_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24398_cast_fp16 = einsum(equation = var_24398_equation_0, values = (var_23896_cast_fp16, var_24298_cast_fp16))[name = tensor("op_24398_cast_fp16")]; + tensor var_24400_equation_0 = const()[name = tensor("op_24400_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24400_cast_fp16 = einsum(equation = var_24400_equation_0, values = (var_23900_cast_fp16, var_24299_cast_fp16))[name = tensor("op_24400_cast_fp16")]; + tensor var_24402_equation_0 = const()[name = tensor("op_24402_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24402_cast_fp16 = einsum(equation = var_24402_equation_0, values = (var_23900_cast_fp16, var_24300_cast_fp16))[name = tensor("op_24402_cast_fp16")]; + tensor var_24404_equation_0 = const()[name = tensor("op_24404_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24404_cast_fp16 = einsum(equation = var_24404_equation_0, values = (var_23900_cast_fp16, var_24301_cast_fp16))[name = tensor("op_24404_cast_fp16")]; + tensor var_24406_equation_0 = const()[name = tensor("op_24406_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24406_cast_fp16 = einsum(equation = var_24406_equation_0, values = (var_23900_cast_fp16, var_24302_cast_fp16))[name = tensor("op_24406_cast_fp16")]; + tensor var_24408_equation_0 = const()[name = tensor("op_24408_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24408_cast_fp16 = einsum(equation = var_24408_equation_0, values = (var_23904_cast_fp16, var_24303_cast_fp16))[name = tensor("op_24408_cast_fp16")]; + tensor var_24410_equation_0 = const()[name = tensor("op_24410_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24410_cast_fp16 = einsum(equation = var_24410_equation_0, values = (var_23904_cast_fp16, var_24304_cast_fp16))[name = tensor("op_24410_cast_fp16")]; + tensor var_24412_equation_0 = const()[name = tensor("op_24412_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24412_cast_fp16 = einsum(equation = var_24412_equation_0, values = (var_23904_cast_fp16, var_24305_cast_fp16))[name = tensor("op_24412_cast_fp16")]; + tensor var_24414_equation_0 = const()[name = tensor("op_24414_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24414_cast_fp16 = einsum(equation = var_24414_equation_0, values = (var_23904_cast_fp16, var_24306_cast_fp16))[name = tensor("op_24414_cast_fp16")]; + tensor var_24416_equation_0 = const()[name = tensor("op_24416_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24416_cast_fp16 = einsum(equation = var_24416_equation_0, values = (var_23908_cast_fp16, var_24307_cast_fp16))[name = tensor("op_24416_cast_fp16")]; + tensor var_24418_equation_0 = const()[name = tensor("op_24418_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24418_cast_fp16 = einsum(equation = var_24418_equation_0, values = (var_23908_cast_fp16, var_24308_cast_fp16))[name = tensor("op_24418_cast_fp16")]; + tensor var_24420_equation_0 = const()[name = tensor("op_24420_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24420_cast_fp16 = einsum(equation = var_24420_equation_0, values = (var_23908_cast_fp16, var_24309_cast_fp16))[name = tensor("op_24420_cast_fp16")]; + tensor var_24422_equation_0 = const()[name = tensor("op_24422_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24422_cast_fp16 = einsum(equation = var_24422_equation_0, values = (var_23908_cast_fp16, var_24310_cast_fp16))[name = tensor("op_24422_cast_fp16")]; + tensor var_24424_equation_0 = const()[name = tensor("op_24424_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24424_cast_fp16 = einsum(equation = var_24424_equation_0, values = (var_23912_cast_fp16, var_24311_cast_fp16))[name = tensor("op_24424_cast_fp16")]; + tensor var_24426_equation_0 = const()[name = tensor("op_24426_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24426_cast_fp16 = einsum(equation = var_24426_equation_0, values = (var_23912_cast_fp16, var_24312_cast_fp16))[name = tensor("op_24426_cast_fp16")]; + tensor var_24428_equation_0 = const()[name = tensor("op_24428_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24428_cast_fp16 = einsum(equation = var_24428_equation_0, values = (var_23912_cast_fp16, var_24313_cast_fp16))[name = tensor("op_24428_cast_fp16")]; + tensor var_24430_equation_0 = const()[name = tensor("op_24430_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24430_cast_fp16 = einsum(equation = var_24430_equation_0, values = (var_23912_cast_fp16, var_24314_cast_fp16))[name = tensor("op_24430_cast_fp16")]; + tensor var_24432_equation_0 = const()[name = tensor("op_24432_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24432_cast_fp16 = einsum(equation = var_24432_equation_0, values = (var_23916_cast_fp16, var_24315_cast_fp16))[name = tensor("op_24432_cast_fp16")]; + tensor var_24434_equation_0 = const()[name = tensor("op_24434_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24434_cast_fp16 = einsum(equation = var_24434_equation_0, values = (var_23916_cast_fp16, var_24316_cast_fp16))[name = tensor("op_24434_cast_fp16")]; + tensor var_24436_equation_0 = const()[name = tensor("op_24436_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24436_cast_fp16 = einsum(equation = var_24436_equation_0, values = (var_23916_cast_fp16, var_24317_cast_fp16))[name = tensor("op_24436_cast_fp16")]; + tensor var_24438_equation_0 = const()[name = tensor("op_24438_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24438_cast_fp16 = einsum(equation = var_24438_equation_0, values = (var_23916_cast_fp16, var_24318_cast_fp16))[name = tensor("op_24438_cast_fp16")]; + tensor var_24440_equation_0 = const()[name = tensor("op_24440_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24440_cast_fp16 = einsum(equation = var_24440_equation_0, values = (var_23920_cast_fp16, var_24319_cast_fp16))[name = tensor("op_24440_cast_fp16")]; + tensor var_24442_equation_0 = const()[name = tensor("op_24442_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24442_cast_fp16 = einsum(equation = var_24442_equation_0, values = (var_23920_cast_fp16, var_24320_cast_fp16))[name = tensor("op_24442_cast_fp16")]; + tensor var_24444_equation_0 = const()[name = tensor("op_24444_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24444_cast_fp16 = einsum(equation = var_24444_equation_0, values = (var_23920_cast_fp16, var_24321_cast_fp16))[name = tensor("op_24444_cast_fp16")]; + tensor var_24446_equation_0 = const()[name = tensor("op_24446_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24446_cast_fp16 = einsum(equation = var_24446_equation_0, values = (var_23920_cast_fp16, var_24322_cast_fp16))[name = tensor("op_24446_cast_fp16")]; + tensor var_24448_equation_0 = const()[name = tensor("op_24448_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24448_cast_fp16 = einsum(equation = var_24448_equation_0, values = (var_23924_cast_fp16, var_24323_cast_fp16))[name = tensor("op_24448_cast_fp16")]; + tensor var_24450_equation_0 = const()[name = tensor("op_24450_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24450_cast_fp16 = einsum(equation = var_24450_equation_0, values = (var_23924_cast_fp16, var_24324_cast_fp16))[name = tensor("op_24450_cast_fp16")]; + tensor var_24452_equation_0 = const()[name = tensor("op_24452_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24452_cast_fp16 = einsum(equation = var_24452_equation_0, values = (var_23924_cast_fp16, var_24325_cast_fp16))[name = tensor("op_24452_cast_fp16")]; + tensor var_24454_equation_0 = const()[name = tensor("op_24454_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24454_cast_fp16 = einsum(equation = var_24454_equation_0, values = (var_23924_cast_fp16, var_24326_cast_fp16))[name = tensor("op_24454_cast_fp16")]; + tensor var_24456_equation_0 = const()[name = tensor("op_24456_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24456_cast_fp16 = einsum(equation = var_24456_equation_0, values = (var_23928_cast_fp16, var_24327_cast_fp16))[name = tensor("op_24456_cast_fp16")]; + tensor var_24458_equation_0 = const()[name = tensor("op_24458_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24458_cast_fp16 = einsum(equation = var_24458_equation_0, values = (var_23928_cast_fp16, var_24328_cast_fp16))[name = tensor("op_24458_cast_fp16")]; + tensor var_24460_equation_0 = const()[name = tensor("op_24460_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24460_cast_fp16 = einsum(equation = var_24460_equation_0, values = (var_23928_cast_fp16, var_24329_cast_fp16))[name = tensor("op_24460_cast_fp16")]; + tensor var_24462_equation_0 = const()[name = tensor("op_24462_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24462_cast_fp16 = einsum(equation = var_24462_equation_0, values = (var_23928_cast_fp16, var_24330_cast_fp16))[name = tensor("op_24462_cast_fp16")]; + tensor var_24464_equation_0 = const()[name = tensor("op_24464_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24464_cast_fp16 = einsum(equation = var_24464_equation_0, values = (var_23932_cast_fp16, var_24331_cast_fp16))[name = tensor("op_24464_cast_fp16")]; + tensor var_24466_equation_0 = const()[name = tensor("op_24466_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24466_cast_fp16 = einsum(equation = var_24466_equation_0, values = (var_23932_cast_fp16, var_24332_cast_fp16))[name = tensor("op_24466_cast_fp16")]; + tensor var_24468_equation_0 = const()[name = tensor("op_24468_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24468_cast_fp16 = einsum(equation = var_24468_equation_0, values = (var_23932_cast_fp16, var_24333_cast_fp16))[name = tensor("op_24468_cast_fp16")]; + tensor var_24470_equation_0 = const()[name = tensor("op_24470_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24470_cast_fp16 = einsum(equation = var_24470_equation_0, values = (var_23932_cast_fp16, var_24334_cast_fp16))[name = tensor("op_24470_cast_fp16")]; + tensor var_24472_equation_0 = const()[name = tensor("op_24472_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24472_cast_fp16 = einsum(equation = var_24472_equation_0, values = (var_23936_cast_fp16, var_24335_cast_fp16))[name = tensor("op_24472_cast_fp16")]; + tensor var_24474_equation_0 = const()[name = tensor("op_24474_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24474_cast_fp16 = einsum(equation = var_24474_equation_0, values = (var_23936_cast_fp16, var_24336_cast_fp16))[name = tensor("op_24474_cast_fp16")]; + tensor var_24476_equation_0 = const()[name = tensor("op_24476_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24476_cast_fp16 = einsum(equation = var_24476_equation_0, values = (var_23936_cast_fp16, var_24337_cast_fp16))[name = tensor("op_24476_cast_fp16")]; + tensor var_24478_equation_0 = const()[name = tensor("op_24478_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24478_cast_fp16 = einsum(equation = var_24478_equation_0, values = (var_23936_cast_fp16, var_24338_cast_fp16))[name = tensor("op_24478_cast_fp16")]; + tensor var_24480_equation_0 = const()[name = tensor("op_24480_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24480_cast_fp16 = einsum(equation = var_24480_equation_0, values = (var_23940_cast_fp16, var_24339_cast_fp16))[name = tensor("op_24480_cast_fp16")]; + tensor var_24482_equation_0 = const()[name = tensor("op_24482_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24482_cast_fp16 = einsum(equation = var_24482_equation_0, values = (var_23940_cast_fp16, var_24340_cast_fp16))[name = tensor("op_24482_cast_fp16")]; + tensor var_24484_equation_0 = const()[name = tensor("op_24484_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24484_cast_fp16 = einsum(equation = var_24484_equation_0, values = (var_23940_cast_fp16, var_24341_cast_fp16))[name = tensor("op_24484_cast_fp16")]; + tensor var_24486_equation_0 = const()[name = tensor("op_24486_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24486_cast_fp16 = einsum(equation = var_24486_equation_0, values = (var_23940_cast_fp16, var_24342_cast_fp16))[name = tensor("op_24486_cast_fp16")]; + tensor var_24488_equation_0 = const()[name = tensor("op_24488_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24488_cast_fp16 = einsum(equation = var_24488_equation_0, values = (var_23944_cast_fp16, var_24343_cast_fp16))[name = tensor("op_24488_cast_fp16")]; + tensor var_24490_equation_0 = const()[name = tensor("op_24490_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24490_cast_fp16 = einsum(equation = var_24490_equation_0, values = (var_23944_cast_fp16, var_24344_cast_fp16))[name = tensor("op_24490_cast_fp16")]; + tensor var_24492_equation_0 = const()[name = tensor("op_24492_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24492_cast_fp16 = einsum(equation = var_24492_equation_0, values = (var_23944_cast_fp16, var_24345_cast_fp16))[name = tensor("op_24492_cast_fp16")]; + tensor var_24494_equation_0 = const()[name = tensor("op_24494_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24494_cast_fp16 = einsum(equation = var_24494_equation_0, values = (var_23944_cast_fp16, var_24346_cast_fp16))[name = tensor("op_24494_cast_fp16")]; + tensor var_24496_equation_0 = const()[name = tensor("op_24496_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24496_cast_fp16 = einsum(equation = var_24496_equation_0, values = (var_23948_cast_fp16, var_24347_cast_fp16))[name = tensor("op_24496_cast_fp16")]; + tensor var_24498_equation_0 = const()[name = tensor("op_24498_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24498_cast_fp16 = einsum(equation = var_24498_equation_0, values = (var_23948_cast_fp16, var_24348_cast_fp16))[name = tensor("op_24498_cast_fp16")]; + tensor var_24500_equation_0 = const()[name = tensor("op_24500_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24500_cast_fp16 = einsum(equation = var_24500_equation_0, values = (var_23948_cast_fp16, var_24349_cast_fp16))[name = tensor("op_24500_cast_fp16")]; + tensor var_24502_equation_0 = const()[name = tensor("op_24502_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24502_cast_fp16 = einsum(equation = var_24502_equation_0, values = (var_23948_cast_fp16, var_24350_cast_fp16))[name = tensor("op_24502_cast_fp16")]; + tensor var_24504_equation_0 = const()[name = tensor("op_24504_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24504_cast_fp16 = einsum(equation = var_24504_equation_0, values = (var_23952_cast_fp16, var_24351_cast_fp16))[name = tensor("op_24504_cast_fp16")]; + tensor var_24506_equation_0 = const()[name = tensor("op_24506_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24506_cast_fp16 = einsum(equation = var_24506_equation_0, values = (var_23952_cast_fp16, var_24352_cast_fp16))[name = tensor("op_24506_cast_fp16")]; + tensor var_24508_equation_0 = const()[name = tensor("op_24508_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24508_cast_fp16 = einsum(equation = var_24508_equation_0, values = (var_23952_cast_fp16, var_24353_cast_fp16))[name = tensor("op_24508_cast_fp16")]; + tensor var_24510_equation_0 = const()[name = tensor("op_24510_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24510_cast_fp16 = einsum(equation = var_24510_equation_0, values = (var_23952_cast_fp16, var_24354_cast_fp16))[name = tensor("op_24510_cast_fp16")]; + tensor var_24512_equation_0 = const()[name = tensor("op_24512_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24512_cast_fp16 = einsum(equation = var_24512_equation_0, values = (var_23956_cast_fp16, var_24355_cast_fp16))[name = tensor("op_24512_cast_fp16")]; + tensor var_24514_equation_0 = const()[name = tensor("op_24514_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24514_cast_fp16 = einsum(equation = var_24514_equation_0, values = (var_23956_cast_fp16, var_24356_cast_fp16))[name = tensor("op_24514_cast_fp16")]; + tensor var_24516_equation_0 = const()[name = tensor("op_24516_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24516_cast_fp16 = einsum(equation = var_24516_equation_0, values = (var_23956_cast_fp16, var_24357_cast_fp16))[name = tensor("op_24516_cast_fp16")]; + tensor var_24518_equation_0 = const()[name = tensor("op_24518_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_24518_cast_fp16 = einsum(equation = var_24518_equation_0, values = (var_23956_cast_fp16, var_24358_cast_fp16))[name = tensor("op_24518_cast_fp16")]; + tensor var_24520_interleave_0 = const()[name = tensor("op_24520_interleave_0"), val = tensor(false)]; + tensor var_24520_cast_fp16 = concat(axis = var_23079, interleave = var_24520_interleave_0, values = (var_24360_cast_fp16, var_24362_cast_fp16, var_24364_cast_fp16, var_24366_cast_fp16))[name = tensor("op_24520_cast_fp16")]; + tensor var_24522_interleave_0 = const()[name = tensor("op_24522_interleave_0"), val = tensor(false)]; + tensor var_24522_cast_fp16 = concat(axis = var_23079, interleave = var_24522_interleave_0, values = (var_24368_cast_fp16, var_24370_cast_fp16, var_24372_cast_fp16, var_24374_cast_fp16))[name = tensor("op_24522_cast_fp16")]; + tensor var_24524_interleave_0 = const()[name = tensor("op_24524_interleave_0"), val = tensor(false)]; + tensor var_24524_cast_fp16 = concat(axis = var_23079, interleave = var_24524_interleave_0, values = (var_24376_cast_fp16, var_24378_cast_fp16, var_24380_cast_fp16, var_24382_cast_fp16))[name = tensor("op_24524_cast_fp16")]; + tensor var_24526_interleave_0 = const()[name = tensor("op_24526_interleave_0"), val = tensor(false)]; + tensor var_24526_cast_fp16 = concat(axis = var_23079, interleave = var_24526_interleave_0, values = (var_24384_cast_fp16, var_24386_cast_fp16, var_24388_cast_fp16, var_24390_cast_fp16))[name = tensor("op_24526_cast_fp16")]; + tensor var_24528_interleave_0 = const()[name = tensor("op_24528_interleave_0"), val = tensor(false)]; + tensor var_24528_cast_fp16 = concat(axis = var_23079, interleave = var_24528_interleave_0, values = (var_24392_cast_fp16, var_24394_cast_fp16, var_24396_cast_fp16, var_24398_cast_fp16))[name = tensor("op_24528_cast_fp16")]; + tensor var_24530_interleave_0 = const()[name = tensor("op_24530_interleave_0"), val = tensor(false)]; + tensor var_24530_cast_fp16 = concat(axis = var_23079, interleave = var_24530_interleave_0, values = (var_24400_cast_fp16, var_24402_cast_fp16, var_24404_cast_fp16, var_24406_cast_fp16))[name = tensor("op_24530_cast_fp16")]; + tensor var_24532_interleave_0 = const()[name = tensor("op_24532_interleave_0"), val = tensor(false)]; + tensor var_24532_cast_fp16 = concat(axis = var_23079, interleave = var_24532_interleave_0, values = (var_24408_cast_fp16, var_24410_cast_fp16, var_24412_cast_fp16, var_24414_cast_fp16))[name = tensor("op_24532_cast_fp16")]; + tensor var_24534_interleave_0 = const()[name = tensor("op_24534_interleave_0"), val = tensor(false)]; + tensor var_24534_cast_fp16 = concat(axis = var_23079, interleave = var_24534_interleave_0, values = (var_24416_cast_fp16, var_24418_cast_fp16, var_24420_cast_fp16, var_24422_cast_fp16))[name = tensor("op_24534_cast_fp16")]; + tensor var_24536_interleave_0 = const()[name = tensor("op_24536_interleave_0"), val = tensor(false)]; + tensor var_24536_cast_fp16 = concat(axis = var_23079, interleave = var_24536_interleave_0, values = (var_24424_cast_fp16, var_24426_cast_fp16, var_24428_cast_fp16, var_24430_cast_fp16))[name = tensor("op_24536_cast_fp16")]; + tensor var_24538_interleave_0 = const()[name = tensor("op_24538_interleave_0"), val = tensor(false)]; + tensor var_24538_cast_fp16 = concat(axis = var_23079, interleave = var_24538_interleave_0, values = (var_24432_cast_fp16, var_24434_cast_fp16, var_24436_cast_fp16, var_24438_cast_fp16))[name = tensor("op_24538_cast_fp16")]; + tensor var_24540_interleave_0 = const()[name = tensor("op_24540_interleave_0"), val = tensor(false)]; + tensor var_24540_cast_fp16 = concat(axis = var_23079, interleave = var_24540_interleave_0, values = (var_24440_cast_fp16, var_24442_cast_fp16, var_24444_cast_fp16, var_24446_cast_fp16))[name = tensor("op_24540_cast_fp16")]; + tensor var_24542_interleave_0 = const()[name = tensor("op_24542_interleave_0"), val = tensor(false)]; + tensor var_24542_cast_fp16 = concat(axis = var_23079, interleave = var_24542_interleave_0, values = (var_24448_cast_fp16, var_24450_cast_fp16, var_24452_cast_fp16, var_24454_cast_fp16))[name = tensor("op_24542_cast_fp16")]; + tensor var_24544_interleave_0 = const()[name = tensor("op_24544_interleave_0"), val = tensor(false)]; + tensor var_24544_cast_fp16 = concat(axis = var_23079, interleave = var_24544_interleave_0, values = (var_24456_cast_fp16, var_24458_cast_fp16, var_24460_cast_fp16, var_24462_cast_fp16))[name = tensor("op_24544_cast_fp16")]; + tensor var_24546_interleave_0 = const()[name = tensor("op_24546_interleave_0"), val = tensor(false)]; + tensor var_24546_cast_fp16 = concat(axis = var_23079, interleave = var_24546_interleave_0, values = (var_24464_cast_fp16, var_24466_cast_fp16, var_24468_cast_fp16, var_24470_cast_fp16))[name = tensor("op_24546_cast_fp16")]; + tensor var_24548_interleave_0 = const()[name = tensor("op_24548_interleave_0"), val = tensor(false)]; + tensor var_24548_cast_fp16 = concat(axis = var_23079, interleave = var_24548_interleave_0, values = (var_24472_cast_fp16, var_24474_cast_fp16, var_24476_cast_fp16, var_24478_cast_fp16))[name = tensor("op_24548_cast_fp16")]; + tensor var_24550_interleave_0 = const()[name = tensor("op_24550_interleave_0"), val = tensor(false)]; + tensor var_24550_cast_fp16 = concat(axis = var_23079, interleave = var_24550_interleave_0, values = (var_24480_cast_fp16, var_24482_cast_fp16, var_24484_cast_fp16, var_24486_cast_fp16))[name = tensor("op_24550_cast_fp16")]; + tensor var_24552_interleave_0 = const()[name = tensor("op_24552_interleave_0"), val = tensor(false)]; + tensor var_24552_cast_fp16 = concat(axis = var_23079, interleave = var_24552_interleave_0, values = (var_24488_cast_fp16, var_24490_cast_fp16, var_24492_cast_fp16, var_24494_cast_fp16))[name = tensor("op_24552_cast_fp16")]; + tensor var_24554_interleave_0 = const()[name = tensor("op_24554_interleave_0"), val = tensor(false)]; + tensor var_24554_cast_fp16 = concat(axis = var_23079, interleave = var_24554_interleave_0, values = (var_24496_cast_fp16, var_24498_cast_fp16, var_24500_cast_fp16, var_24502_cast_fp16))[name = tensor("op_24554_cast_fp16")]; + tensor var_24556_interleave_0 = const()[name = tensor("op_24556_interleave_0"), val = tensor(false)]; + tensor var_24556_cast_fp16 = concat(axis = var_23079, interleave = var_24556_interleave_0, values = (var_24504_cast_fp16, var_24506_cast_fp16, var_24508_cast_fp16, var_24510_cast_fp16))[name = tensor("op_24556_cast_fp16")]; + tensor var_24558_interleave_0 = const()[name = tensor("op_24558_interleave_0"), val = tensor(false)]; + tensor var_24558_cast_fp16 = concat(axis = var_23079, interleave = var_24558_interleave_0, values = (var_24512_cast_fp16, var_24514_cast_fp16, var_24516_cast_fp16, var_24518_cast_fp16))[name = tensor("op_24558_cast_fp16")]; + tensor input_121_interleave_0 = const()[name = tensor("input_121_interleave_0"), val = tensor(false)]; + tensor input_121_cast_fp16 = concat(axis = var_23104, interleave = input_121_interleave_0, values = (var_24520_cast_fp16, var_24522_cast_fp16, var_24524_cast_fp16, var_24526_cast_fp16, var_24528_cast_fp16, var_24530_cast_fp16, var_24532_cast_fp16, var_24534_cast_fp16, var_24536_cast_fp16, var_24538_cast_fp16, var_24540_cast_fp16, var_24542_cast_fp16, var_24544_cast_fp16, var_24546_cast_fp16, var_24548_cast_fp16, var_24550_cast_fp16, var_24552_cast_fp16, var_24554_cast_fp16, var_24556_cast_fp16, var_24558_cast_fp16))[name = tensor("input_121_cast_fp16")]; + tensor var_24563 = const()[name = tensor("op_24563"), val = tensor([1, 1])]; + tensor var_24565 = const()[name = tensor("op_24565"), val = tensor([1, 1])]; + tensor obj_63_pad_type_0 = const()[name = tensor("obj_63_pad_type_0"), val = tensor("custom")]; + tensor obj_63_pad_0 = const()[name = tensor("obj_63_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_15_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_15_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(614435840)))]; + tensor layers_15_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_15_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(617712704)))]; + tensor obj_63_cast_fp16 = conv(bias = layers_15_self_attn_o_proj_bias_to_fp16, dilations = var_24565, groups = var_23104, pad = obj_63_pad_0, pad_type = obj_63_pad_type_0, strides = var_24563, weight = layers_15_self_attn_o_proj_weight_to_fp16, x = input_121_cast_fp16)[name = tensor("obj_63_cast_fp16")]; + tensor inputs_63_cast_fp16 = add(x = inputs_61_cast_fp16, y = obj_63_cast_fp16)[name = tensor("inputs_63_cast_fp16")]; + tensor var_24571 = const()[name = tensor("op_24571"), val = tensor([1])]; + tensor channels_mean_63_cast_fp16 = reduce_mean(axes = var_24571, keep_dims = var_23105, x = inputs_63_cast_fp16)[name = tensor("channels_mean_63_cast_fp16")]; + tensor zero_mean_63_cast_fp16 = sub(x = inputs_63_cast_fp16, y = channels_mean_63_cast_fp16)[name = tensor("zero_mean_63_cast_fp16")]; + tensor zero_mean_sq_63_cast_fp16 = mul(x = zero_mean_63_cast_fp16, y = zero_mean_63_cast_fp16)[name = tensor("zero_mean_sq_63_cast_fp16")]; + tensor var_24575 = const()[name = tensor("op_24575"), val = tensor([1])]; + tensor var_24576_cast_fp16 = reduce_mean(axes = var_24575, keep_dims = var_23105, x = zero_mean_sq_63_cast_fp16)[name = tensor("op_24576_cast_fp16")]; + tensor var_24577_to_fp16 = const()[name = tensor("op_24577_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_24578_cast_fp16 = add(x = var_24576_cast_fp16, y = var_24577_to_fp16)[name = tensor("op_24578_cast_fp16")]; + tensor denom_63_epsilon_0_to_fp16 = const()[name = tensor("denom_63_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_63_cast_fp16 = rsqrt(epsilon = denom_63_epsilon_0_to_fp16, x = var_24578_cast_fp16)[name = tensor("denom_63_cast_fp16")]; + tensor out_63_cast_fp16 = mul(x = zero_mean_63_cast_fp16, y = denom_63_cast_fp16)[name = tensor("out_63_cast_fp16")]; + tensor input_123_gamma_0_to_fp16 = const()[name = tensor("input_123_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(617715328)))]; + tensor input_123_beta_0_to_fp16 = const()[name = tensor("input_123_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(617717952)))]; + tensor input_123_epsilon_0_to_fp16 = const()[name = tensor("input_123_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_123_cast_fp16 = batch_norm(beta = input_123_beta_0_to_fp16, epsilon = input_123_epsilon_0_to_fp16, gamma = input_123_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_63_cast_fp16)[name = tensor("input_123_cast_fp16")]; + tensor var_24589 = const()[name = tensor("op_24589"), val = tensor([1, 1])]; + tensor var_24591 = const()[name = tensor("op_24591"), val = tensor([1, 1])]; + tensor input_125_pad_type_0 = const()[name = tensor("input_125_pad_type_0"), val = tensor("custom")]; + tensor input_125_pad_0 = const()[name = tensor("input_125_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_15_fc1_weight_to_fp16 = const()[name = tensor("layers_15_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(617720576)))]; + tensor layers_15_fc1_bias_to_fp16 = const()[name = tensor("layers_15_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(630827840)))]; + tensor input_125_cast_fp16 = conv(bias = layers_15_fc1_bias_to_fp16, dilations = var_24591, groups = var_23104, pad = input_125_pad_0, pad_type = input_125_pad_type_0, strides = var_24589, weight = layers_15_fc1_weight_to_fp16, x = input_123_cast_fp16)[name = tensor("input_125_cast_fp16")]; + tensor input_127_mode_0 = const()[name = tensor("input_127_mode_0"), val = tensor("EXACT")]; + tensor input_127_cast_fp16 = gelu(mode = input_127_mode_0, x = input_125_cast_fp16)[name = tensor("input_127_cast_fp16")]; + tensor var_24597 = const()[name = tensor("op_24597"), val = tensor([1, 1])]; + tensor var_24599 = const()[name = tensor("op_24599"), val = tensor([1, 1])]; + tensor hidden_states_35_pad_type_0 = const()[name = tensor("hidden_states_35_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_35_pad_0 = const()[name = tensor("hidden_states_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_15_fc2_weight_to_fp16 = const()[name = tensor("layers_15_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(630838144)))]; + tensor layers_15_fc2_bias_to_fp16 = const()[name = tensor("layers_15_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(643945408)))]; + tensor hidden_states_35_cast_fp16 = conv(bias = layers_15_fc2_bias_to_fp16, dilations = var_24599, groups = var_23104, pad = hidden_states_35_pad_0, pad_type = hidden_states_35_pad_type_0, strides = var_24597, weight = layers_15_fc2_weight_to_fp16, x = input_127_cast_fp16)[name = tensor("hidden_states_35_cast_fp16")]; + tensor inputs_65_cast_fp16 = add(x = inputs_63_cast_fp16, y = hidden_states_35_cast_fp16)[name = tensor("inputs_65_cast_fp16")]; + tensor var_24606 = const()[name = tensor("op_24606"), val = tensor(3)]; + tensor var_24631 = const()[name = tensor("op_24631"), val = tensor(1)]; + tensor var_24632 = const()[name = tensor("op_24632"), val = tensor(true)]; + tensor var_24642 = const()[name = tensor("op_24642"), val = tensor([1])]; + tensor channels_mean_65_cast_fp16 = reduce_mean(axes = var_24642, keep_dims = var_24632, x = inputs_65_cast_fp16)[name = tensor("channels_mean_65_cast_fp16")]; + tensor zero_mean_65_cast_fp16 = sub(x = inputs_65_cast_fp16, y = channels_mean_65_cast_fp16)[name = tensor("zero_mean_65_cast_fp16")]; + tensor zero_mean_sq_65_cast_fp16 = mul(x = zero_mean_65_cast_fp16, y = zero_mean_65_cast_fp16)[name = tensor("zero_mean_sq_65_cast_fp16")]; + tensor var_24646 = const()[name = tensor("op_24646"), val = tensor([1])]; + tensor var_24647_cast_fp16 = reduce_mean(axes = var_24646, keep_dims = var_24632, x = zero_mean_sq_65_cast_fp16)[name = tensor("op_24647_cast_fp16")]; + tensor var_24648_to_fp16 = const()[name = tensor("op_24648_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_24649_cast_fp16 = add(x = var_24647_cast_fp16, y = var_24648_to_fp16)[name = tensor("op_24649_cast_fp16")]; + tensor denom_65_epsilon_0_to_fp16 = const()[name = tensor("denom_65_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_65_cast_fp16 = rsqrt(epsilon = denom_65_epsilon_0_to_fp16, x = var_24649_cast_fp16)[name = tensor("denom_65_cast_fp16")]; + tensor out_65_cast_fp16 = mul(x = zero_mean_65_cast_fp16, y = denom_65_cast_fp16)[name = tensor("out_65_cast_fp16")]; + tensor obj_65_gamma_0_to_fp16 = const()[name = tensor("obj_65_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(643948032)))]; + tensor obj_65_beta_0_to_fp16 = const()[name = tensor("obj_65_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(643950656)))]; + tensor obj_65_epsilon_0_to_fp16 = const()[name = tensor("obj_65_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_65_cast_fp16 = batch_norm(beta = obj_65_beta_0_to_fp16, epsilon = obj_65_epsilon_0_to_fp16, gamma = obj_65_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_65_cast_fp16)[name = tensor("obj_65_cast_fp16")]; + tensor var_24664 = const()[name = tensor("op_24664"), val = tensor([1, 1])]; + tensor var_24666 = const()[name = tensor("op_24666"), val = tensor([1, 1])]; + tensor query_33_pad_type_0 = const()[name = tensor("query_33_pad_type_0"), val = tensor("custom")]; + tensor query_33_pad_0 = const()[name = tensor("query_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_16_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_16_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(643953280)))]; + tensor layers_16_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_16_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(647230144)))]; + tensor query_33_cast_fp16 = conv(bias = layers_16_self_attn_q_proj_bias_to_fp16, dilations = var_24666, groups = var_24631, pad = query_33_pad_0, pad_type = query_33_pad_type_0, strides = var_24664, weight = layers_16_self_attn_q_proj_weight_to_fp16, x = obj_65_cast_fp16)[name = tensor("query_33_cast_fp16")]; + tensor var_24670 = const()[name = tensor("op_24670"), val = tensor([1, 1])]; + tensor var_24672 = const()[name = tensor("op_24672"), val = tensor([1, 1])]; + tensor key_33_pad_type_0 = const()[name = tensor("key_33_pad_type_0"), val = tensor("custom")]; + tensor key_33_pad_0 = const()[name = tensor("key_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_16_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_16_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(647232768)))]; + tensor key_33_cast_fp16 = conv(dilations = var_24672, groups = var_24631, pad = key_33_pad_0, pad_type = key_33_pad_type_0, strides = var_24670, weight = layers_16_self_attn_k_proj_weight_to_fp16, x = obj_65_cast_fp16)[name = tensor("key_33_cast_fp16")]; + tensor var_24677 = const()[name = tensor("op_24677"), val = tensor([1, 1])]; + tensor var_24679 = const()[name = tensor("op_24679"), val = tensor([1, 1])]; + tensor value_33_pad_type_0 = const()[name = tensor("value_33_pad_type_0"), val = tensor("custom")]; + tensor value_33_pad_0 = const()[name = tensor("value_33_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_16_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_16_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(650509632)))]; + tensor layers_16_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_16_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(653786496)))]; + tensor value_33_cast_fp16 = conv(bias = layers_16_self_attn_v_proj_bias_to_fp16, dilations = var_24679, groups = var_24631, pad = value_33_pad_0, pad_type = value_33_pad_type_0, strides = var_24677, weight = layers_16_self_attn_v_proj_weight_to_fp16, x = obj_65_cast_fp16)[name = tensor("value_33_cast_fp16")]; + tensor var_24686_begin_0 = const()[name = tensor("op_24686_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_24686_end_0 = const()[name = tensor("op_24686_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_24686_end_mask_0 = const()[name = tensor("op_24686_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_24686_cast_fp16 = slice_by_index(begin = var_24686_begin_0, end = var_24686_end_0, end_mask = var_24686_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_24686_cast_fp16")]; + tensor var_24690_begin_0 = const()[name = tensor("op_24690_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_24690_end_0 = const()[name = tensor("op_24690_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_24690_end_mask_0 = const()[name = tensor("op_24690_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_24690_cast_fp16 = slice_by_index(begin = var_24690_begin_0, end = var_24690_end_0, end_mask = var_24690_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_24690_cast_fp16")]; + tensor var_24694_begin_0 = const()[name = tensor("op_24694_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_24694_end_0 = const()[name = tensor("op_24694_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_24694_end_mask_0 = const()[name = tensor("op_24694_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_24694_cast_fp16 = slice_by_index(begin = var_24694_begin_0, end = var_24694_end_0, end_mask = var_24694_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_24694_cast_fp16")]; + tensor var_24698_begin_0 = const()[name = tensor("op_24698_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_24698_end_0 = const()[name = tensor("op_24698_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_24698_end_mask_0 = const()[name = tensor("op_24698_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_24698_cast_fp16 = slice_by_index(begin = var_24698_begin_0, end = var_24698_end_0, end_mask = var_24698_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_24698_cast_fp16")]; + tensor var_24702_begin_0 = const()[name = tensor("op_24702_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_24702_end_0 = const()[name = tensor("op_24702_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_24702_end_mask_0 = const()[name = tensor("op_24702_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_24702_cast_fp16 = slice_by_index(begin = var_24702_begin_0, end = var_24702_end_0, end_mask = var_24702_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_24702_cast_fp16")]; + tensor var_24706_begin_0 = const()[name = tensor("op_24706_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_24706_end_0 = const()[name = tensor("op_24706_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_24706_end_mask_0 = const()[name = tensor("op_24706_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_24706_cast_fp16 = slice_by_index(begin = var_24706_begin_0, end = var_24706_end_0, end_mask = var_24706_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_24706_cast_fp16")]; + tensor var_24710_begin_0 = const()[name = tensor("op_24710_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_24710_end_0 = const()[name = tensor("op_24710_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_24710_end_mask_0 = const()[name = tensor("op_24710_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_24710_cast_fp16 = slice_by_index(begin = var_24710_begin_0, end = var_24710_end_0, end_mask = var_24710_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_24710_cast_fp16")]; + tensor var_24714_begin_0 = const()[name = tensor("op_24714_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_24714_end_0 = const()[name = tensor("op_24714_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_24714_end_mask_0 = const()[name = tensor("op_24714_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_24714_cast_fp16 = slice_by_index(begin = var_24714_begin_0, end = var_24714_end_0, end_mask = var_24714_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_24714_cast_fp16")]; + tensor var_24718_begin_0 = const()[name = tensor("op_24718_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_24718_end_0 = const()[name = tensor("op_24718_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_24718_end_mask_0 = const()[name = tensor("op_24718_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_24718_cast_fp16 = slice_by_index(begin = var_24718_begin_0, end = var_24718_end_0, end_mask = var_24718_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_24718_cast_fp16")]; + tensor var_24722_begin_0 = const()[name = tensor("op_24722_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_24722_end_0 = const()[name = tensor("op_24722_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_24722_end_mask_0 = const()[name = tensor("op_24722_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_24722_cast_fp16 = slice_by_index(begin = var_24722_begin_0, end = var_24722_end_0, end_mask = var_24722_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_24722_cast_fp16")]; + tensor var_24726_begin_0 = const()[name = tensor("op_24726_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_24726_end_0 = const()[name = tensor("op_24726_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_24726_end_mask_0 = const()[name = tensor("op_24726_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_24726_cast_fp16 = slice_by_index(begin = var_24726_begin_0, end = var_24726_end_0, end_mask = var_24726_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_24726_cast_fp16")]; + tensor var_24730_begin_0 = const()[name = tensor("op_24730_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_24730_end_0 = const()[name = tensor("op_24730_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_24730_end_mask_0 = const()[name = tensor("op_24730_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_24730_cast_fp16 = slice_by_index(begin = var_24730_begin_0, end = var_24730_end_0, end_mask = var_24730_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_24730_cast_fp16")]; + tensor var_24734_begin_0 = const()[name = tensor("op_24734_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_24734_end_0 = const()[name = tensor("op_24734_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_24734_end_mask_0 = const()[name = tensor("op_24734_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_24734_cast_fp16 = slice_by_index(begin = var_24734_begin_0, end = var_24734_end_0, end_mask = var_24734_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_24734_cast_fp16")]; + tensor var_24738_begin_0 = const()[name = tensor("op_24738_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_24738_end_0 = const()[name = tensor("op_24738_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_24738_end_mask_0 = const()[name = tensor("op_24738_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_24738_cast_fp16 = slice_by_index(begin = var_24738_begin_0, end = var_24738_end_0, end_mask = var_24738_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_24738_cast_fp16")]; + tensor var_24742_begin_0 = const()[name = tensor("op_24742_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_24742_end_0 = const()[name = tensor("op_24742_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_24742_end_mask_0 = const()[name = tensor("op_24742_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_24742_cast_fp16 = slice_by_index(begin = var_24742_begin_0, end = var_24742_end_0, end_mask = var_24742_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_24742_cast_fp16")]; + tensor var_24746_begin_0 = const()[name = tensor("op_24746_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_24746_end_0 = const()[name = tensor("op_24746_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_24746_end_mask_0 = const()[name = tensor("op_24746_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_24746_cast_fp16 = slice_by_index(begin = var_24746_begin_0, end = var_24746_end_0, end_mask = var_24746_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_24746_cast_fp16")]; + tensor var_24750_begin_0 = const()[name = tensor("op_24750_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_24750_end_0 = const()[name = tensor("op_24750_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_24750_end_mask_0 = const()[name = tensor("op_24750_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_24750_cast_fp16 = slice_by_index(begin = var_24750_begin_0, end = var_24750_end_0, end_mask = var_24750_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_24750_cast_fp16")]; + tensor var_24754_begin_0 = const()[name = tensor("op_24754_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_24754_end_0 = const()[name = tensor("op_24754_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_24754_end_mask_0 = const()[name = tensor("op_24754_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_24754_cast_fp16 = slice_by_index(begin = var_24754_begin_0, end = var_24754_end_0, end_mask = var_24754_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_24754_cast_fp16")]; + tensor var_24758_begin_0 = const()[name = tensor("op_24758_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_24758_end_0 = const()[name = tensor("op_24758_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_24758_end_mask_0 = const()[name = tensor("op_24758_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_24758_cast_fp16 = slice_by_index(begin = var_24758_begin_0, end = var_24758_end_0, end_mask = var_24758_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_24758_cast_fp16")]; + tensor var_24762_begin_0 = const()[name = tensor("op_24762_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_24762_end_0 = const()[name = tensor("op_24762_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_24762_end_mask_0 = const()[name = tensor("op_24762_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_24762_cast_fp16 = slice_by_index(begin = var_24762_begin_0, end = var_24762_end_0, end_mask = var_24762_end_mask_0, x = query_33_cast_fp16)[name = tensor("op_24762_cast_fp16")]; + tensor var_24771_begin_0 = const()[name = tensor("op_24771_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_24771_end_0 = const()[name = tensor("op_24771_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_24771_end_mask_0 = const()[name = tensor("op_24771_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24771_cast_fp16 = slice_by_index(begin = var_24771_begin_0, end = var_24771_end_0, end_mask = var_24771_end_mask_0, x = var_24686_cast_fp16)[name = tensor("op_24771_cast_fp16")]; + tensor var_24778_begin_0 = const()[name = tensor("op_24778_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_24778_end_0 = const()[name = tensor("op_24778_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_24778_end_mask_0 = const()[name = tensor("op_24778_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24778_cast_fp16 = slice_by_index(begin = var_24778_begin_0, end = var_24778_end_0, end_mask = var_24778_end_mask_0, x = var_24686_cast_fp16)[name = tensor("op_24778_cast_fp16")]; + tensor var_24785_begin_0 = const()[name = tensor("op_24785_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_24785_end_0 = const()[name = tensor("op_24785_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_24785_end_mask_0 = const()[name = tensor("op_24785_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24785_cast_fp16 = slice_by_index(begin = var_24785_begin_0, end = var_24785_end_0, end_mask = var_24785_end_mask_0, x = var_24686_cast_fp16)[name = tensor("op_24785_cast_fp16")]; + tensor var_24792_begin_0 = const()[name = tensor("op_24792_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_24792_end_0 = const()[name = tensor("op_24792_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_24792_end_mask_0 = const()[name = tensor("op_24792_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24792_cast_fp16 = slice_by_index(begin = var_24792_begin_0, end = var_24792_end_0, end_mask = var_24792_end_mask_0, x = var_24686_cast_fp16)[name = tensor("op_24792_cast_fp16")]; + tensor var_24799_begin_0 = const()[name = tensor("op_24799_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_24799_end_0 = const()[name = tensor("op_24799_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_24799_end_mask_0 = const()[name = tensor("op_24799_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24799_cast_fp16 = slice_by_index(begin = var_24799_begin_0, end = var_24799_end_0, end_mask = var_24799_end_mask_0, x = var_24690_cast_fp16)[name = tensor("op_24799_cast_fp16")]; + tensor var_24806_begin_0 = const()[name = tensor("op_24806_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_24806_end_0 = const()[name = tensor("op_24806_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_24806_end_mask_0 = const()[name = tensor("op_24806_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24806_cast_fp16 = slice_by_index(begin = var_24806_begin_0, end = var_24806_end_0, end_mask = var_24806_end_mask_0, x = var_24690_cast_fp16)[name = tensor("op_24806_cast_fp16")]; + tensor var_24813_begin_0 = const()[name = tensor("op_24813_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_24813_end_0 = const()[name = tensor("op_24813_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_24813_end_mask_0 = const()[name = tensor("op_24813_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24813_cast_fp16 = slice_by_index(begin = var_24813_begin_0, end = var_24813_end_0, end_mask = var_24813_end_mask_0, x = var_24690_cast_fp16)[name = tensor("op_24813_cast_fp16")]; + tensor var_24820_begin_0 = const()[name = tensor("op_24820_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_24820_end_0 = const()[name = tensor("op_24820_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_24820_end_mask_0 = const()[name = tensor("op_24820_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24820_cast_fp16 = slice_by_index(begin = var_24820_begin_0, end = var_24820_end_0, end_mask = var_24820_end_mask_0, x = var_24690_cast_fp16)[name = tensor("op_24820_cast_fp16")]; + tensor var_24827_begin_0 = const()[name = tensor("op_24827_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_24827_end_0 = const()[name = tensor("op_24827_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_24827_end_mask_0 = const()[name = tensor("op_24827_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24827_cast_fp16 = slice_by_index(begin = var_24827_begin_0, end = var_24827_end_0, end_mask = var_24827_end_mask_0, x = var_24694_cast_fp16)[name = tensor("op_24827_cast_fp16")]; + tensor var_24834_begin_0 = const()[name = tensor("op_24834_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_24834_end_0 = const()[name = tensor("op_24834_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_24834_end_mask_0 = const()[name = tensor("op_24834_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24834_cast_fp16 = slice_by_index(begin = var_24834_begin_0, end = var_24834_end_0, end_mask = var_24834_end_mask_0, x = var_24694_cast_fp16)[name = tensor("op_24834_cast_fp16")]; + tensor var_24841_begin_0 = const()[name = tensor("op_24841_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_24841_end_0 = const()[name = tensor("op_24841_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_24841_end_mask_0 = const()[name = tensor("op_24841_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24841_cast_fp16 = slice_by_index(begin = var_24841_begin_0, end = var_24841_end_0, end_mask = var_24841_end_mask_0, x = var_24694_cast_fp16)[name = tensor("op_24841_cast_fp16")]; + tensor var_24848_begin_0 = const()[name = tensor("op_24848_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_24848_end_0 = const()[name = tensor("op_24848_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_24848_end_mask_0 = const()[name = tensor("op_24848_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24848_cast_fp16 = slice_by_index(begin = var_24848_begin_0, end = var_24848_end_0, end_mask = var_24848_end_mask_0, x = var_24694_cast_fp16)[name = tensor("op_24848_cast_fp16")]; + tensor var_24855_begin_0 = const()[name = tensor("op_24855_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_24855_end_0 = const()[name = tensor("op_24855_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_24855_end_mask_0 = const()[name = tensor("op_24855_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24855_cast_fp16 = slice_by_index(begin = var_24855_begin_0, end = var_24855_end_0, end_mask = var_24855_end_mask_0, x = var_24698_cast_fp16)[name = tensor("op_24855_cast_fp16")]; + tensor var_24862_begin_0 = const()[name = tensor("op_24862_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_24862_end_0 = const()[name = tensor("op_24862_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_24862_end_mask_0 = const()[name = tensor("op_24862_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24862_cast_fp16 = slice_by_index(begin = var_24862_begin_0, end = var_24862_end_0, end_mask = var_24862_end_mask_0, x = var_24698_cast_fp16)[name = tensor("op_24862_cast_fp16")]; + tensor var_24869_begin_0 = const()[name = tensor("op_24869_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_24869_end_0 = const()[name = tensor("op_24869_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_24869_end_mask_0 = const()[name = tensor("op_24869_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24869_cast_fp16 = slice_by_index(begin = var_24869_begin_0, end = var_24869_end_0, end_mask = var_24869_end_mask_0, x = var_24698_cast_fp16)[name = tensor("op_24869_cast_fp16")]; + tensor var_24876_begin_0 = const()[name = tensor("op_24876_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_24876_end_0 = const()[name = tensor("op_24876_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_24876_end_mask_0 = const()[name = tensor("op_24876_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24876_cast_fp16 = slice_by_index(begin = var_24876_begin_0, end = var_24876_end_0, end_mask = var_24876_end_mask_0, x = var_24698_cast_fp16)[name = tensor("op_24876_cast_fp16")]; + tensor var_24883_begin_0 = const()[name = tensor("op_24883_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_24883_end_0 = const()[name = tensor("op_24883_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_24883_end_mask_0 = const()[name = tensor("op_24883_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24883_cast_fp16 = slice_by_index(begin = var_24883_begin_0, end = var_24883_end_0, end_mask = var_24883_end_mask_0, x = var_24702_cast_fp16)[name = tensor("op_24883_cast_fp16")]; + tensor var_24890_begin_0 = const()[name = tensor("op_24890_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_24890_end_0 = const()[name = tensor("op_24890_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_24890_end_mask_0 = const()[name = tensor("op_24890_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24890_cast_fp16 = slice_by_index(begin = var_24890_begin_0, end = var_24890_end_0, end_mask = var_24890_end_mask_0, x = var_24702_cast_fp16)[name = tensor("op_24890_cast_fp16")]; + tensor var_24897_begin_0 = const()[name = tensor("op_24897_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_24897_end_0 = const()[name = tensor("op_24897_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_24897_end_mask_0 = const()[name = tensor("op_24897_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24897_cast_fp16 = slice_by_index(begin = var_24897_begin_0, end = var_24897_end_0, end_mask = var_24897_end_mask_0, x = var_24702_cast_fp16)[name = tensor("op_24897_cast_fp16")]; + tensor var_24904_begin_0 = const()[name = tensor("op_24904_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_24904_end_0 = const()[name = tensor("op_24904_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_24904_end_mask_0 = const()[name = tensor("op_24904_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24904_cast_fp16 = slice_by_index(begin = var_24904_begin_0, end = var_24904_end_0, end_mask = var_24904_end_mask_0, x = var_24702_cast_fp16)[name = tensor("op_24904_cast_fp16")]; + tensor var_24911_begin_0 = const()[name = tensor("op_24911_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_24911_end_0 = const()[name = tensor("op_24911_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_24911_end_mask_0 = const()[name = tensor("op_24911_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24911_cast_fp16 = slice_by_index(begin = var_24911_begin_0, end = var_24911_end_0, end_mask = var_24911_end_mask_0, x = var_24706_cast_fp16)[name = tensor("op_24911_cast_fp16")]; + tensor var_24918_begin_0 = const()[name = tensor("op_24918_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_24918_end_0 = const()[name = tensor("op_24918_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_24918_end_mask_0 = const()[name = tensor("op_24918_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24918_cast_fp16 = slice_by_index(begin = var_24918_begin_0, end = var_24918_end_0, end_mask = var_24918_end_mask_0, x = var_24706_cast_fp16)[name = tensor("op_24918_cast_fp16")]; + tensor var_24925_begin_0 = const()[name = tensor("op_24925_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_24925_end_0 = const()[name = tensor("op_24925_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_24925_end_mask_0 = const()[name = tensor("op_24925_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24925_cast_fp16 = slice_by_index(begin = var_24925_begin_0, end = var_24925_end_0, end_mask = var_24925_end_mask_0, x = var_24706_cast_fp16)[name = tensor("op_24925_cast_fp16")]; + tensor var_24932_begin_0 = const()[name = tensor("op_24932_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_24932_end_0 = const()[name = tensor("op_24932_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_24932_end_mask_0 = const()[name = tensor("op_24932_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24932_cast_fp16 = slice_by_index(begin = var_24932_begin_0, end = var_24932_end_0, end_mask = var_24932_end_mask_0, x = var_24706_cast_fp16)[name = tensor("op_24932_cast_fp16")]; + tensor var_24939_begin_0 = const()[name = tensor("op_24939_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_24939_end_0 = const()[name = tensor("op_24939_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_24939_end_mask_0 = const()[name = tensor("op_24939_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24939_cast_fp16 = slice_by_index(begin = var_24939_begin_0, end = var_24939_end_0, end_mask = var_24939_end_mask_0, x = var_24710_cast_fp16)[name = tensor("op_24939_cast_fp16")]; + tensor var_24946_begin_0 = const()[name = tensor("op_24946_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_24946_end_0 = const()[name = tensor("op_24946_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_24946_end_mask_0 = const()[name = tensor("op_24946_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24946_cast_fp16 = slice_by_index(begin = var_24946_begin_0, end = var_24946_end_0, end_mask = var_24946_end_mask_0, x = var_24710_cast_fp16)[name = tensor("op_24946_cast_fp16")]; + tensor var_24953_begin_0 = const()[name = tensor("op_24953_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_24953_end_0 = const()[name = tensor("op_24953_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_24953_end_mask_0 = const()[name = tensor("op_24953_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24953_cast_fp16 = slice_by_index(begin = var_24953_begin_0, end = var_24953_end_0, end_mask = var_24953_end_mask_0, x = var_24710_cast_fp16)[name = tensor("op_24953_cast_fp16")]; + tensor var_24960_begin_0 = const()[name = tensor("op_24960_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_24960_end_0 = const()[name = tensor("op_24960_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_24960_end_mask_0 = const()[name = tensor("op_24960_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24960_cast_fp16 = slice_by_index(begin = var_24960_begin_0, end = var_24960_end_0, end_mask = var_24960_end_mask_0, x = var_24710_cast_fp16)[name = tensor("op_24960_cast_fp16")]; + tensor var_24967_begin_0 = const()[name = tensor("op_24967_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_24967_end_0 = const()[name = tensor("op_24967_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_24967_end_mask_0 = const()[name = tensor("op_24967_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24967_cast_fp16 = slice_by_index(begin = var_24967_begin_0, end = var_24967_end_0, end_mask = var_24967_end_mask_0, x = var_24714_cast_fp16)[name = tensor("op_24967_cast_fp16")]; + tensor var_24974_begin_0 = const()[name = tensor("op_24974_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_24974_end_0 = const()[name = tensor("op_24974_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_24974_end_mask_0 = const()[name = tensor("op_24974_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24974_cast_fp16 = slice_by_index(begin = var_24974_begin_0, end = var_24974_end_0, end_mask = var_24974_end_mask_0, x = var_24714_cast_fp16)[name = tensor("op_24974_cast_fp16")]; + tensor var_24981_begin_0 = const()[name = tensor("op_24981_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_24981_end_0 = const()[name = tensor("op_24981_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_24981_end_mask_0 = const()[name = tensor("op_24981_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24981_cast_fp16 = slice_by_index(begin = var_24981_begin_0, end = var_24981_end_0, end_mask = var_24981_end_mask_0, x = var_24714_cast_fp16)[name = tensor("op_24981_cast_fp16")]; + tensor var_24988_begin_0 = const()[name = tensor("op_24988_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_24988_end_0 = const()[name = tensor("op_24988_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_24988_end_mask_0 = const()[name = tensor("op_24988_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24988_cast_fp16 = slice_by_index(begin = var_24988_begin_0, end = var_24988_end_0, end_mask = var_24988_end_mask_0, x = var_24714_cast_fp16)[name = tensor("op_24988_cast_fp16")]; + tensor var_24995_begin_0 = const()[name = tensor("op_24995_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_24995_end_0 = const()[name = tensor("op_24995_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_24995_end_mask_0 = const()[name = tensor("op_24995_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_24995_cast_fp16 = slice_by_index(begin = var_24995_begin_0, end = var_24995_end_0, end_mask = var_24995_end_mask_0, x = var_24718_cast_fp16)[name = tensor("op_24995_cast_fp16")]; + tensor var_25002_begin_0 = const()[name = tensor("op_25002_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_25002_end_0 = const()[name = tensor("op_25002_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_25002_end_mask_0 = const()[name = tensor("op_25002_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25002_cast_fp16 = slice_by_index(begin = var_25002_begin_0, end = var_25002_end_0, end_mask = var_25002_end_mask_0, x = var_24718_cast_fp16)[name = tensor("op_25002_cast_fp16")]; + tensor var_25009_begin_0 = const()[name = tensor("op_25009_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_25009_end_0 = const()[name = tensor("op_25009_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_25009_end_mask_0 = const()[name = tensor("op_25009_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25009_cast_fp16 = slice_by_index(begin = var_25009_begin_0, end = var_25009_end_0, end_mask = var_25009_end_mask_0, x = var_24718_cast_fp16)[name = tensor("op_25009_cast_fp16")]; + tensor var_25016_begin_0 = const()[name = tensor("op_25016_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_25016_end_0 = const()[name = tensor("op_25016_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_25016_end_mask_0 = const()[name = tensor("op_25016_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25016_cast_fp16 = slice_by_index(begin = var_25016_begin_0, end = var_25016_end_0, end_mask = var_25016_end_mask_0, x = var_24718_cast_fp16)[name = tensor("op_25016_cast_fp16")]; + tensor var_25023_begin_0 = const()[name = tensor("op_25023_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_25023_end_0 = const()[name = tensor("op_25023_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_25023_end_mask_0 = const()[name = tensor("op_25023_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25023_cast_fp16 = slice_by_index(begin = var_25023_begin_0, end = var_25023_end_0, end_mask = var_25023_end_mask_0, x = var_24722_cast_fp16)[name = tensor("op_25023_cast_fp16")]; + tensor var_25030_begin_0 = const()[name = tensor("op_25030_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_25030_end_0 = const()[name = tensor("op_25030_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_25030_end_mask_0 = const()[name = tensor("op_25030_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25030_cast_fp16 = slice_by_index(begin = var_25030_begin_0, end = var_25030_end_0, end_mask = var_25030_end_mask_0, x = var_24722_cast_fp16)[name = tensor("op_25030_cast_fp16")]; + tensor var_25037_begin_0 = const()[name = tensor("op_25037_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_25037_end_0 = const()[name = tensor("op_25037_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_25037_end_mask_0 = const()[name = tensor("op_25037_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25037_cast_fp16 = slice_by_index(begin = var_25037_begin_0, end = var_25037_end_0, end_mask = var_25037_end_mask_0, x = var_24722_cast_fp16)[name = tensor("op_25037_cast_fp16")]; + tensor var_25044_begin_0 = const()[name = tensor("op_25044_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_25044_end_0 = const()[name = tensor("op_25044_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_25044_end_mask_0 = const()[name = tensor("op_25044_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25044_cast_fp16 = slice_by_index(begin = var_25044_begin_0, end = var_25044_end_0, end_mask = var_25044_end_mask_0, x = var_24722_cast_fp16)[name = tensor("op_25044_cast_fp16")]; + tensor var_25051_begin_0 = const()[name = tensor("op_25051_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_25051_end_0 = const()[name = tensor("op_25051_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_25051_end_mask_0 = const()[name = tensor("op_25051_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25051_cast_fp16 = slice_by_index(begin = var_25051_begin_0, end = var_25051_end_0, end_mask = var_25051_end_mask_0, x = var_24726_cast_fp16)[name = tensor("op_25051_cast_fp16")]; + tensor var_25058_begin_0 = const()[name = tensor("op_25058_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_25058_end_0 = const()[name = tensor("op_25058_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_25058_end_mask_0 = const()[name = tensor("op_25058_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25058_cast_fp16 = slice_by_index(begin = var_25058_begin_0, end = var_25058_end_0, end_mask = var_25058_end_mask_0, x = var_24726_cast_fp16)[name = tensor("op_25058_cast_fp16")]; + tensor var_25065_begin_0 = const()[name = tensor("op_25065_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_25065_end_0 = const()[name = tensor("op_25065_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_25065_end_mask_0 = const()[name = tensor("op_25065_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25065_cast_fp16 = slice_by_index(begin = var_25065_begin_0, end = var_25065_end_0, end_mask = var_25065_end_mask_0, x = var_24726_cast_fp16)[name = tensor("op_25065_cast_fp16")]; + tensor var_25072_begin_0 = const()[name = tensor("op_25072_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_25072_end_0 = const()[name = tensor("op_25072_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_25072_end_mask_0 = const()[name = tensor("op_25072_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25072_cast_fp16 = slice_by_index(begin = var_25072_begin_0, end = var_25072_end_0, end_mask = var_25072_end_mask_0, x = var_24726_cast_fp16)[name = tensor("op_25072_cast_fp16")]; + tensor var_25079_begin_0 = const()[name = tensor("op_25079_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_25079_end_0 = const()[name = tensor("op_25079_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_25079_end_mask_0 = const()[name = tensor("op_25079_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25079_cast_fp16 = slice_by_index(begin = var_25079_begin_0, end = var_25079_end_0, end_mask = var_25079_end_mask_0, x = var_24730_cast_fp16)[name = tensor("op_25079_cast_fp16")]; + tensor var_25086_begin_0 = const()[name = tensor("op_25086_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_25086_end_0 = const()[name = tensor("op_25086_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_25086_end_mask_0 = const()[name = tensor("op_25086_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25086_cast_fp16 = slice_by_index(begin = var_25086_begin_0, end = var_25086_end_0, end_mask = var_25086_end_mask_0, x = var_24730_cast_fp16)[name = tensor("op_25086_cast_fp16")]; + tensor var_25093_begin_0 = const()[name = tensor("op_25093_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_25093_end_0 = const()[name = tensor("op_25093_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_25093_end_mask_0 = const()[name = tensor("op_25093_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25093_cast_fp16 = slice_by_index(begin = var_25093_begin_0, end = var_25093_end_0, end_mask = var_25093_end_mask_0, x = var_24730_cast_fp16)[name = tensor("op_25093_cast_fp16")]; + tensor var_25100_begin_0 = const()[name = tensor("op_25100_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_25100_end_0 = const()[name = tensor("op_25100_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_25100_end_mask_0 = const()[name = tensor("op_25100_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25100_cast_fp16 = slice_by_index(begin = var_25100_begin_0, end = var_25100_end_0, end_mask = var_25100_end_mask_0, x = var_24730_cast_fp16)[name = tensor("op_25100_cast_fp16")]; + tensor var_25107_begin_0 = const()[name = tensor("op_25107_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_25107_end_0 = const()[name = tensor("op_25107_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_25107_end_mask_0 = const()[name = tensor("op_25107_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25107_cast_fp16 = slice_by_index(begin = var_25107_begin_0, end = var_25107_end_0, end_mask = var_25107_end_mask_0, x = var_24734_cast_fp16)[name = tensor("op_25107_cast_fp16")]; + tensor var_25114_begin_0 = const()[name = tensor("op_25114_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_25114_end_0 = const()[name = tensor("op_25114_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_25114_end_mask_0 = const()[name = tensor("op_25114_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25114_cast_fp16 = slice_by_index(begin = var_25114_begin_0, end = var_25114_end_0, end_mask = var_25114_end_mask_0, x = var_24734_cast_fp16)[name = tensor("op_25114_cast_fp16")]; + tensor var_25121_begin_0 = const()[name = tensor("op_25121_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_25121_end_0 = const()[name = tensor("op_25121_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_25121_end_mask_0 = const()[name = tensor("op_25121_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25121_cast_fp16 = slice_by_index(begin = var_25121_begin_0, end = var_25121_end_0, end_mask = var_25121_end_mask_0, x = var_24734_cast_fp16)[name = tensor("op_25121_cast_fp16")]; + tensor var_25128_begin_0 = const()[name = tensor("op_25128_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_25128_end_0 = const()[name = tensor("op_25128_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_25128_end_mask_0 = const()[name = tensor("op_25128_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25128_cast_fp16 = slice_by_index(begin = var_25128_begin_0, end = var_25128_end_0, end_mask = var_25128_end_mask_0, x = var_24734_cast_fp16)[name = tensor("op_25128_cast_fp16")]; + tensor var_25135_begin_0 = const()[name = tensor("op_25135_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_25135_end_0 = const()[name = tensor("op_25135_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_25135_end_mask_0 = const()[name = tensor("op_25135_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25135_cast_fp16 = slice_by_index(begin = var_25135_begin_0, end = var_25135_end_0, end_mask = var_25135_end_mask_0, x = var_24738_cast_fp16)[name = tensor("op_25135_cast_fp16")]; + tensor var_25142_begin_0 = const()[name = tensor("op_25142_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_25142_end_0 = const()[name = tensor("op_25142_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_25142_end_mask_0 = const()[name = tensor("op_25142_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25142_cast_fp16 = slice_by_index(begin = var_25142_begin_0, end = var_25142_end_0, end_mask = var_25142_end_mask_0, x = var_24738_cast_fp16)[name = tensor("op_25142_cast_fp16")]; + tensor var_25149_begin_0 = const()[name = tensor("op_25149_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_25149_end_0 = const()[name = tensor("op_25149_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_25149_end_mask_0 = const()[name = tensor("op_25149_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25149_cast_fp16 = slice_by_index(begin = var_25149_begin_0, end = var_25149_end_0, end_mask = var_25149_end_mask_0, x = var_24738_cast_fp16)[name = tensor("op_25149_cast_fp16")]; + tensor var_25156_begin_0 = const()[name = tensor("op_25156_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_25156_end_0 = const()[name = tensor("op_25156_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_25156_end_mask_0 = const()[name = tensor("op_25156_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25156_cast_fp16 = slice_by_index(begin = var_25156_begin_0, end = var_25156_end_0, end_mask = var_25156_end_mask_0, x = var_24738_cast_fp16)[name = tensor("op_25156_cast_fp16")]; + tensor var_25163_begin_0 = const()[name = tensor("op_25163_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_25163_end_0 = const()[name = tensor("op_25163_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_25163_end_mask_0 = const()[name = tensor("op_25163_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25163_cast_fp16 = slice_by_index(begin = var_25163_begin_0, end = var_25163_end_0, end_mask = var_25163_end_mask_0, x = var_24742_cast_fp16)[name = tensor("op_25163_cast_fp16")]; + tensor var_25170_begin_0 = const()[name = tensor("op_25170_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_25170_end_0 = const()[name = tensor("op_25170_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_25170_end_mask_0 = const()[name = tensor("op_25170_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25170_cast_fp16 = slice_by_index(begin = var_25170_begin_0, end = var_25170_end_0, end_mask = var_25170_end_mask_0, x = var_24742_cast_fp16)[name = tensor("op_25170_cast_fp16")]; + tensor var_25177_begin_0 = const()[name = tensor("op_25177_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_25177_end_0 = const()[name = tensor("op_25177_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_25177_end_mask_0 = const()[name = tensor("op_25177_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25177_cast_fp16 = slice_by_index(begin = var_25177_begin_0, end = var_25177_end_0, end_mask = var_25177_end_mask_0, x = var_24742_cast_fp16)[name = tensor("op_25177_cast_fp16")]; + tensor var_25184_begin_0 = const()[name = tensor("op_25184_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_25184_end_0 = const()[name = tensor("op_25184_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_25184_end_mask_0 = const()[name = tensor("op_25184_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25184_cast_fp16 = slice_by_index(begin = var_25184_begin_0, end = var_25184_end_0, end_mask = var_25184_end_mask_0, x = var_24742_cast_fp16)[name = tensor("op_25184_cast_fp16")]; + tensor var_25191_begin_0 = const()[name = tensor("op_25191_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_25191_end_0 = const()[name = tensor("op_25191_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_25191_end_mask_0 = const()[name = tensor("op_25191_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25191_cast_fp16 = slice_by_index(begin = var_25191_begin_0, end = var_25191_end_0, end_mask = var_25191_end_mask_0, x = var_24746_cast_fp16)[name = tensor("op_25191_cast_fp16")]; + tensor var_25198_begin_0 = const()[name = tensor("op_25198_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_25198_end_0 = const()[name = tensor("op_25198_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_25198_end_mask_0 = const()[name = tensor("op_25198_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25198_cast_fp16 = slice_by_index(begin = var_25198_begin_0, end = var_25198_end_0, end_mask = var_25198_end_mask_0, x = var_24746_cast_fp16)[name = tensor("op_25198_cast_fp16")]; + tensor var_25205_begin_0 = const()[name = tensor("op_25205_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_25205_end_0 = const()[name = tensor("op_25205_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_25205_end_mask_0 = const()[name = tensor("op_25205_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25205_cast_fp16 = slice_by_index(begin = var_25205_begin_0, end = var_25205_end_0, end_mask = var_25205_end_mask_0, x = var_24746_cast_fp16)[name = tensor("op_25205_cast_fp16")]; + tensor var_25212_begin_0 = const()[name = tensor("op_25212_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_25212_end_0 = const()[name = tensor("op_25212_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_25212_end_mask_0 = const()[name = tensor("op_25212_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25212_cast_fp16 = slice_by_index(begin = var_25212_begin_0, end = var_25212_end_0, end_mask = var_25212_end_mask_0, x = var_24746_cast_fp16)[name = tensor("op_25212_cast_fp16")]; + tensor var_25219_begin_0 = const()[name = tensor("op_25219_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_25219_end_0 = const()[name = tensor("op_25219_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_25219_end_mask_0 = const()[name = tensor("op_25219_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25219_cast_fp16 = slice_by_index(begin = var_25219_begin_0, end = var_25219_end_0, end_mask = var_25219_end_mask_0, x = var_24750_cast_fp16)[name = tensor("op_25219_cast_fp16")]; + tensor var_25226_begin_0 = const()[name = tensor("op_25226_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_25226_end_0 = const()[name = tensor("op_25226_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_25226_end_mask_0 = const()[name = tensor("op_25226_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25226_cast_fp16 = slice_by_index(begin = var_25226_begin_0, end = var_25226_end_0, end_mask = var_25226_end_mask_0, x = var_24750_cast_fp16)[name = tensor("op_25226_cast_fp16")]; + tensor var_25233_begin_0 = const()[name = tensor("op_25233_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_25233_end_0 = const()[name = tensor("op_25233_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_25233_end_mask_0 = const()[name = tensor("op_25233_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25233_cast_fp16 = slice_by_index(begin = var_25233_begin_0, end = var_25233_end_0, end_mask = var_25233_end_mask_0, x = var_24750_cast_fp16)[name = tensor("op_25233_cast_fp16")]; + tensor var_25240_begin_0 = const()[name = tensor("op_25240_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_25240_end_0 = const()[name = tensor("op_25240_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_25240_end_mask_0 = const()[name = tensor("op_25240_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25240_cast_fp16 = slice_by_index(begin = var_25240_begin_0, end = var_25240_end_0, end_mask = var_25240_end_mask_0, x = var_24750_cast_fp16)[name = tensor("op_25240_cast_fp16")]; + tensor var_25247_begin_0 = const()[name = tensor("op_25247_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_25247_end_0 = const()[name = tensor("op_25247_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_25247_end_mask_0 = const()[name = tensor("op_25247_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25247_cast_fp16 = slice_by_index(begin = var_25247_begin_0, end = var_25247_end_0, end_mask = var_25247_end_mask_0, x = var_24754_cast_fp16)[name = tensor("op_25247_cast_fp16")]; + tensor var_25254_begin_0 = const()[name = tensor("op_25254_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_25254_end_0 = const()[name = tensor("op_25254_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_25254_end_mask_0 = const()[name = tensor("op_25254_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25254_cast_fp16 = slice_by_index(begin = var_25254_begin_0, end = var_25254_end_0, end_mask = var_25254_end_mask_0, x = var_24754_cast_fp16)[name = tensor("op_25254_cast_fp16")]; + tensor var_25261_begin_0 = const()[name = tensor("op_25261_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_25261_end_0 = const()[name = tensor("op_25261_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_25261_end_mask_0 = const()[name = tensor("op_25261_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25261_cast_fp16 = slice_by_index(begin = var_25261_begin_0, end = var_25261_end_0, end_mask = var_25261_end_mask_0, x = var_24754_cast_fp16)[name = tensor("op_25261_cast_fp16")]; + tensor var_25268_begin_0 = const()[name = tensor("op_25268_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_25268_end_0 = const()[name = tensor("op_25268_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_25268_end_mask_0 = const()[name = tensor("op_25268_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25268_cast_fp16 = slice_by_index(begin = var_25268_begin_0, end = var_25268_end_0, end_mask = var_25268_end_mask_0, x = var_24754_cast_fp16)[name = tensor("op_25268_cast_fp16")]; + tensor var_25275_begin_0 = const()[name = tensor("op_25275_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_25275_end_0 = const()[name = tensor("op_25275_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_25275_end_mask_0 = const()[name = tensor("op_25275_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25275_cast_fp16 = slice_by_index(begin = var_25275_begin_0, end = var_25275_end_0, end_mask = var_25275_end_mask_0, x = var_24758_cast_fp16)[name = tensor("op_25275_cast_fp16")]; + tensor var_25282_begin_0 = const()[name = tensor("op_25282_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_25282_end_0 = const()[name = tensor("op_25282_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_25282_end_mask_0 = const()[name = tensor("op_25282_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25282_cast_fp16 = slice_by_index(begin = var_25282_begin_0, end = var_25282_end_0, end_mask = var_25282_end_mask_0, x = var_24758_cast_fp16)[name = tensor("op_25282_cast_fp16")]; + tensor var_25289_begin_0 = const()[name = tensor("op_25289_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_25289_end_0 = const()[name = tensor("op_25289_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_25289_end_mask_0 = const()[name = tensor("op_25289_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25289_cast_fp16 = slice_by_index(begin = var_25289_begin_0, end = var_25289_end_0, end_mask = var_25289_end_mask_0, x = var_24758_cast_fp16)[name = tensor("op_25289_cast_fp16")]; + tensor var_25296_begin_0 = const()[name = tensor("op_25296_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_25296_end_0 = const()[name = tensor("op_25296_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_25296_end_mask_0 = const()[name = tensor("op_25296_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25296_cast_fp16 = slice_by_index(begin = var_25296_begin_0, end = var_25296_end_0, end_mask = var_25296_end_mask_0, x = var_24758_cast_fp16)[name = tensor("op_25296_cast_fp16")]; + tensor var_25303_begin_0 = const()[name = tensor("op_25303_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_25303_end_0 = const()[name = tensor("op_25303_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_25303_end_mask_0 = const()[name = tensor("op_25303_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25303_cast_fp16 = slice_by_index(begin = var_25303_begin_0, end = var_25303_end_0, end_mask = var_25303_end_mask_0, x = var_24762_cast_fp16)[name = tensor("op_25303_cast_fp16")]; + tensor var_25310_begin_0 = const()[name = tensor("op_25310_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_25310_end_0 = const()[name = tensor("op_25310_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_25310_end_mask_0 = const()[name = tensor("op_25310_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25310_cast_fp16 = slice_by_index(begin = var_25310_begin_0, end = var_25310_end_0, end_mask = var_25310_end_mask_0, x = var_24762_cast_fp16)[name = tensor("op_25310_cast_fp16")]; + tensor var_25317_begin_0 = const()[name = tensor("op_25317_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_25317_end_0 = const()[name = tensor("op_25317_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_25317_end_mask_0 = const()[name = tensor("op_25317_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25317_cast_fp16 = slice_by_index(begin = var_25317_begin_0, end = var_25317_end_0, end_mask = var_25317_end_mask_0, x = var_24762_cast_fp16)[name = tensor("op_25317_cast_fp16")]; + tensor var_25324_begin_0 = const()[name = tensor("op_25324_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_25324_end_0 = const()[name = tensor("op_25324_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_25324_end_mask_0 = const()[name = tensor("op_25324_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25324_cast_fp16 = slice_by_index(begin = var_25324_begin_0, end = var_25324_end_0, end_mask = var_25324_end_mask_0, x = var_24762_cast_fp16)[name = tensor("op_25324_cast_fp16")]; + tensor k_33_perm_0 = const()[name = tensor("k_33_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_25329_begin_0 = const()[name = tensor("op_25329_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_25329_end_0 = const()[name = tensor("op_25329_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_25329_end_mask_0 = const()[name = tensor("op_25329_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_15 = transpose(perm = k_33_perm_0, x = key_33_cast_fp16)[name = tensor("transpose_15")]; + tensor var_25329_cast_fp16 = slice_by_index(begin = var_25329_begin_0, end = var_25329_end_0, end_mask = var_25329_end_mask_0, x = transpose_15)[name = tensor("op_25329_cast_fp16")]; + tensor var_25333_begin_0 = const()[name = tensor("op_25333_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_25333_end_0 = const()[name = tensor("op_25333_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_25333_end_mask_0 = const()[name = tensor("op_25333_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25333_cast_fp16 = slice_by_index(begin = var_25333_begin_0, end = var_25333_end_0, end_mask = var_25333_end_mask_0, x = transpose_15)[name = tensor("op_25333_cast_fp16")]; + tensor var_25337_begin_0 = const()[name = tensor("op_25337_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_25337_end_0 = const()[name = tensor("op_25337_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_25337_end_mask_0 = const()[name = tensor("op_25337_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25337_cast_fp16 = slice_by_index(begin = var_25337_begin_0, end = var_25337_end_0, end_mask = var_25337_end_mask_0, x = transpose_15)[name = tensor("op_25337_cast_fp16")]; + tensor var_25341_begin_0 = const()[name = tensor("op_25341_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_25341_end_0 = const()[name = tensor("op_25341_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_25341_end_mask_0 = const()[name = tensor("op_25341_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25341_cast_fp16 = slice_by_index(begin = var_25341_begin_0, end = var_25341_end_0, end_mask = var_25341_end_mask_0, x = transpose_15)[name = tensor("op_25341_cast_fp16")]; + tensor var_25345_begin_0 = const()[name = tensor("op_25345_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_25345_end_0 = const()[name = tensor("op_25345_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_25345_end_mask_0 = const()[name = tensor("op_25345_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25345_cast_fp16 = slice_by_index(begin = var_25345_begin_0, end = var_25345_end_0, end_mask = var_25345_end_mask_0, x = transpose_15)[name = tensor("op_25345_cast_fp16")]; + tensor var_25349_begin_0 = const()[name = tensor("op_25349_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_25349_end_0 = const()[name = tensor("op_25349_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_25349_end_mask_0 = const()[name = tensor("op_25349_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25349_cast_fp16 = slice_by_index(begin = var_25349_begin_0, end = var_25349_end_0, end_mask = var_25349_end_mask_0, x = transpose_15)[name = tensor("op_25349_cast_fp16")]; + tensor var_25353_begin_0 = const()[name = tensor("op_25353_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_25353_end_0 = const()[name = tensor("op_25353_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_25353_end_mask_0 = const()[name = tensor("op_25353_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25353_cast_fp16 = slice_by_index(begin = var_25353_begin_0, end = var_25353_end_0, end_mask = var_25353_end_mask_0, x = transpose_15)[name = tensor("op_25353_cast_fp16")]; + tensor var_25357_begin_0 = const()[name = tensor("op_25357_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_25357_end_0 = const()[name = tensor("op_25357_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_25357_end_mask_0 = const()[name = tensor("op_25357_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25357_cast_fp16 = slice_by_index(begin = var_25357_begin_0, end = var_25357_end_0, end_mask = var_25357_end_mask_0, x = transpose_15)[name = tensor("op_25357_cast_fp16")]; + tensor var_25361_begin_0 = const()[name = tensor("op_25361_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_25361_end_0 = const()[name = tensor("op_25361_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_25361_end_mask_0 = const()[name = tensor("op_25361_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25361_cast_fp16 = slice_by_index(begin = var_25361_begin_0, end = var_25361_end_0, end_mask = var_25361_end_mask_0, x = transpose_15)[name = tensor("op_25361_cast_fp16")]; + tensor var_25365_begin_0 = const()[name = tensor("op_25365_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_25365_end_0 = const()[name = tensor("op_25365_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_25365_end_mask_0 = const()[name = tensor("op_25365_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25365_cast_fp16 = slice_by_index(begin = var_25365_begin_0, end = var_25365_end_0, end_mask = var_25365_end_mask_0, x = transpose_15)[name = tensor("op_25365_cast_fp16")]; + tensor var_25369_begin_0 = const()[name = tensor("op_25369_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_25369_end_0 = const()[name = tensor("op_25369_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_25369_end_mask_0 = const()[name = tensor("op_25369_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25369_cast_fp16 = slice_by_index(begin = var_25369_begin_0, end = var_25369_end_0, end_mask = var_25369_end_mask_0, x = transpose_15)[name = tensor("op_25369_cast_fp16")]; + tensor var_25373_begin_0 = const()[name = tensor("op_25373_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_25373_end_0 = const()[name = tensor("op_25373_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_25373_end_mask_0 = const()[name = tensor("op_25373_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25373_cast_fp16 = slice_by_index(begin = var_25373_begin_0, end = var_25373_end_0, end_mask = var_25373_end_mask_0, x = transpose_15)[name = tensor("op_25373_cast_fp16")]; + tensor var_25377_begin_0 = const()[name = tensor("op_25377_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_25377_end_0 = const()[name = tensor("op_25377_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_25377_end_mask_0 = const()[name = tensor("op_25377_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25377_cast_fp16 = slice_by_index(begin = var_25377_begin_0, end = var_25377_end_0, end_mask = var_25377_end_mask_0, x = transpose_15)[name = tensor("op_25377_cast_fp16")]; + tensor var_25381_begin_0 = const()[name = tensor("op_25381_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_25381_end_0 = const()[name = tensor("op_25381_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_25381_end_mask_0 = const()[name = tensor("op_25381_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25381_cast_fp16 = slice_by_index(begin = var_25381_begin_0, end = var_25381_end_0, end_mask = var_25381_end_mask_0, x = transpose_15)[name = tensor("op_25381_cast_fp16")]; + tensor var_25385_begin_0 = const()[name = tensor("op_25385_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_25385_end_0 = const()[name = tensor("op_25385_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_25385_end_mask_0 = const()[name = tensor("op_25385_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25385_cast_fp16 = slice_by_index(begin = var_25385_begin_0, end = var_25385_end_0, end_mask = var_25385_end_mask_0, x = transpose_15)[name = tensor("op_25385_cast_fp16")]; + tensor var_25389_begin_0 = const()[name = tensor("op_25389_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_25389_end_0 = const()[name = tensor("op_25389_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_25389_end_mask_0 = const()[name = tensor("op_25389_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25389_cast_fp16 = slice_by_index(begin = var_25389_begin_0, end = var_25389_end_0, end_mask = var_25389_end_mask_0, x = transpose_15)[name = tensor("op_25389_cast_fp16")]; + tensor var_25393_begin_0 = const()[name = tensor("op_25393_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_25393_end_0 = const()[name = tensor("op_25393_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_25393_end_mask_0 = const()[name = tensor("op_25393_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25393_cast_fp16 = slice_by_index(begin = var_25393_begin_0, end = var_25393_end_0, end_mask = var_25393_end_mask_0, x = transpose_15)[name = tensor("op_25393_cast_fp16")]; + tensor var_25397_begin_0 = const()[name = tensor("op_25397_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_25397_end_0 = const()[name = tensor("op_25397_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_25397_end_mask_0 = const()[name = tensor("op_25397_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25397_cast_fp16 = slice_by_index(begin = var_25397_begin_0, end = var_25397_end_0, end_mask = var_25397_end_mask_0, x = transpose_15)[name = tensor("op_25397_cast_fp16")]; + tensor var_25401_begin_0 = const()[name = tensor("op_25401_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_25401_end_0 = const()[name = tensor("op_25401_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_25401_end_mask_0 = const()[name = tensor("op_25401_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25401_cast_fp16 = slice_by_index(begin = var_25401_begin_0, end = var_25401_end_0, end_mask = var_25401_end_mask_0, x = transpose_15)[name = tensor("op_25401_cast_fp16")]; + tensor var_25405_begin_0 = const()[name = tensor("op_25405_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_25405_end_0 = const()[name = tensor("op_25405_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_25405_end_mask_0 = const()[name = tensor("op_25405_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_25405_cast_fp16 = slice_by_index(begin = var_25405_begin_0, end = var_25405_end_0, end_mask = var_25405_end_mask_0, x = transpose_15)[name = tensor("op_25405_cast_fp16")]; + tensor var_25407_begin_0 = const()[name = tensor("op_25407_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_25407_end_0 = const()[name = tensor("op_25407_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_25407_end_mask_0 = const()[name = tensor("op_25407_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_25407_cast_fp16 = slice_by_index(begin = var_25407_begin_0, end = var_25407_end_0, end_mask = var_25407_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_25407_cast_fp16")]; + tensor var_25411_begin_0 = const()[name = tensor("op_25411_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_25411_end_0 = const()[name = tensor("op_25411_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_25411_end_mask_0 = const()[name = tensor("op_25411_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_25411_cast_fp16 = slice_by_index(begin = var_25411_begin_0, end = var_25411_end_0, end_mask = var_25411_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_25411_cast_fp16")]; + tensor var_25415_begin_0 = const()[name = tensor("op_25415_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_25415_end_0 = const()[name = tensor("op_25415_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_25415_end_mask_0 = const()[name = tensor("op_25415_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_25415_cast_fp16 = slice_by_index(begin = var_25415_begin_0, end = var_25415_end_0, end_mask = var_25415_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_25415_cast_fp16")]; + tensor var_25419_begin_0 = const()[name = tensor("op_25419_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_25419_end_0 = const()[name = tensor("op_25419_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_25419_end_mask_0 = const()[name = tensor("op_25419_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_25419_cast_fp16 = slice_by_index(begin = var_25419_begin_0, end = var_25419_end_0, end_mask = var_25419_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_25419_cast_fp16")]; + tensor var_25423_begin_0 = const()[name = tensor("op_25423_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_25423_end_0 = const()[name = tensor("op_25423_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_25423_end_mask_0 = const()[name = tensor("op_25423_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_25423_cast_fp16 = slice_by_index(begin = var_25423_begin_0, end = var_25423_end_0, end_mask = var_25423_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_25423_cast_fp16")]; + tensor var_25427_begin_0 = const()[name = tensor("op_25427_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_25427_end_0 = const()[name = tensor("op_25427_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_25427_end_mask_0 = const()[name = tensor("op_25427_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_25427_cast_fp16 = slice_by_index(begin = var_25427_begin_0, end = var_25427_end_0, end_mask = var_25427_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_25427_cast_fp16")]; + tensor var_25431_begin_0 = const()[name = tensor("op_25431_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_25431_end_0 = const()[name = tensor("op_25431_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_25431_end_mask_0 = const()[name = tensor("op_25431_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_25431_cast_fp16 = slice_by_index(begin = var_25431_begin_0, end = var_25431_end_0, end_mask = var_25431_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_25431_cast_fp16")]; + tensor var_25435_begin_0 = const()[name = tensor("op_25435_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_25435_end_0 = const()[name = tensor("op_25435_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_25435_end_mask_0 = const()[name = tensor("op_25435_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_25435_cast_fp16 = slice_by_index(begin = var_25435_begin_0, end = var_25435_end_0, end_mask = var_25435_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_25435_cast_fp16")]; + tensor var_25439_begin_0 = const()[name = tensor("op_25439_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_25439_end_0 = const()[name = tensor("op_25439_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_25439_end_mask_0 = const()[name = tensor("op_25439_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_25439_cast_fp16 = slice_by_index(begin = var_25439_begin_0, end = var_25439_end_0, end_mask = var_25439_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_25439_cast_fp16")]; + tensor var_25443_begin_0 = const()[name = tensor("op_25443_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_25443_end_0 = const()[name = tensor("op_25443_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_25443_end_mask_0 = const()[name = tensor("op_25443_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_25443_cast_fp16 = slice_by_index(begin = var_25443_begin_0, end = var_25443_end_0, end_mask = var_25443_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_25443_cast_fp16")]; + tensor var_25447_begin_0 = const()[name = tensor("op_25447_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_25447_end_0 = const()[name = tensor("op_25447_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_25447_end_mask_0 = const()[name = tensor("op_25447_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_25447_cast_fp16 = slice_by_index(begin = var_25447_begin_0, end = var_25447_end_0, end_mask = var_25447_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_25447_cast_fp16")]; + tensor var_25451_begin_0 = const()[name = tensor("op_25451_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_25451_end_0 = const()[name = tensor("op_25451_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_25451_end_mask_0 = const()[name = tensor("op_25451_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_25451_cast_fp16 = slice_by_index(begin = var_25451_begin_0, end = var_25451_end_0, end_mask = var_25451_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_25451_cast_fp16")]; + tensor var_25455_begin_0 = const()[name = tensor("op_25455_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_25455_end_0 = const()[name = tensor("op_25455_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_25455_end_mask_0 = const()[name = tensor("op_25455_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_25455_cast_fp16 = slice_by_index(begin = var_25455_begin_0, end = var_25455_end_0, end_mask = var_25455_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_25455_cast_fp16")]; + tensor var_25459_begin_0 = const()[name = tensor("op_25459_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_25459_end_0 = const()[name = tensor("op_25459_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_25459_end_mask_0 = const()[name = tensor("op_25459_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_25459_cast_fp16 = slice_by_index(begin = var_25459_begin_0, end = var_25459_end_0, end_mask = var_25459_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_25459_cast_fp16")]; + tensor var_25463_begin_0 = const()[name = tensor("op_25463_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_25463_end_0 = const()[name = tensor("op_25463_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_25463_end_mask_0 = const()[name = tensor("op_25463_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_25463_cast_fp16 = slice_by_index(begin = var_25463_begin_0, end = var_25463_end_0, end_mask = var_25463_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_25463_cast_fp16")]; + tensor var_25467_begin_0 = const()[name = tensor("op_25467_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_25467_end_0 = const()[name = tensor("op_25467_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_25467_end_mask_0 = const()[name = tensor("op_25467_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_25467_cast_fp16 = slice_by_index(begin = var_25467_begin_0, end = var_25467_end_0, end_mask = var_25467_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_25467_cast_fp16")]; + tensor var_25471_begin_0 = const()[name = tensor("op_25471_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_25471_end_0 = const()[name = tensor("op_25471_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_25471_end_mask_0 = const()[name = tensor("op_25471_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_25471_cast_fp16 = slice_by_index(begin = var_25471_begin_0, end = var_25471_end_0, end_mask = var_25471_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_25471_cast_fp16")]; + tensor var_25475_begin_0 = const()[name = tensor("op_25475_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_25475_end_0 = const()[name = tensor("op_25475_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_25475_end_mask_0 = const()[name = tensor("op_25475_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_25475_cast_fp16 = slice_by_index(begin = var_25475_begin_0, end = var_25475_end_0, end_mask = var_25475_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_25475_cast_fp16")]; + tensor var_25479_begin_0 = const()[name = tensor("op_25479_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_25479_end_0 = const()[name = tensor("op_25479_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_25479_end_mask_0 = const()[name = tensor("op_25479_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_25479_cast_fp16 = slice_by_index(begin = var_25479_begin_0, end = var_25479_end_0, end_mask = var_25479_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_25479_cast_fp16")]; + tensor var_25483_begin_0 = const()[name = tensor("op_25483_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_25483_end_0 = const()[name = tensor("op_25483_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_25483_end_mask_0 = const()[name = tensor("op_25483_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_25483_cast_fp16 = slice_by_index(begin = var_25483_begin_0, end = var_25483_end_0, end_mask = var_25483_end_mask_0, x = value_33_cast_fp16)[name = tensor("op_25483_cast_fp16")]; + tensor var_25487_equation_0 = const()[name = tensor("op_25487_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25487_cast_fp16 = einsum(equation = var_25487_equation_0, values = (var_25329_cast_fp16, var_24771_cast_fp16))[name = tensor("op_25487_cast_fp16")]; + tensor var_25488_to_fp16 = const()[name = tensor("op_25488_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2561_cast_fp16 = mul(x = var_25487_cast_fp16, y = var_25488_to_fp16)[name = tensor("aw_chunk_2561_cast_fp16")]; + tensor var_25491_equation_0 = const()[name = tensor("op_25491_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25491_cast_fp16 = einsum(equation = var_25491_equation_0, values = (var_25329_cast_fp16, var_24778_cast_fp16))[name = tensor("op_25491_cast_fp16")]; + tensor var_25492_to_fp16 = const()[name = tensor("op_25492_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2563_cast_fp16 = mul(x = var_25491_cast_fp16, y = var_25492_to_fp16)[name = tensor("aw_chunk_2563_cast_fp16")]; + tensor var_25495_equation_0 = const()[name = tensor("op_25495_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25495_cast_fp16 = einsum(equation = var_25495_equation_0, values = (var_25329_cast_fp16, var_24785_cast_fp16))[name = tensor("op_25495_cast_fp16")]; + tensor var_25496_to_fp16 = const()[name = tensor("op_25496_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2565_cast_fp16 = mul(x = var_25495_cast_fp16, y = var_25496_to_fp16)[name = tensor("aw_chunk_2565_cast_fp16")]; + tensor var_25499_equation_0 = const()[name = tensor("op_25499_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25499_cast_fp16 = einsum(equation = var_25499_equation_0, values = (var_25329_cast_fp16, var_24792_cast_fp16))[name = tensor("op_25499_cast_fp16")]; + tensor var_25500_to_fp16 = const()[name = tensor("op_25500_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2567_cast_fp16 = mul(x = var_25499_cast_fp16, y = var_25500_to_fp16)[name = tensor("aw_chunk_2567_cast_fp16")]; + tensor var_25503_equation_0 = const()[name = tensor("op_25503_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25503_cast_fp16 = einsum(equation = var_25503_equation_0, values = (var_25333_cast_fp16, var_24799_cast_fp16))[name = tensor("op_25503_cast_fp16")]; + tensor var_25504_to_fp16 = const()[name = tensor("op_25504_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2569_cast_fp16 = mul(x = var_25503_cast_fp16, y = var_25504_to_fp16)[name = tensor("aw_chunk_2569_cast_fp16")]; + tensor var_25507_equation_0 = const()[name = tensor("op_25507_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25507_cast_fp16 = einsum(equation = var_25507_equation_0, values = (var_25333_cast_fp16, var_24806_cast_fp16))[name = tensor("op_25507_cast_fp16")]; + tensor var_25508_to_fp16 = const()[name = tensor("op_25508_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2571_cast_fp16 = mul(x = var_25507_cast_fp16, y = var_25508_to_fp16)[name = tensor("aw_chunk_2571_cast_fp16")]; + tensor var_25511_equation_0 = const()[name = tensor("op_25511_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25511_cast_fp16 = einsum(equation = var_25511_equation_0, values = (var_25333_cast_fp16, var_24813_cast_fp16))[name = tensor("op_25511_cast_fp16")]; + tensor var_25512_to_fp16 = const()[name = tensor("op_25512_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2573_cast_fp16 = mul(x = var_25511_cast_fp16, y = var_25512_to_fp16)[name = tensor("aw_chunk_2573_cast_fp16")]; + tensor var_25515_equation_0 = const()[name = tensor("op_25515_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25515_cast_fp16 = einsum(equation = var_25515_equation_0, values = (var_25333_cast_fp16, var_24820_cast_fp16))[name = tensor("op_25515_cast_fp16")]; + tensor var_25516_to_fp16 = const()[name = tensor("op_25516_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2575_cast_fp16 = mul(x = var_25515_cast_fp16, y = var_25516_to_fp16)[name = tensor("aw_chunk_2575_cast_fp16")]; + tensor var_25519_equation_0 = const()[name = tensor("op_25519_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25519_cast_fp16 = einsum(equation = var_25519_equation_0, values = (var_25337_cast_fp16, var_24827_cast_fp16))[name = tensor("op_25519_cast_fp16")]; + tensor var_25520_to_fp16 = const()[name = tensor("op_25520_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2577_cast_fp16 = mul(x = var_25519_cast_fp16, y = var_25520_to_fp16)[name = tensor("aw_chunk_2577_cast_fp16")]; + tensor var_25523_equation_0 = const()[name = tensor("op_25523_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25523_cast_fp16 = einsum(equation = var_25523_equation_0, values = (var_25337_cast_fp16, var_24834_cast_fp16))[name = tensor("op_25523_cast_fp16")]; + tensor var_25524_to_fp16 = const()[name = tensor("op_25524_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2579_cast_fp16 = mul(x = var_25523_cast_fp16, y = var_25524_to_fp16)[name = tensor("aw_chunk_2579_cast_fp16")]; + tensor var_25527_equation_0 = const()[name = tensor("op_25527_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25527_cast_fp16 = einsum(equation = var_25527_equation_0, values = (var_25337_cast_fp16, var_24841_cast_fp16))[name = tensor("op_25527_cast_fp16")]; + tensor var_25528_to_fp16 = const()[name = tensor("op_25528_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2581_cast_fp16 = mul(x = var_25527_cast_fp16, y = var_25528_to_fp16)[name = tensor("aw_chunk_2581_cast_fp16")]; + tensor var_25531_equation_0 = const()[name = tensor("op_25531_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25531_cast_fp16 = einsum(equation = var_25531_equation_0, values = (var_25337_cast_fp16, var_24848_cast_fp16))[name = tensor("op_25531_cast_fp16")]; + tensor var_25532_to_fp16 = const()[name = tensor("op_25532_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2583_cast_fp16 = mul(x = var_25531_cast_fp16, y = var_25532_to_fp16)[name = tensor("aw_chunk_2583_cast_fp16")]; + tensor var_25535_equation_0 = const()[name = tensor("op_25535_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25535_cast_fp16 = einsum(equation = var_25535_equation_0, values = (var_25341_cast_fp16, var_24855_cast_fp16))[name = tensor("op_25535_cast_fp16")]; + tensor var_25536_to_fp16 = const()[name = tensor("op_25536_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2585_cast_fp16 = mul(x = var_25535_cast_fp16, y = var_25536_to_fp16)[name = tensor("aw_chunk_2585_cast_fp16")]; + tensor var_25539_equation_0 = const()[name = tensor("op_25539_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25539_cast_fp16 = einsum(equation = var_25539_equation_0, values = (var_25341_cast_fp16, var_24862_cast_fp16))[name = tensor("op_25539_cast_fp16")]; + tensor var_25540_to_fp16 = const()[name = tensor("op_25540_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2587_cast_fp16 = mul(x = var_25539_cast_fp16, y = var_25540_to_fp16)[name = tensor("aw_chunk_2587_cast_fp16")]; + tensor var_25543_equation_0 = const()[name = tensor("op_25543_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25543_cast_fp16 = einsum(equation = var_25543_equation_0, values = (var_25341_cast_fp16, var_24869_cast_fp16))[name = tensor("op_25543_cast_fp16")]; + tensor var_25544_to_fp16 = const()[name = tensor("op_25544_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2589_cast_fp16 = mul(x = var_25543_cast_fp16, y = var_25544_to_fp16)[name = tensor("aw_chunk_2589_cast_fp16")]; + tensor var_25547_equation_0 = const()[name = tensor("op_25547_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25547_cast_fp16 = einsum(equation = var_25547_equation_0, values = (var_25341_cast_fp16, var_24876_cast_fp16))[name = tensor("op_25547_cast_fp16")]; + tensor var_25548_to_fp16 = const()[name = tensor("op_25548_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2591_cast_fp16 = mul(x = var_25547_cast_fp16, y = var_25548_to_fp16)[name = tensor("aw_chunk_2591_cast_fp16")]; + tensor var_25551_equation_0 = const()[name = tensor("op_25551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25551_cast_fp16 = einsum(equation = var_25551_equation_0, values = (var_25345_cast_fp16, var_24883_cast_fp16))[name = tensor("op_25551_cast_fp16")]; + tensor var_25552_to_fp16 = const()[name = tensor("op_25552_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2593_cast_fp16 = mul(x = var_25551_cast_fp16, y = var_25552_to_fp16)[name = tensor("aw_chunk_2593_cast_fp16")]; + tensor var_25555_equation_0 = const()[name = tensor("op_25555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25555_cast_fp16 = einsum(equation = var_25555_equation_0, values = (var_25345_cast_fp16, var_24890_cast_fp16))[name = tensor("op_25555_cast_fp16")]; + tensor var_25556_to_fp16 = const()[name = tensor("op_25556_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2595_cast_fp16 = mul(x = var_25555_cast_fp16, y = var_25556_to_fp16)[name = tensor("aw_chunk_2595_cast_fp16")]; + tensor var_25559_equation_0 = const()[name = tensor("op_25559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25559_cast_fp16 = einsum(equation = var_25559_equation_0, values = (var_25345_cast_fp16, var_24897_cast_fp16))[name = tensor("op_25559_cast_fp16")]; + tensor var_25560_to_fp16 = const()[name = tensor("op_25560_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2597_cast_fp16 = mul(x = var_25559_cast_fp16, y = var_25560_to_fp16)[name = tensor("aw_chunk_2597_cast_fp16")]; + tensor var_25563_equation_0 = const()[name = tensor("op_25563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25563_cast_fp16 = einsum(equation = var_25563_equation_0, values = (var_25345_cast_fp16, var_24904_cast_fp16))[name = tensor("op_25563_cast_fp16")]; + tensor var_25564_to_fp16 = const()[name = tensor("op_25564_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2599_cast_fp16 = mul(x = var_25563_cast_fp16, y = var_25564_to_fp16)[name = tensor("aw_chunk_2599_cast_fp16")]; + tensor var_25567_equation_0 = const()[name = tensor("op_25567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25567_cast_fp16 = einsum(equation = var_25567_equation_0, values = (var_25349_cast_fp16, var_24911_cast_fp16))[name = tensor("op_25567_cast_fp16")]; + tensor var_25568_to_fp16 = const()[name = tensor("op_25568_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2601_cast_fp16 = mul(x = var_25567_cast_fp16, y = var_25568_to_fp16)[name = tensor("aw_chunk_2601_cast_fp16")]; + tensor var_25571_equation_0 = const()[name = tensor("op_25571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25571_cast_fp16 = einsum(equation = var_25571_equation_0, values = (var_25349_cast_fp16, var_24918_cast_fp16))[name = tensor("op_25571_cast_fp16")]; + tensor var_25572_to_fp16 = const()[name = tensor("op_25572_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2603_cast_fp16 = mul(x = var_25571_cast_fp16, y = var_25572_to_fp16)[name = tensor("aw_chunk_2603_cast_fp16")]; + tensor var_25575_equation_0 = const()[name = tensor("op_25575_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25575_cast_fp16 = einsum(equation = var_25575_equation_0, values = (var_25349_cast_fp16, var_24925_cast_fp16))[name = tensor("op_25575_cast_fp16")]; + tensor var_25576_to_fp16 = const()[name = tensor("op_25576_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2605_cast_fp16 = mul(x = var_25575_cast_fp16, y = var_25576_to_fp16)[name = tensor("aw_chunk_2605_cast_fp16")]; + tensor var_25579_equation_0 = const()[name = tensor("op_25579_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25579_cast_fp16 = einsum(equation = var_25579_equation_0, values = (var_25349_cast_fp16, var_24932_cast_fp16))[name = tensor("op_25579_cast_fp16")]; + tensor var_25580_to_fp16 = const()[name = tensor("op_25580_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2607_cast_fp16 = mul(x = var_25579_cast_fp16, y = var_25580_to_fp16)[name = tensor("aw_chunk_2607_cast_fp16")]; + tensor var_25583_equation_0 = const()[name = tensor("op_25583_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25583_cast_fp16 = einsum(equation = var_25583_equation_0, values = (var_25353_cast_fp16, var_24939_cast_fp16))[name = tensor("op_25583_cast_fp16")]; + tensor var_25584_to_fp16 = const()[name = tensor("op_25584_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2609_cast_fp16 = mul(x = var_25583_cast_fp16, y = var_25584_to_fp16)[name = tensor("aw_chunk_2609_cast_fp16")]; + tensor var_25587_equation_0 = const()[name = tensor("op_25587_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25587_cast_fp16 = einsum(equation = var_25587_equation_0, values = (var_25353_cast_fp16, var_24946_cast_fp16))[name = tensor("op_25587_cast_fp16")]; + tensor var_25588_to_fp16 = const()[name = tensor("op_25588_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2611_cast_fp16 = mul(x = var_25587_cast_fp16, y = var_25588_to_fp16)[name = tensor("aw_chunk_2611_cast_fp16")]; + tensor var_25591_equation_0 = const()[name = tensor("op_25591_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25591_cast_fp16 = einsum(equation = var_25591_equation_0, values = (var_25353_cast_fp16, var_24953_cast_fp16))[name = tensor("op_25591_cast_fp16")]; + tensor var_25592_to_fp16 = const()[name = tensor("op_25592_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2613_cast_fp16 = mul(x = var_25591_cast_fp16, y = var_25592_to_fp16)[name = tensor("aw_chunk_2613_cast_fp16")]; + tensor var_25595_equation_0 = const()[name = tensor("op_25595_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25595_cast_fp16 = einsum(equation = var_25595_equation_0, values = (var_25353_cast_fp16, var_24960_cast_fp16))[name = tensor("op_25595_cast_fp16")]; + tensor var_25596_to_fp16 = const()[name = tensor("op_25596_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2615_cast_fp16 = mul(x = var_25595_cast_fp16, y = var_25596_to_fp16)[name = tensor("aw_chunk_2615_cast_fp16")]; + tensor var_25599_equation_0 = const()[name = tensor("op_25599_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25599_cast_fp16 = einsum(equation = var_25599_equation_0, values = (var_25357_cast_fp16, var_24967_cast_fp16))[name = tensor("op_25599_cast_fp16")]; + tensor var_25600_to_fp16 = const()[name = tensor("op_25600_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2617_cast_fp16 = mul(x = var_25599_cast_fp16, y = var_25600_to_fp16)[name = tensor("aw_chunk_2617_cast_fp16")]; + tensor var_25603_equation_0 = const()[name = tensor("op_25603_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25603_cast_fp16 = einsum(equation = var_25603_equation_0, values = (var_25357_cast_fp16, var_24974_cast_fp16))[name = tensor("op_25603_cast_fp16")]; + tensor var_25604_to_fp16 = const()[name = tensor("op_25604_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2619_cast_fp16 = mul(x = var_25603_cast_fp16, y = var_25604_to_fp16)[name = tensor("aw_chunk_2619_cast_fp16")]; + tensor var_25607_equation_0 = const()[name = tensor("op_25607_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25607_cast_fp16 = einsum(equation = var_25607_equation_0, values = (var_25357_cast_fp16, var_24981_cast_fp16))[name = tensor("op_25607_cast_fp16")]; + tensor var_25608_to_fp16 = const()[name = tensor("op_25608_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2621_cast_fp16 = mul(x = var_25607_cast_fp16, y = var_25608_to_fp16)[name = tensor("aw_chunk_2621_cast_fp16")]; + tensor var_25611_equation_0 = const()[name = tensor("op_25611_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25611_cast_fp16 = einsum(equation = var_25611_equation_0, values = (var_25357_cast_fp16, var_24988_cast_fp16))[name = tensor("op_25611_cast_fp16")]; + tensor var_25612_to_fp16 = const()[name = tensor("op_25612_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2623_cast_fp16 = mul(x = var_25611_cast_fp16, y = var_25612_to_fp16)[name = tensor("aw_chunk_2623_cast_fp16")]; + tensor var_25615_equation_0 = const()[name = tensor("op_25615_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25615_cast_fp16 = einsum(equation = var_25615_equation_0, values = (var_25361_cast_fp16, var_24995_cast_fp16))[name = tensor("op_25615_cast_fp16")]; + tensor var_25616_to_fp16 = const()[name = tensor("op_25616_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2625_cast_fp16 = mul(x = var_25615_cast_fp16, y = var_25616_to_fp16)[name = tensor("aw_chunk_2625_cast_fp16")]; + tensor var_25619_equation_0 = const()[name = tensor("op_25619_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25619_cast_fp16 = einsum(equation = var_25619_equation_0, values = (var_25361_cast_fp16, var_25002_cast_fp16))[name = tensor("op_25619_cast_fp16")]; + tensor var_25620_to_fp16 = const()[name = tensor("op_25620_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2627_cast_fp16 = mul(x = var_25619_cast_fp16, y = var_25620_to_fp16)[name = tensor("aw_chunk_2627_cast_fp16")]; + tensor var_25623_equation_0 = const()[name = tensor("op_25623_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25623_cast_fp16 = einsum(equation = var_25623_equation_0, values = (var_25361_cast_fp16, var_25009_cast_fp16))[name = tensor("op_25623_cast_fp16")]; + tensor var_25624_to_fp16 = const()[name = tensor("op_25624_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2629_cast_fp16 = mul(x = var_25623_cast_fp16, y = var_25624_to_fp16)[name = tensor("aw_chunk_2629_cast_fp16")]; + tensor var_25627_equation_0 = const()[name = tensor("op_25627_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25627_cast_fp16 = einsum(equation = var_25627_equation_0, values = (var_25361_cast_fp16, var_25016_cast_fp16))[name = tensor("op_25627_cast_fp16")]; + tensor var_25628_to_fp16 = const()[name = tensor("op_25628_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2631_cast_fp16 = mul(x = var_25627_cast_fp16, y = var_25628_to_fp16)[name = tensor("aw_chunk_2631_cast_fp16")]; + tensor var_25631_equation_0 = const()[name = tensor("op_25631_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25631_cast_fp16 = einsum(equation = var_25631_equation_0, values = (var_25365_cast_fp16, var_25023_cast_fp16))[name = tensor("op_25631_cast_fp16")]; + tensor var_25632_to_fp16 = const()[name = tensor("op_25632_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2633_cast_fp16 = mul(x = var_25631_cast_fp16, y = var_25632_to_fp16)[name = tensor("aw_chunk_2633_cast_fp16")]; + tensor var_25635_equation_0 = const()[name = tensor("op_25635_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25635_cast_fp16 = einsum(equation = var_25635_equation_0, values = (var_25365_cast_fp16, var_25030_cast_fp16))[name = tensor("op_25635_cast_fp16")]; + tensor var_25636_to_fp16 = const()[name = tensor("op_25636_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2635_cast_fp16 = mul(x = var_25635_cast_fp16, y = var_25636_to_fp16)[name = tensor("aw_chunk_2635_cast_fp16")]; + tensor var_25639_equation_0 = const()[name = tensor("op_25639_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25639_cast_fp16 = einsum(equation = var_25639_equation_0, values = (var_25365_cast_fp16, var_25037_cast_fp16))[name = tensor("op_25639_cast_fp16")]; + tensor var_25640_to_fp16 = const()[name = tensor("op_25640_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2637_cast_fp16 = mul(x = var_25639_cast_fp16, y = var_25640_to_fp16)[name = tensor("aw_chunk_2637_cast_fp16")]; + tensor var_25643_equation_0 = const()[name = tensor("op_25643_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25643_cast_fp16 = einsum(equation = var_25643_equation_0, values = (var_25365_cast_fp16, var_25044_cast_fp16))[name = tensor("op_25643_cast_fp16")]; + tensor var_25644_to_fp16 = const()[name = tensor("op_25644_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2639_cast_fp16 = mul(x = var_25643_cast_fp16, y = var_25644_to_fp16)[name = tensor("aw_chunk_2639_cast_fp16")]; + tensor var_25647_equation_0 = const()[name = tensor("op_25647_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25647_cast_fp16 = einsum(equation = var_25647_equation_0, values = (var_25369_cast_fp16, var_25051_cast_fp16))[name = tensor("op_25647_cast_fp16")]; + tensor var_25648_to_fp16 = const()[name = tensor("op_25648_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2641_cast_fp16 = mul(x = var_25647_cast_fp16, y = var_25648_to_fp16)[name = tensor("aw_chunk_2641_cast_fp16")]; + tensor var_25651_equation_0 = const()[name = tensor("op_25651_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25651_cast_fp16 = einsum(equation = var_25651_equation_0, values = (var_25369_cast_fp16, var_25058_cast_fp16))[name = tensor("op_25651_cast_fp16")]; + tensor var_25652_to_fp16 = const()[name = tensor("op_25652_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2643_cast_fp16 = mul(x = var_25651_cast_fp16, y = var_25652_to_fp16)[name = tensor("aw_chunk_2643_cast_fp16")]; + tensor var_25655_equation_0 = const()[name = tensor("op_25655_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25655_cast_fp16 = einsum(equation = var_25655_equation_0, values = (var_25369_cast_fp16, var_25065_cast_fp16))[name = tensor("op_25655_cast_fp16")]; + tensor var_25656_to_fp16 = const()[name = tensor("op_25656_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2645_cast_fp16 = mul(x = var_25655_cast_fp16, y = var_25656_to_fp16)[name = tensor("aw_chunk_2645_cast_fp16")]; + tensor var_25659_equation_0 = const()[name = tensor("op_25659_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25659_cast_fp16 = einsum(equation = var_25659_equation_0, values = (var_25369_cast_fp16, var_25072_cast_fp16))[name = tensor("op_25659_cast_fp16")]; + tensor var_25660_to_fp16 = const()[name = tensor("op_25660_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2647_cast_fp16 = mul(x = var_25659_cast_fp16, y = var_25660_to_fp16)[name = tensor("aw_chunk_2647_cast_fp16")]; + tensor var_25663_equation_0 = const()[name = tensor("op_25663_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25663_cast_fp16 = einsum(equation = var_25663_equation_0, values = (var_25373_cast_fp16, var_25079_cast_fp16))[name = tensor("op_25663_cast_fp16")]; + tensor var_25664_to_fp16 = const()[name = tensor("op_25664_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2649_cast_fp16 = mul(x = var_25663_cast_fp16, y = var_25664_to_fp16)[name = tensor("aw_chunk_2649_cast_fp16")]; + tensor var_25667_equation_0 = const()[name = tensor("op_25667_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25667_cast_fp16 = einsum(equation = var_25667_equation_0, values = (var_25373_cast_fp16, var_25086_cast_fp16))[name = tensor("op_25667_cast_fp16")]; + tensor var_25668_to_fp16 = const()[name = tensor("op_25668_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2651_cast_fp16 = mul(x = var_25667_cast_fp16, y = var_25668_to_fp16)[name = tensor("aw_chunk_2651_cast_fp16")]; + tensor var_25671_equation_0 = const()[name = tensor("op_25671_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25671_cast_fp16 = einsum(equation = var_25671_equation_0, values = (var_25373_cast_fp16, var_25093_cast_fp16))[name = tensor("op_25671_cast_fp16")]; + tensor var_25672_to_fp16 = const()[name = tensor("op_25672_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2653_cast_fp16 = mul(x = var_25671_cast_fp16, y = var_25672_to_fp16)[name = tensor("aw_chunk_2653_cast_fp16")]; + tensor var_25675_equation_0 = const()[name = tensor("op_25675_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25675_cast_fp16 = einsum(equation = var_25675_equation_0, values = (var_25373_cast_fp16, var_25100_cast_fp16))[name = tensor("op_25675_cast_fp16")]; + tensor var_25676_to_fp16 = const()[name = tensor("op_25676_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2655_cast_fp16 = mul(x = var_25675_cast_fp16, y = var_25676_to_fp16)[name = tensor("aw_chunk_2655_cast_fp16")]; + tensor var_25679_equation_0 = const()[name = tensor("op_25679_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25679_cast_fp16 = einsum(equation = var_25679_equation_0, values = (var_25377_cast_fp16, var_25107_cast_fp16))[name = tensor("op_25679_cast_fp16")]; + tensor var_25680_to_fp16 = const()[name = tensor("op_25680_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2657_cast_fp16 = mul(x = var_25679_cast_fp16, y = var_25680_to_fp16)[name = tensor("aw_chunk_2657_cast_fp16")]; + tensor var_25683_equation_0 = const()[name = tensor("op_25683_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25683_cast_fp16 = einsum(equation = var_25683_equation_0, values = (var_25377_cast_fp16, var_25114_cast_fp16))[name = tensor("op_25683_cast_fp16")]; + tensor var_25684_to_fp16 = const()[name = tensor("op_25684_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2659_cast_fp16 = mul(x = var_25683_cast_fp16, y = var_25684_to_fp16)[name = tensor("aw_chunk_2659_cast_fp16")]; + tensor var_25687_equation_0 = const()[name = tensor("op_25687_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25687_cast_fp16 = einsum(equation = var_25687_equation_0, values = (var_25377_cast_fp16, var_25121_cast_fp16))[name = tensor("op_25687_cast_fp16")]; + tensor var_25688_to_fp16 = const()[name = tensor("op_25688_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2661_cast_fp16 = mul(x = var_25687_cast_fp16, y = var_25688_to_fp16)[name = tensor("aw_chunk_2661_cast_fp16")]; + tensor var_25691_equation_0 = const()[name = tensor("op_25691_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25691_cast_fp16 = einsum(equation = var_25691_equation_0, values = (var_25377_cast_fp16, var_25128_cast_fp16))[name = tensor("op_25691_cast_fp16")]; + tensor var_25692_to_fp16 = const()[name = tensor("op_25692_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2663_cast_fp16 = mul(x = var_25691_cast_fp16, y = var_25692_to_fp16)[name = tensor("aw_chunk_2663_cast_fp16")]; + tensor var_25695_equation_0 = const()[name = tensor("op_25695_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25695_cast_fp16 = einsum(equation = var_25695_equation_0, values = (var_25381_cast_fp16, var_25135_cast_fp16))[name = tensor("op_25695_cast_fp16")]; + tensor var_25696_to_fp16 = const()[name = tensor("op_25696_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2665_cast_fp16 = mul(x = var_25695_cast_fp16, y = var_25696_to_fp16)[name = tensor("aw_chunk_2665_cast_fp16")]; + tensor var_25699_equation_0 = const()[name = tensor("op_25699_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25699_cast_fp16 = einsum(equation = var_25699_equation_0, values = (var_25381_cast_fp16, var_25142_cast_fp16))[name = tensor("op_25699_cast_fp16")]; + tensor var_25700_to_fp16 = const()[name = tensor("op_25700_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2667_cast_fp16 = mul(x = var_25699_cast_fp16, y = var_25700_to_fp16)[name = tensor("aw_chunk_2667_cast_fp16")]; + tensor var_25703_equation_0 = const()[name = tensor("op_25703_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25703_cast_fp16 = einsum(equation = var_25703_equation_0, values = (var_25381_cast_fp16, var_25149_cast_fp16))[name = tensor("op_25703_cast_fp16")]; + tensor var_25704_to_fp16 = const()[name = tensor("op_25704_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2669_cast_fp16 = mul(x = var_25703_cast_fp16, y = var_25704_to_fp16)[name = tensor("aw_chunk_2669_cast_fp16")]; + tensor var_25707_equation_0 = const()[name = tensor("op_25707_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25707_cast_fp16 = einsum(equation = var_25707_equation_0, values = (var_25381_cast_fp16, var_25156_cast_fp16))[name = tensor("op_25707_cast_fp16")]; + tensor var_25708_to_fp16 = const()[name = tensor("op_25708_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2671_cast_fp16 = mul(x = var_25707_cast_fp16, y = var_25708_to_fp16)[name = tensor("aw_chunk_2671_cast_fp16")]; + tensor var_25711_equation_0 = const()[name = tensor("op_25711_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25711_cast_fp16 = einsum(equation = var_25711_equation_0, values = (var_25385_cast_fp16, var_25163_cast_fp16))[name = tensor("op_25711_cast_fp16")]; + tensor var_25712_to_fp16 = const()[name = tensor("op_25712_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2673_cast_fp16 = mul(x = var_25711_cast_fp16, y = var_25712_to_fp16)[name = tensor("aw_chunk_2673_cast_fp16")]; + tensor var_25715_equation_0 = const()[name = tensor("op_25715_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25715_cast_fp16 = einsum(equation = var_25715_equation_0, values = (var_25385_cast_fp16, var_25170_cast_fp16))[name = tensor("op_25715_cast_fp16")]; + tensor var_25716_to_fp16 = const()[name = tensor("op_25716_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2675_cast_fp16 = mul(x = var_25715_cast_fp16, y = var_25716_to_fp16)[name = tensor("aw_chunk_2675_cast_fp16")]; + tensor var_25719_equation_0 = const()[name = tensor("op_25719_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25719_cast_fp16 = einsum(equation = var_25719_equation_0, values = (var_25385_cast_fp16, var_25177_cast_fp16))[name = tensor("op_25719_cast_fp16")]; + tensor var_25720_to_fp16 = const()[name = tensor("op_25720_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2677_cast_fp16 = mul(x = var_25719_cast_fp16, y = var_25720_to_fp16)[name = tensor("aw_chunk_2677_cast_fp16")]; + tensor var_25723_equation_0 = const()[name = tensor("op_25723_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25723_cast_fp16 = einsum(equation = var_25723_equation_0, values = (var_25385_cast_fp16, var_25184_cast_fp16))[name = tensor("op_25723_cast_fp16")]; + tensor var_25724_to_fp16 = const()[name = tensor("op_25724_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2679_cast_fp16 = mul(x = var_25723_cast_fp16, y = var_25724_to_fp16)[name = tensor("aw_chunk_2679_cast_fp16")]; + tensor var_25727_equation_0 = const()[name = tensor("op_25727_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25727_cast_fp16 = einsum(equation = var_25727_equation_0, values = (var_25389_cast_fp16, var_25191_cast_fp16))[name = tensor("op_25727_cast_fp16")]; + tensor var_25728_to_fp16 = const()[name = tensor("op_25728_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2681_cast_fp16 = mul(x = var_25727_cast_fp16, y = var_25728_to_fp16)[name = tensor("aw_chunk_2681_cast_fp16")]; + tensor var_25731_equation_0 = const()[name = tensor("op_25731_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25731_cast_fp16 = einsum(equation = var_25731_equation_0, values = (var_25389_cast_fp16, var_25198_cast_fp16))[name = tensor("op_25731_cast_fp16")]; + tensor var_25732_to_fp16 = const()[name = tensor("op_25732_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2683_cast_fp16 = mul(x = var_25731_cast_fp16, y = var_25732_to_fp16)[name = tensor("aw_chunk_2683_cast_fp16")]; + tensor var_25735_equation_0 = const()[name = tensor("op_25735_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25735_cast_fp16 = einsum(equation = var_25735_equation_0, values = (var_25389_cast_fp16, var_25205_cast_fp16))[name = tensor("op_25735_cast_fp16")]; + tensor var_25736_to_fp16 = const()[name = tensor("op_25736_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2685_cast_fp16 = mul(x = var_25735_cast_fp16, y = var_25736_to_fp16)[name = tensor("aw_chunk_2685_cast_fp16")]; + tensor var_25739_equation_0 = const()[name = tensor("op_25739_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25739_cast_fp16 = einsum(equation = var_25739_equation_0, values = (var_25389_cast_fp16, var_25212_cast_fp16))[name = tensor("op_25739_cast_fp16")]; + tensor var_25740_to_fp16 = const()[name = tensor("op_25740_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2687_cast_fp16 = mul(x = var_25739_cast_fp16, y = var_25740_to_fp16)[name = tensor("aw_chunk_2687_cast_fp16")]; + tensor var_25743_equation_0 = const()[name = tensor("op_25743_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25743_cast_fp16 = einsum(equation = var_25743_equation_0, values = (var_25393_cast_fp16, var_25219_cast_fp16))[name = tensor("op_25743_cast_fp16")]; + tensor var_25744_to_fp16 = const()[name = tensor("op_25744_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2689_cast_fp16 = mul(x = var_25743_cast_fp16, y = var_25744_to_fp16)[name = tensor("aw_chunk_2689_cast_fp16")]; + tensor var_25747_equation_0 = const()[name = tensor("op_25747_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25747_cast_fp16 = einsum(equation = var_25747_equation_0, values = (var_25393_cast_fp16, var_25226_cast_fp16))[name = tensor("op_25747_cast_fp16")]; + tensor var_25748_to_fp16 = const()[name = tensor("op_25748_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2691_cast_fp16 = mul(x = var_25747_cast_fp16, y = var_25748_to_fp16)[name = tensor("aw_chunk_2691_cast_fp16")]; + tensor var_25751_equation_0 = const()[name = tensor("op_25751_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25751_cast_fp16 = einsum(equation = var_25751_equation_0, values = (var_25393_cast_fp16, var_25233_cast_fp16))[name = tensor("op_25751_cast_fp16")]; + tensor var_25752_to_fp16 = const()[name = tensor("op_25752_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2693_cast_fp16 = mul(x = var_25751_cast_fp16, y = var_25752_to_fp16)[name = tensor("aw_chunk_2693_cast_fp16")]; + tensor var_25755_equation_0 = const()[name = tensor("op_25755_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25755_cast_fp16 = einsum(equation = var_25755_equation_0, values = (var_25393_cast_fp16, var_25240_cast_fp16))[name = tensor("op_25755_cast_fp16")]; + tensor var_25756_to_fp16 = const()[name = tensor("op_25756_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2695_cast_fp16 = mul(x = var_25755_cast_fp16, y = var_25756_to_fp16)[name = tensor("aw_chunk_2695_cast_fp16")]; + tensor var_25759_equation_0 = const()[name = tensor("op_25759_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25759_cast_fp16 = einsum(equation = var_25759_equation_0, values = (var_25397_cast_fp16, var_25247_cast_fp16))[name = tensor("op_25759_cast_fp16")]; + tensor var_25760_to_fp16 = const()[name = tensor("op_25760_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2697_cast_fp16 = mul(x = var_25759_cast_fp16, y = var_25760_to_fp16)[name = tensor("aw_chunk_2697_cast_fp16")]; + tensor var_25763_equation_0 = const()[name = tensor("op_25763_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25763_cast_fp16 = einsum(equation = var_25763_equation_0, values = (var_25397_cast_fp16, var_25254_cast_fp16))[name = tensor("op_25763_cast_fp16")]; + tensor var_25764_to_fp16 = const()[name = tensor("op_25764_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2699_cast_fp16 = mul(x = var_25763_cast_fp16, y = var_25764_to_fp16)[name = tensor("aw_chunk_2699_cast_fp16")]; + tensor var_25767_equation_0 = const()[name = tensor("op_25767_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25767_cast_fp16 = einsum(equation = var_25767_equation_0, values = (var_25397_cast_fp16, var_25261_cast_fp16))[name = tensor("op_25767_cast_fp16")]; + tensor var_25768_to_fp16 = const()[name = tensor("op_25768_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2701_cast_fp16 = mul(x = var_25767_cast_fp16, y = var_25768_to_fp16)[name = tensor("aw_chunk_2701_cast_fp16")]; + tensor var_25771_equation_0 = const()[name = tensor("op_25771_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25771_cast_fp16 = einsum(equation = var_25771_equation_0, values = (var_25397_cast_fp16, var_25268_cast_fp16))[name = tensor("op_25771_cast_fp16")]; + tensor var_25772_to_fp16 = const()[name = tensor("op_25772_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2703_cast_fp16 = mul(x = var_25771_cast_fp16, y = var_25772_to_fp16)[name = tensor("aw_chunk_2703_cast_fp16")]; + tensor var_25775_equation_0 = const()[name = tensor("op_25775_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25775_cast_fp16 = einsum(equation = var_25775_equation_0, values = (var_25401_cast_fp16, var_25275_cast_fp16))[name = tensor("op_25775_cast_fp16")]; + tensor var_25776_to_fp16 = const()[name = tensor("op_25776_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2705_cast_fp16 = mul(x = var_25775_cast_fp16, y = var_25776_to_fp16)[name = tensor("aw_chunk_2705_cast_fp16")]; + tensor var_25779_equation_0 = const()[name = tensor("op_25779_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25779_cast_fp16 = einsum(equation = var_25779_equation_0, values = (var_25401_cast_fp16, var_25282_cast_fp16))[name = tensor("op_25779_cast_fp16")]; + tensor var_25780_to_fp16 = const()[name = tensor("op_25780_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2707_cast_fp16 = mul(x = var_25779_cast_fp16, y = var_25780_to_fp16)[name = tensor("aw_chunk_2707_cast_fp16")]; + tensor var_25783_equation_0 = const()[name = tensor("op_25783_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25783_cast_fp16 = einsum(equation = var_25783_equation_0, values = (var_25401_cast_fp16, var_25289_cast_fp16))[name = tensor("op_25783_cast_fp16")]; + tensor var_25784_to_fp16 = const()[name = tensor("op_25784_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2709_cast_fp16 = mul(x = var_25783_cast_fp16, y = var_25784_to_fp16)[name = tensor("aw_chunk_2709_cast_fp16")]; + tensor var_25787_equation_0 = const()[name = tensor("op_25787_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25787_cast_fp16 = einsum(equation = var_25787_equation_0, values = (var_25401_cast_fp16, var_25296_cast_fp16))[name = tensor("op_25787_cast_fp16")]; + tensor var_25788_to_fp16 = const()[name = tensor("op_25788_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2711_cast_fp16 = mul(x = var_25787_cast_fp16, y = var_25788_to_fp16)[name = tensor("aw_chunk_2711_cast_fp16")]; + tensor var_25791_equation_0 = const()[name = tensor("op_25791_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25791_cast_fp16 = einsum(equation = var_25791_equation_0, values = (var_25405_cast_fp16, var_25303_cast_fp16))[name = tensor("op_25791_cast_fp16")]; + tensor var_25792_to_fp16 = const()[name = tensor("op_25792_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2713_cast_fp16 = mul(x = var_25791_cast_fp16, y = var_25792_to_fp16)[name = tensor("aw_chunk_2713_cast_fp16")]; + tensor var_25795_equation_0 = const()[name = tensor("op_25795_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25795_cast_fp16 = einsum(equation = var_25795_equation_0, values = (var_25405_cast_fp16, var_25310_cast_fp16))[name = tensor("op_25795_cast_fp16")]; + tensor var_25796_to_fp16 = const()[name = tensor("op_25796_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2715_cast_fp16 = mul(x = var_25795_cast_fp16, y = var_25796_to_fp16)[name = tensor("aw_chunk_2715_cast_fp16")]; + tensor var_25799_equation_0 = const()[name = tensor("op_25799_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25799_cast_fp16 = einsum(equation = var_25799_equation_0, values = (var_25405_cast_fp16, var_25317_cast_fp16))[name = tensor("op_25799_cast_fp16")]; + tensor var_25800_to_fp16 = const()[name = tensor("op_25800_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2717_cast_fp16 = mul(x = var_25799_cast_fp16, y = var_25800_to_fp16)[name = tensor("aw_chunk_2717_cast_fp16")]; + tensor var_25803_equation_0 = const()[name = tensor("op_25803_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_25803_cast_fp16 = einsum(equation = var_25803_equation_0, values = (var_25405_cast_fp16, var_25324_cast_fp16))[name = tensor("op_25803_cast_fp16")]; + tensor var_25804_to_fp16 = const()[name = tensor("op_25804_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2719_cast_fp16 = mul(x = var_25803_cast_fp16, y = var_25804_to_fp16)[name = tensor("aw_chunk_2719_cast_fp16")]; + tensor var_25806_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2561_cast_fp16)[name = tensor("op_25806_cast_fp16")]; + tensor var_25807_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2563_cast_fp16)[name = tensor("op_25807_cast_fp16")]; + tensor var_25808_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2565_cast_fp16)[name = tensor("op_25808_cast_fp16")]; + tensor var_25809_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2567_cast_fp16)[name = tensor("op_25809_cast_fp16")]; + tensor var_25810_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2569_cast_fp16)[name = tensor("op_25810_cast_fp16")]; + tensor var_25811_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2571_cast_fp16)[name = tensor("op_25811_cast_fp16")]; + tensor var_25812_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2573_cast_fp16)[name = tensor("op_25812_cast_fp16")]; + tensor var_25813_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2575_cast_fp16)[name = tensor("op_25813_cast_fp16")]; + tensor var_25814_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2577_cast_fp16)[name = tensor("op_25814_cast_fp16")]; + tensor var_25815_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2579_cast_fp16)[name = tensor("op_25815_cast_fp16")]; + tensor var_25816_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2581_cast_fp16)[name = tensor("op_25816_cast_fp16")]; + tensor var_25817_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2583_cast_fp16)[name = tensor("op_25817_cast_fp16")]; + tensor var_25818_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2585_cast_fp16)[name = tensor("op_25818_cast_fp16")]; + tensor var_25819_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2587_cast_fp16)[name = tensor("op_25819_cast_fp16")]; + tensor var_25820_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2589_cast_fp16)[name = tensor("op_25820_cast_fp16")]; + tensor var_25821_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2591_cast_fp16)[name = tensor("op_25821_cast_fp16")]; + tensor var_25822_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2593_cast_fp16)[name = tensor("op_25822_cast_fp16")]; + tensor var_25823_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2595_cast_fp16)[name = tensor("op_25823_cast_fp16")]; + tensor var_25824_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2597_cast_fp16)[name = tensor("op_25824_cast_fp16")]; + tensor var_25825_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2599_cast_fp16)[name = tensor("op_25825_cast_fp16")]; + tensor var_25826_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2601_cast_fp16)[name = tensor("op_25826_cast_fp16")]; + tensor var_25827_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2603_cast_fp16)[name = tensor("op_25827_cast_fp16")]; + tensor var_25828_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2605_cast_fp16)[name = tensor("op_25828_cast_fp16")]; + tensor var_25829_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2607_cast_fp16)[name = tensor("op_25829_cast_fp16")]; + tensor var_25830_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2609_cast_fp16)[name = tensor("op_25830_cast_fp16")]; + tensor var_25831_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2611_cast_fp16)[name = tensor("op_25831_cast_fp16")]; + tensor var_25832_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2613_cast_fp16)[name = tensor("op_25832_cast_fp16")]; + tensor var_25833_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2615_cast_fp16)[name = tensor("op_25833_cast_fp16")]; + tensor var_25834_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2617_cast_fp16)[name = tensor("op_25834_cast_fp16")]; + tensor var_25835_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2619_cast_fp16)[name = tensor("op_25835_cast_fp16")]; + tensor var_25836_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2621_cast_fp16)[name = tensor("op_25836_cast_fp16")]; + tensor var_25837_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2623_cast_fp16)[name = tensor("op_25837_cast_fp16")]; + tensor var_25838_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2625_cast_fp16)[name = tensor("op_25838_cast_fp16")]; + tensor var_25839_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2627_cast_fp16)[name = tensor("op_25839_cast_fp16")]; + tensor var_25840_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2629_cast_fp16)[name = tensor("op_25840_cast_fp16")]; + tensor var_25841_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2631_cast_fp16)[name = tensor("op_25841_cast_fp16")]; + tensor var_25842_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2633_cast_fp16)[name = tensor("op_25842_cast_fp16")]; + tensor var_25843_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2635_cast_fp16)[name = tensor("op_25843_cast_fp16")]; + tensor var_25844_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2637_cast_fp16)[name = tensor("op_25844_cast_fp16")]; + tensor var_25845_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2639_cast_fp16)[name = tensor("op_25845_cast_fp16")]; + tensor var_25846_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2641_cast_fp16)[name = tensor("op_25846_cast_fp16")]; + tensor var_25847_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2643_cast_fp16)[name = tensor("op_25847_cast_fp16")]; + tensor var_25848_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2645_cast_fp16)[name = tensor("op_25848_cast_fp16")]; + tensor var_25849_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2647_cast_fp16)[name = tensor("op_25849_cast_fp16")]; + tensor var_25850_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2649_cast_fp16)[name = tensor("op_25850_cast_fp16")]; + tensor var_25851_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2651_cast_fp16)[name = tensor("op_25851_cast_fp16")]; + tensor var_25852_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2653_cast_fp16)[name = tensor("op_25852_cast_fp16")]; + tensor var_25853_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2655_cast_fp16)[name = tensor("op_25853_cast_fp16")]; + tensor var_25854_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2657_cast_fp16)[name = tensor("op_25854_cast_fp16")]; + tensor var_25855_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2659_cast_fp16)[name = tensor("op_25855_cast_fp16")]; + tensor var_25856_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2661_cast_fp16)[name = tensor("op_25856_cast_fp16")]; + tensor var_25857_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2663_cast_fp16)[name = tensor("op_25857_cast_fp16")]; + tensor var_25858_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2665_cast_fp16)[name = tensor("op_25858_cast_fp16")]; + tensor var_25859_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2667_cast_fp16)[name = tensor("op_25859_cast_fp16")]; + tensor var_25860_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2669_cast_fp16)[name = tensor("op_25860_cast_fp16")]; + tensor var_25861_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2671_cast_fp16)[name = tensor("op_25861_cast_fp16")]; + tensor var_25862_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2673_cast_fp16)[name = tensor("op_25862_cast_fp16")]; + tensor var_25863_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2675_cast_fp16)[name = tensor("op_25863_cast_fp16")]; + tensor var_25864_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2677_cast_fp16)[name = tensor("op_25864_cast_fp16")]; + tensor var_25865_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2679_cast_fp16)[name = tensor("op_25865_cast_fp16")]; + tensor var_25866_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2681_cast_fp16)[name = tensor("op_25866_cast_fp16")]; + tensor var_25867_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2683_cast_fp16)[name = tensor("op_25867_cast_fp16")]; + tensor var_25868_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2685_cast_fp16)[name = tensor("op_25868_cast_fp16")]; + tensor var_25869_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2687_cast_fp16)[name = tensor("op_25869_cast_fp16")]; + tensor var_25870_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2689_cast_fp16)[name = tensor("op_25870_cast_fp16")]; + tensor var_25871_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2691_cast_fp16)[name = tensor("op_25871_cast_fp16")]; + tensor var_25872_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2693_cast_fp16)[name = tensor("op_25872_cast_fp16")]; + tensor var_25873_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2695_cast_fp16)[name = tensor("op_25873_cast_fp16")]; + tensor var_25874_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2697_cast_fp16)[name = tensor("op_25874_cast_fp16")]; + tensor var_25875_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2699_cast_fp16)[name = tensor("op_25875_cast_fp16")]; + tensor var_25876_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2701_cast_fp16)[name = tensor("op_25876_cast_fp16")]; + tensor var_25877_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2703_cast_fp16)[name = tensor("op_25877_cast_fp16")]; + tensor var_25878_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2705_cast_fp16)[name = tensor("op_25878_cast_fp16")]; + tensor var_25879_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2707_cast_fp16)[name = tensor("op_25879_cast_fp16")]; + tensor var_25880_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2709_cast_fp16)[name = tensor("op_25880_cast_fp16")]; + tensor var_25881_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2711_cast_fp16)[name = tensor("op_25881_cast_fp16")]; + tensor var_25882_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2713_cast_fp16)[name = tensor("op_25882_cast_fp16")]; + tensor var_25883_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2715_cast_fp16)[name = tensor("op_25883_cast_fp16")]; + tensor var_25884_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2717_cast_fp16)[name = tensor("op_25884_cast_fp16")]; + tensor var_25885_cast_fp16 = softmax(axis = var_24631, x = aw_chunk_2719_cast_fp16)[name = tensor("op_25885_cast_fp16")]; + tensor var_25887_equation_0 = const()[name = tensor("op_25887_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25887_cast_fp16 = einsum(equation = var_25887_equation_0, values = (var_25407_cast_fp16, var_25806_cast_fp16))[name = tensor("op_25887_cast_fp16")]; + tensor var_25889_equation_0 = const()[name = tensor("op_25889_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25889_cast_fp16 = einsum(equation = var_25889_equation_0, values = (var_25407_cast_fp16, var_25807_cast_fp16))[name = tensor("op_25889_cast_fp16")]; + tensor var_25891_equation_0 = const()[name = tensor("op_25891_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25891_cast_fp16 = einsum(equation = var_25891_equation_0, values = (var_25407_cast_fp16, var_25808_cast_fp16))[name = tensor("op_25891_cast_fp16")]; + tensor var_25893_equation_0 = const()[name = tensor("op_25893_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25893_cast_fp16 = einsum(equation = var_25893_equation_0, values = (var_25407_cast_fp16, var_25809_cast_fp16))[name = tensor("op_25893_cast_fp16")]; + tensor var_25895_equation_0 = const()[name = tensor("op_25895_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25895_cast_fp16 = einsum(equation = var_25895_equation_0, values = (var_25411_cast_fp16, var_25810_cast_fp16))[name = tensor("op_25895_cast_fp16")]; + tensor var_25897_equation_0 = const()[name = tensor("op_25897_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25897_cast_fp16 = einsum(equation = var_25897_equation_0, values = (var_25411_cast_fp16, var_25811_cast_fp16))[name = tensor("op_25897_cast_fp16")]; + tensor var_25899_equation_0 = const()[name = tensor("op_25899_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25899_cast_fp16 = einsum(equation = var_25899_equation_0, values = (var_25411_cast_fp16, var_25812_cast_fp16))[name = tensor("op_25899_cast_fp16")]; + tensor var_25901_equation_0 = const()[name = tensor("op_25901_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25901_cast_fp16 = einsum(equation = var_25901_equation_0, values = (var_25411_cast_fp16, var_25813_cast_fp16))[name = tensor("op_25901_cast_fp16")]; + tensor var_25903_equation_0 = const()[name = tensor("op_25903_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25903_cast_fp16 = einsum(equation = var_25903_equation_0, values = (var_25415_cast_fp16, var_25814_cast_fp16))[name = tensor("op_25903_cast_fp16")]; + tensor var_25905_equation_0 = const()[name = tensor("op_25905_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25905_cast_fp16 = einsum(equation = var_25905_equation_0, values = (var_25415_cast_fp16, var_25815_cast_fp16))[name = tensor("op_25905_cast_fp16")]; + tensor var_25907_equation_0 = const()[name = tensor("op_25907_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25907_cast_fp16 = einsum(equation = var_25907_equation_0, values = (var_25415_cast_fp16, var_25816_cast_fp16))[name = tensor("op_25907_cast_fp16")]; + tensor var_25909_equation_0 = const()[name = tensor("op_25909_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25909_cast_fp16 = einsum(equation = var_25909_equation_0, values = (var_25415_cast_fp16, var_25817_cast_fp16))[name = tensor("op_25909_cast_fp16")]; + tensor var_25911_equation_0 = const()[name = tensor("op_25911_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25911_cast_fp16 = einsum(equation = var_25911_equation_0, values = (var_25419_cast_fp16, var_25818_cast_fp16))[name = tensor("op_25911_cast_fp16")]; + tensor var_25913_equation_0 = const()[name = tensor("op_25913_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25913_cast_fp16 = einsum(equation = var_25913_equation_0, values = (var_25419_cast_fp16, var_25819_cast_fp16))[name = tensor("op_25913_cast_fp16")]; + tensor var_25915_equation_0 = const()[name = tensor("op_25915_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25915_cast_fp16 = einsum(equation = var_25915_equation_0, values = (var_25419_cast_fp16, var_25820_cast_fp16))[name = tensor("op_25915_cast_fp16")]; + tensor var_25917_equation_0 = const()[name = tensor("op_25917_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25917_cast_fp16 = einsum(equation = var_25917_equation_0, values = (var_25419_cast_fp16, var_25821_cast_fp16))[name = tensor("op_25917_cast_fp16")]; + tensor var_25919_equation_0 = const()[name = tensor("op_25919_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25919_cast_fp16 = einsum(equation = var_25919_equation_0, values = (var_25423_cast_fp16, var_25822_cast_fp16))[name = tensor("op_25919_cast_fp16")]; + tensor var_25921_equation_0 = const()[name = tensor("op_25921_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25921_cast_fp16 = einsum(equation = var_25921_equation_0, values = (var_25423_cast_fp16, var_25823_cast_fp16))[name = tensor("op_25921_cast_fp16")]; + tensor var_25923_equation_0 = const()[name = tensor("op_25923_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25923_cast_fp16 = einsum(equation = var_25923_equation_0, values = (var_25423_cast_fp16, var_25824_cast_fp16))[name = tensor("op_25923_cast_fp16")]; + tensor var_25925_equation_0 = const()[name = tensor("op_25925_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25925_cast_fp16 = einsum(equation = var_25925_equation_0, values = (var_25423_cast_fp16, var_25825_cast_fp16))[name = tensor("op_25925_cast_fp16")]; + tensor var_25927_equation_0 = const()[name = tensor("op_25927_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25927_cast_fp16 = einsum(equation = var_25927_equation_0, values = (var_25427_cast_fp16, var_25826_cast_fp16))[name = tensor("op_25927_cast_fp16")]; + tensor var_25929_equation_0 = const()[name = tensor("op_25929_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25929_cast_fp16 = einsum(equation = var_25929_equation_0, values = (var_25427_cast_fp16, var_25827_cast_fp16))[name = tensor("op_25929_cast_fp16")]; + tensor var_25931_equation_0 = const()[name = tensor("op_25931_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25931_cast_fp16 = einsum(equation = var_25931_equation_0, values = (var_25427_cast_fp16, var_25828_cast_fp16))[name = tensor("op_25931_cast_fp16")]; + tensor var_25933_equation_0 = const()[name = tensor("op_25933_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25933_cast_fp16 = einsum(equation = var_25933_equation_0, values = (var_25427_cast_fp16, var_25829_cast_fp16))[name = tensor("op_25933_cast_fp16")]; + tensor var_25935_equation_0 = const()[name = tensor("op_25935_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25935_cast_fp16 = einsum(equation = var_25935_equation_0, values = (var_25431_cast_fp16, var_25830_cast_fp16))[name = tensor("op_25935_cast_fp16")]; + tensor var_25937_equation_0 = const()[name = tensor("op_25937_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25937_cast_fp16 = einsum(equation = var_25937_equation_0, values = (var_25431_cast_fp16, var_25831_cast_fp16))[name = tensor("op_25937_cast_fp16")]; + tensor var_25939_equation_0 = const()[name = tensor("op_25939_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25939_cast_fp16 = einsum(equation = var_25939_equation_0, values = (var_25431_cast_fp16, var_25832_cast_fp16))[name = tensor("op_25939_cast_fp16")]; + tensor var_25941_equation_0 = const()[name = tensor("op_25941_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25941_cast_fp16 = einsum(equation = var_25941_equation_0, values = (var_25431_cast_fp16, var_25833_cast_fp16))[name = tensor("op_25941_cast_fp16")]; + tensor var_25943_equation_0 = const()[name = tensor("op_25943_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25943_cast_fp16 = einsum(equation = var_25943_equation_0, values = (var_25435_cast_fp16, var_25834_cast_fp16))[name = tensor("op_25943_cast_fp16")]; + tensor var_25945_equation_0 = const()[name = tensor("op_25945_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25945_cast_fp16 = einsum(equation = var_25945_equation_0, values = (var_25435_cast_fp16, var_25835_cast_fp16))[name = tensor("op_25945_cast_fp16")]; + tensor var_25947_equation_0 = const()[name = tensor("op_25947_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25947_cast_fp16 = einsum(equation = var_25947_equation_0, values = (var_25435_cast_fp16, var_25836_cast_fp16))[name = tensor("op_25947_cast_fp16")]; + tensor var_25949_equation_0 = const()[name = tensor("op_25949_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25949_cast_fp16 = einsum(equation = var_25949_equation_0, values = (var_25435_cast_fp16, var_25837_cast_fp16))[name = tensor("op_25949_cast_fp16")]; + tensor var_25951_equation_0 = const()[name = tensor("op_25951_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25951_cast_fp16 = einsum(equation = var_25951_equation_0, values = (var_25439_cast_fp16, var_25838_cast_fp16))[name = tensor("op_25951_cast_fp16")]; + tensor var_25953_equation_0 = const()[name = tensor("op_25953_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25953_cast_fp16 = einsum(equation = var_25953_equation_0, values = (var_25439_cast_fp16, var_25839_cast_fp16))[name = tensor("op_25953_cast_fp16")]; + tensor var_25955_equation_0 = const()[name = tensor("op_25955_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25955_cast_fp16 = einsum(equation = var_25955_equation_0, values = (var_25439_cast_fp16, var_25840_cast_fp16))[name = tensor("op_25955_cast_fp16")]; + tensor var_25957_equation_0 = const()[name = tensor("op_25957_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25957_cast_fp16 = einsum(equation = var_25957_equation_0, values = (var_25439_cast_fp16, var_25841_cast_fp16))[name = tensor("op_25957_cast_fp16")]; + tensor var_25959_equation_0 = const()[name = tensor("op_25959_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25959_cast_fp16 = einsum(equation = var_25959_equation_0, values = (var_25443_cast_fp16, var_25842_cast_fp16))[name = tensor("op_25959_cast_fp16")]; + tensor var_25961_equation_0 = const()[name = tensor("op_25961_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25961_cast_fp16 = einsum(equation = var_25961_equation_0, values = (var_25443_cast_fp16, var_25843_cast_fp16))[name = tensor("op_25961_cast_fp16")]; + tensor var_25963_equation_0 = const()[name = tensor("op_25963_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25963_cast_fp16 = einsum(equation = var_25963_equation_0, values = (var_25443_cast_fp16, var_25844_cast_fp16))[name = tensor("op_25963_cast_fp16")]; + tensor var_25965_equation_0 = const()[name = tensor("op_25965_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25965_cast_fp16 = einsum(equation = var_25965_equation_0, values = (var_25443_cast_fp16, var_25845_cast_fp16))[name = tensor("op_25965_cast_fp16")]; + tensor var_25967_equation_0 = const()[name = tensor("op_25967_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25967_cast_fp16 = einsum(equation = var_25967_equation_0, values = (var_25447_cast_fp16, var_25846_cast_fp16))[name = tensor("op_25967_cast_fp16")]; + tensor var_25969_equation_0 = const()[name = tensor("op_25969_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25969_cast_fp16 = einsum(equation = var_25969_equation_0, values = (var_25447_cast_fp16, var_25847_cast_fp16))[name = tensor("op_25969_cast_fp16")]; + tensor var_25971_equation_0 = const()[name = tensor("op_25971_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25971_cast_fp16 = einsum(equation = var_25971_equation_0, values = (var_25447_cast_fp16, var_25848_cast_fp16))[name = tensor("op_25971_cast_fp16")]; + tensor var_25973_equation_0 = const()[name = tensor("op_25973_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25973_cast_fp16 = einsum(equation = var_25973_equation_0, values = (var_25447_cast_fp16, var_25849_cast_fp16))[name = tensor("op_25973_cast_fp16")]; + tensor var_25975_equation_0 = const()[name = tensor("op_25975_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25975_cast_fp16 = einsum(equation = var_25975_equation_0, values = (var_25451_cast_fp16, var_25850_cast_fp16))[name = tensor("op_25975_cast_fp16")]; + tensor var_25977_equation_0 = const()[name = tensor("op_25977_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25977_cast_fp16 = einsum(equation = var_25977_equation_0, values = (var_25451_cast_fp16, var_25851_cast_fp16))[name = tensor("op_25977_cast_fp16")]; + tensor var_25979_equation_0 = const()[name = tensor("op_25979_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25979_cast_fp16 = einsum(equation = var_25979_equation_0, values = (var_25451_cast_fp16, var_25852_cast_fp16))[name = tensor("op_25979_cast_fp16")]; + tensor var_25981_equation_0 = const()[name = tensor("op_25981_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25981_cast_fp16 = einsum(equation = var_25981_equation_0, values = (var_25451_cast_fp16, var_25853_cast_fp16))[name = tensor("op_25981_cast_fp16")]; + tensor var_25983_equation_0 = const()[name = tensor("op_25983_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25983_cast_fp16 = einsum(equation = var_25983_equation_0, values = (var_25455_cast_fp16, var_25854_cast_fp16))[name = tensor("op_25983_cast_fp16")]; + tensor var_25985_equation_0 = const()[name = tensor("op_25985_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25985_cast_fp16 = einsum(equation = var_25985_equation_0, values = (var_25455_cast_fp16, var_25855_cast_fp16))[name = tensor("op_25985_cast_fp16")]; + tensor var_25987_equation_0 = const()[name = tensor("op_25987_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25987_cast_fp16 = einsum(equation = var_25987_equation_0, values = (var_25455_cast_fp16, var_25856_cast_fp16))[name = tensor("op_25987_cast_fp16")]; + tensor var_25989_equation_0 = const()[name = tensor("op_25989_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25989_cast_fp16 = einsum(equation = var_25989_equation_0, values = (var_25455_cast_fp16, var_25857_cast_fp16))[name = tensor("op_25989_cast_fp16")]; + tensor var_25991_equation_0 = const()[name = tensor("op_25991_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25991_cast_fp16 = einsum(equation = var_25991_equation_0, values = (var_25459_cast_fp16, var_25858_cast_fp16))[name = tensor("op_25991_cast_fp16")]; + tensor var_25993_equation_0 = const()[name = tensor("op_25993_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25993_cast_fp16 = einsum(equation = var_25993_equation_0, values = (var_25459_cast_fp16, var_25859_cast_fp16))[name = tensor("op_25993_cast_fp16")]; + tensor var_25995_equation_0 = const()[name = tensor("op_25995_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25995_cast_fp16 = einsum(equation = var_25995_equation_0, values = (var_25459_cast_fp16, var_25860_cast_fp16))[name = tensor("op_25995_cast_fp16")]; + tensor var_25997_equation_0 = const()[name = tensor("op_25997_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25997_cast_fp16 = einsum(equation = var_25997_equation_0, values = (var_25459_cast_fp16, var_25861_cast_fp16))[name = tensor("op_25997_cast_fp16")]; + tensor var_25999_equation_0 = const()[name = tensor("op_25999_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_25999_cast_fp16 = einsum(equation = var_25999_equation_0, values = (var_25463_cast_fp16, var_25862_cast_fp16))[name = tensor("op_25999_cast_fp16")]; + tensor var_26001_equation_0 = const()[name = tensor("op_26001_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_26001_cast_fp16 = einsum(equation = var_26001_equation_0, values = (var_25463_cast_fp16, var_25863_cast_fp16))[name = tensor("op_26001_cast_fp16")]; + tensor var_26003_equation_0 = const()[name = tensor("op_26003_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_26003_cast_fp16 = einsum(equation = var_26003_equation_0, values = (var_25463_cast_fp16, var_25864_cast_fp16))[name = tensor("op_26003_cast_fp16")]; + tensor var_26005_equation_0 = const()[name = tensor("op_26005_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_26005_cast_fp16 = einsum(equation = var_26005_equation_0, values = (var_25463_cast_fp16, var_25865_cast_fp16))[name = tensor("op_26005_cast_fp16")]; + tensor var_26007_equation_0 = const()[name = tensor("op_26007_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_26007_cast_fp16 = einsum(equation = var_26007_equation_0, values = (var_25467_cast_fp16, var_25866_cast_fp16))[name = tensor("op_26007_cast_fp16")]; + tensor var_26009_equation_0 = const()[name = tensor("op_26009_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_26009_cast_fp16 = einsum(equation = var_26009_equation_0, values = (var_25467_cast_fp16, var_25867_cast_fp16))[name = tensor("op_26009_cast_fp16")]; + tensor var_26011_equation_0 = const()[name = tensor("op_26011_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_26011_cast_fp16 = einsum(equation = var_26011_equation_0, values = (var_25467_cast_fp16, var_25868_cast_fp16))[name = tensor("op_26011_cast_fp16")]; + tensor var_26013_equation_0 = const()[name = tensor("op_26013_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_26013_cast_fp16 = einsum(equation = var_26013_equation_0, values = (var_25467_cast_fp16, var_25869_cast_fp16))[name = tensor("op_26013_cast_fp16")]; + tensor var_26015_equation_0 = const()[name = tensor("op_26015_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_26015_cast_fp16 = einsum(equation = var_26015_equation_0, values = (var_25471_cast_fp16, var_25870_cast_fp16))[name = tensor("op_26015_cast_fp16")]; + tensor var_26017_equation_0 = const()[name = tensor("op_26017_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_26017_cast_fp16 = einsum(equation = var_26017_equation_0, values = (var_25471_cast_fp16, var_25871_cast_fp16))[name = tensor("op_26017_cast_fp16")]; + tensor var_26019_equation_0 = const()[name = tensor("op_26019_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_26019_cast_fp16 = einsum(equation = var_26019_equation_0, values = (var_25471_cast_fp16, var_25872_cast_fp16))[name = tensor("op_26019_cast_fp16")]; + tensor var_26021_equation_0 = const()[name = tensor("op_26021_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_26021_cast_fp16 = einsum(equation = var_26021_equation_0, values = (var_25471_cast_fp16, var_25873_cast_fp16))[name = tensor("op_26021_cast_fp16")]; + tensor var_26023_equation_0 = const()[name = tensor("op_26023_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_26023_cast_fp16 = einsum(equation = var_26023_equation_0, values = (var_25475_cast_fp16, var_25874_cast_fp16))[name = tensor("op_26023_cast_fp16")]; + tensor var_26025_equation_0 = const()[name = tensor("op_26025_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_26025_cast_fp16 = einsum(equation = var_26025_equation_0, values = (var_25475_cast_fp16, var_25875_cast_fp16))[name = tensor("op_26025_cast_fp16")]; + tensor var_26027_equation_0 = const()[name = tensor("op_26027_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_26027_cast_fp16 = einsum(equation = var_26027_equation_0, values = (var_25475_cast_fp16, var_25876_cast_fp16))[name = tensor("op_26027_cast_fp16")]; + tensor var_26029_equation_0 = const()[name = tensor("op_26029_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_26029_cast_fp16 = einsum(equation = var_26029_equation_0, values = (var_25475_cast_fp16, var_25877_cast_fp16))[name = tensor("op_26029_cast_fp16")]; + tensor var_26031_equation_0 = const()[name = tensor("op_26031_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_26031_cast_fp16 = einsum(equation = var_26031_equation_0, values = (var_25479_cast_fp16, var_25878_cast_fp16))[name = tensor("op_26031_cast_fp16")]; + tensor var_26033_equation_0 = const()[name = tensor("op_26033_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_26033_cast_fp16 = einsum(equation = var_26033_equation_0, values = (var_25479_cast_fp16, var_25879_cast_fp16))[name = tensor("op_26033_cast_fp16")]; + tensor var_26035_equation_0 = const()[name = tensor("op_26035_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_26035_cast_fp16 = einsum(equation = var_26035_equation_0, values = (var_25479_cast_fp16, var_25880_cast_fp16))[name = tensor("op_26035_cast_fp16")]; + tensor var_26037_equation_0 = const()[name = tensor("op_26037_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_26037_cast_fp16 = einsum(equation = var_26037_equation_0, values = (var_25479_cast_fp16, var_25881_cast_fp16))[name = tensor("op_26037_cast_fp16")]; + tensor var_26039_equation_0 = const()[name = tensor("op_26039_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_26039_cast_fp16 = einsum(equation = var_26039_equation_0, values = (var_25483_cast_fp16, var_25882_cast_fp16))[name = tensor("op_26039_cast_fp16")]; + tensor var_26041_equation_0 = const()[name = tensor("op_26041_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_26041_cast_fp16 = einsum(equation = var_26041_equation_0, values = (var_25483_cast_fp16, var_25883_cast_fp16))[name = tensor("op_26041_cast_fp16")]; + tensor var_26043_equation_0 = const()[name = tensor("op_26043_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_26043_cast_fp16 = einsum(equation = var_26043_equation_0, values = (var_25483_cast_fp16, var_25884_cast_fp16))[name = tensor("op_26043_cast_fp16")]; + tensor var_26045_equation_0 = const()[name = tensor("op_26045_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_26045_cast_fp16 = einsum(equation = var_26045_equation_0, values = (var_25483_cast_fp16, var_25885_cast_fp16))[name = tensor("op_26045_cast_fp16")]; + tensor var_26047_interleave_0 = const()[name = tensor("op_26047_interleave_0"), val = tensor(false)]; + tensor var_26047_cast_fp16 = concat(axis = var_24606, interleave = var_26047_interleave_0, values = (var_25887_cast_fp16, var_25889_cast_fp16, var_25891_cast_fp16, var_25893_cast_fp16))[name = tensor("op_26047_cast_fp16")]; + tensor var_26049_interleave_0 = const()[name = tensor("op_26049_interleave_0"), val = tensor(false)]; + tensor var_26049_cast_fp16 = concat(axis = var_24606, interleave = var_26049_interleave_0, values = (var_25895_cast_fp16, var_25897_cast_fp16, var_25899_cast_fp16, var_25901_cast_fp16))[name = tensor("op_26049_cast_fp16")]; + tensor var_26051_interleave_0 = const()[name = tensor("op_26051_interleave_0"), val = tensor(false)]; + tensor var_26051_cast_fp16 = concat(axis = var_24606, interleave = var_26051_interleave_0, values = (var_25903_cast_fp16, var_25905_cast_fp16, var_25907_cast_fp16, var_25909_cast_fp16))[name = tensor("op_26051_cast_fp16")]; + tensor var_26053_interleave_0 = const()[name = tensor("op_26053_interleave_0"), val = tensor(false)]; + tensor var_26053_cast_fp16 = concat(axis = var_24606, interleave = var_26053_interleave_0, values = (var_25911_cast_fp16, var_25913_cast_fp16, var_25915_cast_fp16, var_25917_cast_fp16))[name = tensor("op_26053_cast_fp16")]; + tensor var_26055_interleave_0 = const()[name = tensor("op_26055_interleave_0"), val = tensor(false)]; + tensor var_26055_cast_fp16 = concat(axis = var_24606, interleave = var_26055_interleave_0, values = (var_25919_cast_fp16, var_25921_cast_fp16, var_25923_cast_fp16, var_25925_cast_fp16))[name = tensor("op_26055_cast_fp16")]; + tensor var_26057_interleave_0 = const()[name = tensor("op_26057_interleave_0"), val = tensor(false)]; + tensor var_26057_cast_fp16 = concat(axis = var_24606, interleave = var_26057_interleave_0, values = (var_25927_cast_fp16, var_25929_cast_fp16, var_25931_cast_fp16, var_25933_cast_fp16))[name = tensor("op_26057_cast_fp16")]; + tensor var_26059_interleave_0 = const()[name = tensor("op_26059_interleave_0"), val = tensor(false)]; + tensor var_26059_cast_fp16 = concat(axis = var_24606, interleave = var_26059_interleave_0, values = (var_25935_cast_fp16, var_25937_cast_fp16, var_25939_cast_fp16, var_25941_cast_fp16))[name = tensor("op_26059_cast_fp16")]; + tensor var_26061_interleave_0 = const()[name = tensor("op_26061_interleave_0"), val = tensor(false)]; + tensor var_26061_cast_fp16 = concat(axis = var_24606, interleave = var_26061_interleave_0, values = (var_25943_cast_fp16, var_25945_cast_fp16, var_25947_cast_fp16, var_25949_cast_fp16))[name = tensor("op_26061_cast_fp16")]; + tensor var_26063_interleave_0 = const()[name = tensor("op_26063_interleave_0"), val = tensor(false)]; + tensor var_26063_cast_fp16 = concat(axis = var_24606, interleave = var_26063_interleave_0, values = (var_25951_cast_fp16, var_25953_cast_fp16, var_25955_cast_fp16, var_25957_cast_fp16))[name = tensor("op_26063_cast_fp16")]; + tensor var_26065_interleave_0 = const()[name = tensor("op_26065_interleave_0"), val = tensor(false)]; + tensor var_26065_cast_fp16 = concat(axis = var_24606, interleave = var_26065_interleave_0, values = (var_25959_cast_fp16, var_25961_cast_fp16, var_25963_cast_fp16, var_25965_cast_fp16))[name = tensor("op_26065_cast_fp16")]; + tensor var_26067_interleave_0 = const()[name = tensor("op_26067_interleave_0"), val = tensor(false)]; + tensor var_26067_cast_fp16 = concat(axis = var_24606, interleave = var_26067_interleave_0, values = (var_25967_cast_fp16, var_25969_cast_fp16, var_25971_cast_fp16, var_25973_cast_fp16))[name = tensor("op_26067_cast_fp16")]; + tensor var_26069_interleave_0 = const()[name = tensor("op_26069_interleave_0"), val = tensor(false)]; + tensor var_26069_cast_fp16 = concat(axis = var_24606, interleave = var_26069_interleave_0, values = (var_25975_cast_fp16, var_25977_cast_fp16, var_25979_cast_fp16, var_25981_cast_fp16))[name = tensor("op_26069_cast_fp16")]; + tensor var_26071_interleave_0 = const()[name = tensor("op_26071_interleave_0"), val = tensor(false)]; + tensor var_26071_cast_fp16 = concat(axis = var_24606, interleave = var_26071_interleave_0, values = (var_25983_cast_fp16, var_25985_cast_fp16, var_25987_cast_fp16, var_25989_cast_fp16))[name = tensor("op_26071_cast_fp16")]; + tensor var_26073_interleave_0 = const()[name = tensor("op_26073_interleave_0"), val = tensor(false)]; + tensor var_26073_cast_fp16 = concat(axis = var_24606, interleave = var_26073_interleave_0, values = (var_25991_cast_fp16, var_25993_cast_fp16, var_25995_cast_fp16, var_25997_cast_fp16))[name = tensor("op_26073_cast_fp16")]; + tensor var_26075_interleave_0 = const()[name = tensor("op_26075_interleave_0"), val = tensor(false)]; + tensor var_26075_cast_fp16 = concat(axis = var_24606, interleave = var_26075_interleave_0, values = (var_25999_cast_fp16, var_26001_cast_fp16, var_26003_cast_fp16, var_26005_cast_fp16))[name = tensor("op_26075_cast_fp16")]; + tensor var_26077_interleave_0 = const()[name = tensor("op_26077_interleave_0"), val = tensor(false)]; + tensor var_26077_cast_fp16 = concat(axis = var_24606, interleave = var_26077_interleave_0, values = (var_26007_cast_fp16, var_26009_cast_fp16, var_26011_cast_fp16, var_26013_cast_fp16))[name = tensor("op_26077_cast_fp16")]; + tensor var_26079_interleave_0 = const()[name = tensor("op_26079_interleave_0"), val = tensor(false)]; + tensor var_26079_cast_fp16 = concat(axis = var_24606, interleave = var_26079_interleave_0, values = (var_26015_cast_fp16, var_26017_cast_fp16, var_26019_cast_fp16, var_26021_cast_fp16))[name = tensor("op_26079_cast_fp16")]; + tensor var_26081_interleave_0 = const()[name = tensor("op_26081_interleave_0"), val = tensor(false)]; + tensor var_26081_cast_fp16 = concat(axis = var_24606, interleave = var_26081_interleave_0, values = (var_26023_cast_fp16, var_26025_cast_fp16, var_26027_cast_fp16, var_26029_cast_fp16))[name = tensor("op_26081_cast_fp16")]; + tensor var_26083_interleave_0 = const()[name = tensor("op_26083_interleave_0"), val = tensor(false)]; + tensor var_26083_cast_fp16 = concat(axis = var_24606, interleave = var_26083_interleave_0, values = (var_26031_cast_fp16, var_26033_cast_fp16, var_26035_cast_fp16, var_26037_cast_fp16))[name = tensor("op_26083_cast_fp16")]; + tensor var_26085_interleave_0 = const()[name = tensor("op_26085_interleave_0"), val = tensor(false)]; + tensor var_26085_cast_fp16 = concat(axis = var_24606, interleave = var_26085_interleave_0, values = (var_26039_cast_fp16, var_26041_cast_fp16, var_26043_cast_fp16, var_26045_cast_fp16))[name = tensor("op_26085_cast_fp16")]; + tensor input_129_interleave_0 = const()[name = tensor("input_129_interleave_0"), val = tensor(false)]; + tensor input_129_cast_fp16 = concat(axis = var_24631, interleave = input_129_interleave_0, values = (var_26047_cast_fp16, var_26049_cast_fp16, var_26051_cast_fp16, var_26053_cast_fp16, var_26055_cast_fp16, var_26057_cast_fp16, var_26059_cast_fp16, var_26061_cast_fp16, var_26063_cast_fp16, var_26065_cast_fp16, var_26067_cast_fp16, var_26069_cast_fp16, var_26071_cast_fp16, var_26073_cast_fp16, var_26075_cast_fp16, var_26077_cast_fp16, var_26079_cast_fp16, var_26081_cast_fp16, var_26083_cast_fp16, var_26085_cast_fp16))[name = tensor("input_129_cast_fp16")]; + tensor var_26090 = const()[name = tensor("op_26090"), val = tensor([1, 1])]; + tensor var_26092 = const()[name = tensor("op_26092"), val = tensor([1, 1])]; + tensor obj_67_pad_type_0 = const()[name = tensor("obj_67_pad_type_0"), val = tensor("custom")]; + tensor obj_67_pad_0 = const()[name = tensor("obj_67_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_16_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_16_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(653789120)))]; + tensor layers_16_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_16_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(657065984)))]; + tensor obj_67_cast_fp16 = conv(bias = layers_16_self_attn_o_proj_bias_to_fp16, dilations = var_26092, groups = var_24631, pad = obj_67_pad_0, pad_type = obj_67_pad_type_0, strides = var_26090, weight = layers_16_self_attn_o_proj_weight_to_fp16, x = input_129_cast_fp16)[name = tensor("obj_67_cast_fp16")]; + tensor inputs_67_cast_fp16 = add(x = inputs_65_cast_fp16, y = obj_67_cast_fp16)[name = tensor("inputs_67_cast_fp16")]; + tensor var_26098 = const()[name = tensor("op_26098"), val = tensor([1])]; + tensor channels_mean_67_cast_fp16 = reduce_mean(axes = var_26098, keep_dims = var_24632, x = inputs_67_cast_fp16)[name = tensor("channels_mean_67_cast_fp16")]; + tensor zero_mean_67_cast_fp16 = sub(x = inputs_67_cast_fp16, y = channels_mean_67_cast_fp16)[name = tensor("zero_mean_67_cast_fp16")]; + tensor zero_mean_sq_67_cast_fp16 = mul(x = zero_mean_67_cast_fp16, y = zero_mean_67_cast_fp16)[name = tensor("zero_mean_sq_67_cast_fp16")]; + tensor var_26102 = const()[name = tensor("op_26102"), val = tensor([1])]; + tensor var_26103_cast_fp16 = reduce_mean(axes = var_26102, keep_dims = var_24632, x = zero_mean_sq_67_cast_fp16)[name = tensor("op_26103_cast_fp16")]; + tensor var_26104_to_fp16 = const()[name = tensor("op_26104_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_26105_cast_fp16 = add(x = var_26103_cast_fp16, y = var_26104_to_fp16)[name = tensor("op_26105_cast_fp16")]; + tensor denom_67_epsilon_0_to_fp16 = const()[name = tensor("denom_67_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_67_cast_fp16 = rsqrt(epsilon = denom_67_epsilon_0_to_fp16, x = var_26105_cast_fp16)[name = tensor("denom_67_cast_fp16")]; + tensor out_67_cast_fp16 = mul(x = zero_mean_67_cast_fp16, y = denom_67_cast_fp16)[name = tensor("out_67_cast_fp16")]; + tensor input_131_gamma_0_to_fp16 = const()[name = tensor("input_131_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(657068608)))]; + tensor input_131_beta_0_to_fp16 = const()[name = tensor("input_131_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(657071232)))]; + tensor input_131_epsilon_0_to_fp16 = const()[name = tensor("input_131_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_131_cast_fp16 = batch_norm(beta = input_131_beta_0_to_fp16, epsilon = input_131_epsilon_0_to_fp16, gamma = input_131_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_67_cast_fp16)[name = tensor("input_131_cast_fp16")]; + tensor var_26116 = const()[name = tensor("op_26116"), val = tensor([1, 1])]; + tensor var_26118 = const()[name = tensor("op_26118"), val = tensor([1, 1])]; + tensor input_133_pad_type_0 = const()[name = tensor("input_133_pad_type_0"), val = tensor("custom")]; + tensor input_133_pad_0 = const()[name = tensor("input_133_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_16_fc1_weight_to_fp16 = const()[name = tensor("layers_16_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(657073856)))]; + tensor layers_16_fc1_bias_to_fp16 = const()[name = tensor("layers_16_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(670181120)))]; + tensor input_133_cast_fp16 = conv(bias = layers_16_fc1_bias_to_fp16, dilations = var_26118, groups = var_24631, pad = input_133_pad_0, pad_type = input_133_pad_type_0, strides = var_26116, weight = layers_16_fc1_weight_to_fp16, x = input_131_cast_fp16)[name = tensor("input_133_cast_fp16")]; + tensor input_135_mode_0 = const()[name = tensor("input_135_mode_0"), val = tensor("EXACT")]; + tensor input_135_cast_fp16 = gelu(mode = input_135_mode_0, x = input_133_cast_fp16)[name = tensor("input_135_cast_fp16")]; + tensor var_26124 = const()[name = tensor("op_26124"), val = tensor([1, 1])]; + tensor var_26126 = const()[name = tensor("op_26126"), val = tensor([1, 1])]; + tensor hidden_states_37_pad_type_0 = const()[name = tensor("hidden_states_37_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_37_pad_0 = const()[name = tensor("hidden_states_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_16_fc2_weight_to_fp16 = const()[name = tensor("layers_16_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(670191424)))]; + tensor layers_16_fc2_bias_to_fp16 = const()[name = tensor("layers_16_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(683298688)))]; + tensor hidden_states_37_cast_fp16 = conv(bias = layers_16_fc2_bias_to_fp16, dilations = var_26126, groups = var_24631, pad = hidden_states_37_pad_0, pad_type = hidden_states_37_pad_type_0, strides = var_26124, weight = layers_16_fc2_weight_to_fp16, x = input_135_cast_fp16)[name = tensor("hidden_states_37_cast_fp16")]; + tensor inputs_69_cast_fp16 = add(x = inputs_67_cast_fp16, y = hidden_states_37_cast_fp16)[name = tensor("inputs_69_cast_fp16")]; + tensor var_26133 = const()[name = tensor("op_26133"), val = tensor(3)]; + tensor var_26158 = const()[name = tensor("op_26158"), val = tensor(1)]; + tensor var_26159 = const()[name = tensor("op_26159"), val = tensor(true)]; + tensor var_26169 = const()[name = tensor("op_26169"), val = tensor([1])]; + tensor channels_mean_69_cast_fp16 = reduce_mean(axes = var_26169, keep_dims = var_26159, x = inputs_69_cast_fp16)[name = tensor("channels_mean_69_cast_fp16")]; + tensor zero_mean_69_cast_fp16 = sub(x = inputs_69_cast_fp16, y = channels_mean_69_cast_fp16)[name = tensor("zero_mean_69_cast_fp16")]; + tensor zero_mean_sq_69_cast_fp16 = mul(x = zero_mean_69_cast_fp16, y = zero_mean_69_cast_fp16)[name = tensor("zero_mean_sq_69_cast_fp16")]; + tensor var_26173 = const()[name = tensor("op_26173"), val = tensor([1])]; + tensor var_26174_cast_fp16 = reduce_mean(axes = var_26173, keep_dims = var_26159, x = zero_mean_sq_69_cast_fp16)[name = tensor("op_26174_cast_fp16")]; + tensor var_26175_to_fp16 = const()[name = tensor("op_26175_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_26176_cast_fp16 = add(x = var_26174_cast_fp16, y = var_26175_to_fp16)[name = tensor("op_26176_cast_fp16")]; + tensor denom_69_epsilon_0_to_fp16 = const()[name = tensor("denom_69_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_69_cast_fp16 = rsqrt(epsilon = denom_69_epsilon_0_to_fp16, x = var_26176_cast_fp16)[name = tensor("denom_69_cast_fp16")]; + tensor out_69_cast_fp16 = mul(x = zero_mean_69_cast_fp16, y = denom_69_cast_fp16)[name = tensor("out_69_cast_fp16")]; + tensor obj_69_gamma_0_to_fp16 = const()[name = tensor("obj_69_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(683301312)))]; + tensor obj_69_beta_0_to_fp16 = const()[name = tensor("obj_69_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(683303936)))]; + tensor obj_69_epsilon_0_to_fp16 = const()[name = tensor("obj_69_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_69_cast_fp16 = batch_norm(beta = obj_69_beta_0_to_fp16, epsilon = obj_69_epsilon_0_to_fp16, gamma = obj_69_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_69_cast_fp16)[name = tensor("obj_69_cast_fp16")]; + tensor var_26191 = const()[name = tensor("op_26191"), val = tensor([1, 1])]; + tensor var_26193 = const()[name = tensor("op_26193"), val = tensor([1, 1])]; + tensor query_35_pad_type_0 = const()[name = tensor("query_35_pad_type_0"), val = tensor("custom")]; + tensor query_35_pad_0 = const()[name = tensor("query_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_17_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_17_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(683306560)))]; + tensor layers_17_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_17_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(686583424)))]; + tensor query_35_cast_fp16 = conv(bias = layers_17_self_attn_q_proj_bias_to_fp16, dilations = var_26193, groups = var_26158, pad = query_35_pad_0, pad_type = query_35_pad_type_0, strides = var_26191, weight = layers_17_self_attn_q_proj_weight_to_fp16, x = obj_69_cast_fp16)[name = tensor("query_35_cast_fp16")]; + tensor var_26197 = const()[name = tensor("op_26197"), val = tensor([1, 1])]; + tensor var_26199 = const()[name = tensor("op_26199"), val = tensor([1, 1])]; + tensor key_35_pad_type_0 = const()[name = tensor("key_35_pad_type_0"), val = tensor("custom")]; + tensor key_35_pad_0 = const()[name = tensor("key_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_17_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_17_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(686586048)))]; + tensor key_35_cast_fp16 = conv(dilations = var_26199, groups = var_26158, pad = key_35_pad_0, pad_type = key_35_pad_type_0, strides = var_26197, weight = layers_17_self_attn_k_proj_weight_to_fp16, x = obj_69_cast_fp16)[name = tensor("key_35_cast_fp16")]; + tensor var_26204 = const()[name = tensor("op_26204"), val = tensor([1, 1])]; + tensor var_26206 = const()[name = tensor("op_26206"), val = tensor([1, 1])]; + tensor value_35_pad_type_0 = const()[name = tensor("value_35_pad_type_0"), val = tensor("custom")]; + tensor value_35_pad_0 = const()[name = tensor("value_35_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_17_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_17_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(689862912)))]; + tensor layers_17_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_17_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(693139776)))]; + tensor value_35_cast_fp16 = conv(bias = layers_17_self_attn_v_proj_bias_to_fp16, dilations = var_26206, groups = var_26158, pad = value_35_pad_0, pad_type = value_35_pad_type_0, strides = var_26204, weight = layers_17_self_attn_v_proj_weight_to_fp16, x = obj_69_cast_fp16)[name = tensor("value_35_cast_fp16")]; + tensor var_26213_begin_0 = const()[name = tensor("op_26213_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_26213_end_0 = const()[name = tensor("op_26213_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_26213_end_mask_0 = const()[name = tensor("op_26213_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26213_cast_fp16 = slice_by_index(begin = var_26213_begin_0, end = var_26213_end_0, end_mask = var_26213_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_26213_cast_fp16")]; + tensor var_26217_begin_0 = const()[name = tensor("op_26217_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_26217_end_0 = const()[name = tensor("op_26217_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_26217_end_mask_0 = const()[name = tensor("op_26217_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26217_cast_fp16 = slice_by_index(begin = var_26217_begin_0, end = var_26217_end_0, end_mask = var_26217_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_26217_cast_fp16")]; + tensor var_26221_begin_0 = const()[name = tensor("op_26221_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_26221_end_0 = const()[name = tensor("op_26221_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_26221_end_mask_0 = const()[name = tensor("op_26221_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26221_cast_fp16 = slice_by_index(begin = var_26221_begin_0, end = var_26221_end_0, end_mask = var_26221_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_26221_cast_fp16")]; + tensor var_26225_begin_0 = const()[name = tensor("op_26225_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_26225_end_0 = const()[name = tensor("op_26225_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_26225_end_mask_0 = const()[name = tensor("op_26225_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26225_cast_fp16 = slice_by_index(begin = var_26225_begin_0, end = var_26225_end_0, end_mask = var_26225_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_26225_cast_fp16")]; + tensor var_26229_begin_0 = const()[name = tensor("op_26229_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_26229_end_0 = const()[name = tensor("op_26229_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_26229_end_mask_0 = const()[name = tensor("op_26229_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26229_cast_fp16 = slice_by_index(begin = var_26229_begin_0, end = var_26229_end_0, end_mask = var_26229_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_26229_cast_fp16")]; + tensor var_26233_begin_0 = const()[name = tensor("op_26233_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_26233_end_0 = const()[name = tensor("op_26233_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_26233_end_mask_0 = const()[name = tensor("op_26233_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26233_cast_fp16 = slice_by_index(begin = var_26233_begin_0, end = var_26233_end_0, end_mask = var_26233_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_26233_cast_fp16")]; + tensor var_26237_begin_0 = const()[name = tensor("op_26237_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_26237_end_0 = const()[name = tensor("op_26237_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_26237_end_mask_0 = const()[name = tensor("op_26237_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26237_cast_fp16 = slice_by_index(begin = var_26237_begin_0, end = var_26237_end_0, end_mask = var_26237_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_26237_cast_fp16")]; + tensor var_26241_begin_0 = const()[name = tensor("op_26241_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_26241_end_0 = const()[name = tensor("op_26241_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_26241_end_mask_0 = const()[name = tensor("op_26241_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26241_cast_fp16 = slice_by_index(begin = var_26241_begin_0, end = var_26241_end_0, end_mask = var_26241_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_26241_cast_fp16")]; + tensor var_26245_begin_0 = const()[name = tensor("op_26245_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_26245_end_0 = const()[name = tensor("op_26245_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_26245_end_mask_0 = const()[name = tensor("op_26245_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26245_cast_fp16 = slice_by_index(begin = var_26245_begin_0, end = var_26245_end_0, end_mask = var_26245_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_26245_cast_fp16")]; + tensor var_26249_begin_0 = const()[name = tensor("op_26249_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_26249_end_0 = const()[name = tensor("op_26249_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_26249_end_mask_0 = const()[name = tensor("op_26249_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26249_cast_fp16 = slice_by_index(begin = var_26249_begin_0, end = var_26249_end_0, end_mask = var_26249_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_26249_cast_fp16")]; + tensor var_26253_begin_0 = const()[name = tensor("op_26253_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_26253_end_0 = const()[name = tensor("op_26253_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_26253_end_mask_0 = const()[name = tensor("op_26253_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26253_cast_fp16 = slice_by_index(begin = var_26253_begin_0, end = var_26253_end_0, end_mask = var_26253_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_26253_cast_fp16")]; + tensor var_26257_begin_0 = const()[name = tensor("op_26257_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_26257_end_0 = const()[name = tensor("op_26257_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_26257_end_mask_0 = const()[name = tensor("op_26257_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26257_cast_fp16 = slice_by_index(begin = var_26257_begin_0, end = var_26257_end_0, end_mask = var_26257_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_26257_cast_fp16")]; + tensor var_26261_begin_0 = const()[name = tensor("op_26261_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_26261_end_0 = const()[name = tensor("op_26261_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_26261_end_mask_0 = const()[name = tensor("op_26261_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26261_cast_fp16 = slice_by_index(begin = var_26261_begin_0, end = var_26261_end_0, end_mask = var_26261_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_26261_cast_fp16")]; + tensor var_26265_begin_0 = const()[name = tensor("op_26265_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_26265_end_0 = const()[name = tensor("op_26265_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_26265_end_mask_0 = const()[name = tensor("op_26265_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26265_cast_fp16 = slice_by_index(begin = var_26265_begin_0, end = var_26265_end_0, end_mask = var_26265_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_26265_cast_fp16")]; + tensor var_26269_begin_0 = const()[name = tensor("op_26269_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_26269_end_0 = const()[name = tensor("op_26269_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_26269_end_mask_0 = const()[name = tensor("op_26269_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26269_cast_fp16 = slice_by_index(begin = var_26269_begin_0, end = var_26269_end_0, end_mask = var_26269_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_26269_cast_fp16")]; + tensor var_26273_begin_0 = const()[name = tensor("op_26273_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_26273_end_0 = const()[name = tensor("op_26273_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_26273_end_mask_0 = const()[name = tensor("op_26273_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26273_cast_fp16 = slice_by_index(begin = var_26273_begin_0, end = var_26273_end_0, end_mask = var_26273_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_26273_cast_fp16")]; + tensor var_26277_begin_0 = const()[name = tensor("op_26277_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_26277_end_0 = const()[name = tensor("op_26277_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_26277_end_mask_0 = const()[name = tensor("op_26277_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26277_cast_fp16 = slice_by_index(begin = var_26277_begin_0, end = var_26277_end_0, end_mask = var_26277_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_26277_cast_fp16")]; + tensor var_26281_begin_0 = const()[name = tensor("op_26281_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_26281_end_0 = const()[name = tensor("op_26281_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_26281_end_mask_0 = const()[name = tensor("op_26281_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26281_cast_fp16 = slice_by_index(begin = var_26281_begin_0, end = var_26281_end_0, end_mask = var_26281_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_26281_cast_fp16")]; + tensor var_26285_begin_0 = const()[name = tensor("op_26285_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_26285_end_0 = const()[name = tensor("op_26285_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_26285_end_mask_0 = const()[name = tensor("op_26285_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26285_cast_fp16 = slice_by_index(begin = var_26285_begin_0, end = var_26285_end_0, end_mask = var_26285_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_26285_cast_fp16")]; + tensor var_26289_begin_0 = const()[name = tensor("op_26289_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_26289_end_0 = const()[name = tensor("op_26289_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_26289_end_mask_0 = const()[name = tensor("op_26289_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26289_cast_fp16 = slice_by_index(begin = var_26289_begin_0, end = var_26289_end_0, end_mask = var_26289_end_mask_0, x = query_35_cast_fp16)[name = tensor("op_26289_cast_fp16")]; + tensor var_26298_begin_0 = const()[name = tensor("op_26298_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_26298_end_0 = const()[name = tensor("op_26298_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_26298_end_mask_0 = const()[name = tensor("op_26298_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26298_cast_fp16 = slice_by_index(begin = var_26298_begin_0, end = var_26298_end_0, end_mask = var_26298_end_mask_0, x = var_26213_cast_fp16)[name = tensor("op_26298_cast_fp16")]; + tensor var_26305_begin_0 = const()[name = tensor("op_26305_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_26305_end_0 = const()[name = tensor("op_26305_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_26305_end_mask_0 = const()[name = tensor("op_26305_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26305_cast_fp16 = slice_by_index(begin = var_26305_begin_0, end = var_26305_end_0, end_mask = var_26305_end_mask_0, x = var_26213_cast_fp16)[name = tensor("op_26305_cast_fp16")]; + tensor var_26312_begin_0 = const()[name = tensor("op_26312_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_26312_end_0 = const()[name = tensor("op_26312_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_26312_end_mask_0 = const()[name = tensor("op_26312_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26312_cast_fp16 = slice_by_index(begin = var_26312_begin_0, end = var_26312_end_0, end_mask = var_26312_end_mask_0, x = var_26213_cast_fp16)[name = tensor("op_26312_cast_fp16")]; + tensor var_26319_begin_0 = const()[name = tensor("op_26319_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_26319_end_0 = const()[name = tensor("op_26319_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_26319_end_mask_0 = const()[name = tensor("op_26319_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26319_cast_fp16 = slice_by_index(begin = var_26319_begin_0, end = var_26319_end_0, end_mask = var_26319_end_mask_0, x = var_26213_cast_fp16)[name = tensor("op_26319_cast_fp16")]; + tensor var_26326_begin_0 = const()[name = tensor("op_26326_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_26326_end_0 = const()[name = tensor("op_26326_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_26326_end_mask_0 = const()[name = tensor("op_26326_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26326_cast_fp16 = slice_by_index(begin = var_26326_begin_0, end = var_26326_end_0, end_mask = var_26326_end_mask_0, x = var_26217_cast_fp16)[name = tensor("op_26326_cast_fp16")]; + tensor var_26333_begin_0 = const()[name = tensor("op_26333_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_26333_end_0 = const()[name = tensor("op_26333_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_26333_end_mask_0 = const()[name = tensor("op_26333_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26333_cast_fp16 = slice_by_index(begin = var_26333_begin_0, end = var_26333_end_0, end_mask = var_26333_end_mask_0, x = var_26217_cast_fp16)[name = tensor("op_26333_cast_fp16")]; + tensor var_26340_begin_0 = const()[name = tensor("op_26340_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_26340_end_0 = const()[name = tensor("op_26340_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_26340_end_mask_0 = const()[name = tensor("op_26340_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26340_cast_fp16 = slice_by_index(begin = var_26340_begin_0, end = var_26340_end_0, end_mask = var_26340_end_mask_0, x = var_26217_cast_fp16)[name = tensor("op_26340_cast_fp16")]; + tensor var_26347_begin_0 = const()[name = tensor("op_26347_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_26347_end_0 = const()[name = tensor("op_26347_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_26347_end_mask_0 = const()[name = tensor("op_26347_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26347_cast_fp16 = slice_by_index(begin = var_26347_begin_0, end = var_26347_end_0, end_mask = var_26347_end_mask_0, x = var_26217_cast_fp16)[name = tensor("op_26347_cast_fp16")]; + tensor var_26354_begin_0 = const()[name = tensor("op_26354_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_26354_end_0 = const()[name = tensor("op_26354_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_26354_end_mask_0 = const()[name = tensor("op_26354_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26354_cast_fp16 = slice_by_index(begin = var_26354_begin_0, end = var_26354_end_0, end_mask = var_26354_end_mask_0, x = var_26221_cast_fp16)[name = tensor("op_26354_cast_fp16")]; + tensor var_26361_begin_0 = const()[name = tensor("op_26361_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_26361_end_0 = const()[name = tensor("op_26361_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_26361_end_mask_0 = const()[name = tensor("op_26361_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26361_cast_fp16 = slice_by_index(begin = var_26361_begin_0, end = var_26361_end_0, end_mask = var_26361_end_mask_0, x = var_26221_cast_fp16)[name = tensor("op_26361_cast_fp16")]; + tensor var_26368_begin_0 = const()[name = tensor("op_26368_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_26368_end_0 = const()[name = tensor("op_26368_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_26368_end_mask_0 = const()[name = tensor("op_26368_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26368_cast_fp16 = slice_by_index(begin = var_26368_begin_0, end = var_26368_end_0, end_mask = var_26368_end_mask_0, x = var_26221_cast_fp16)[name = tensor("op_26368_cast_fp16")]; + tensor var_26375_begin_0 = const()[name = tensor("op_26375_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_26375_end_0 = const()[name = tensor("op_26375_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_26375_end_mask_0 = const()[name = tensor("op_26375_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26375_cast_fp16 = slice_by_index(begin = var_26375_begin_0, end = var_26375_end_0, end_mask = var_26375_end_mask_0, x = var_26221_cast_fp16)[name = tensor("op_26375_cast_fp16")]; + tensor var_26382_begin_0 = const()[name = tensor("op_26382_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_26382_end_0 = const()[name = tensor("op_26382_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_26382_end_mask_0 = const()[name = tensor("op_26382_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26382_cast_fp16 = slice_by_index(begin = var_26382_begin_0, end = var_26382_end_0, end_mask = var_26382_end_mask_0, x = var_26225_cast_fp16)[name = tensor("op_26382_cast_fp16")]; + tensor var_26389_begin_0 = const()[name = tensor("op_26389_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_26389_end_0 = const()[name = tensor("op_26389_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_26389_end_mask_0 = const()[name = tensor("op_26389_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26389_cast_fp16 = slice_by_index(begin = var_26389_begin_0, end = var_26389_end_0, end_mask = var_26389_end_mask_0, x = var_26225_cast_fp16)[name = tensor("op_26389_cast_fp16")]; + tensor var_26396_begin_0 = const()[name = tensor("op_26396_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_26396_end_0 = const()[name = tensor("op_26396_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_26396_end_mask_0 = const()[name = tensor("op_26396_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26396_cast_fp16 = slice_by_index(begin = var_26396_begin_0, end = var_26396_end_0, end_mask = var_26396_end_mask_0, x = var_26225_cast_fp16)[name = tensor("op_26396_cast_fp16")]; + tensor var_26403_begin_0 = const()[name = tensor("op_26403_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_26403_end_0 = const()[name = tensor("op_26403_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_26403_end_mask_0 = const()[name = tensor("op_26403_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26403_cast_fp16 = slice_by_index(begin = var_26403_begin_0, end = var_26403_end_0, end_mask = var_26403_end_mask_0, x = var_26225_cast_fp16)[name = tensor("op_26403_cast_fp16")]; + tensor var_26410_begin_0 = const()[name = tensor("op_26410_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_26410_end_0 = const()[name = tensor("op_26410_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_26410_end_mask_0 = const()[name = tensor("op_26410_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26410_cast_fp16 = slice_by_index(begin = var_26410_begin_0, end = var_26410_end_0, end_mask = var_26410_end_mask_0, x = var_26229_cast_fp16)[name = tensor("op_26410_cast_fp16")]; + tensor var_26417_begin_0 = const()[name = tensor("op_26417_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_26417_end_0 = const()[name = tensor("op_26417_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_26417_end_mask_0 = const()[name = tensor("op_26417_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26417_cast_fp16 = slice_by_index(begin = var_26417_begin_0, end = var_26417_end_0, end_mask = var_26417_end_mask_0, x = var_26229_cast_fp16)[name = tensor("op_26417_cast_fp16")]; + tensor var_26424_begin_0 = const()[name = tensor("op_26424_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_26424_end_0 = const()[name = tensor("op_26424_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_26424_end_mask_0 = const()[name = tensor("op_26424_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26424_cast_fp16 = slice_by_index(begin = var_26424_begin_0, end = var_26424_end_0, end_mask = var_26424_end_mask_0, x = var_26229_cast_fp16)[name = tensor("op_26424_cast_fp16")]; + tensor var_26431_begin_0 = const()[name = tensor("op_26431_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_26431_end_0 = const()[name = tensor("op_26431_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_26431_end_mask_0 = const()[name = tensor("op_26431_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26431_cast_fp16 = slice_by_index(begin = var_26431_begin_0, end = var_26431_end_0, end_mask = var_26431_end_mask_0, x = var_26229_cast_fp16)[name = tensor("op_26431_cast_fp16")]; + tensor var_26438_begin_0 = const()[name = tensor("op_26438_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_26438_end_0 = const()[name = tensor("op_26438_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_26438_end_mask_0 = const()[name = tensor("op_26438_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26438_cast_fp16 = slice_by_index(begin = var_26438_begin_0, end = var_26438_end_0, end_mask = var_26438_end_mask_0, x = var_26233_cast_fp16)[name = tensor("op_26438_cast_fp16")]; + tensor var_26445_begin_0 = const()[name = tensor("op_26445_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_26445_end_0 = const()[name = tensor("op_26445_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_26445_end_mask_0 = const()[name = tensor("op_26445_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26445_cast_fp16 = slice_by_index(begin = var_26445_begin_0, end = var_26445_end_0, end_mask = var_26445_end_mask_0, x = var_26233_cast_fp16)[name = tensor("op_26445_cast_fp16")]; + tensor var_26452_begin_0 = const()[name = tensor("op_26452_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_26452_end_0 = const()[name = tensor("op_26452_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_26452_end_mask_0 = const()[name = tensor("op_26452_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26452_cast_fp16 = slice_by_index(begin = var_26452_begin_0, end = var_26452_end_0, end_mask = var_26452_end_mask_0, x = var_26233_cast_fp16)[name = tensor("op_26452_cast_fp16")]; + tensor var_26459_begin_0 = const()[name = tensor("op_26459_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_26459_end_0 = const()[name = tensor("op_26459_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_26459_end_mask_0 = const()[name = tensor("op_26459_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26459_cast_fp16 = slice_by_index(begin = var_26459_begin_0, end = var_26459_end_0, end_mask = var_26459_end_mask_0, x = var_26233_cast_fp16)[name = tensor("op_26459_cast_fp16")]; + tensor var_26466_begin_0 = const()[name = tensor("op_26466_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_26466_end_0 = const()[name = tensor("op_26466_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_26466_end_mask_0 = const()[name = tensor("op_26466_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26466_cast_fp16 = slice_by_index(begin = var_26466_begin_0, end = var_26466_end_0, end_mask = var_26466_end_mask_0, x = var_26237_cast_fp16)[name = tensor("op_26466_cast_fp16")]; + tensor var_26473_begin_0 = const()[name = tensor("op_26473_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_26473_end_0 = const()[name = tensor("op_26473_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_26473_end_mask_0 = const()[name = tensor("op_26473_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26473_cast_fp16 = slice_by_index(begin = var_26473_begin_0, end = var_26473_end_0, end_mask = var_26473_end_mask_0, x = var_26237_cast_fp16)[name = tensor("op_26473_cast_fp16")]; + tensor var_26480_begin_0 = const()[name = tensor("op_26480_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_26480_end_0 = const()[name = tensor("op_26480_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_26480_end_mask_0 = const()[name = tensor("op_26480_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26480_cast_fp16 = slice_by_index(begin = var_26480_begin_0, end = var_26480_end_0, end_mask = var_26480_end_mask_0, x = var_26237_cast_fp16)[name = tensor("op_26480_cast_fp16")]; + tensor var_26487_begin_0 = const()[name = tensor("op_26487_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_26487_end_0 = const()[name = tensor("op_26487_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_26487_end_mask_0 = const()[name = tensor("op_26487_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26487_cast_fp16 = slice_by_index(begin = var_26487_begin_0, end = var_26487_end_0, end_mask = var_26487_end_mask_0, x = var_26237_cast_fp16)[name = tensor("op_26487_cast_fp16")]; + tensor var_26494_begin_0 = const()[name = tensor("op_26494_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_26494_end_0 = const()[name = tensor("op_26494_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_26494_end_mask_0 = const()[name = tensor("op_26494_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26494_cast_fp16 = slice_by_index(begin = var_26494_begin_0, end = var_26494_end_0, end_mask = var_26494_end_mask_0, x = var_26241_cast_fp16)[name = tensor("op_26494_cast_fp16")]; + tensor var_26501_begin_0 = const()[name = tensor("op_26501_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_26501_end_0 = const()[name = tensor("op_26501_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_26501_end_mask_0 = const()[name = tensor("op_26501_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26501_cast_fp16 = slice_by_index(begin = var_26501_begin_0, end = var_26501_end_0, end_mask = var_26501_end_mask_0, x = var_26241_cast_fp16)[name = tensor("op_26501_cast_fp16")]; + tensor var_26508_begin_0 = const()[name = tensor("op_26508_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_26508_end_0 = const()[name = tensor("op_26508_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_26508_end_mask_0 = const()[name = tensor("op_26508_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26508_cast_fp16 = slice_by_index(begin = var_26508_begin_0, end = var_26508_end_0, end_mask = var_26508_end_mask_0, x = var_26241_cast_fp16)[name = tensor("op_26508_cast_fp16")]; + tensor var_26515_begin_0 = const()[name = tensor("op_26515_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_26515_end_0 = const()[name = tensor("op_26515_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_26515_end_mask_0 = const()[name = tensor("op_26515_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26515_cast_fp16 = slice_by_index(begin = var_26515_begin_0, end = var_26515_end_0, end_mask = var_26515_end_mask_0, x = var_26241_cast_fp16)[name = tensor("op_26515_cast_fp16")]; + tensor var_26522_begin_0 = const()[name = tensor("op_26522_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_26522_end_0 = const()[name = tensor("op_26522_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_26522_end_mask_0 = const()[name = tensor("op_26522_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26522_cast_fp16 = slice_by_index(begin = var_26522_begin_0, end = var_26522_end_0, end_mask = var_26522_end_mask_0, x = var_26245_cast_fp16)[name = tensor("op_26522_cast_fp16")]; + tensor var_26529_begin_0 = const()[name = tensor("op_26529_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_26529_end_0 = const()[name = tensor("op_26529_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_26529_end_mask_0 = const()[name = tensor("op_26529_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26529_cast_fp16 = slice_by_index(begin = var_26529_begin_0, end = var_26529_end_0, end_mask = var_26529_end_mask_0, x = var_26245_cast_fp16)[name = tensor("op_26529_cast_fp16")]; + tensor var_26536_begin_0 = const()[name = tensor("op_26536_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_26536_end_0 = const()[name = tensor("op_26536_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_26536_end_mask_0 = const()[name = tensor("op_26536_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26536_cast_fp16 = slice_by_index(begin = var_26536_begin_0, end = var_26536_end_0, end_mask = var_26536_end_mask_0, x = var_26245_cast_fp16)[name = tensor("op_26536_cast_fp16")]; + tensor var_26543_begin_0 = const()[name = tensor("op_26543_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_26543_end_0 = const()[name = tensor("op_26543_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_26543_end_mask_0 = const()[name = tensor("op_26543_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26543_cast_fp16 = slice_by_index(begin = var_26543_begin_0, end = var_26543_end_0, end_mask = var_26543_end_mask_0, x = var_26245_cast_fp16)[name = tensor("op_26543_cast_fp16")]; + tensor var_26550_begin_0 = const()[name = tensor("op_26550_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_26550_end_0 = const()[name = tensor("op_26550_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_26550_end_mask_0 = const()[name = tensor("op_26550_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26550_cast_fp16 = slice_by_index(begin = var_26550_begin_0, end = var_26550_end_0, end_mask = var_26550_end_mask_0, x = var_26249_cast_fp16)[name = tensor("op_26550_cast_fp16")]; + tensor var_26557_begin_0 = const()[name = tensor("op_26557_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_26557_end_0 = const()[name = tensor("op_26557_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_26557_end_mask_0 = const()[name = tensor("op_26557_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26557_cast_fp16 = slice_by_index(begin = var_26557_begin_0, end = var_26557_end_0, end_mask = var_26557_end_mask_0, x = var_26249_cast_fp16)[name = tensor("op_26557_cast_fp16")]; + tensor var_26564_begin_0 = const()[name = tensor("op_26564_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_26564_end_0 = const()[name = tensor("op_26564_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_26564_end_mask_0 = const()[name = tensor("op_26564_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26564_cast_fp16 = slice_by_index(begin = var_26564_begin_0, end = var_26564_end_0, end_mask = var_26564_end_mask_0, x = var_26249_cast_fp16)[name = tensor("op_26564_cast_fp16")]; + tensor var_26571_begin_0 = const()[name = tensor("op_26571_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_26571_end_0 = const()[name = tensor("op_26571_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_26571_end_mask_0 = const()[name = tensor("op_26571_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26571_cast_fp16 = slice_by_index(begin = var_26571_begin_0, end = var_26571_end_0, end_mask = var_26571_end_mask_0, x = var_26249_cast_fp16)[name = tensor("op_26571_cast_fp16")]; + tensor var_26578_begin_0 = const()[name = tensor("op_26578_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_26578_end_0 = const()[name = tensor("op_26578_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_26578_end_mask_0 = const()[name = tensor("op_26578_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26578_cast_fp16 = slice_by_index(begin = var_26578_begin_0, end = var_26578_end_0, end_mask = var_26578_end_mask_0, x = var_26253_cast_fp16)[name = tensor("op_26578_cast_fp16")]; + tensor var_26585_begin_0 = const()[name = tensor("op_26585_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_26585_end_0 = const()[name = tensor("op_26585_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_26585_end_mask_0 = const()[name = tensor("op_26585_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26585_cast_fp16 = slice_by_index(begin = var_26585_begin_0, end = var_26585_end_0, end_mask = var_26585_end_mask_0, x = var_26253_cast_fp16)[name = tensor("op_26585_cast_fp16")]; + tensor var_26592_begin_0 = const()[name = tensor("op_26592_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_26592_end_0 = const()[name = tensor("op_26592_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_26592_end_mask_0 = const()[name = tensor("op_26592_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26592_cast_fp16 = slice_by_index(begin = var_26592_begin_0, end = var_26592_end_0, end_mask = var_26592_end_mask_0, x = var_26253_cast_fp16)[name = tensor("op_26592_cast_fp16")]; + tensor var_26599_begin_0 = const()[name = tensor("op_26599_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_26599_end_0 = const()[name = tensor("op_26599_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_26599_end_mask_0 = const()[name = tensor("op_26599_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26599_cast_fp16 = slice_by_index(begin = var_26599_begin_0, end = var_26599_end_0, end_mask = var_26599_end_mask_0, x = var_26253_cast_fp16)[name = tensor("op_26599_cast_fp16")]; + tensor var_26606_begin_0 = const()[name = tensor("op_26606_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_26606_end_0 = const()[name = tensor("op_26606_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_26606_end_mask_0 = const()[name = tensor("op_26606_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26606_cast_fp16 = slice_by_index(begin = var_26606_begin_0, end = var_26606_end_0, end_mask = var_26606_end_mask_0, x = var_26257_cast_fp16)[name = tensor("op_26606_cast_fp16")]; + tensor var_26613_begin_0 = const()[name = tensor("op_26613_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_26613_end_0 = const()[name = tensor("op_26613_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_26613_end_mask_0 = const()[name = tensor("op_26613_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26613_cast_fp16 = slice_by_index(begin = var_26613_begin_0, end = var_26613_end_0, end_mask = var_26613_end_mask_0, x = var_26257_cast_fp16)[name = tensor("op_26613_cast_fp16")]; + tensor var_26620_begin_0 = const()[name = tensor("op_26620_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_26620_end_0 = const()[name = tensor("op_26620_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_26620_end_mask_0 = const()[name = tensor("op_26620_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26620_cast_fp16 = slice_by_index(begin = var_26620_begin_0, end = var_26620_end_0, end_mask = var_26620_end_mask_0, x = var_26257_cast_fp16)[name = tensor("op_26620_cast_fp16")]; + tensor var_26627_begin_0 = const()[name = tensor("op_26627_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_26627_end_0 = const()[name = tensor("op_26627_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_26627_end_mask_0 = const()[name = tensor("op_26627_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26627_cast_fp16 = slice_by_index(begin = var_26627_begin_0, end = var_26627_end_0, end_mask = var_26627_end_mask_0, x = var_26257_cast_fp16)[name = tensor("op_26627_cast_fp16")]; + tensor var_26634_begin_0 = const()[name = tensor("op_26634_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_26634_end_0 = const()[name = tensor("op_26634_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_26634_end_mask_0 = const()[name = tensor("op_26634_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26634_cast_fp16 = slice_by_index(begin = var_26634_begin_0, end = var_26634_end_0, end_mask = var_26634_end_mask_0, x = var_26261_cast_fp16)[name = tensor("op_26634_cast_fp16")]; + tensor var_26641_begin_0 = const()[name = tensor("op_26641_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_26641_end_0 = const()[name = tensor("op_26641_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_26641_end_mask_0 = const()[name = tensor("op_26641_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26641_cast_fp16 = slice_by_index(begin = var_26641_begin_0, end = var_26641_end_0, end_mask = var_26641_end_mask_0, x = var_26261_cast_fp16)[name = tensor("op_26641_cast_fp16")]; + tensor var_26648_begin_0 = const()[name = tensor("op_26648_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_26648_end_0 = const()[name = tensor("op_26648_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_26648_end_mask_0 = const()[name = tensor("op_26648_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26648_cast_fp16 = slice_by_index(begin = var_26648_begin_0, end = var_26648_end_0, end_mask = var_26648_end_mask_0, x = var_26261_cast_fp16)[name = tensor("op_26648_cast_fp16")]; + tensor var_26655_begin_0 = const()[name = tensor("op_26655_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_26655_end_0 = const()[name = tensor("op_26655_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_26655_end_mask_0 = const()[name = tensor("op_26655_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26655_cast_fp16 = slice_by_index(begin = var_26655_begin_0, end = var_26655_end_0, end_mask = var_26655_end_mask_0, x = var_26261_cast_fp16)[name = tensor("op_26655_cast_fp16")]; + tensor var_26662_begin_0 = const()[name = tensor("op_26662_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_26662_end_0 = const()[name = tensor("op_26662_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_26662_end_mask_0 = const()[name = tensor("op_26662_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26662_cast_fp16 = slice_by_index(begin = var_26662_begin_0, end = var_26662_end_0, end_mask = var_26662_end_mask_0, x = var_26265_cast_fp16)[name = tensor("op_26662_cast_fp16")]; + tensor var_26669_begin_0 = const()[name = tensor("op_26669_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_26669_end_0 = const()[name = tensor("op_26669_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_26669_end_mask_0 = const()[name = tensor("op_26669_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26669_cast_fp16 = slice_by_index(begin = var_26669_begin_0, end = var_26669_end_0, end_mask = var_26669_end_mask_0, x = var_26265_cast_fp16)[name = tensor("op_26669_cast_fp16")]; + tensor var_26676_begin_0 = const()[name = tensor("op_26676_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_26676_end_0 = const()[name = tensor("op_26676_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_26676_end_mask_0 = const()[name = tensor("op_26676_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26676_cast_fp16 = slice_by_index(begin = var_26676_begin_0, end = var_26676_end_0, end_mask = var_26676_end_mask_0, x = var_26265_cast_fp16)[name = tensor("op_26676_cast_fp16")]; + tensor var_26683_begin_0 = const()[name = tensor("op_26683_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_26683_end_0 = const()[name = tensor("op_26683_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_26683_end_mask_0 = const()[name = tensor("op_26683_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26683_cast_fp16 = slice_by_index(begin = var_26683_begin_0, end = var_26683_end_0, end_mask = var_26683_end_mask_0, x = var_26265_cast_fp16)[name = tensor("op_26683_cast_fp16")]; + tensor var_26690_begin_0 = const()[name = tensor("op_26690_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_26690_end_0 = const()[name = tensor("op_26690_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_26690_end_mask_0 = const()[name = tensor("op_26690_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26690_cast_fp16 = slice_by_index(begin = var_26690_begin_0, end = var_26690_end_0, end_mask = var_26690_end_mask_0, x = var_26269_cast_fp16)[name = tensor("op_26690_cast_fp16")]; + tensor var_26697_begin_0 = const()[name = tensor("op_26697_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_26697_end_0 = const()[name = tensor("op_26697_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_26697_end_mask_0 = const()[name = tensor("op_26697_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26697_cast_fp16 = slice_by_index(begin = var_26697_begin_0, end = var_26697_end_0, end_mask = var_26697_end_mask_0, x = var_26269_cast_fp16)[name = tensor("op_26697_cast_fp16")]; + tensor var_26704_begin_0 = const()[name = tensor("op_26704_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_26704_end_0 = const()[name = tensor("op_26704_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_26704_end_mask_0 = const()[name = tensor("op_26704_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26704_cast_fp16 = slice_by_index(begin = var_26704_begin_0, end = var_26704_end_0, end_mask = var_26704_end_mask_0, x = var_26269_cast_fp16)[name = tensor("op_26704_cast_fp16")]; + tensor var_26711_begin_0 = const()[name = tensor("op_26711_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_26711_end_0 = const()[name = tensor("op_26711_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_26711_end_mask_0 = const()[name = tensor("op_26711_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26711_cast_fp16 = slice_by_index(begin = var_26711_begin_0, end = var_26711_end_0, end_mask = var_26711_end_mask_0, x = var_26269_cast_fp16)[name = tensor("op_26711_cast_fp16")]; + tensor var_26718_begin_0 = const()[name = tensor("op_26718_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_26718_end_0 = const()[name = tensor("op_26718_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_26718_end_mask_0 = const()[name = tensor("op_26718_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26718_cast_fp16 = slice_by_index(begin = var_26718_begin_0, end = var_26718_end_0, end_mask = var_26718_end_mask_0, x = var_26273_cast_fp16)[name = tensor("op_26718_cast_fp16")]; + tensor var_26725_begin_0 = const()[name = tensor("op_26725_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_26725_end_0 = const()[name = tensor("op_26725_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_26725_end_mask_0 = const()[name = tensor("op_26725_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26725_cast_fp16 = slice_by_index(begin = var_26725_begin_0, end = var_26725_end_0, end_mask = var_26725_end_mask_0, x = var_26273_cast_fp16)[name = tensor("op_26725_cast_fp16")]; + tensor var_26732_begin_0 = const()[name = tensor("op_26732_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_26732_end_0 = const()[name = tensor("op_26732_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_26732_end_mask_0 = const()[name = tensor("op_26732_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26732_cast_fp16 = slice_by_index(begin = var_26732_begin_0, end = var_26732_end_0, end_mask = var_26732_end_mask_0, x = var_26273_cast_fp16)[name = tensor("op_26732_cast_fp16")]; + tensor var_26739_begin_0 = const()[name = tensor("op_26739_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_26739_end_0 = const()[name = tensor("op_26739_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_26739_end_mask_0 = const()[name = tensor("op_26739_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26739_cast_fp16 = slice_by_index(begin = var_26739_begin_0, end = var_26739_end_0, end_mask = var_26739_end_mask_0, x = var_26273_cast_fp16)[name = tensor("op_26739_cast_fp16")]; + tensor var_26746_begin_0 = const()[name = tensor("op_26746_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_26746_end_0 = const()[name = tensor("op_26746_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_26746_end_mask_0 = const()[name = tensor("op_26746_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26746_cast_fp16 = slice_by_index(begin = var_26746_begin_0, end = var_26746_end_0, end_mask = var_26746_end_mask_0, x = var_26277_cast_fp16)[name = tensor("op_26746_cast_fp16")]; + tensor var_26753_begin_0 = const()[name = tensor("op_26753_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_26753_end_0 = const()[name = tensor("op_26753_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_26753_end_mask_0 = const()[name = tensor("op_26753_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26753_cast_fp16 = slice_by_index(begin = var_26753_begin_0, end = var_26753_end_0, end_mask = var_26753_end_mask_0, x = var_26277_cast_fp16)[name = tensor("op_26753_cast_fp16")]; + tensor var_26760_begin_0 = const()[name = tensor("op_26760_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_26760_end_0 = const()[name = tensor("op_26760_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_26760_end_mask_0 = const()[name = tensor("op_26760_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26760_cast_fp16 = slice_by_index(begin = var_26760_begin_0, end = var_26760_end_0, end_mask = var_26760_end_mask_0, x = var_26277_cast_fp16)[name = tensor("op_26760_cast_fp16")]; + tensor var_26767_begin_0 = const()[name = tensor("op_26767_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_26767_end_0 = const()[name = tensor("op_26767_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_26767_end_mask_0 = const()[name = tensor("op_26767_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26767_cast_fp16 = slice_by_index(begin = var_26767_begin_0, end = var_26767_end_0, end_mask = var_26767_end_mask_0, x = var_26277_cast_fp16)[name = tensor("op_26767_cast_fp16")]; + tensor var_26774_begin_0 = const()[name = tensor("op_26774_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_26774_end_0 = const()[name = tensor("op_26774_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_26774_end_mask_0 = const()[name = tensor("op_26774_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26774_cast_fp16 = slice_by_index(begin = var_26774_begin_0, end = var_26774_end_0, end_mask = var_26774_end_mask_0, x = var_26281_cast_fp16)[name = tensor("op_26774_cast_fp16")]; + tensor var_26781_begin_0 = const()[name = tensor("op_26781_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_26781_end_0 = const()[name = tensor("op_26781_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_26781_end_mask_0 = const()[name = tensor("op_26781_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26781_cast_fp16 = slice_by_index(begin = var_26781_begin_0, end = var_26781_end_0, end_mask = var_26781_end_mask_0, x = var_26281_cast_fp16)[name = tensor("op_26781_cast_fp16")]; + tensor var_26788_begin_0 = const()[name = tensor("op_26788_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_26788_end_0 = const()[name = tensor("op_26788_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_26788_end_mask_0 = const()[name = tensor("op_26788_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26788_cast_fp16 = slice_by_index(begin = var_26788_begin_0, end = var_26788_end_0, end_mask = var_26788_end_mask_0, x = var_26281_cast_fp16)[name = tensor("op_26788_cast_fp16")]; + tensor var_26795_begin_0 = const()[name = tensor("op_26795_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_26795_end_0 = const()[name = tensor("op_26795_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_26795_end_mask_0 = const()[name = tensor("op_26795_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26795_cast_fp16 = slice_by_index(begin = var_26795_begin_0, end = var_26795_end_0, end_mask = var_26795_end_mask_0, x = var_26281_cast_fp16)[name = tensor("op_26795_cast_fp16")]; + tensor var_26802_begin_0 = const()[name = tensor("op_26802_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_26802_end_0 = const()[name = tensor("op_26802_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_26802_end_mask_0 = const()[name = tensor("op_26802_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26802_cast_fp16 = slice_by_index(begin = var_26802_begin_0, end = var_26802_end_0, end_mask = var_26802_end_mask_0, x = var_26285_cast_fp16)[name = tensor("op_26802_cast_fp16")]; + tensor var_26809_begin_0 = const()[name = tensor("op_26809_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_26809_end_0 = const()[name = tensor("op_26809_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_26809_end_mask_0 = const()[name = tensor("op_26809_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26809_cast_fp16 = slice_by_index(begin = var_26809_begin_0, end = var_26809_end_0, end_mask = var_26809_end_mask_0, x = var_26285_cast_fp16)[name = tensor("op_26809_cast_fp16")]; + tensor var_26816_begin_0 = const()[name = tensor("op_26816_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_26816_end_0 = const()[name = tensor("op_26816_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_26816_end_mask_0 = const()[name = tensor("op_26816_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26816_cast_fp16 = slice_by_index(begin = var_26816_begin_0, end = var_26816_end_0, end_mask = var_26816_end_mask_0, x = var_26285_cast_fp16)[name = tensor("op_26816_cast_fp16")]; + tensor var_26823_begin_0 = const()[name = tensor("op_26823_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_26823_end_0 = const()[name = tensor("op_26823_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_26823_end_mask_0 = const()[name = tensor("op_26823_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26823_cast_fp16 = slice_by_index(begin = var_26823_begin_0, end = var_26823_end_0, end_mask = var_26823_end_mask_0, x = var_26285_cast_fp16)[name = tensor("op_26823_cast_fp16")]; + tensor var_26830_begin_0 = const()[name = tensor("op_26830_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_26830_end_0 = const()[name = tensor("op_26830_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_26830_end_mask_0 = const()[name = tensor("op_26830_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26830_cast_fp16 = slice_by_index(begin = var_26830_begin_0, end = var_26830_end_0, end_mask = var_26830_end_mask_0, x = var_26289_cast_fp16)[name = tensor("op_26830_cast_fp16")]; + tensor var_26837_begin_0 = const()[name = tensor("op_26837_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_26837_end_0 = const()[name = tensor("op_26837_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_26837_end_mask_0 = const()[name = tensor("op_26837_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26837_cast_fp16 = slice_by_index(begin = var_26837_begin_0, end = var_26837_end_0, end_mask = var_26837_end_mask_0, x = var_26289_cast_fp16)[name = tensor("op_26837_cast_fp16")]; + tensor var_26844_begin_0 = const()[name = tensor("op_26844_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_26844_end_0 = const()[name = tensor("op_26844_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_26844_end_mask_0 = const()[name = tensor("op_26844_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26844_cast_fp16 = slice_by_index(begin = var_26844_begin_0, end = var_26844_end_0, end_mask = var_26844_end_mask_0, x = var_26289_cast_fp16)[name = tensor("op_26844_cast_fp16")]; + tensor var_26851_begin_0 = const()[name = tensor("op_26851_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_26851_end_0 = const()[name = tensor("op_26851_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_26851_end_mask_0 = const()[name = tensor("op_26851_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26851_cast_fp16 = slice_by_index(begin = var_26851_begin_0, end = var_26851_end_0, end_mask = var_26851_end_mask_0, x = var_26289_cast_fp16)[name = tensor("op_26851_cast_fp16")]; + tensor k_35_perm_0 = const()[name = tensor("k_35_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_26856_begin_0 = const()[name = tensor("op_26856_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_26856_end_0 = const()[name = tensor("op_26856_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_26856_end_mask_0 = const()[name = tensor("op_26856_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_14 = transpose(perm = k_35_perm_0, x = key_35_cast_fp16)[name = tensor("transpose_14")]; + tensor var_26856_cast_fp16 = slice_by_index(begin = var_26856_begin_0, end = var_26856_end_0, end_mask = var_26856_end_mask_0, x = transpose_14)[name = tensor("op_26856_cast_fp16")]; + tensor var_26860_begin_0 = const()[name = tensor("op_26860_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_26860_end_0 = const()[name = tensor("op_26860_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_26860_end_mask_0 = const()[name = tensor("op_26860_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26860_cast_fp16 = slice_by_index(begin = var_26860_begin_0, end = var_26860_end_0, end_mask = var_26860_end_mask_0, x = transpose_14)[name = tensor("op_26860_cast_fp16")]; + tensor var_26864_begin_0 = const()[name = tensor("op_26864_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_26864_end_0 = const()[name = tensor("op_26864_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_26864_end_mask_0 = const()[name = tensor("op_26864_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26864_cast_fp16 = slice_by_index(begin = var_26864_begin_0, end = var_26864_end_0, end_mask = var_26864_end_mask_0, x = transpose_14)[name = tensor("op_26864_cast_fp16")]; + tensor var_26868_begin_0 = const()[name = tensor("op_26868_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_26868_end_0 = const()[name = tensor("op_26868_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_26868_end_mask_0 = const()[name = tensor("op_26868_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26868_cast_fp16 = slice_by_index(begin = var_26868_begin_0, end = var_26868_end_0, end_mask = var_26868_end_mask_0, x = transpose_14)[name = tensor("op_26868_cast_fp16")]; + tensor var_26872_begin_0 = const()[name = tensor("op_26872_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_26872_end_0 = const()[name = tensor("op_26872_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_26872_end_mask_0 = const()[name = tensor("op_26872_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26872_cast_fp16 = slice_by_index(begin = var_26872_begin_0, end = var_26872_end_0, end_mask = var_26872_end_mask_0, x = transpose_14)[name = tensor("op_26872_cast_fp16")]; + tensor var_26876_begin_0 = const()[name = tensor("op_26876_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_26876_end_0 = const()[name = tensor("op_26876_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_26876_end_mask_0 = const()[name = tensor("op_26876_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26876_cast_fp16 = slice_by_index(begin = var_26876_begin_0, end = var_26876_end_0, end_mask = var_26876_end_mask_0, x = transpose_14)[name = tensor("op_26876_cast_fp16")]; + tensor var_26880_begin_0 = const()[name = tensor("op_26880_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_26880_end_0 = const()[name = tensor("op_26880_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_26880_end_mask_0 = const()[name = tensor("op_26880_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26880_cast_fp16 = slice_by_index(begin = var_26880_begin_0, end = var_26880_end_0, end_mask = var_26880_end_mask_0, x = transpose_14)[name = tensor("op_26880_cast_fp16")]; + tensor var_26884_begin_0 = const()[name = tensor("op_26884_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_26884_end_0 = const()[name = tensor("op_26884_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_26884_end_mask_0 = const()[name = tensor("op_26884_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26884_cast_fp16 = slice_by_index(begin = var_26884_begin_0, end = var_26884_end_0, end_mask = var_26884_end_mask_0, x = transpose_14)[name = tensor("op_26884_cast_fp16")]; + tensor var_26888_begin_0 = const()[name = tensor("op_26888_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_26888_end_0 = const()[name = tensor("op_26888_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_26888_end_mask_0 = const()[name = tensor("op_26888_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26888_cast_fp16 = slice_by_index(begin = var_26888_begin_0, end = var_26888_end_0, end_mask = var_26888_end_mask_0, x = transpose_14)[name = tensor("op_26888_cast_fp16")]; + tensor var_26892_begin_0 = const()[name = tensor("op_26892_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_26892_end_0 = const()[name = tensor("op_26892_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_26892_end_mask_0 = const()[name = tensor("op_26892_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26892_cast_fp16 = slice_by_index(begin = var_26892_begin_0, end = var_26892_end_0, end_mask = var_26892_end_mask_0, x = transpose_14)[name = tensor("op_26892_cast_fp16")]; + tensor var_26896_begin_0 = const()[name = tensor("op_26896_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_26896_end_0 = const()[name = tensor("op_26896_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_26896_end_mask_0 = const()[name = tensor("op_26896_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26896_cast_fp16 = slice_by_index(begin = var_26896_begin_0, end = var_26896_end_0, end_mask = var_26896_end_mask_0, x = transpose_14)[name = tensor("op_26896_cast_fp16")]; + tensor var_26900_begin_0 = const()[name = tensor("op_26900_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_26900_end_0 = const()[name = tensor("op_26900_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_26900_end_mask_0 = const()[name = tensor("op_26900_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26900_cast_fp16 = slice_by_index(begin = var_26900_begin_0, end = var_26900_end_0, end_mask = var_26900_end_mask_0, x = transpose_14)[name = tensor("op_26900_cast_fp16")]; + tensor var_26904_begin_0 = const()[name = tensor("op_26904_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_26904_end_0 = const()[name = tensor("op_26904_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_26904_end_mask_0 = const()[name = tensor("op_26904_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26904_cast_fp16 = slice_by_index(begin = var_26904_begin_0, end = var_26904_end_0, end_mask = var_26904_end_mask_0, x = transpose_14)[name = tensor("op_26904_cast_fp16")]; + tensor var_26908_begin_0 = const()[name = tensor("op_26908_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_26908_end_0 = const()[name = tensor("op_26908_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_26908_end_mask_0 = const()[name = tensor("op_26908_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26908_cast_fp16 = slice_by_index(begin = var_26908_begin_0, end = var_26908_end_0, end_mask = var_26908_end_mask_0, x = transpose_14)[name = tensor("op_26908_cast_fp16")]; + tensor var_26912_begin_0 = const()[name = tensor("op_26912_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_26912_end_0 = const()[name = tensor("op_26912_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_26912_end_mask_0 = const()[name = tensor("op_26912_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26912_cast_fp16 = slice_by_index(begin = var_26912_begin_0, end = var_26912_end_0, end_mask = var_26912_end_mask_0, x = transpose_14)[name = tensor("op_26912_cast_fp16")]; + tensor var_26916_begin_0 = const()[name = tensor("op_26916_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_26916_end_0 = const()[name = tensor("op_26916_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_26916_end_mask_0 = const()[name = tensor("op_26916_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26916_cast_fp16 = slice_by_index(begin = var_26916_begin_0, end = var_26916_end_0, end_mask = var_26916_end_mask_0, x = transpose_14)[name = tensor("op_26916_cast_fp16")]; + tensor var_26920_begin_0 = const()[name = tensor("op_26920_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_26920_end_0 = const()[name = tensor("op_26920_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_26920_end_mask_0 = const()[name = tensor("op_26920_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26920_cast_fp16 = slice_by_index(begin = var_26920_begin_0, end = var_26920_end_0, end_mask = var_26920_end_mask_0, x = transpose_14)[name = tensor("op_26920_cast_fp16")]; + tensor var_26924_begin_0 = const()[name = tensor("op_26924_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_26924_end_0 = const()[name = tensor("op_26924_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_26924_end_mask_0 = const()[name = tensor("op_26924_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26924_cast_fp16 = slice_by_index(begin = var_26924_begin_0, end = var_26924_end_0, end_mask = var_26924_end_mask_0, x = transpose_14)[name = tensor("op_26924_cast_fp16")]; + tensor var_26928_begin_0 = const()[name = tensor("op_26928_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_26928_end_0 = const()[name = tensor("op_26928_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_26928_end_mask_0 = const()[name = tensor("op_26928_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26928_cast_fp16 = slice_by_index(begin = var_26928_begin_0, end = var_26928_end_0, end_mask = var_26928_end_mask_0, x = transpose_14)[name = tensor("op_26928_cast_fp16")]; + tensor var_26932_begin_0 = const()[name = tensor("op_26932_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_26932_end_0 = const()[name = tensor("op_26932_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_26932_end_mask_0 = const()[name = tensor("op_26932_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_26932_cast_fp16 = slice_by_index(begin = var_26932_begin_0, end = var_26932_end_0, end_mask = var_26932_end_mask_0, x = transpose_14)[name = tensor("op_26932_cast_fp16")]; + tensor var_26934_begin_0 = const()[name = tensor("op_26934_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_26934_end_0 = const()[name = tensor("op_26934_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_26934_end_mask_0 = const()[name = tensor("op_26934_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26934_cast_fp16 = slice_by_index(begin = var_26934_begin_0, end = var_26934_end_0, end_mask = var_26934_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_26934_cast_fp16")]; + tensor var_26938_begin_0 = const()[name = tensor("op_26938_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_26938_end_0 = const()[name = tensor("op_26938_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_26938_end_mask_0 = const()[name = tensor("op_26938_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26938_cast_fp16 = slice_by_index(begin = var_26938_begin_0, end = var_26938_end_0, end_mask = var_26938_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_26938_cast_fp16")]; + tensor var_26942_begin_0 = const()[name = tensor("op_26942_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_26942_end_0 = const()[name = tensor("op_26942_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_26942_end_mask_0 = const()[name = tensor("op_26942_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26942_cast_fp16 = slice_by_index(begin = var_26942_begin_0, end = var_26942_end_0, end_mask = var_26942_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_26942_cast_fp16")]; + tensor var_26946_begin_0 = const()[name = tensor("op_26946_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_26946_end_0 = const()[name = tensor("op_26946_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_26946_end_mask_0 = const()[name = tensor("op_26946_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26946_cast_fp16 = slice_by_index(begin = var_26946_begin_0, end = var_26946_end_0, end_mask = var_26946_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_26946_cast_fp16")]; + tensor var_26950_begin_0 = const()[name = tensor("op_26950_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_26950_end_0 = const()[name = tensor("op_26950_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_26950_end_mask_0 = const()[name = tensor("op_26950_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26950_cast_fp16 = slice_by_index(begin = var_26950_begin_0, end = var_26950_end_0, end_mask = var_26950_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_26950_cast_fp16")]; + tensor var_26954_begin_0 = const()[name = tensor("op_26954_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_26954_end_0 = const()[name = tensor("op_26954_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_26954_end_mask_0 = const()[name = tensor("op_26954_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26954_cast_fp16 = slice_by_index(begin = var_26954_begin_0, end = var_26954_end_0, end_mask = var_26954_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_26954_cast_fp16")]; + tensor var_26958_begin_0 = const()[name = tensor("op_26958_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_26958_end_0 = const()[name = tensor("op_26958_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_26958_end_mask_0 = const()[name = tensor("op_26958_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26958_cast_fp16 = slice_by_index(begin = var_26958_begin_0, end = var_26958_end_0, end_mask = var_26958_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_26958_cast_fp16")]; + tensor var_26962_begin_0 = const()[name = tensor("op_26962_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_26962_end_0 = const()[name = tensor("op_26962_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_26962_end_mask_0 = const()[name = tensor("op_26962_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26962_cast_fp16 = slice_by_index(begin = var_26962_begin_0, end = var_26962_end_0, end_mask = var_26962_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_26962_cast_fp16")]; + tensor var_26966_begin_0 = const()[name = tensor("op_26966_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_26966_end_0 = const()[name = tensor("op_26966_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_26966_end_mask_0 = const()[name = tensor("op_26966_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26966_cast_fp16 = slice_by_index(begin = var_26966_begin_0, end = var_26966_end_0, end_mask = var_26966_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_26966_cast_fp16")]; + tensor var_26970_begin_0 = const()[name = tensor("op_26970_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_26970_end_0 = const()[name = tensor("op_26970_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_26970_end_mask_0 = const()[name = tensor("op_26970_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26970_cast_fp16 = slice_by_index(begin = var_26970_begin_0, end = var_26970_end_0, end_mask = var_26970_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_26970_cast_fp16")]; + tensor var_26974_begin_0 = const()[name = tensor("op_26974_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_26974_end_0 = const()[name = tensor("op_26974_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_26974_end_mask_0 = const()[name = tensor("op_26974_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26974_cast_fp16 = slice_by_index(begin = var_26974_begin_0, end = var_26974_end_0, end_mask = var_26974_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_26974_cast_fp16")]; + tensor var_26978_begin_0 = const()[name = tensor("op_26978_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_26978_end_0 = const()[name = tensor("op_26978_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_26978_end_mask_0 = const()[name = tensor("op_26978_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26978_cast_fp16 = slice_by_index(begin = var_26978_begin_0, end = var_26978_end_0, end_mask = var_26978_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_26978_cast_fp16")]; + tensor var_26982_begin_0 = const()[name = tensor("op_26982_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_26982_end_0 = const()[name = tensor("op_26982_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_26982_end_mask_0 = const()[name = tensor("op_26982_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26982_cast_fp16 = slice_by_index(begin = var_26982_begin_0, end = var_26982_end_0, end_mask = var_26982_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_26982_cast_fp16")]; + tensor var_26986_begin_0 = const()[name = tensor("op_26986_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_26986_end_0 = const()[name = tensor("op_26986_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_26986_end_mask_0 = const()[name = tensor("op_26986_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26986_cast_fp16 = slice_by_index(begin = var_26986_begin_0, end = var_26986_end_0, end_mask = var_26986_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_26986_cast_fp16")]; + tensor var_26990_begin_0 = const()[name = tensor("op_26990_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_26990_end_0 = const()[name = tensor("op_26990_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_26990_end_mask_0 = const()[name = tensor("op_26990_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26990_cast_fp16 = slice_by_index(begin = var_26990_begin_0, end = var_26990_end_0, end_mask = var_26990_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_26990_cast_fp16")]; + tensor var_26994_begin_0 = const()[name = tensor("op_26994_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_26994_end_0 = const()[name = tensor("op_26994_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_26994_end_mask_0 = const()[name = tensor("op_26994_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26994_cast_fp16 = slice_by_index(begin = var_26994_begin_0, end = var_26994_end_0, end_mask = var_26994_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_26994_cast_fp16")]; + tensor var_26998_begin_0 = const()[name = tensor("op_26998_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_26998_end_0 = const()[name = tensor("op_26998_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_26998_end_mask_0 = const()[name = tensor("op_26998_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_26998_cast_fp16 = slice_by_index(begin = var_26998_begin_0, end = var_26998_end_0, end_mask = var_26998_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_26998_cast_fp16")]; + tensor var_27002_begin_0 = const()[name = tensor("op_27002_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_27002_end_0 = const()[name = tensor("op_27002_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_27002_end_mask_0 = const()[name = tensor("op_27002_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_27002_cast_fp16 = slice_by_index(begin = var_27002_begin_0, end = var_27002_end_0, end_mask = var_27002_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_27002_cast_fp16")]; + tensor var_27006_begin_0 = const()[name = tensor("op_27006_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_27006_end_0 = const()[name = tensor("op_27006_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_27006_end_mask_0 = const()[name = tensor("op_27006_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_27006_cast_fp16 = slice_by_index(begin = var_27006_begin_0, end = var_27006_end_0, end_mask = var_27006_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_27006_cast_fp16")]; + tensor var_27010_begin_0 = const()[name = tensor("op_27010_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_27010_end_0 = const()[name = tensor("op_27010_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_27010_end_mask_0 = const()[name = tensor("op_27010_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_27010_cast_fp16 = slice_by_index(begin = var_27010_begin_0, end = var_27010_end_0, end_mask = var_27010_end_mask_0, x = value_35_cast_fp16)[name = tensor("op_27010_cast_fp16")]; + tensor var_27014_equation_0 = const()[name = tensor("op_27014_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27014_cast_fp16 = einsum(equation = var_27014_equation_0, values = (var_26856_cast_fp16, var_26298_cast_fp16))[name = tensor("op_27014_cast_fp16")]; + tensor var_27015_to_fp16 = const()[name = tensor("op_27015_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2721_cast_fp16 = mul(x = var_27014_cast_fp16, y = var_27015_to_fp16)[name = tensor("aw_chunk_2721_cast_fp16")]; + tensor var_27018_equation_0 = const()[name = tensor("op_27018_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27018_cast_fp16 = einsum(equation = var_27018_equation_0, values = (var_26856_cast_fp16, var_26305_cast_fp16))[name = tensor("op_27018_cast_fp16")]; + tensor var_27019_to_fp16 = const()[name = tensor("op_27019_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2723_cast_fp16 = mul(x = var_27018_cast_fp16, y = var_27019_to_fp16)[name = tensor("aw_chunk_2723_cast_fp16")]; + tensor var_27022_equation_0 = const()[name = tensor("op_27022_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27022_cast_fp16 = einsum(equation = var_27022_equation_0, values = (var_26856_cast_fp16, var_26312_cast_fp16))[name = tensor("op_27022_cast_fp16")]; + tensor var_27023_to_fp16 = const()[name = tensor("op_27023_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2725_cast_fp16 = mul(x = var_27022_cast_fp16, y = var_27023_to_fp16)[name = tensor("aw_chunk_2725_cast_fp16")]; + tensor var_27026_equation_0 = const()[name = tensor("op_27026_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27026_cast_fp16 = einsum(equation = var_27026_equation_0, values = (var_26856_cast_fp16, var_26319_cast_fp16))[name = tensor("op_27026_cast_fp16")]; + tensor var_27027_to_fp16 = const()[name = tensor("op_27027_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2727_cast_fp16 = mul(x = var_27026_cast_fp16, y = var_27027_to_fp16)[name = tensor("aw_chunk_2727_cast_fp16")]; + tensor var_27030_equation_0 = const()[name = tensor("op_27030_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27030_cast_fp16 = einsum(equation = var_27030_equation_0, values = (var_26860_cast_fp16, var_26326_cast_fp16))[name = tensor("op_27030_cast_fp16")]; + tensor var_27031_to_fp16 = const()[name = tensor("op_27031_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2729_cast_fp16 = mul(x = var_27030_cast_fp16, y = var_27031_to_fp16)[name = tensor("aw_chunk_2729_cast_fp16")]; + tensor var_27034_equation_0 = const()[name = tensor("op_27034_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27034_cast_fp16 = einsum(equation = var_27034_equation_0, values = (var_26860_cast_fp16, var_26333_cast_fp16))[name = tensor("op_27034_cast_fp16")]; + tensor var_27035_to_fp16 = const()[name = tensor("op_27035_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2731_cast_fp16 = mul(x = var_27034_cast_fp16, y = var_27035_to_fp16)[name = tensor("aw_chunk_2731_cast_fp16")]; + tensor var_27038_equation_0 = const()[name = tensor("op_27038_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27038_cast_fp16 = einsum(equation = var_27038_equation_0, values = (var_26860_cast_fp16, var_26340_cast_fp16))[name = tensor("op_27038_cast_fp16")]; + tensor var_27039_to_fp16 = const()[name = tensor("op_27039_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2733_cast_fp16 = mul(x = var_27038_cast_fp16, y = var_27039_to_fp16)[name = tensor("aw_chunk_2733_cast_fp16")]; + tensor var_27042_equation_0 = const()[name = tensor("op_27042_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27042_cast_fp16 = einsum(equation = var_27042_equation_0, values = (var_26860_cast_fp16, var_26347_cast_fp16))[name = tensor("op_27042_cast_fp16")]; + tensor var_27043_to_fp16 = const()[name = tensor("op_27043_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2735_cast_fp16 = mul(x = var_27042_cast_fp16, y = var_27043_to_fp16)[name = tensor("aw_chunk_2735_cast_fp16")]; + tensor var_27046_equation_0 = const()[name = tensor("op_27046_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27046_cast_fp16 = einsum(equation = var_27046_equation_0, values = (var_26864_cast_fp16, var_26354_cast_fp16))[name = tensor("op_27046_cast_fp16")]; + tensor var_27047_to_fp16 = const()[name = tensor("op_27047_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2737_cast_fp16 = mul(x = var_27046_cast_fp16, y = var_27047_to_fp16)[name = tensor("aw_chunk_2737_cast_fp16")]; + tensor var_27050_equation_0 = const()[name = tensor("op_27050_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27050_cast_fp16 = einsum(equation = var_27050_equation_0, values = (var_26864_cast_fp16, var_26361_cast_fp16))[name = tensor("op_27050_cast_fp16")]; + tensor var_27051_to_fp16 = const()[name = tensor("op_27051_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2739_cast_fp16 = mul(x = var_27050_cast_fp16, y = var_27051_to_fp16)[name = tensor("aw_chunk_2739_cast_fp16")]; + tensor var_27054_equation_0 = const()[name = tensor("op_27054_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27054_cast_fp16 = einsum(equation = var_27054_equation_0, values = (var_26864_cast_fp16, var_26368_cast_fp16))[name = tensor("op_27054_cast_fp16")]; + tensor var_27055_to_fp16 = const()[name = tensor("op_27055_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2741_cast_fp16 = mul(x = var_27054_cast_fp16, y = var_27055_to_fp16)[name = tensor("aw_chunk_2741_cast_fp16")]; + tensor var_27058_equation_0 = const()[name = tensor("op_27058_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27058_cast_fp16 = einsum(equation = var_27058_equation_0, values = (var_26864_cast_fp16, var_26375_cast_fp16))[name = tensor("op_27058_cast_fp16")]; + tensor var_27059_to_fp16 = const()[name = tensor("op_27059_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2743_cast_fp16 = mul(x = var_27058_cast_fp16, y = var_27059_to_fp16)[name = tensor("aw_chunk_2743_cast_fp16")]; + tensor var_27062_equation_0 = const()[name = tensor("op_27062_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27062_cast_fp16 = einsum(equation = var_27062_equation_0, values = (var_26868_cast_fp16, var_26382_cast_fp16))[name = tensor("op_27062_cast_fp16")]; + tensor var_27063_to_fp16 = const()[name = tensor("op_27063_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2745_cast_fp16 = mul(x = var_27062_cast_fp16, y = var_27063_to_fp16)[name = tensor("aw_chunk_2745_cast_fp16")]; + tensor var_27066_equation_0 = const()[name = tensor("op_27066_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27066_cast_fp16 = einsum(equation = var_27066_equation_0, values = (var_26868_cast_fp16, var_26389_cast_fp16))[name = tensor("op_27066_cast_fp16")]; + tensor var_27067_to_fp16 = const()[name = tensor("op_27067_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2747_cast_fp16 = mul(x = var_27066_cast_fp16, y = var_27067_to_fp16)[name = tensor("aw_chunk_2747_cast_fp16")]; + tensor var_27070_equation_0 = const()[name = tensor("op_27070_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27070_cast_fp16 = einsum(equation = var_27070_equation_0, values = (var_26868_cast_fp16, var_26396_cast_fp16))[name = tensor("op_27070_cast_fp16")]; + tensor var_27071_to_fp16 = const()[name = tensor("op_27071_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2749_cast_fp16 = mul(x = var_27070_cast_fp16, y = var_27071_to_fp16)[name = tensor("aw_chunk_2749_cast_fp16")]; + tensor var_27074_equation_0 = const()[name = tensor("op_27074_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27074_cast_fp16 = einsum(equation = var_27074_equation_0, values = (var_26868_cast_fp16, var_26403_cast_fp16))[name = tensor("op_27074_cast_fp16")]; + tensor var_27075_to_fp16 = const()[name = tensor("op_27075_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2751_cast_fp16 = mul(x = var_27074_cast_fp16, y = var_27075_to_fp16)[name = tensor("aw_chunk_2751_cast_fp16")]; + tensor var_27078_equation_0 = const()[name = tensor("op_27078_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27078_cast_fp16 = einsum(equation = var_27078_equation_0, values = (var_26872_cast_fp16, var_26410_cast_fp16))[name = tensor("op_27078_cast_fp16")]; + tensor var_27079_to_fp16 = const()[name = tensor("op_27079_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2753_cast_fp16 = mul(x = var_27078_cast_fp16, y = var_27079_to_fp16)[name = tensor("aw_chunk_2753_cast_fp16")]; + tensor var_27082_equation_0 = const()[name = tensor("op_27082_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27082_cast_fp16 = einsum(equation = var_27082_equation_0, values = (var_26872_cast_fp16, var_26417_cast_fp16))[name = tensor("op_27082_cast_fp16")]; + tensor var_27083_to_fp16 = const()[name = tensor("op_27083_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2755_cast_fp16 = mul(x = var_27082_cast_fp16, y = var_27083_to_fp16)[name = tensor("aw_chunk_2755_cast_fp16")]; + tensor var_27086_equation_0 = const()[name = tensor("op_27086_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27086_cast_fp16 = einsum(equation = var_27086_equation_0, values = (var_26872_cast_fp16, var_26424_cast_fp16))[name = tensor("op_27086_cast_fp16")]; + tensor var_27087_to_fp16 = const()[name = tensor("op_27087_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2757_cast_fp16 = mul(x = var_27086_cast_fp16, y = var_27087_to_fp16)[name = tensor("aw_chunk_2757_cast_fp16")]; + tensor var_27090_equation_0 = const()[name = tensor("op_27090_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27090_cast_fp16 = einsum(equation = var_27090_equation_0, values = (var_26872_cast_fp16, var_26431_cast_fp16))[name = tensor("op_27090_cast_fp16")]; + tensor var_27091_to_fp16 = const()[name = tensor("op_27091_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2759_cast_fp16 = mul(x = var_27090_cast_fp16, y = var_27091_to_fp16)[name = tensor("aw_chunk_2759_cast_fp16")]; + tensor var_27094_equation_0 = const()[name = tensor("op_27094_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27094_cast_fp16 = einsum(equation = var_27094_equation_0, values = (var_26876_cast_fp16, var_26438_cast_fp16))[name = tensor("op_27094_cast_fp16")]; + tensor var_27095_to_fp16 = const()[name = tensor("op_27095_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2761_cast_fp16 = mul(x = var_27094_cast_fp16, y = var_27095_to_fp16)[name = tensor("aw_chunk_2761_cast_fp16")]; + tensor var_27098_equation_0 = const()[name = tensor("op_27098_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27098_cast_fp16 = einsum(equation = var_27098_equation_0, values = (var_26876_cast_fp16, var_26445_cast_fp16))[name = tensor("op_27098_cast_fp16")]; + tensor var_27099_to_fp16 = const()[name = tensor("op_27099_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2763_cast_fp16 = mul(x = var_27098_cast_fp16, y = var_27099_to_fp16)[name = tensor("aw_chunk_2763_cast_fp16")]; + tensor var_27102_equation_0 = const()[name = tensor("op_27102_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27102_cast_fp16 = einsum(equation = var_27102_equation_0, values = (var_26876_cast_fp16, var_26452_cast_fp16))[name = tensor("op_27102_cast_fp16")]; + tensor var_27103_to_fp16 = const()[name = tensor("op_27103_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2765_cast_fp16 = mul(x = var_27102_cast_fp16, y = var_27103_to_fp16)[name = tensor("aw_chunk_2765_cast_fp16")]; + tensor var_27106_equation_0 = const()[name = tensor("op_27106_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27106_cast_fp16 = einsum(equation = var_27106_equation_0, values = (var_26876_cast_fp16, var_26459_cast_fp16))[name = tensor("op_27106_cast_fp16")]; + tensor var_27107_to_fp16 = const()[name = tensor("op_27107_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2767_cast_fp16 = mul(x = var_27106_cast_fp16, y = var_27107_to_fp16)[name = tensor("aw_chunk_2767_cast_fp16")]; + tensor var_27110_equation_0 = const()[name = tensor("op_27110_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27110_cast_fp16 = einsum(equation = var_27110_equation_0, values = (var_26880_cast_fp16, var_26466_cast_fp16))[name = tensor("op_27110_cast_fp16")]; + tensor var_27111_to_fp16 = const()[name = tensor("op_27111_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2769_cast_fp16 = mul(x = var_27110_cast_fp16, y = var_27111_to_fp16)[name = tensor("aw_chunk_2769_cast_fp16")]; + tensor var_27114_equation_0 = const()[name = tensor("op_27114_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27114_cast_fp16 = einsum(equation = var_27114_equation_0, values = (var_26880_cast_fp16, var_26473_cast_fp16))[name = tensor("op_27114_cast_fp16")]; + tensor var_27115_to_fp16 = const()[name = tensor("op_27115_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2771_cast_fp16 = mul(x = var_27114_cast_fp16, y = var_27115_to_fp16)[name = tensor("aw_chunk_2771_cast_fp16")]; + tensor var_27118_equation_0 = const()[name = tensor("op_27118_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27118_cast_fp16 = einsum(equation = var_27118_equation_0, values = (var_26880_cast_fp16, var_26480_cast_fp16))[name = tensor("op_27118_cast_fp16")]; + tensor var_27119_to_fp16 = const()[name = tensor("op_27119_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2773_cast_fp16 = mul(x = var_27118_cast_fp16, y = var_27119_to_fp16)[name = tensor("aw_chunk_2773_cast_fp16")]; + tensor var_27122_equation_0 = const()[name = tensor("op_27122_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27122_cast_fp16 = einsum(equation = var_27122_equation_0, values = (var_26880_cast_fp16, var_26487_cast_fp16))[name = tensor("op_27122_cast_fp16")]; + tensor var_27123_to_fp16 = const()[name = tensor("op_27123_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2775_cast_fp16 = mul(x = var_27122_cast_fp16, y = var_27123_to_fp16)[name = tensor("aw_chunk_2775_cast_fp16")]; + tensor var_27126_equation_0 = const()[name = tensor("op_27126_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27126_cast_fp16 = einsum(equation = var_27126_equation_0, values = (var_26884_cast_fp16, var_26494_cast_fp16))[name = tensor("op_27126_cast_fp16")]; + tensor var_27127_to_fp16 = const()[name = tensor("op_27127_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2777_cast_fp16 = mul(x = var_27126_cast_fp16, y = var_27127_to_fp16)[name = tensor("aw_chunk_2777_cast_fp16")]; + tensor var_27130_equation_0 = const()[name = tensor("op_27130_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27130_cast_fp16 = einsum(equation = var_27130_equation_0, values = (var_26884_cast_fp16, var_26501_cast_fp16))[name = tensor("op_27130_cast_fp16")]; + tensor var_27131_to_fp16 = const()[name = tensor("op_27131_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2779_cast_fp16 = mul(x = var_27130_cast_fp16, y = var_27131_to_fp16)[name = tensor("aw_chunk_2779_cast_fp16")]; + tensor var_27134_equation_0 = const()[name = tensor("op_27134_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27134_cast_fp16 = einsum(equation = var_27134_equation_0, values = (var_26884_cast_fp16, var_26508_cast_fp16))[name = tensor("op_27134_cast_fp16")]; + tensor var_27135_to_fp16 = const()[name = tensor("op_27135_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2781_cast_fp16 = mul(x = var_27134_cast_fp16, y = var_27135_to_fp16)[name = tensor("aw_chunk_2781_cast_fp16")]; + tensor var_27138_equation_0 = const()[name = tensor("op_27138_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27138_cast_fp16 = einsum(equation = var_27138_equation_0, values = (var_26884_cast_fp16, var_26515_cast_fp16))[name = tensor("op_27138_cast_fp16")]; + tensor var_27139_to_fp16 = const()[name = tensor("op_27139_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2783_cast_fp16 = mul(x = var_27138_cast_fp16, y = var_27139_to_fp16)[name = tensor("aw_chunk_2783_cast_fp16")]; + tensor var_27142_equation_0 = const()[name = tensor("op_27142_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27142_cast_fp16 = einsum(equation = var_27142_equation_0, values = (var_26888_cast_fp16, var_26522_cast_fp16))[name = tensor("op_27142_cast_fp16")]; + tensor var_27143_to_fp16 = const()[name = tensor("op_27143_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2785_cast_fp16 = mul(x = var_27142_cast_fp16, y = var_27143_to_fp16)[name = tensor("aw_chunk_2785_cast_fp16")]; + tensor var_27146_equation_0 = const()[name = tensor("op_27146_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27146_cast_fp16 = einsum(equation = var_27146_equation_0, values = (var_26888_cast_fp16, var_26529_cast_fp16))[name = tensor("op_27146_cast_fp16")]; + tensor var_27147_to_fp16 = const()[name = tensor("op_27147_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2787_cast_fp16 = mul(x = var_27146_cast_fp16, y = var_27147_to_fp16)[name = tensor("aw_chunk_2787_cast_fp16")]; + tensor var_27150_equation_0 = const()[name = tensor("op_27150_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27150_cast_fp16 = einsum(equation = var_27150_equation_0, values = (var_26888_cast_fp16, var_26536_cast_fp16))[name = tensor("op_27150_cast_fp16")]; + tensor var_27151_to_fp16 = const()[name = tensor("op_27151_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2789_cast_fp16 = mul(x = var_27150_cast_fp16, y = var_27151_to_fp16)[name = tensor("aw_chunk_2789_cast_fp16")]; + tensor var_27154_equation_0 = const()[name = tensor("op_27154_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27154_cast_fp16 = einsum(equation = var_27154_equation_0, values = (var_26888_cast_fp16, var_26543_cast_fp16))[name = tensor("op_27154_cast_fp16")]; + tensor var_27155_to_fp16 = const()[name = tensor("op_27155_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2791_cast_fp16 = mul(x = var_27154_cast_fp16, y = var_27155_to_fp16)[name = tensor("aw_chunk_2791_cast_fp16")]; + tensor var_27158_equation_0 = const()[name = tensor("op_27158_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27158_cast_fp16 = einsum(equation = var_27158_equation_0, values = (var_26892_cast_fp16, var_26550_cast_fp16))[name = tensor("op_27158_cast_fp16")]; + tensor var_27159_to_fp16 = const()[name = tensor("op_27159_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2793_cast_fp16 = mul(x = var_27158_cast_fp16, y = var_27159_to_fp16)[name = tensor("aw_chunk_2793_cast_fp16")]; + tensor var_27162_equation_0 = const()[name = tensor("op_27162_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27162_cast_fp16 = einsum(equation = var_27162_equation_0, values = (var_26892_cast_fp16, var_26557_cast_fp16))[name = tensor("op_27162_cast_fp16")]; + tensor var_27163_to_fp16 = const()[name = tensor("op_27163_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2795_cast_fp16 = mul(x = var_27162_cast_fp16, y = var_27163_to_fp16)[name = tensor("aw_chunk_2795_cast_fp16")]; + tensor var_27166_equation_0 = const()[name = tensor("op_27166_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27166_cast_fp16 = einsum(equation = var_27166_equation_0, values = (var_26892_cast_fp16, var_26564_cast_fp16))[name = tensor("op_27166_cast_fp16")]; + tensor var_27167_to_fp16 = const()[name = tensor("op_27167_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2797_cast_fp16 = mul(x = var_27166_cast_fp16, y = var_27167_to_fp16)[name = tensor("aw_chunk_2797_cast_fp16")]; + tensor var_27170_equation_0 = const()[name = tensor("op_27170_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27170_cast_fp16 = einsum(equation = var_27170_equation_0, values = (var_26892_cast_fp16, var_26571_cast_fp16))[name = tensor("op_27170_cast_fp16")]; + tensor var_27171_to_fp16 = const()[name = tensor("op_27171_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2799_cast_fp16 = mul(x = var_27170_cast_fp16, y = var_27171_to_fp16)[name = tensor("aw_chunk_2799_cast_fp16")]; + tensor var_27174_equation_0 = const()[name = tensor("op_27174_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27174_cast_fp16 = einsum(equation = var_27174_equation_0, values = (var_26896_cast_fp16, var_26578_cast_fp16))[name = tensor("op_27174_cast_fp16")]; + tensor var_27175_to_fp16 = const()[name = tensor("op_27175_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2801_cast_fp16 = mul(x = var_27174_cast_fp16, y = var_27175_to_fp16)[name = tensor("aw_chunk_2801_cast_fp16")]; + tensor var_27178_equation_0 = const()[name = tensor("op_27178_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27178_cast_fp16 = einsum(equation = var_27178_equation_0, values = (var_26896_cast_fp16, var_26585_cast_fp16))[name = tensor("op_27178_cast_fp16")]; + tensor var_27179_to_fp16 = const()[name = tensor("op_27179_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2803_cast_fp16 = mul(x = var_27178_cast_fp16, y = var_27179_to_fp16)[name = tensor("aw_chunk_2803_cast_fp16")]; + tensor var_27182_equation_0 = const()[name = tensor("op_27182_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27182_cast_fp16 = einsum(equation = var_27182_equation_0, values = (var_26896_cast_fp16, var_26592_cast_fp16))[name = tensor("op_27182_cast_fp16")]; + tensor var_27183_to_fp16 = const()[name = tensor("op_27183_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2805_cast_fp16 = mul(x = var_27182_cast_fp16, y = var_27183_to_fp16)[name = tensor("aw_chunk_2805_cast_fp16")]; + tensor var_27186_equation_0 = const()[name = tensor("op_27186_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27186_cast_fp16 = einsum(equation = var_27186_equation_0, values = (var_26896_cast_fp16, var_26599_cast_fp16))[name = tensor("op_27186_cast_fp16")]; + tensor var_27187_to_fp16 = const()[name = tensor("op_27187_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2807_cast_fp16 = mul(x = var_27186_cast_fp16, y = var_27187_to_fp16)[name = tensor("aw_chunk_2807_cast_fp16")]; + tensor var_27190_equation_0 = const()[name = tensor("op_27190_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27190_cast_fp16 = einsum(equation = var_27190_equation_0, values = (var_26900_cast_fp16, var_26606_cast_fp16))[name = tensor("op_27190_cast_fp16")]; + tensor var_27191_to_fp16 = const()[name = tensor("op_27191_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2809_cast_fp16 = mul(x = var_27190_cast_fp16, y = var_27191_to_fp16)[name = tensor("aw_chunk_2809_cast_fp16")]; + tensor var_27194_equation_0 = const()[name = tensor("op_27194_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27194_cast_fp16 = einsum(equation = var_27194_equation_0, values = (var_26900_cast_fp16, var_26613_cast_fp16))[name = tensor("op_27194_cast_fp16")]; + tensor var_27195_to_fp16 = const()[name = tensor("op_27195_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2811_cast_fp16 = mul(x = var_27194_cast_fp16, y = var_27195_to_fp16)[name = tensor("aw_chunk_2811_cast_fp16")]; + tensor var_27198_equation_0 = const()[name = tensor("op_27198_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27198_cast_fp16 = einsum(equation = var_27198_equation_0, values = (var_26900_cast_fp16, var_26620_cast_fp16))[name = tensor("op_27198_cast_fp16")]; + tensor var_27199_to_fp16 = const()[name = tensor("op_27199_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2813_cast_fp16 = mul(x = var_27198_cast_fp16, y = var_27199_to_fp16)[name = tensor("aw_chunk_2813_cast_fp16")]; + tensor var_27202_equation_0 = const()[name = tensor("op_27202_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27202_cast_fp16 = einsum(equation = var_27202_equation_0, values = (var_26900_cast_fp16, var_26627_cast_fp16))[name = tensor("op_27202_cast_fp16")]; + tensor var_27203_to_fp16 = const()[name = tensor("op_27203_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2815_cast_fp16 = mul(x = var_27202_cast_fp16, y = var_27203_to_fp16)[name = tensor("aw_chunk_2815_cast_fp16")]; + tensor var_27206_equation_0 = const()[name = tensor("op_27206_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27206_cast_fp16 = einsum(equation = var_27206_equation_0, values = (var_26904_cast_fp16, var_26634_cast_fp16))[name = tensor("op_27206_cast_fp16")]; + tensor var_27207_to_fp16 = const()[name = tensor("op_27207_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2817_cast_fp16 = mul(x = var_27206_cast_fp16, y = var_27207_to_fp16)[name = tensor("aw_chunk_2817_cast_fp16")]; + tensor var_27210_equation_0 = const()[name = tensor("op_27210_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27210_cast_fp16 = einsum(equation = var_27210_equation_0, values = (var_26904_cast_fp16, var_26641_cast_fp16))[name = tensor("op_27210_cast_fp16")]; + tensor var_27211_to_fp16 = const()[name = tensor("op_27211_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2819_cast_fp16 = mul(x = var_27210_cast_fp16, y = var_27211_to_fp16)[name = tensor("aw_chunk_2819_cast_fp16")]; + tensor var_27214_equation_0 = const()[name = tensor("op_27214_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27214_cast_fp16 = einsum(equation = var_27214_equation_0, values = (var_26904_cast_fp16, var_26648_cast_fp16))[name = tensor("op_27214_cast_fp16")]; + tensor var_27215_to_fp16 = const()[name = tensor("op_27215_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2821_cast_fp16 = mul(x = var_27214_cast_fp16, y = var_27215_to_fp16)[name = tensor("aw_chunk_2821_cast_fp16")]; + tensor var_27218_equation_0 = const()[name = tensor("op_27218_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27218_cast_fp16 = einsum(equation = var_27218_equation_0, values = (var_26904_cast_fp16, var_26655_cast_fp16))[name = tensor("op_27218_cast_fp16")]; + tensor var_27219_to_fp16 = const()[name = tensor("op_27219_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2823_cast_fp16 = mul(x = var_27218_cast_fp16, y = var_27219_to_fp16)[name = tensor("aw_chunk_2823_cast_fp16")]; + tensor var_27222_equation_0 = const()[name = tensor("op_27222_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27222_cast_fp16 = einsum(equation = var_27222_equation_0, values = (var_26908_cast_fp16, var_26662_cast_fp16))[name = tensor("op_27222_cast_fp16")]; + tensor var_27223_to_fp16 = const()[name = tensor("op_27223_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2825_cast_fp16 = mul(x = var_27222_cast_fp16, y = var_27223_to_fp16)[name = tensor("aw_chunk_2825_cast_fp16")]; + tensor var_27226_equation_0 = const()[name = tensor("op_27226_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27226_cast_fp16 = einsum(equation = var_27226_equation_0, values = (var_26908_cast_fp16, var_26669_cast_fp16))[name = tensor("op_27226_cast_fp16")]; + tensor var_27227_to_fp16 = const()[name = tensor("op_27227_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2827_cast_fp16 = mul(x = var_27226_cast_fp16, y = var_27227_to_fp16)[name = tensor("aw_chunk_2827_cast_fp16")]; + tensor var_27230_equation_0 = const()[name = tensor("op_27230_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27230_cast_fp16 = einsum(equation = var_27230_equation_0, values = (var_26908_cast_fp16, var_26676_cast_fp16))[name = tensor("op_27230_cast_fp16")]; + tensor var_27231_to_fp16 = const()[name = tensor("op_27231_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2829_cast_fp16 = mul(x = var_27230_cast_fp16, y = var_27231_to_fp16)[name = tensor("aw_chunk_2829_cast_fp16")]; + tensor var_27234_equation_0 = const()[name = tensor("op_27234_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27234_cast_fp16 = einsum(equation = var_27234_equation_0, values = (var_26908_cast_fp16, var_26683_cast_fp16))[name = tensor("op_27234_cast_fp16")]; + tensor var_27235_to_fp16 = const()[name = tensor("op_27235_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2831_cast_fp16 = mul(x = var_27234_cast_fp16, y = var_27235_to_fp16)[name = tensor("aw_chunk_2831_cast_fp16")]; + tensor var_27238_equation_0 = const()[name = tensor("op_27238_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27238_cast_fp16 = einsum(equation = var_27238_equation_0, values = (var_26912_cast_fp16, var_26690_cast_fp16))[name = tensor("op_27238_cast_fp16")]; + tensor var_27239_to_fp16 = const()[name = tensor("op_27239_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2833_cast_fp16 = mul(x = var_27238_cast_fp16, y = var_27239_to_fp16)[name = tensor("aw_chunk_2833_cast_fp16")]; + tensor var_27242_equation_0 = const()[name = tensor("op_27242_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27242_cast_fp16 = einsum(equation = var_27242_equation_0, values = (var_26912_cast_fp16, var_26697_cast_fp16))[name = tensor("op_27242_cast_fp16")]; + tensor var_27243_to_fp16 = const()[name = tensor("op_27243_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2835_cast_fp16 = mul(x = var_27242_cast_fp16, y = var_27243_to_fp16)[name = tensor("aw_chunk_2835_cast_fp16")]; + tensor var_27246_equation_0 = const()[name = tensor("op_27246_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27246_cast_fp16 = einsum(equation = var_27246_equation_0, values = (var_26912_cast_fp16, var_26704_cast_fp16))[name = tensor("op_27246_cast_fp16")]; + tensor var_27247_to_fp16 = const()[name = tensor("op_27247_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2837_cast_fp16 = mul(x = var_27246_cast_fp16, y = var_27247_to_fp16)[name = tensor("aw_chunk_2837_cast_fp16")]; + tensor var_27250_equation_0 = const()[name = tensor("op_27250_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27250_cast_fp16 = einsum(equation = var_27250_equation_0, values = (var_26912_cast_fp16, var_26711_cast_fp16))[name = tensor("op_27250_cast_fp16")]; + tensor var_27251_to_fp16 = const()[name = tensor("op_27251_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2839_cast_fp16 = mul(x = var_27250_cast_fp16, y = var_27251_to_fp16)[name = tensor("aw_chunk_2839_cast_fp16")]; + tensor var_27254_equation_0 = const()[name = tensor("op_27254_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27254_cast_fp16 = einsum(equation = var_27254_equation_0, values = (var_26916_cast_fp16, var_26718_cast_fp16))[name = tensor("op_27254_cast_fp16")]; + tensor var_27255_to_fp16 = const()[name = tensor("op_27255_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2841_cast_fp16 = mul(x = var_27254_cast_fp16, y = var_27255_to_fp16)[name = tensor("aw_chunk_2841_cast_fp16")]; + tensor var_27258_equation_0 = const()[name = tensor("op_27258_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27258_cast_fp16 = einsum(equation = var_27258_equation_0, values = (var_26916_cast_fp16, var_26725_cast_fp16))[name = tensor("op_27258_cast_fp16")]; + tensor var_27259_to_fp16 = const()[name = tensor("op_27259_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2843_cast_fp16 = mul(x = var_27258_cast_fp16, y = var_27259_to_fp16)[name = tensor("aw_chunk_2843_cast_fp16")]; + tensor var_27262_equation_0 = const()[name = tensor("op_27262_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27262_cast_fp16 = einsum(equation = var_27262_equation_0, values = (var_26916_cast_fp16, var_26732_cast_fp16))[name = tensor("op_27262_cast_fp16")]; + tensor var_27263_to_fp16 = const()[name = tensor("op_27263_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2845_cast_fp16 = mul(x = var_27262_cast_fp16, y = var_27263_to_fp16)[name = tensor("aw_chunk_2845_cast_fp16")]; + tensor var_27266_equation_0 = const()[name = tensor("op_27266_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27266_cast_fp16 = einsum(equation = var_27266_equation_0, values = (var_26916_cast_fp16, var_26739_cast_fp16))[name = tensor("op_27266_cast_fp16")]; + tensor var_27267_to_fp16 = const()[name = tensor("op_27267_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2847_cast_fp16 = mul(x = var_27266_cast_fp16, y = var_27267_to_fp16)[name = tensor("aw_chunk_2847_cast_fp16")]; + tensor var_27270_equation_0 = const()[name = tensor("op_27270_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27270_cast_fp16 = einsum(equation = var_27270_equation_0, values = (var_26920_cast_fp16, var_26746_cast_fp16))[name = tensor("op_27270_cast_fp16")]; + tensor var_27271_to_fp16 = const()[name = tensor("op_27271_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2849_cast_fp16 = mul(x = var_27270_cast_fp16, y = var_27271_to_fp16)[name = tensor("aw_chunk_2849_cast_fp16")]; + tensor var_27274_equation_0 = const()[name = tensor("op_27274_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27274_cast_fp16 = einsum(equation = var_27274_equation_0, values = (var_26920_cast_fp16, var_26753_cast_fp16))[name = tensor("op_27274_cast_fp16")]; + tensor var_27275_to_fp16 = const()[name = tensor("op_27275_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2851_cast_fp16 = mul(x = var_27274_cast_fp16, y = var_27275_to_fp16)[name = tensor("aw_chunk_2851_cast_fp16")]; + tensor var_27278_equation_0 = const()[name = tensor("op_27278_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27278_cast_fp16 = einsum(equation = var_27278_equation_0, values = (var_26920_cast_fp16, var_26760_cast_fp16))[name = tensor("op_27278_cast_fp16")]; + tensor var_27279_to_fp16 = const()[name = tensor("op_27279_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2853_cast_fp16 = mul(x = var_27278_cast_fp16, y = var_27279_to_fp16)[name = tensor("aw_chunk_2853_cast_fp16")]; + tensor var_27282_equation_0 = const()[name = tensor("op_27282_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27282_cast_fp16 = einsum(equation = var_27282_equation_0, values = (var_26920_cast_fp16, var_26767_cast_fp16))[name = tensor("op_27282_cast_fp16")]; + tensor var_27283_to_fp16 = const()[name = tensor("op_27283_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2855_cast_fp16 = mul(x = var_27282_cast_fp16, y = var_27283_to_fp16)[name = tensor("aw_chunk_2855_cast_fp16")]; + tensor var_27286_equation_0 = const()[name = tensor("op_27286_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27286_cast_fp16 = einsum(equation = var_27286_equation_0, values = (var_26924_cast_fp16, var_26774_cast_fp16))[name = tensor("op_27286_cast_fp16")]; + tensor var_27287_to_fp16 = const()[name = tensor("op_27287_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2857_cast_fp16 = mul(x = var_27286_cast_fp16, y = var_27287_to_fp16)[name = tensor("aw_chunk_2857_cast_fp16")]; + tensor var_27290_equation_0 = const()[name = tensor("op_27290_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27290_cast_fp16 = einsum(equation = var_27290_equation_0, values = (var_26924_cast_fp16, var_26781_cast_fp16))[name = tensor("op_27290_cast_fp16")]; + tensor var_27291_to_fp16 = const()[name = tensor("op_27291_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2859_cast_fp16 = mul(x = var_27290_cast_fp16, y = var_27291_to_fp16)[name = tensor("aw_chunk_2859_cast_fp16")]; + tensor var_27294_equation_0 = const()[name = tensor("op_27294_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27294_cast_fp16 = einsum(equation = var_27294_equation_0, values = (var_26924_cast_fp16, var_26788_cast_fp16))[name = tensor("op_27294_cast_fp16")]; + tensor var_27295_to_fp16 = const()[name = tensor("op_27295_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2861_cast_fp16 = mul(x = var_27294_cast_fp16, y = var_27295_to_fp16)[name = tensor("aw_chunk_2861_cast_fp16")]; + tensor var_27298_equation_0 = const()[name = tensor("op_27298_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27298_cast_fp16 = einsum(equation = var_27298_equation_0, values = (var_26924_cast_fp16, var_26795_cast_fp16))[name = tensor("op_27298_cast_fp16")]; + tensor var_27299_to_fp16 = const()[name = tensor("op_27299_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2863_cast_fp16 = mul(x = var_27298_cast_fp16, y = var_27299_to_fp16)[name = tensor("aw_chunk_2863_cast_fp16")]; + tensor var_27302_equation_0 = const()[name = tensor("op_27302_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27302_cast_fp16 = einsum(equation = var_27302_equation_0, values = (var_26928_cast_fp16, var_26802_cast_fp16))[name = tensor("op_27302_cast_fp16")]; + tensor var_27303_to_fp16 = const()[name = tensor("op_27303_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2865_cast_fp16 = mul(x = var_27302_cast_fp16, y = var_27303_to_fp16)[name = tensor("aw_chunk_2865_cast_fp16")]; + tensor var_27306_equation_0 = const()[name = tensor("op_27306_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27306_cast_fp16 = einsum(equation = var_27306_equation_0, values = (var_26928_cast_fp16, var_26809_cast_fp16))[name = tensor("op_27306_cast_fp16")]; + tensor var_27307_to_fp16 = const()[name = tensor("op_27307_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2867_cast_fp16 = mul(x = var_27306_cast_fp16, y = var_27307_to_fp16)[name = tensor("aw_chunk_2867_cast_fp16")]; + tensor var_27310_equation_0 = const()[name = tensor("op_27310_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27310_cast_fp16 = einsum(equation = var_27310_equation_0, values = (var_26928_cast_fp16, var_26816_cast_fp16))[name = tensor("op_27310_cast_fp16")]; + tensor var_27311_to_fp16 = const()[name = tensor("op_27311_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2869_cast_fp16 = mul(x = var_27310_cast_fp16, y = var_27311_to_fp16)[name = tensor("aw_chunk_2869_cast_fp16")]; + tensor var_27314_equation_0 = const()[name = tensor("op_27314_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27314_cast_fp16 = einsum(equation = var_27314_equation_0, values = (var_26928_cast_fp16, var_26823_cast_fp16))[name = tensor("op_27314_cast_fp16")]; + tensor var_27315_to_fp16 = const()[name = tensor("op_27315_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2871_cast_fp16 = mul(x = var_27314_cast_fp16, y = var_27315_to_fp16)[name = tensor("aw_chunk_2871_cast_fp16")]; + tensor var_27318_equation_0 = const()[name = tensor("op_27318_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27318_cast_fp16 = einsum(equation = var_27318_equation_0, values = (var_26932_cast_fp16, var_26830_cast_fp16))[name = tensor("op_27318_cast_fp16")]; + tensor var_27319_to_fp16 = const()[name = tensor("op_27319_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2873_cast_fp16 = mul(x = var_27318_cast_fp16, y = var_27319_to_fp16)[name = tensor("aw_chunk_2873_cast_fp16")]; + tensor var_27322_equation_0 = const()[name = tensor("op_27322_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27322_cast_fp16 = einsum(equation = var_27322_equation_0, values = (var_26932_cast_fp16, var_26837_cast_fp16))[name = tensor("op_27322_cast_fp16")]; + tensor var_27323_to_fp16 = const()[name = tensor("op_27323_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2875_cast_fp16 = mul(x = var_27322_cast_fp16, y = var_27323_to_fp16)[name = tensor("aw_chunk_2875_cast_fp16")]; + tensor var_27326_equation_0 = const()[name = tensor("op_27326_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27326_cast_fp16 = einsum(equation = var_27326_equation_0, values = (var_26932_cast_fp16, var_26844_cast_fp16))[name = tensor("op_27326_cast_fp16")]; + tensor var_27327_to_fp16 = const()[name = tensor("op_27327_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2877_cast_fp16 = mul(x = var_27326_cast_fp16, y = var_27327_to_fp16)[name = tensor("aw_chunk_2877_cast_fp16")]; + tensor var_27330_equation_0 = const()[name = tensor("op_27330_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_27330_cast_fp16 = einsum(equation = var_27330_equation_0, values = (var_26932_cast_fp16, var_26851_cast_fp16))[name = tensor("op_27330_cast_fp16")]; + tensor var_27331_to_fp16 = const()[name = tensor("op_27331_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2879_cast_fp16 = mul(x = var_27330_cast_fp16, y = var_27331_to_fp16)[name = tensor("aw_chunk_2879_cast_fp16")]; + tensor var_27333_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2721_cast_fp16)[name = tensor("op_27333_cast_fp16")]; + tensor var_27334_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2723_cast_fp16)[name = tensor("op_27334_cast_fp16")]; + tensor var_27335_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2725_cast_fp16)[name = tensor("op_27335_cast_fp16")]; + tensor var_27336_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2727_cast_fp16)[name = tensor("op_27336_cast_fp16")]; + tensor var_27337_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2729_cast_fp16)[name = tensor("op_27337_cast_fp16")]; + tensor var_27338_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2731_cast_fp16)[name = tensor("op_27338_cast_fp16")]; + tensor var_27339_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2733_cast_fp16)[name = tensor("op_27339_cast_fp16")]; + tensor var_27340_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2735_cast_fp16)[name = tensor("op_27340_cast_fp16")]; + tensor var_27341_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2737_cast_fp16)[name = tensor("op_27341_cast_fp16")]; + tensor var_27342_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2739_cast_fp16)[name = tensor("op_27342_cast_fp16")]; + tensor var_27343_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2741_cast_fp16)[name = tensor("op_27343_cast_fp16")]; + tensor var_27344_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2743_cast_fp16)[name = tensor("op_27344_cast_fp16")]; + tensor var_27345_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2745_cast_fp16)[name = tensor("op_27345_cast_fp16")]; + tensor var_27346_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2747_cast_fp16)[name = tensor("op_27346_cast_fp16")]; + tensor var_27347_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2749_cast_fp16)[name = tensor("op_27347_cast_fp16")]; + tensor var_27348_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2751_cast_fp16)[name = tensor("op_27348_cast_fp16")]; + tensor var_27349_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2753_cast_fp16)[name = tensor("op_27349_cast_fp16")]; + tensor var_27350_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2755_cast_fp16)[name = tensor("op_27350_cast_fp16")]; + tensor var_27351_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2757_cast_fp16)[name = tensor("op_27351_cast_fp16")]; + tensor var_27352_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2759_cast_fp16)[name = tensor("op_27352_cast_fp16")]; + tensor var_27353_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2761_cast_fp16)[name = tensor("op_27353_cast_fp16")]; + tensor var_27354_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2763_cast_fp16)[name = tensor("op_27354_cast_fp16")]; + tensor var_27355_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2765_cast_fp16)[name = tensor("op_27355_cast_fp16")]; + tensor var_27356_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2767_cast_fp16)[name = tensor("op_27356_cast_fp16")]; + tensor var_27357_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2769_cast_fp16)[name = tensor("op_27357_cast_fp16")]; + tensor var_27358_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2771_cast_fp16)[name = tensor("op_27358_cast_fp16")]; + tensor var_27359_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2773_cast_fp16)[name = tensor("op_27359_cast_fp16")]; + tensor var_27360_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2775_cast_fp16)[name = tensor("op_27360_cast_fp16")]; + tensor var_27361_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2777_cast_fp16)[name = tensor("op_27361_cast_fp16")]; + tensor var_27362_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2779_cast_fp16)[name = tensor("op_27362_cast_fp16")]; + tensor var_27363_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2781_cast_fp16)[name = tensor("op_27363_cast_fp16")]; + tensor var_27364_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2783_cast_fp16)[name = tensor("op_27364_cast_fp16")]; + tensor var_27365_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2785_cast_fp16)[name = tensor("op_27365_cast_fp16")]; + tensor var_27366_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2787_cast_fp16)[name = tensor("op_27366_cast_fp16")]; + tensor var_27367_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2789_cast_fp16)[name = tensor("op_27367_cast_fp16")]; + tensor var_27368_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2791_cast_fp16)[name = tensor("op_27368_cast_fp16")]; + tensor var_27369_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2793_cast_fp16)[name = tensor("op_27369_cast_fp16")]; + tensor var_27370_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2795_cast_fp16)[name = tensor("op_27370_cast_fp16")]; + tensor var_27371_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2797_cast_fp16)[name = tensor("op_27371_cast_fp16")]; + tensor var_27372_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2799_cast_fp16)[name = tensor("op_27372_cast_fp16")]; + tensor var_27373_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2801_cast_fp16)[name = tensor("op_27373_cast_fp16")]; + tensor var_27374_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2803_cast_fp16)[name = tensor("op_27374_cast_fp16")]; + tensor var_27375_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2805_cast_fp16)[name = tensor("op_27375_cast_fp16")]; + tensor var_27376_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2807_cast_fp16)[name = tensor("op_27376_cast_fp16")]; + tensor var_27377_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2809_cast_fp16)[name = tensor("op_27377_cast_fp16")]; + tensor var_27378_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2811_cast_fp16)[name = tensor("op_27378_cast_fp16")]; + tensor var_27379_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2813_cast_fp16)[name = tensor("op_27379_cast_fp16")]; + tensor var_27380_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2815_cast_fp16)[name = tensor("op_27380_cast_fp16")]; + tensor var_27381_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2817_cast_fp16)[name = tensor("op_27381_cast_fp16")]; + tensor var_27382_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2819_cast_fp16)[name = tensor("op_27382_cast_fp16")]; + tensor var_27383_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2821_cast_fp16)[name = tensor("op_27383_cast_fp16")]; + tensor var_27384_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2823_cast_fp16)[name = tensor("op_27384_cast_fp16")]; + tensor var_27385_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2825_cast_fp16)[name = tensor("op_27385_cast_fp16")]; + tensor var_27386_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2827_cast_fp16)[name = tensor("op_27386_cast_fp16")]; + tensor var_27387_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2829_cast_fp16)[name = tensor("op_27387_cast_fp16")]; + tensor var_27388_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2831_cast_fp16)[name = tensor("op_27388_cast_fp16")]; + tensor var_27389_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2833_cast_fp16)[name = tensor("op_27389_cast_fp16")]; + tensor var_27390_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2835_cast_fp16)[name = tensor("op_27390_cast_fp16")]; + tensor var_27391_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2837_cast_fp16)[name = tensor("op_27391_cast_fp16")]; + tensor var_27392_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2839_cast_fp16)[name = tensor("op_27392_cast_fp16")]; + tensor var_27393_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2841_cast_fp16)[name = tensor("op_27393_cast_fp16")]; + tensor var_27394_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2843_cast_fp16)[name = tensor("op_27394_cast_fp16")]; + tensor var_27395_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2845_cast_fp16)[name = tensor("op_27395_cast_fp16")]; + tensor var_27396_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2847_cast_fp16)[name = tensor("op_27396_cast_fp16")]; + tensor var_27397_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2849_cast_fp16)[name = tensor("op_27397_cast_fp16")]; + tensor var_27398_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2851_cast_fp16)[name = tensor("op_27398_cast_fp16")]; + tensor var_27399_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2853_cast_fp16)[name = tensor("op_27399_cast_fp16")]; + tensor var_27400_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2855_cast_fp16)[name = tensor("op_27400_cast_fp16")]; + tensor var_27401_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2857_cast_fp16)[name = tensor("op_27401_cast_fp16")]; + tensor var_27402_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2859_cast_fp16)[name = tensor("op_27402_cast_fp16")]; + tensor var_27403_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2861_cast_fp16)[name = tensor("op_27403_cast_fp16")]; + tensor var_27404_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2863_cast_fp16)[name = tensor("op_27404_cast_fp16")]; + tensor var_27405_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2865_cast_fp16)[name = tensor("op_27405_cast_fp16")]; + tensor var_27406_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2867_cast_fp16)[name = tensor("op_27406_cast_fp16")]; + tensor var_27407_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2869_cast_fp16)[name = tensor("op_27407_cast_fp16")]; + tensor var_27408_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2871_cast_fp16)[name = tensor("op_27408_cast_fp16")]; + tensor var_27409_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2873_cast_fp16)[name = tensor("op_27409_cast_fp16")]; + tensor var_27410_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2875_cast_fp16)[name = tensor("op_27410_cast_fp16")]; + tensor var_27411_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2877_cast_fp16)[name = tensor("op_27411_cast_fp16")]; + tensor var_27412_cast_fp16 = softmax(axis = var_26158, x = aw_chunk_2879_cast_fp16)[name = tensor("op_27412_cast_fp16")]; + tensor var_27414_equation_0 = const()[name = tensor("op_27414_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27414_cast_fp16 = einsum(equation = var_27414_equation_0, values = (var_26934_cast_fp16, var_27333_cast_fp16))[name = tensor("op_27414_cast_fp16")]; + tensor var_27416_equation_0 = const()[name = tensor("op_27416_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27416_cast_fp16 = einsum(equation = var_27416_equation_0, values = (var_26934_cast_fp16, var_27334_cast_fp16))[name = tensor("op_27416_cast_fp16")]; + tensor var_27418_equation_0 = const()[name = tensor("op_27418_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27418_cast_fp16 = einsum(equation = var_27418_equation_0, values = (var_26934_cast_fp16, var_27335_cast_fp16))[name = tensor("op_27418_cast_fp16")]; + tensor var_27420_equation_0 = const()[name = tensor("op_27420_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27420_cast_fp16 = einsum(equation = var_27420_equation_0, values = (var_26934_cast_fp16, var_27336_cast_fp16))[name = tensor("op_27420_cast_fp16")]; + tensor var_27422_equation_0 = const()[name = tensor("op_27422_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27422_cast_fp16 = einsum(equation = var_27422_equation_0, values = (var_26938_cast_fp16, var_27337_cast_fp16))[name = tensor("op_27422_cast_fp16")]; + tensor var_27424_equation_0 = const()[name = tensor("op_27424_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27424_cast_fp16 = einsum(equation = var_27424_equation_0, values = (var_26938_cast_fp16, var_27338_cast_fp16))[name = tensor("op_27424_cast_fp16")]; + tensor var_27426_equation_0 = const()[name = tensor("op_27426_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27426_cast_fp16 = einsum(equation = var_27426_equation_0, values = (var_26938_cast_fp16, var_27339_cast_fp16))[name = tensor("op_27426_cast_fp16")]; + tensor var_27428_equation_0 = const()[name = tensor("op_27428_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27428_cast_fp16 = einsum(equation = var_27428_equation_0, values = (var_26938_cast_fp16, var_27340_cast_fp16))[name = tensor("op_27428_cast_fp16")]; + tensor var_27430_equation_0 = const()[name = tensor("op_27430_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27430_cast_fp16 = einsum(equation = var_27430_equation_0, values = (var_26942_cast_fp16, var_27341_cast_fp16))[name = tensor("op_27430_cast_fp16")]; + tensor var_27432_equation_0 = const()[name = tensor("op_27432_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27432_cast_fp16 = einsum(equation = var_27432_equation_0, values = (var_26942_cast_fp16, var_27342_cast_fp16))[name = tensor("op_27432_cast_fp16")]; + tensor var_27434_equation_0 = const()[name = tensor("op_27434_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27434_cast_fp16 = einsum(equation = var_27434_equation_0, values = (var_26942_cast_fp16, var_27343_cast_fp16))[name = tensor("op_27434_cast_fp16")]; + tensor var_27436_equation_0 = const()[name = tensor("op_27436_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27436_cast_fp16 = einsum(equation = var_27436_equation_0, values = (var_26942_cast_fp16, var_27344_cast_fp16))[name = tensor("op_27436_cast_fp16")]; + tensor var_27438_equation_0 = const()[name = tensor("op_27438_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27438_cast_fp16 = einsum(equation = var_27438_equation_0, values = (var_26946_cast_fp16, var_27345_cast_fp16))[name = tensor("op_27438_cast_fp16")]; + tensor var_27440_equation_0 = const()[name = tensor("op_27440_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27440_cast_fp16 = einsum(equation = var_27440_equation_0, values = (var_26946_cast_fp16, var_27346_cast_fp16))[name = tensor("op_27440_cast_fp16")]; + tensor var_27442_equation_0 = const()[name = tensor("op_27442_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27442_cast_fp16 = einsum(equation = var_27442_equation_0, values = (var_26946_cast_fp16, var_27347_cast_fp16))[name = tensor("op_27442_cast_fp16")]; + tensor var_27444_equation_0 = const()[name = tensor("op_27444_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27444_cast_fp16 = einsum(equation = var_27444_equation_0, values = (var_26946_cast_fp16, var_27348_cast_fp16))[name = tensor("op_27444_cast_fp16")]; + tensor var_27446_equation_0 = const()[name = tensor("op_27446_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27446_cast_fp16 = einsum(equation = var_27446_equation_0, values = (var_26950_cast_fp16, var_27349_cast_fp16))[name = tensor("op_27446_cast_fp16")]; + tensor var_27448_equation_0 = const()[name = tensor("op_27448_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27448_cast_fp16 = einsum(equation = var_27448_equation_0, values = (var_26950_cast_fp16, var_27350_cast_fp16))[name = tensor("op_27448_cast_fp16")]; + tensor var_27450_equation_0 = const()[name = tensor("op_27450_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27450_cast_fp16 = einsum(equation = var_27450_equation_0, values = (var_26950_cast_fp16, var_27351_cast_fp16))[name = tensor("op_27450_cast_fp16")]; + tensor var_27452_equation_0 = const()[name = tensor("op_27452_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27452_cast_fp16 = einsum(equation = var_27452_equation_0, values = (var_26950_cast_fp16, var_27352_cast_fp16))[name = tensor("op_27452_cast_fp16")]; + tensor var_27454_equation_0 = const()[name = tensor("op_27454_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27454_cast_fp16 = einsum(equation = var_27454_equation_0, values = (var_26954_cast_fp16, var_27353_cast_fp16))[name = tensor("op_27454_cast_fp16")]; + tensor var_27456_equation_0 = const()[name = tensor("op_27456_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27456_cast_fp16 = einsum(equation = var_27456_equation_0, values = (var_26954_cast_fp16, var_27354_cast_fp16))[name = tensor("op_27456_cast_fp16")]; + tensor var_27458_equation_0 = const()[name = tensor("op_27458_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27458_cast_fp16 = einsum(equation = var_27458_equation_0, values = (var_26954_cast_fp16, var_27355_cast_fp16))[name = tensor("op_27458_cast_fp16")]; + tensor var_27460_equation_0 = const()[name = tensor("op_27460_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27460_cast_fp16 = einsum(equation = var_27460_equation_0, values = (var_26954_cast_fp16, var_27356_cast_fp16))[name = tensor("op_27460_cast_fp16")]; + tensor var_27462_equation_0 = const()[name = tensor("op_27462_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27462_cast_fp16 = einsum(equation = var_27462_equation_0, values = (var_26958_cast_fp16, var_27357_cast_fp16))[name = tensor("op_27462_cast_fp16")]; + tensor var_27464_equation_0 = const()[name = tensor("op_27464_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27464_cast_fp16 = einsum(equation = var_27464_equation_0, values = (var_26958_cast_fp16, var_27358_cast_fp16))[name = tensor("op_27464_cast_fp16")]; + tensor var_27466_equation_0 = const()[name = tensor("op_27466_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27466_cast_fp16 = einsum(equation = var_27466_equation_0, values = (var_26958_cast_fp16, var_27359_cast_fp16))[name = tensor("op_27466_cast_fp16")]; + tensor var_27468_equation_0 = const()[name = tensor("op_27468_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27468_cast_fp16 = einsum(equation = var_27468_equation_0, values = (var_26958_cast_fp16, var_27360_cast_fp16))[name = tensor("op_27468_cast_fp16")]; + tensor var_27470_equation_0 = const()[name = tensor("op_27470_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27470_cast_fp16 = einsum(equation = var_27470_equation_0, values = (var_26962_cast_fp16, var_27361_cast_fp16))[name = tensor("op_27470_cast_fp16")]; + tensor var_27472_equation_0 = const()[name = tensor("op_27472_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27472_cast_fp16 = einsum(equation = var_27472_equation_0, values = (var_26962_cast_fp16, var_27362_cast_fp16))[name = tensor("op_27472_cast_fp16")]; + tensor var_27474_equation_0 = const()[name = tensor("op_27474_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27474_cast_fp16 = einsum(equation = var_27474_equation_0, values = (var_26962_cast_fp16, var_27363_cast_fp16))[name = tensor("op_27474_cast_fp16")]; + tensor var_27476_equation_0 = const()[name = tensor("op_27476_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27476_cast_fp16 = einsum(equation = var_27476_equation_0, values = (var_26962_cast_fp16, var_27364_cast_fp16))[name = tensor("op_27476_cast_fp16")]; + tensor var_27478_equation_0 = const()[name = tensor("op_27478_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27478_cast_fp16 = einsum(equation = var_27478_equation_0, values = (var_26966_cast_fp16, var_27365_cast_fp16))[name = tensor("op_27478_cast_fp16")]; + tensor var_27480_equation_0 = const()[name = tensor("op_27480_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27480_cast_fp16 = einsum(equation = var_27480_equation_0, values = (var_26966_cast_fp16, var_27366_cast_fp16))[name = tensor("op_27480_cast_fp16")]; + tensor var_27482_equation_0 = const()[name = tensor("op_27482_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27482_cast_fp16 = einsum(equation = var_27482_equation_0, values = (var_26966_cast_fp16, var_27367_cast_fp16))[name = tensor("op_27482_cast_fp16")]; + tensor var_27484_equation_0 = const()[name = tensor("op_27484_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27484_cast_fp16 = einsum(equation = var_27484_equation_0, values = (var_26966_cast_fp16, var_27368_cast_fp16))[name = tensor("op_27484_cast_fp16")]; + tensor var_27486_equation_0 = const()[name = tensor("op_27486_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27486_cast_fp16 = einsum(equation = var_27486_equation_0, values = (var_26970_cast_fp16, var_27369_cast_fp16))[name = tensor("op_27486_cast_fp16")]; + tensor var_27488_equation_0 = const()[name = tensor("op_27488_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27488_cast_fp16 = einsum(equation = var_27488_equation_0, values = (var_26970_cast_fp16, var_27370_cast_fp16))[name = tensor("op_27488_cast_fp16")]; + tensor var_27490_equation_0 = const()[name = tensor("op_27490_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27490_cast_fp16 = einsum(equation = var_27490_equation_0, values = (var_26970_cast_fp16, var_27371_cast_fp16))[name = tensor("op_27490_cast_fp16")]; + tensor var_27492_equation_0 = const()[name = tensor("op_27492_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27492_cast_fp16 = einsum(equation = var_27492_equation_0, values = (var_26970_cast_fp16, var_27372_cast_fp16))[name = tensor("op_27492_cast_fp16")]; + tensor var_27494_equation_0 = const()[name = tensor("op_27494_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27494_cast_fp16 = einsum(equation = var_27494_equation_0, values = (var_26974_cast_fp16, var_27373_cast_fp16))[name = tensor("op_27494_cast_fp16")]; + tensor var_27496_equation_0 = const()[name = tensor("op_27496_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27496_cast_fp16 = einsum(equation = var_27496_equation_0, values = (var_26974_cast_fp16, var_27374_cast_fp16))[name = tensor("op_27496_cast_fp16")]; + tensor var_27498_equation_0 = const()[name = tensor("op_27498_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27498_cast_fp16 = einsum(equation = var_27498_equation_0, values = (var_26974_cast_fp16, var_27375_cast_fp16))[name = tensor("op_27498_cast_fp16")]; + tensor var_27500_equation_0 = const()[name = tensor("op_27500_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27500_cast_fp16 = einsum(equation = var_27500_equation_0, values = (var_26974_cast_fp16, var_27376_cast_fp16))[name = tensor("op_27500_cast_fp16")]; + tensor var_27502_equation_0 = const()[name = tensor("op_27502_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27502_cast_fp16 = einsum(equation = var_27502_equation_0, values = (var_26978_cast_fp16, var_27377_cast_fp16))[name = tensor("op_27502_cast_fp16")]; + tensor var_27504_equation_0 = const()[name = tensor("op_27504_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27504_cast_fp16 = einsum(equation = var_27504_equation_0, values = (var_26978_cast_fp16, var_27378_cast_fp16))[name = tensor("op_27504_cast_fp16")]; + tensor var_27506_equation_0 = const()[name = tensor("op_27506_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27506_cast_fp16 = einsum(equation = var_27506_equation_0, values = (var_26978_cast_fp16, var_27379_cast_fp16))[name = tensor("op_27506_cast_fp16")]; + tensor var_27508_equation_0 = const()[name = tensor("op_27508_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27508_cast_fp16 = einsum(equation = var_27508_equation_0, values = (var_26978_cast_fp16, var_27380_cast_fp16))[name = tensor("op_27508_cast_fp16")]; + tensor var_27510_equation_0 = const()[name = tensor("op_27510_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27510_cast_fp16 = einsum(equation = var_27510_equation_0, values = (var_26982_cast_fp16, var_27381_cast_fp16))[name = tensor("op_27510_cast_fp16")]; + tensor var_27512_equation_0 = const()[name = tensor("op_27512_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27512_cast_fp16 = einsum(equation = var_27512_equation_0, values = (var_26982_cast_fp16, var_27382_cast_fp16))[name = tensor("op_27512_cast_fp16")]; + tensor var_27514_equation_0 = const()[name = tensor("op_27514_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27514_cast_fp16 = einsum(equation = var_27514_equation_0, values = (var_26982_cast_fp16, var_27383_cast_fp16))[name = tensor("op_27514_cast_fp16")]; + tensor var_27516_equation_0 = const()[name = tensor("op_27516_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27516_cast_fp16 = einsum(equation = var_27516_equation_0, values = (var_26982_cast_fp16, var_27384_cast_fp16))[name = tensor("op_27516_cast_fp16")]; + tensor var_27518_equation_0 = const()[name = tensor("op_27518_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27518_cast_fp16 = einsum(equation = var_27518_equation_0, values = (var_26986_cast_fp16, var_27385_cast_fp16))[name = tensor("op_27518_cast_fp16")]; + tensor var_27520_equation_0 = const()[name = tensor("op_27520_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27520_cast_fp16 = einsum(equation = var_27520_equation_0, values = (var_26986_cast_fp16, var_27386_cast_fp16))[name = tensor("op_27520_cast_fp16")]; + tensor var_27522_equation_0 = const()[name = tensor("op_27522_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27522_cast_fp16 = einsum(equation = var_27522_equation_0, values = (var_26986_cast_fp16, var_27387_cast_fp16))[name = tensor("op_27522_cast_fp16")]; + tensor var_27524_equation_0 = const()[name = tensor("op_27524_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27524_cast_fp16 = einsum(equation = var_27524_equation_0, values = (var_26986_cast_fp16, var_27388_cast_fp16))[name = tensor("op_27524_cast_fp16")]; + tensor var_27526_equation_0 = const()[name = tensor("op_27526_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27526_cast_fp16 = einsum(equation = var_27526_equation_0, values = (var_26990_cast_fp16, var_27389_cast_fp16))[name = tensor("op_27526_cast_fp16")]; + tensor var_27528_equation_0 = const()[name = tensor("op_27528_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27528_cast_fp16 = einsum(equation = var_27528_equation_0, values = (var_26990_cast_fp16, var_27390_cast_fp16))[name = tensor("op_27528_cast_fp16")]; + tensor var_27530_equation_0 = const()[name = tensor("op_27530_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27530_cast_fp16 = einsum(equation = var_27530_equation_0, values = (var_26990_cast_fp16, var_27391_cast_fp16))[name = tensor("op_27530_cast_fp16")]; + tensor var_27532_equation_0 = const()[name = tensor("op_27532_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27532_cast_fp16 = einsum(equation = var_27532_equation_0, values = (var_26990_cast_fp16, var_27392_cast_fp16))[name = tensor("op_27532_cast_fp16")]; + tensor var_27534_equation_0 = const()[name = tensor("op_27534_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27534_cast_fp16 = einsum(equation = var_27534_equation_0, values = (var_26994_cast_fp16, var_27393_cast_fp16))[name = tensor("op_27534_cast_fp16")]; + tensor var_27536_equation_0 = const()[name = tensor("op_27536_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27536_cast_fp16 = einsum(equation = var_27536_equation_0, values = (var_26994_cast_fp16, var_27394_cast_fp16))[name = tensor("op_27536_cast_fp16")]; + tensor var_27538_equation_0 = const()[name = tensor("op_27538_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27538_cast_fp16 = einsum(equation = var_27538_equation_0, values = (var_26994_cast_fp16, var_27395_cast_fp16))[name = tensor("op_27538_cast_fp16")]; + tensor var_27540_equation_0 = const()[name = tensor("op_27540_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27540_cast_fp16 = einsum(equation = var_27540_equation_0, values = (var_26994_cast_fp16, var_27396_cast_fp16))[name = tensor("op_27540_cast_fp16")]; + tensor var_27542_equation_0 = const()[name = tensor("op_27542_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27542_cast_fp16 = einsum(equation = var_27542_equation_0, values = (var_26998_cast_fp16, var_27397_cast_fp16))[name = tensor("op_27542_cast_fp16")]; + tensor var_27544_equation_0 = const()[name = tensor("op_27544_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27544_cast_fp16 = einsum(equation = var_27544_equation_0, values = (var_26998_cast_fp16, var_27398_cast_fp16))[name = tensor("op_27544_cast_fp16")]; + tensor var_27546_equation_0 = const()[name = tensor("op_27546_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27546_cast_fp16 = einsum(equation = var_27546_equation_0, values = (var_26998_cast_fp16, var_27399_cast_fp16))[name = tensor("op_27546_cast_fp16")]; + tensor var_27548_equation_0 = const()[name = tensor("op_27548_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27548_cast_fp16 = einsum(equation = var_27548_equation_0, values = (var_26998_cast_fp16, var_27400_cast_fp16))[name = tensor("op_27548_cast_fp16")]; + tensor var_27550_equation_0 = const()[name = tensor("op_27550_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27550_cast_fp16 = einsum(equation = var_27550_equation_0, values = (var_27002_cast_fp16, var_27401_cast_fp16))[name = tensor("op_27550_cast_fp16")]; + tensor var_27552_equation_0 = const()[name = tensor("op_27552_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27552_cast_fp16 = einsum(equation = var_27552_equation_0, values = (var_27002_cast_fp16, var_27402_cast_fp16))[name = tensor("op_27552_cast_fp16")]; + tensor var_27554_equation_0 = const()[name = tensor("op_27554_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27554_cast_fp16 = einsum(equation = var_27554_equation_0, values = (var_27002_cast_fp16, var_27403_cast_fp16))[name = tensor("op_27554_cast_fp16")]; + tensor var_27556_equation_0 = const()[name = tensor("op_27556_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27556_cast_fp16 = einsum(equation = var_27556_equation_0, values = (var_27002_cast_fp16, var_27404_cast_fp16))[name = tensor("op_27556_cast_fp16")]; + tensor var_27558_equation_0 = const()[name = tensor("op_27558_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27558_cast_fp16 = einsum(equation = var_27558_equation_0, values = (var_27006_cast_fp16, var_27405_cast_fp16))[name = tensor("op_27558_cast_fp16")]; + tensor var_27560_equation_0 = const()[name = tensor("op_27560_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27560_cast_fp16 = einsum(equation = var_27560_equation_0, values = (var_27006_cast_fp16, var_27406_cast_fp16))[name = tensor("op_27560_cast_fp16")]; + tensor var_27562_equation_0 = const()[name = tensor("op_27562_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27562_cast_fp16 = einsum(equation = var_27562_equation_0, values = (var_27006_cast_fp16, var_27407_cast_fp16))[name = tensor("op_27562_cast_fp16")]; + tensor var_27564_equation_0 = const()[name = tensor("op_27564_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27564_cast_fp16 = einsum(equation = var_27564_equation_0, values = (var_27006_cast_fp16, var_27408_cast_fp16))[name = tensor("op_27564_cast_fp16")]; + tensor var_27566_equation_0 = const()[name = tensor("op_27566_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27566_cast_fp16 = einsum(equation = var_27566_equation_0, values = (var_27010_cast_fp16, var_27409_cast_fp16))[name = tensor("op_27566_cast_fp16")]; + tensor var_27568_equation_0 = const()[name = tensor("op_27568_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27568_cast_fp16 = einsum(equation = var_27568_equation_0, values = (var_27010_cast_fp16, var_27410_cast_fp16))[name = tensor("op_27568_cast_fp16")]; + tensor var_27570_equation_0 = const()[name = tensor("op_27570_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27570_cast_fp16 = einsum(equation = var_27570_equation_0, values = (var_27010_cast_fp16, var_27411_cast_fp16))[name = tensor("op_27570_cast_fp16")]; + tensor var_27572_equation_0 = const()[name = tensor("op_27572_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_27572_cast_fp16 = einsum(equation = var_27572_equation_0, values = (var_27010_cast_fp16, var_27412_cast_fp16))[name = tensor("op_27572_cast_fp16")]; + tensor var_27574_interleave_0 = const()[name = tensor("op_27574_interleave_0"), val = tensor(false)]; + tensor var_27574_cast_fp16 = concat(axis = var_26133, interleave = var_27574_interleave_0, values = (var_27414_cast_fp16, var_27416_cast_fp16, var_27418_cast_fp16, var_27420_cast_fp16))[name = tensor("op_27574_cast_fp16")]; + tensor var_27576_interleave_0 = const()[name = tensor("op_27576_interleave_0"), val = tensor(false)]; + tensor var_27576_cast_fp16 = concat(axis = var_26133, interleave = var_27576_interleave_0, values = (var_27422_cast_fp16, var_27424_cast_fp16, var_27426_cast_fp16, var_27428_cast_fp16))[name = tensor("op_27576_cast_fp16")]; + tensor var_27578_interleave_0 = const()[name = tensor("op_27578_interleave_0"), val = tensor(false)]; + tensor var_27578_cast_fp16 = concat(axis = var_26133, interleave = var_27578_interleave_0, values = (var_27430_cast_fp16, var_27432_cast_fp16, var_27434_cast_fp16, var_27436_cast_fp16))[name = tensor("op_27578_cast_fp16")]; + tensor var_27580_interleave_0 = const()[name = tensor("op_27580_interleave_0"), val = tensor(false)]; + tensor var_27580_cast_fp16 = concat(axis = var_26133, interleave = var_27580_interleave_0, values = (var_27438_cast_fp16, var_27440_cast_fp16, var_27442_cast_fp16, var_27444_cast_fp16))[name = tensor("op_27580_cast_fp16")]; + tensor var_27582_interleave_0 = const()[name = tensor("op_27582_interleave_0"), val = tensor(false)]; + tensor var_27582_cast_fp16 = concat(axis = var_26133, interleave = var_27582_interleave_0, values = (var_27446_cast_fp16, var_27448_cast_fp16, var_27450_cast_fp16, var_27452_cast_fp16))[name = tensor("op_27582_cast_fp16")]; + tensor var_27584_interleave_0 = const()[name = tensor("op_27584_interleave_0"), val = tensor(false)]; + tensor var_27584_cast_fp16 = concat(axis = var_26133, interleave = var_27584_interleave_0, values = (var_27454_cast_fp16, var_27456_cast_fp16, var_27458_cast_fp16, var_27460_cast_fp16))[name = tensor("op_27584_cast_fp16")]; + tensor var_27586_interleave_0 = const()[name = tensor("op_27586_interleave_0"), val = tensor(false)]; + tensor var_27586_cast_fp16 = concat(axis = var_26133, interleave = var_27586_interleave_0, values = (var_27462_cast_fp16, var_27464_cast_fp16, var_27466_cast_fp16, var_27468_cast_fp16))[name = tensor("op_27586_cast_fp16")]; + tensor var_27588_interleave_0 = const()[name = tensor("op_27588_interleave_0"), val = tensor(false)]; + tensor var_27588_cast_fp16 = concat(axis = var_26133, interleave = var_27588_interleave_0, values = (var_27470_cast_fp16, var_27472_cast_fp16, var_27474_cast_fp16, var_27476_cast_fp16))[name = tensor("op_27588_cast_fp16")]; + tensor var_27590_interleave_0 = const()[name = tensor("op_27590_interleave_0"), val = tensor(false)]; + tensor var_27590_cast_fp16 = concat(axis = var_26133, interleave = var_27590_interleave_0, values = (var_27478_cast_fp16, var_27480_cast_fp16, var_27482_cast_fp16, var_27484_cast_fp16))[name = tensor("op_27590_cast_fp16")]; + tensor var_27592_interleave_0 = const()[name = tensor("op_27592_interleave_0"), val = tensor(false)]; + tensor var_27592_cast_fp16 = concat(axis = var_26133, interleave = var_27592_interleave_0, values = (var_27486_cast_fp16, var_27488_cast_fp16, var_27490_cast_fp16, var_27492_cast_fp16))[name = tensor("op_27592_cast_fp16")]; + tensor var_27594_interleave_0 = const()[name = tensor("op_27594_interleave_0"), val = tensor(false)]; + tensor var_27594_cast_fp16 = concat(axis = var_26133, interleave = var_27594_interleave_0, values = (var_27494_cast_fp16, var_27496_cast_fp16, var_27498_cast_fp16, var_27500_cast_fp16))[name = tensor("op_27594_cast_fp16")]; + tensor var_27596_interleave_0 = const()[name = tensor("op_27596_interleave_0"), val = tensor(false)]; + tensor var_27596_cast_fp16 = concat(axis = var_26133, interleave = var_27596_interleave_0, values = (var_27502_cast_fp16, var_27504_cast_fp16, var_27506_cast_fp16, var_27508_cast_fp16))[name = tensor("op_27596_cast_fp16")]; + tensor var_27598_interleave_0 = const()[name = tensor("op_27598_interleave_0"), val = tensor(false)]; + tensor var_27598_cast_fp16 = concat(axis = var_26133, interleave = var_27598_interleave_0, values = (var_27510_cast_fp16, var_27512_cast_fp16, var_27514_cast_fp16, var_27516_cast_fp16))[name = tensor("op_27598_cast_fp16")]; + tensor var_27600_interleave_0 = const()[name = tensor("op_27600_interleave_0"), val = tensor(false)]; + tensor var_27600_cast_fp16 = concat(axis = var_26133, interleave = var_27600_interleave_0, values = (var_27518_cast_fp16, var_27520_cast_fp16, var_27522_cast_fp16, var_27524_cast_fp16))[name = tensor("op_27600_cast_fp16")]; + tensor var_27602_interleave_0 = const()[name = tensor("op_27602_interleave_0"), val = tensor(false)]; + tensor var_27602_cast_fp16 = concat(axis = var_26133, interleave = var_27602_interleave_0, values = (var_27526_cast_fp16, var_27528_cast_fp16, var_27530_cast_fp16, var_27532_cast_fp16))[name = tensor("op_27602_cast_fp16")]; + tensor var_27604_interleave_0 = const()[name = tensor("op_27604_interleave_0"), val = tensor(false)]; + tensor var_27604_cast_fp16 = concat(axis = var_26133, interleave = var_27604_interleave_0, values = (var_27534_cast_fp16, var_27536_cast_fp16, var_27538_cast_fp16, var_27540_cast_fp16))[name = tensor("op_27604_cast_fp16")]; + tensor var_27606_interleave_0 = const()[name = tensor("op_27606_interleave_0"), val = tensor(false)]; + tensor var_27606_cast_fp16 = concat(axis = var_26133, interleave = var_27606_interleave_0, values = (var_27542_cast_fp16, var_27544_cast_fp16, var_27546_cast_fp16, var_27548_cast_fp16))[name = tensor("op_27606_cast_fp16")]; + tensor var_27608_interleave_0 = const()[name = tensor("op_27608_interleave_0"), val = tensor(false)]; + tensor var_27608_cast_fp16 = concat(axis = var_26133, interleave = var_27608_interleave_0, values = (var_27550_cast_fp16, var_27552_cast_fp16, var_27554_cast_fp16, var_27556_cast_fp16))[name = tensor("op_27608_cast_fp16")]; + tensor var_27610_interleave_0 = const()[name = tensor("op_27610_interleave_0"), val = tensor(false)]; + tensor var_27610_cast_fp16 = concat(axis = var_26133, interleave = var_27610_interleave_0, values = (var_27558_cast_fp16, var_27560_cast_fp16, var_27562_cast_fp16, var_27564_cast_fp16))[name = tensor("op_27610_cast_fp16")]; + tensor var_27612_interleave_0 = const()[name = tensor("op_27612_interleave_0"), val = tensor(false)]; + tensor var_27612_cast_fp16 = concat(axis = var_26133, interleave = var_27612_interleave_0, values = (var_27566_cast_fp16, var_27568_cast_fp16, var_27570_cast_fp16, var_27572_cast_fp16))[name = tensor("op_27612_cast_fp16")]; + tensor input_137_interleave_0 = const()[name = tensor("input_137_interleave_0"), val = tensor(false)]; + tensor input_137_cast_fp16 = concat(axis = var_26158, interleave = input_137_interleave_0, values = (var_27574_cast_fp16, var_27576_cast_fp16, var_27578_cast_fp16, var_27580_cast_fp16, var_27582_cast_fp16, var_27584_cast_fp16, var_27586_cast_fp16, var_27588_cast_fp16, var_27590_cast_fp16, var_27592_cast_fp16, var_27594_cast_fp16, var_27596_cast_fp16, var_27598_cast_fp16, var_27600_cast_fp16, var_27602_cast_fp16, var_27604_cast_fp16, var_27606_cast_fp16, var_27608_cast_fp16, var_27610_cast_fp16, var_27612_cast_fp16))[name = tensor("input_137_cast_fp16")]; + tensor var_27617 = const()[name = tensor("op_27617"), val = tensor([1, 1])]; + tensor var_27619 = const()[name = tensor("op_27619"), val = tensor([1, 1])]; + tensor obj_71_pad_type_0 = const()[name = tensor("obj_71_pad_type_0"), val = tensor("custom")]; + tensor obj_71_pad_0 = const()[name = tensor("obj_71_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_17_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_17_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(693142400)))]; + tensor layers_17_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_17_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(696419264)))]; + tensor obj_71_cast_fp16 = conv(bias = layers_17_self_attn_o_proj_bias_to_fp16, dilations = var_27619, groups = var_26158, pad = obj_71_pad_0, pad_type = obj_71_pad_type_0, strides = var_27617, weight = layers_17_self_attn_o_proj_weight_to_fp16, x = input_137_cast_fp16)[name = tensor("obj_71_cast_fp16")]; + tensor inputs_71_cast_fp16 = add(x = inputs_69_cast_fp16, y = obj_71_cast_fp16)[name = tensor("inputs_71_cast_fp16")]; + tensor var_27625 = const()[name = tensor("op_27625"), val = tensor([1])]; + tensor channels_mean_71_cast_fp16 = reduce_mean(axes = var_27625, keep_dims = var_26159, x = inputs_71_cast_fp16)[name = tensor("channels_mean_71_cast_fp16")]; + tensor zero_mean_71_cast_fp16 = sub(x = inputs_71_cast_fp16, y = channels_mean_71_cast_fp16)[name = tensor("zero_mean_71_cast_fp16")]; + tensor zero_mean_sq_71_cast_fp16 = mul(x = zero_mean_71_cast_fp16, y = zero_mean_71_cast_fp16)[name = tensor("zero_mean_sq_71_cast_fp16")]; + tensor var_27629 = const()[name = tensor("op_27629"), val = tensor([1])]; + tensor var_27630_cast_fp16 = reduce_mean(axes = var_27629, keep_dims = var_26159, x = zero_mean_sq_71_cast_fp16)[name = tensor("op_27630_cast_fp16")]; + tensor var_27631_to_fp16 = const()[name = tensor("op_27631_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_27632_cast_fp16 = add(x = var_27630_cast_fp16, y = var_27631_to_fp16)[name = tensor("op_27632_cast_fp16")]; + tensor denom_71_epsilon_0_to_fp16 = const()[name = tensor("denom_71_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_71_cast_fp16 = rsqrt(epsilon = denom_71_epsilon_0_to_fp16, x = var_27632_cast_fp16)[name = tensor("denom_71_cast_fp16")]; + tensor out_71_cast_fp16 = mul(x = zero_mean_71_cast_fp16, y = denom_71_cast_fp16)[name = tensor("out_71_cast_fp16")]; + tensor input_139_gamma_0_to_fp16 = const()[name = tensor("input_139_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(696421888)))]; + tensor input_139_beta_0_to_fp16 = const()[name = tensor("input_139_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(696424512)))]; + tensor input_139_epsilon_0_to_fp16 = const()[name = tensor("input_139_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_139_cast_fp16 = batch_norm(beta = input_139_beta_0_to_fp16, epsilon = input_139_epsilon_0_to_fp16, gamma = input_139_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_71_cast_fp16)[name = tensor("input_139_cast_fp16")]; + tensor var_27643 = const()[name = tensor("op_27643"), val = tensor([1, 1])]; + tensor var_27645 = const()[name = tensor("op_27645"), val = tensor([1, 1])]; + tensor input_141_pad_type_0 = const()[name = tensor("input_141_pad_type_0"), val = tensor("custom")]; + tensor input_141_pad_0 = const()[name = tensor("input_141_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_17_fc1_weight_to_fp16 = const()[name = tensor("layers_17_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(696427136)))]; + tensor layers_17_fc1_bias_to_fp16 = const()[name = tensor("layers_17_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(709534400)))]; + tensor input_141_cast_fp16 = conv(bias = layers_17_fc1_bias_to_fp16, dilations = var_27645, groups = var_26158, pad = input_141_pad_0, pad_type = input_141_pad_type_0, strides = var_27643, weight = layers_17_fc1_weight_to_fp16, x = input_139_cast_fp16)[name = tensor("input_141_cast_fp16")]; + tensor input_143_mode_0 = const()[name = tensor("input_143_mode_0"), val = tensor("EXACT")]; + tensor input_143_cast_fp16 = gelu(mode = input_143_mode_0, x = input_141_cast_fp16)[name = tensor("input_143_cast_fp16")]; + tensor var_27651 = const()[name = tensor("op_27651"), val = tensor([1, 1])]; + tensor var_27653 = const()[name = tensor("op_27653"), val = tensor([1, 1])]; + tensor hidden_states_39_pad_type_0 = const()[name = tensor("hidden_states_39_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_39_pad_0 = const()[name = tensor("hidden_states_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_17_fc2_weight_to_fp16 = const()[name = tensor("layers_17_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(709544704)))]; + tensor layers_17_fc2_bias_to_fp16 = const()[name = tensor("layers_17_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(722651968)))]; + tensor hidden_states_39_cast_fp16 = conv(bias = layers_17_fc2_bias_to_fp16, dilations = var_27653, groups = var_26158, pad = hidden_states_39_pad_0, pad_type = hidden_states_39_pad_type_0, strides = var_27651, weight = layers_17_fc2_weight_to_fp16, x = input_143_cast_fp16)[name = tensor("hidden_states_39_cast_fp16")]; + tensor inputs_73_cast_fp16 = add(x = inputs_71_cast_fp16, y = hidden_states_39_cast_fp16)[name = tensor("inputs_73_cast_fp16")]; + tensor var_27660 = const()[name = tensor("op_27660"), val = tensor(3)]; + tensor var_27685 = const()[name = tensor("op_27685"), val = tensor(1)]; + tensor var_27686 = const()[name = tensor("op_27686"), val = tensor(true)]; + tensor var_27696 = const()[name = tensor("op_27696"), val = tensor([1])]; + tensor channels_mean_73_cast_fp16 = reduce_mean(axes = var_27696, keep_dims = var_27686, x = inputs_73_cast_fp16)[name = tensor("channels_mean_73_cast_fp16")]; + tensor zero_mean_73_cast_fp16 = sub(x = inputs_73_cast_fp16, y = channels_mean_73_cast_fp16)[name = tensor("zero_mean_73_cast_fp16")]; + tensor zero_mean_sq_73_cast_fp16 = mul(x = zero_mean_73_cast_fp16, y = zero_mean_73_cast_fp16)[name = tensor("zero_mean_sq_73_cast_fp16")]; + tensor var_27700 = const()[name = tensor("op_27700"), val = tensor([1])]; + tensor var_27701_cast_fp16 = reduce_mean(axes = var_27700, keep_dims = var_27686, x = zero_mean_sq_73_cast_fp16)[name = tensor("op_27701_cast_fp16")]; + tensor var_27702_to_fp16 = const()[name = tensor("op_27702_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_27703_cast_fp16 = add(x = var_27701_cast_fp16, y = var_27702_to_fp16)[name = tensor("op_27703_cast_fp16")]; + tensor denom_73_epsilon_0_to_fp16 = const()[name = tensor("denom_73_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_73_cast_fp16 = rsqrt(epsilon = denom_73_epsilon_0_to_fp16, x = var_27703_cast_fp16)[name = tensor("denom_73_cast_fp16")]; + tensor out_73_cast_fp16 = mul(x = zero_mean_73_cast_fp16, y = denom_73_cast_fp16)[name = tensor("out_73_cast_fp16")]; + tensor obj_73_gamma_0_to_fp16 = const()[name = tensor("obj_73_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(722654592)))]; + tensor obj_73_beta_0_to_fp16 = const()[name = tensor("obj_73_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(722657216)))]; + tensor obj_73_epsilon_0_to_fp16 = const()[name = tensor("obj_73_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_73_cast_fp16 = batch_norm(beta = obj_73_beta_0_to_fp16, epsilon = obj_73_epsilon_0_to_fp16, gamma = obj_73_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_73_cast_fp16)[name = tensor("obj_73_cast_fp16")]; + tensor var_27718 = const()[name = tensor("op_27718"), val = tensor([1, 1])]; + tensor var_27720 = const()[name = tensor("op_27720"), val = tensor([1, 1])]; + tensor query_37_pad_type_0 = const()[name = tensor("query_37_pad_type_0"), val = tensor("custom")]; + tensor query_37_pad_0 = const()[name = tensor("query_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_18_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_18_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(722659840)))]; + tensor layers_18_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_18_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(725936704)))]; + tensor query_37_cast_fp16 = conv(bias = layers_18_self_attn_q_proj_bias_to_fp16, dilations = var_27720, groups = var_27685, pad = query_37_pad_0, pad_type = query_37_pad_type_0, strides = var_27718, weight = layers_18_self_attn_q_proj_weight_to_fp16, x = obj_73_cast_fp16)[name = tensor("query_37_cast_fp16")]; + tensor var_27724 = const()[name = tensor("op_27724"), val = tensor([1, 1])]; + tensor var_27726 = const()[name = tensor("op_27726"), val = tensor([1, 1])]; + tensor key_37_pad_type_0 = const()[name = tensor("key_37_pad_type_0"), val = tensor("custom")]; + tensor key_37_pad_0 = const()[name = tensor("key_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_18_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_18_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(725939328)))]; + tensor key_37_cast_fp16 = conv(dilations = var_27726, groups = var_27685, pad = key_37_pad_0, pad_type = key_37_pad_type_0, strides = var_27724, weight = layers_18_self_attn_k_proj_weight_to_fp16, x = obj_73_cast_fp16)[name = tensor("key_37_cast_fp16")]; + tensor var_27731 = const()[name = tensor("op_27731"), val = tensor([1, 1])]; + tensor var_27733 = const()[name = tensor("op_27733"), val = tensor([1, 1])]; + tensor value_37_pad_type_0 = const()[name = tensor("value_37_pad_type_0"), val = tensor("custom")]; + tensor value_37_pad_0 = const()[name = tensor("value_37_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_18_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_18_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(729216192)))]; + tensor layers_18_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_18_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(732493056)))]; + tensor value_37_cast_fp16 = conv(bias = layers_18_self_attn_v_proj_bias_to_fp16, dilations = var_27733, groups = var_27685, pad = value_37_pad_0, pad_type = value_37_pad_type_0, strides = var_27731, weight = layers_18_self_attn_v_proj_weight_to_fp16, x = obj_73_cast_fp16)[name = tensor("value_37_cast_fp16")]; + tensor var_27740_begin_0 = const()[name = tensor("op_27740_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_27740_end_0 = const()[name = tensor("op_27740_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_27740_end_mask_0 = const()[name = tensor("op_27740_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_27740_cast_fp16 = slice_by_index(begin = var_27740_begin_0, end = var_27740_end_0, end_mask = var_27740_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_27740_cast_fp16")]; + tensor var_27744_begin_0 = const()[name = tensor("op_27744_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_27744_end_0 = const()[name = tensor("op_27744_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_27744_end_mask_0 = const()[name = tensor("op_27744_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_27744_cast_fp16 = slice_by_index(begin = var_27744_begin_0, end = var_27744_end_0, end_mask = var_27744_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_27744_cast_fp16")]; + tensor var_27748_begin_0 = const()[name = tensor("op_27748_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_27748_end_0 = const()[name = tensor("op_27748_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_27748_end_mask_0 = const()[name = tensor("op_27748_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_27748_cast_fp16 = slice_by_index(begin = var_27748_begin_0, end = var_27748_end_0, end_mask = var_27748_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_27748_cast_fp16")]; + tensor var_27752_begin_0 = const()[name = tensor("op_27752_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_27752_end_0 = const()[name = tensor("op_27752_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_27752_end_mask_0 = const()[name = tensor("op_27752_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_27752_cast_fp16 = slice_by_index(begin = var_27752_begin_0, end = var_27752_end_0, end_mask = var_27752_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_27752_cast_fp16")]; + tensor var_27756_begin_0 = const()[name = tensor("op_27756_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_27756_end_0 = const()[name = tensor("op_27756_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_27756_end_mask_0 = const()[name = tensor("op_27756_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_27756_cast_fp16 = slice_by_index(begin = var_27756_begin_0, end = var_27756_end_0, end_mask = var_27756_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_27756_cast_fp16")]; + tensor var_27760_begin_0 = const()[name = tensor("op_27760_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_27760_end_0 = const()[name = tensor("op_27760_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_27760_end_mask_0 = const()[name = tensor("op_27760_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_27760_cast_fp16 = slice_by_index(begin = var_27760_begin_0, end = var_27760_end_0, end_mask = var_27760_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_27760_cast_fp16")]; + tensor var_27764_begin_0 = const()[name = tensor("op_27764_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_27764_end_0 = const()[name = tensor("op_27764_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_27764_end_mask_0 = const()[name = tensor("op_27764_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_27764_cast_fp16 = slice_by_index(begin = var_27764_begin_0, end = var_27764_end_0, end_mask = var_27764_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_27764_cast_fp16")]; + tensor var_27768_begin_0 = const()[name = tensor("op_27768_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_27768_end_0 = const()[name = tensor("op_27768_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_27768_end_mask_0 = const()[name = tensor("op_27768_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_27768_cast_fp16 = slice_by_index(begin = var_27768_begin_0, end = var_27768_end_0, end_mask = var_27768_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_27768_cast_fp16")]; + tensor var_27772_begin_0 = const()[name = tensor("op_27772_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_27772_end_0 = const()[name = tensor("op_27772_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_27772_end_mask_0 = const()[name = tensor("op_27772_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_27772_cast_fp16 = slice_by_index(begin = var_27772_begin_0, end = var_27772_end_0, end_mask = var_27772_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_27772_cast_fp16")]; + tensor var_27776_begin_0 = const()[name = tensor("op_27776_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_27776_end_0 = const()[name = tensor("op_27776_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_27776_end_mask_0 = const()[name = tensor("op_27776_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_27776_cast_fp16 = slice_by_index(begin = var_27776_begin_0, end = var_27776_end_0, end_mask = var_27776_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_27776_cast_fp16")]; + tensor var_27780_begin_0 = const()[name = tensor("op_27780_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_27780_end_0 = const()[name = tensor("op_27780_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_27780_end_mask_0 = const()[name = tensor("op_27780_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_27780_cast_fp16 = slice_by_index(begin = var_27780_begin_0, end = var_27780_end_0, end_mask = var_27780_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_27780_cast_fp16")]; + tensor var_27784_begin_0 = const()[name = tensor("op_27784_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_27784_end_0 = const()[name = tensor("op_27784_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_27784_end_mask_0 = const()[name = tensor("op_27784_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_27784_cast_fp16 = slice_by_index(begin = var_27784_begin_0, end = var_27784_end_0, end_mask = var_27784_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_27784_cast_fp16")]; + tensor var_27788_begin_0 = const()[name = tensor("op_27788_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_27788_end_0 = const()[name = tensor("op_27788_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_27788_end_mask_0 = const()[name = tensor("op_27788_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_27788_cast_fp16 = slice_by_index(begin = var_27788_begin_0, end = var_27788_end_0, end_mask = var_27788_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_27788_cast_fp16")]; + tensor var_27792_begin_0 = const()[name = tensor("op_27792_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_27792_end_0 = const()[name = tensor("op_27792_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_27792_end_mask_0 = const()[name = tensor("op_27792_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_27792_cast_fp16 = slice_by_index(begin = var_27792_begin_0, end = var_27792_end_0, end_mask = var_27792_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_27792_cast_fp16")]; + tensor var_27796_begin_0 = const()[name = tensor("op_27796_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_27796_end_0 = const()[name = tensor("op_27796_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_27796_end_mask_0 = const()[name = tensor("op_27796_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_27796_cast_fp16 = slice_by_index(begin = var_27796_begin_0, end = var_27796_end_0, end_mask = var_27796_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_27796_cast_fp16")]; + tensor var_27800_begin_0 = const()[name = tensor("op_27800_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_27800_end_0 = const()[name = tensor("op_27800_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_27800_end_mask_0 = const()[name = tensor("op_27800_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_27800_cast_fp16 = slice_by_index(begin = var_27800_begin_0, end = var_27800_end_0, end_mask = var_27800_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_27800_cast_fp16")]; + tensor var_27804_begin_0 = const()[name = tensor("op_27804_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_27804_end_0 = const()[name = tensor("op_27804_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_27804_end_mask_0 = const()[name = tensor("op_27804_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_27804_cast_fp16 = slice_by_index(begin = var_27804_begin_0, end = var_27804_end_0, end_mask = var_27804_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_27804_cast_fp16")]; + tensor var_27808_begin_0 = const()[name = tensor("op_27808_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_27808_end_0 = const()[name = tensor("op_27808_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_27808_end_mask_0 = const()[name = tensor("op_27808_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_27808_cast_fp16 = slice_by_index(begin = var_27808_begin_0, end = var_27808_end_0, end_mask = var_27808_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_27808_cast_fp16")]; + tensor var_27812_begin_0 = const()[name = tensor("op_27812_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_27812_end_0 = const()[name = tensor("op_27812_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_27812_end_mask_0 = const()[name = tensor("op_27812_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_27812_cast_fp16 = slice_by_index(begin = var_27812_begin_0, end = var_27812_end_0, end_mask = var_27812_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_27812_cast_fp16")]; + tensor var_27816_begin_0 = const()[name = tensor("op_27816_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_27816_end_0 = const()[name = tensor("op_27816_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_27816_end_mask_0 = const()[name = tensor("op_27816_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_27816_cast_fp16 = slice_by_index(begin = var_27816_begin_0, end = var_27816_end_0, end_mask = var_27816_end_mask_0, x = query_37_cast_fp16)[name = tensor("op_27816_cast_fp16")]; + tensor var_27825_begin_0 = const()[name = tensor("op_27825_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_27825_end_0 = const()[name = tensor("op_27825_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_27825_end_mask_0 = const()[name = tensor("op_27825_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_27825_cast_fp16 = slice_by_index(begin = var_27825_begin_0, end = var_27825_end_0, end_mask = var_27825_end_mask_0, x = var_27740_cast_fp16)[name = tensor("op_27825_cast_fp16")]; + tensor var_27832_begin_0 = const()[name = tensor("op_27832_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_27832_end_0 = const()[name = tensor("op_27832_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_27832_end_mask_0 = const()[name = tensor("op_27832_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_27832_cast_fp16 = slice_by_index(begin = var_27832_begin_0, end = var_27832_end_0, end_mask = var_27832_end_mask_0, x = var_27740_cast_fp16)[name = tensor("op_27832_cast_fp16")]; + tensor var_27839_begin_0 = const()[name = tensor("op_27839_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_27839_end_0 = const()[name = tensor("op_27839_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_27839_end_mask_0 = const()[name = tensor("op_27839_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_27839_cast_fp16 = slice_by_index(begin = var_27839_begin_0, end = var_27839_end_0, end_mask = var_27839_end_mask_0, x = var_27740_cast_fp16)[name = tensor("op_27839_cast_fp16")]; + tensor var_27846_begin_0 = const()[name = tensor("op_27846_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_27846_end_0 = const()[name = tensor("op_27846_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_27846_end_mask_0 = const()[name = tensor("op_27846_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_27846_cast_fp16 = slice_by_index(begin = var_27846_begin_0, end = var_27846_end_0, end_mask = var_27846_end_mask_0, x = var_27740_cast_fp16)[name = tensor("op_27846_cast_fp16")]; + tensor var_27853_begin_0 = const()[name = tensor("op_27853_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_27853_end_0 = const()[name = tensor("op_27853_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_27853_end_mask_0 = const()[name = tensor("op_27853_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_27853_cast_fp16 = slice_by_index(begin = var_27853_begin_0, end = var_27853_end_0, end_mask = var_27853_end_mask_0, x = var_27744_cast_fp16)[name = tensor("op_27853_cast_fp16")]; + tensor var_27860_begin_0 = const()[name = tensor("op_27860_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_27860_end_0 = const()[name = tensor("op_27860_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_27860_end_mask_0 = const()[name = tensor("op_27860_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_27860_cast_fp16 = slice_by_index(begin = var_27860_begin_0, end = var_27860_end_0, end_mask = var_27860_end_mask_0, x = var_27744_cast_fp16)[name = tensor("op_27860_cast_fp16")]; + tensor var_27867_begin_0 = const()[name = tensor("op_27867_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_27867_end_0 = const()[name = tensor("op_27867_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_27867_end_mask_0 = const()[name = tensor("op_27867_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_27867_cast_fp16 = slice_by_index(begin = var_27867_begin_0, end = var_27867_end_0, end_mask = var_27867_end_mask_0, x = var_27744_cast_fp16)[name = tensor("op_27867_cast_fp16")]; + tensor var_27874_begin_0 = const()[name = tensor("op_27874_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_27874_end_0 = const()[name = tensor("op_27874_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_27874_end_mask_0 = const()[name = tensor("op_27874_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_27874_cast_fp16 = slice_by_index(begin = var_27874_begin_0, end = var_27874_end_0, end_mask = var_27874_end_mask_0, x = var_27744_cast_fp16)[name = tensor("op_27874_cast_fp16")]; + tensor var_27881_begin_0 = const()[name = tensor("op_27881_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_27881_end_0 = const()[name = tensor("op_27881_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_27881_end_mask_0 = const()[name = tensor("op_27881_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_27881_cast_fp16 = slice_by_index(begin = var_27881_begin_0, end = var_27881_end_0, end_mask = var_27881_end_mask_0, x = var_27748_cast_fp16)[name = tensor("op_27881_cast_fp16")]; + tensor var_27888_begin_0 = const()[name = tensor("op_27888_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_27888_end_0 = const()[name = tensor("op_27888_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_27888_end_mask_0 = const()[name = tensor("op_27888_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_27888_cast_fp16 = slice_by_index(begin = var_27888_begin_0, end = var_27888_end_0, end_mask = var_27888_end_mask_0, x = var_27748_cast_fp16)[name = tensor("op_27888_cast_fp16")]; + tensor var_27895_begin_0 = const()[name = tensor("op_27895_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_27895_end_0 = const()[name = tensor("op_27895_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_27895_end_mask_0 = const()[name = tensor("op_27895_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_27895_cast_fp16 = slice_by_index(begin = var_27895_begin_0, end = var_27895_end_0, end_mask = var_27895_end_mask_0, x = var_27748_cast_fp16)[name = tensor("op_27895_cast_fp16")]; + tensor var_27902_begin_0 = const()[name = tensor("op_27902_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_27902_end_0 = const()[name = tensor("op_27902_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_27902_end_mask_0 = const()[name = tensor("op_27902_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_27902_cast_fp16 = slice_by_index(begin = var_27902_begin_0, end = var_27902_end_0, end_mask = var_27902_end_mask_0, x = var_27748_cast_fp16)[name = tensor("op_27902_cast_fp16")]; + tensor var_27909_begin_0 = const()[name = tensor("op_27909_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_27909_end_0 = const()[name = tensor("op_27909_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_27909_end_mask_0 = const()[name = tensor("op_27909_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_27909_cast_fp16 = slice_by_index(begin = var_27909_begin_0, end = var_27909_end_0, end_mask = var_27909_end_mask_0, x = var_27752_cast_fp16)[name = tensor("op_27909_cast_fp16")]; + tensor var_27916_begin_0 = const()[name = tensor("op_27916_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_27916_end_0 = const()[name = tensor("op_27916_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_27916_end_mask_0 = const()[name = tensor("op_27916_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_27916_cast_fp16 = slice_by_index(begin = var_27916_begin_0, end = var_27916_end_0, end_mask = var_27916_end_mask_0, x = var_27752_cast_fp16)[name = tensor("op_27916_cast_fp16")]; + tensor var_27923_begin_0 = const()[name = tensor("op_27923_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_27923_end_0 = const()[name = tensor("op_27923_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_27923_end_mask_0 = const()[name = tensor("op_27923_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_27923_cast_fp16 = slice_by_index(begin = var_27923_begin_0, end = var_27923_end_0, end_mask = var_27923_end_mask_0, x = var_27752_cast_fp16)[name = tensor("op_27923_cast_fp16")]; + tensor var_27930_begin_0 = const()[name = tensor("op_27930_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_27930_end_0 = const()[name = tensor("op_27930_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_27930_end_mask_0 = const()[name = tensor("op_27930_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_27930_cast_fp16 = slice_by_index(begin = var_27930_begin_0, end = var_27930_end_0, end_mask = var_27930_end_mask_0, x = var_27752_cast_fp16)[name = tensor("op_27930_cast_fp16")]; + tensor var_27937_begin_0 = const()[name = tensor("op_27937_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_27937_end_0 = const()[name = tensor("op_27937_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_27937_end_mask_0 = const()[name = tensor("op_27937_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_27937_cast_fp16 = slice_by_index(begin = var_27937_begin_0, end = var_27937_end_0, end_mask = var_27937_end_mask_0, x = var_27756_cast_fp16)[name = tensor("op_27937_cast_fp16")]; + tensor var_27944_begin_0 = const()[name = tensor("op_27944_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_27944_end_0 = const()[name = tensor("op_27944_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_27944_end_mask_0 = const()[name = tensor("op_27944_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_27944_cast_fp16 = slice_by_index(begin = var_27944_begin_0, end = var_27944_end_0, end_mask = var_27944_end_mask_0, x = var_27756_cast_fp16)[name = tensor("op_27944_cast_fp16")]; + tensor var_27951_begin_0 = const()[name = tensor("op_27951_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_27951_end_0 = const()[name = tensor("op_27951_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_27951_end_mask_0 = const()[name = tensor("op_27951_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_27951_cast_fp16 = slice_by_index(begin = var_27951_begin_0, end = var_27951_end_0, end_mask = var_27951_end_mask_0, x = var_27756_cast_fp16)[name = tensor("op_27951_cast_fp16")]; + tensor var_27958_begin_0 = const()[name = tensor("op_27958_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_27958_end_0 = const()[name = tensor("op_27958_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_27958_end_mask_0 = const()[name = tensor("op_27958_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_27958_cast_fp16 = slice_by_index(begin = var_27958_begin_0, end = var_27958_end_0, end_mask = var_27958_end_mask_0, x = var_27756_cast_fp16)[name = tensor("op_27958_cast_fp16")]; + tensor var_27965_begin_0 = const()[name = tensor("op_27965_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_27965_end_0 = const()[name = tensor("op_27965_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_27965_end_mask_0 = const()[name = tensor("op_27965_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_27965_cast_fp16 = slice_by_index(begin = var_27965_begin_0, end = var_27965_end_0, end_mask = var_27965_end_mask_0, x = var_27760_cast_fp16)[name = tensor("op_27965_cast_fp16")]; + tensor var_27972_begin_0 = const()[name = tensor("op_27972_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_27972_end_0 = const()[name = tensor("op_27972_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_27972_end_mask_0 = const()[name = tensor("op_27972_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_27972_cast_fp16 = slice_by_index(begin = var_27972_begin_0, end = var_27972_end_0, end_mask = var_27972_end_mask_0, x = var_27760_cast_fp16)[name = tensor("op_27972_cast_fp16")]; + tensor var_27979_begin_0 = const()[name = tensor("op_27979_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_27979_end_0 = const()[name = tensor("op_27979_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_27979_end_mask_0 = const()[name = tensor("op_27979_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_27979_cast_fp16 = slice_by_index(begin = var_27979_begin_0, end = var_27979_end_0, end_mask = var_27979_end_mask_0, x = var_27760_cast_fp16)[name = tensor("op_27979_cast_fp16")]; + tensor var_27986_begin_0 = const()[name = tensor("op_27986_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_27986_end_0 = const()[name = tensor("op_27986_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_27986_end_mask_0 = const()[name = tensor("op_27986_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_27986_cast_fp16 = slice_by_index(begin = var_27986_begin_0, end = var_27986_end_0, end_mask = var_27986_end_mask_0, x = var_27760_cast_fp16)[name = tensor("op_27986_cast_fp16")]; + tensor var_27993_begin_0 = const()[name = tensor("op_27993_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_27993_end_0 = const()[name = tensor("op_27993_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_27993_end_mask_0 = const()[name = tensor("op_27993_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_27993_cast_fp16 = slice_by_index(begin = var_27993_begin_0, end = var_27993_end_0, end_mask = var_27993_end_mask_0, x = var_27764_cast_fp16)[name = tensor("op_27993_cast_fp16")]; + tensor var_28000_begin_0 = const()[name = tensor("op_28000_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_28000_end_0 = const()[name = tensor("op_28000_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_28000_end_mask_0 = const()[name = tensor("op_28000_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28000_cast_fp16 = slice_by_index(begin = var_28000_begin_0, end = var_28000_end_0, end_mask = var_28000_end_mask_0, x = var_27764_cast_fp16)[name = tensor("op_28000_cast_fp16")]; + tensor var_28007_begin_0 = const()[name = tensor("op_28007_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_28007_end_0 = const()[name = tensor("op_28007_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_28007_end_mask_0 = const()[name = tensor("op_28007_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28007_cast_fp16 = slice_by_index(begin = var_28007_begin_0, end = var_28007_end_0, end_mask = var_28007_end_mask_0, x = var_27764_cast_fp16)[name = tensor("op_28007_cast_fp16")]; + tensor var_28014_begin_0 = const()[name = tensor("op_28014_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_28014_end_0 = const()[name = tensor("op_28014_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_28014_end_mask_0 = const()[name = tensor("op_28014_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28014_cast_fp16 = slice_by_index(begin = var_28014_begin_0, end = var_28014_end_0, end_mask = var_28014_end_mask_0, x = var_27764_cast_fp16)[name = tensor("op_28014_cast_fp16")]; + tensor var_28021_begin_0 = const()[name = tensor("op_28021_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_28021_end_0 = const()[name = tensor("op_28021_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_28021_end_mask_0 = const()[name = tensor("op_28021_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28021_cast_fp16 = slice_by_index(begin = var_28021_begin_0, end = var_28021_end_0, end_mask = var_28021_end_mask_0, x = var_27768_cast_fp16)[name = tensor("op_28021_cast_fp16")]; + tensor var_28028_begin_0 = const()[name = tensor("op_28028_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_28028_end_0 = const()[name = tensor("op_28028_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_28028_end_mask_0 = const()[name = tensor("op_28028_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28028_cast_fp16 = slice_by_index(begin = var_28028_begin_0, end = var_28028_end_0, end_mask = var_28028_end_mask_0, x = var_27768_cast_fp16)[name = tensor("op_28028_cast_fp16")]; + tensor var_28035_begin_0 = const()[name = tensor("op_28035_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_28035_end_0 = const()[name = tensor("op_28035_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_28035_end_mask_0 = const()[name = tensor("op_28035_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28035_cast_fp16 = slice_by_index(begin = var_28035_begin_0, end = var_28035_end_0, end_mask = var_28035_end_mask_0, x = var_27768_cast_fp16)[name = tensor("op_28035_cast_fp16")]; + tensor var_28042_begin_0 = const()[name = tensor("op_28042_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_28042_end_0 = const()[name = tensor("op_28042_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_28042_end_mask_0 = const()[name = tensor("op_28042_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28042_cast_fp16 = slice_by_index(begin = var_28042_begin_0, end = var_28042_end_0, end_mask = var_28042_end_mask_0, x = var_27768_cast_fp16)[name = tensor("op_28042_cast_fp16")]; + tensor var_28049_begin_0 = const()[name = tensor("op_28049_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_28049_end_0 = const()[name = tensor("op_28049_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_28049_end_mask_0 = const()[name = tensor("op_28049_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28049_cast_fp16 = slice_by_index(begin = var_28049_begin_0, end = var_28049_end_0, end_mask = var_28049_end_mask_0, x = var_27772_cast_fp16)[name = tensor("op_28049_cast_fp16")]; + tensor var_28056_begin_0 = const()[name = tensor("op_28056_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_28056_end_0 = const()[name = tensor("op_28056_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_28056_end_mask_0 = const()[name = tensor("op_28056_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28056_cast_fp16 = slice_by_index(begin = var_28056_begin_0, end = var_28056_end_0, end_mask = var_28056_end_mask_0, x = var_27772_cast_fp16)[name = tensor("op_28056_cast_fp16")]; + tensor var_28063_begin_0 = const()[name = tensor("op_28063_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_28063_end_0 = const()[name = tensor("op_28063_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_28063_end_mask_0 = const()[name = tensor("op_28063_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28063_cast_fp16 = slice_by_index(begin = var_28063_begin_0, end = var_28063_end_0, end_mask = var_28063_end_mask_0, x = var_27772_cast_fp16)[name = tensor("op_28063_cast_fp16")]; + tensor var_28070_begin_0 = const()[name = tensor("op_28070_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_28070_end_0 = const()[name = tensor("op_28070_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_28070_end_mask_0 = const()[name = tensor("op_28070_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28070_cast_fp16 = slice_by_index(begin = var_28070_begin_0, end = var_28070_end_0, end_mask = var_28070_end_mask_0, x = var_27772_cast_fp16)[name = tensor("op_28070_cast_fp16")]; + tensor var_28077_begin_0 = const()[name = tensor("op_28077_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_28077_end_0 = const()[name = tensor("op_28077_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_28077_end_mask_0 = const()[name = tensor("op_28077_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28077_cast_fp16 = slice_by_index(begin = var_28077_begin_0, end = var_28077_end_0, end_mask = var_28077_end_mask_0, x = var_27776_cast_fp16)[name = tensor("op_28077_cast_fp16")]; + tensor var_28084_begin_0 = const()[name = tensor("op_28084_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_28084_end_0 = const()[name = tensor("op_28084_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_28084_end_mask_0 = const()[name = tensor("op_28084_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28084_cast_fp16 = slice_by_index(begin = var_28084_begin_0, end = var_28084_end_0, end_mask = var_28084_end_mask_0, x = var_27776_cast_fp16)[name = tensor("op_28084_cast_fp16")]; + tensor var_28091_begin_0 = const()[name = tensor("op_28091_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_28091_end_0 = const()[name = tensor("op_28091_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_28091_end_mask_0 = const()[name = tensor("op_28091_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28091_cast_fp16 = slice_by_index(begin = var_28091_begin_0, end = var_28091_end_0, end_mask = var_28091_end_mask_0, x = var_27776_cast_fp16)[name = tensor("op_28091_cast_fp16")]; + tensor var_28098_begin_0 = const()[name = tensor("op_28098_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_28098_end_0 = const()[name = tensor("op_28098_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_28098_end_mask_0 = const()[name = tensor("op_28098_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28098_cast_fp16 = slice_by_index(begin = var_28098_begin_0, end = var_28098_end_0, end_mask = var_28098_end_mask_0, x = var_27776_cast_fp16)[name = tensor("op_28098_cast_fp16")]; + tensor var_28105_begin_0 = const()[name = tensor("op_28105_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_28105_end_0 = const()[name = tensor("op_28105_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_28105_end_mask_0 = const()[name = tensor("op_28105_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28105_cast_fp16 = slice_by_index(begin = var_28105_begin_0, end = var_28105_end_0, end_mask = var_28105_end_mask_0, x = var_27780_cast_fp16)[name = tensor("op_28105_cast_fp16")]; + tensor var_28112_begin_0 = const()[name = tensor("op_28112_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_28112_end_0 = const()[name = tensor("op_28112_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_28112_end_mask_0 = const()[name = tensor("op_28112_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28112_cast_fp16 = slice_by_index(begin = var_28112_begin_0, end = var_28112_end_0, end_mask = var_28112_end_mask_0, x = var_27780_cast_fp16)[name = tensor("op_28112_cast_fp16")]; + tensor var_28119_begin_0 = const()[name = tensor("op_28119_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_28119_end_0 = const()[name = tensor("op_28119_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_28119_end_mask_0 = const()[name = tensor("op_28119_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28119_cast_fp16 = slice_by_index(begin = var_28119_begin_0, end = var_28119_end_0, end_mask = var_28119_end_mask_0, x = var_27780_cast_fp16)[name = tensor("op_28119_cast_fp16")]; + tensor var_28126_begin_0 = const()[name = tensor("op_28126_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_28126_end_0 = const()[name = tensor("op_28126_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_28126_end_mask_0 = const()[name = tensor("op_28126_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28126_cast_fp16 = slice_by_index(begin = var_28126_begin_0, end = var_28126_end_0, end_mask = var_28126_end_mask_0, x = var_27780_cast_fp16)[name = tensor("op_28126_cast_fp16")]; + tensor var_28133_begin_0 = const()[name = tensor("op_28133_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_28133_end_0 = const()[name = tensor("op_28133_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_28133_end_mask_0 = const()[name = tensor("op_28133_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28133_cast_fp16 = slice_by_index(begin = var_28133_begin_0, end = var_28133_end_0, end_mask = var_28133_end_mask_0, x = var_27784_cast_fp16)[name = tensor("op_28133_cast_fp16")]; + tensor var_28140_begin_0 = const()[name = tensor("op_28140_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_28140_end_0 = const()[name = tensor("op_28140_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_28140_end_mask_0 = const()[name = tensor("op_28140_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28140_cast_fp16 = slice_by_index(begin = var_28140_begin_0, end = var_28140_end_0, end_mask = var_28140_end_mask_0, x = var_27784_cast_fp16)[name = tensor("op_28140_cast_fp16")]; + tensor var_28147_begin_0 = const()[name = tensor("op_28147_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_28147_end_0 = const()[name = tensor("op_28147_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_28147_end_mask_0 = const()[name = tensor("op_28147_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28147_cast_fp16 = slice_by_index(begin = var_28147_begin_0, end = var_28147_end_0, end_mask = var_28147_end_mask_0, x = var_27784_cast_fp16)[name = tensor("op_28147_cast_fp16")]; + tensor var_28154_begin_0 = const()[name = tensor("op_28154_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_28154_end_0 = const()[name = tensor("op_28154_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_28154_end_mask_0 = const()[name = tensor("op_28154_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28154_cast_fp16 = slice_by_index(begin = var_28154_begin_0, end = var_28154_end_0, end_mask = var_28154_end_mask_0, x = var_27784_cast_fp16)[name = tensor("op_28154_cast_fp16")]; + tensor var_28161_begin_0 = const()[name = tensor("op_28161_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_28161_end_0 = const()[name = tensor("op_28161_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_28161_end_mask_0 = const()[name = tensor("op_28161_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28161_cast_fp16 = slice_by_index(begin = var_28161_begin_0, end = var_28161_end_0, end_mask = var_28161_end_mask_0, x = var_27788_cast_fp16)[name = tensor("op_28161_cast_fp16")]; + tensor var_28168_begin_0 = const()[name = tensor("op_28168_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_28168_end_0 = const()[name = tensor("op_28168_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_28168_end_mask_0 = const()[name = tensor("op_28168_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28168_cast_fp16 = slice_by_index(begin = var_28168_begin_0, end = var_28168_end_0, end_mask = var_28168_end_mask_0, x = var_27788_cast_fp16)[name = tensor("op_28168_cast_fp16")]; + tensor var_28175_begin_0 = const()[name = tensor("op_28175_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_28175_end_0 = const()[name = tensor("op_28175_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_28175_end_mask_0 = const()[name = tensor("op_28175_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28175_cast_fp16 = slice_by_index(begin = var_28175_begin_0, end = var_28175_end_0, end_mask = var_28175_end_mask_0, x = var_27788_cast_fp16)[name = tensor("op_28175_cast_fp16")]; + tensor var_28182_begin_0 = const()[name = tensor("op_28182_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_28182_end_0 = const()[name = tensor("op_28182_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_28182_end_mask_0 = const()[name = tensor("op_28182_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28182_cast_fp16 = slice_by_index(begin = var_28182_begin_0, end = var_28182_end_0, end_mask = var_28182_end_mask_0, x = var_27788_cast_fp16)[name = tensor("op_28182_cast_fp16")]; + tensor var_28189_begin_0 = const()[name = tensor("op_28189_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_28189_end_0 = const()[name = tensor("op_28189_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_28189_end_mask_0 = const()[name = tensor("op_28189_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28189_cast_fp16 = slice_by_index(begin = var_28189_begin_0, end = var_28189_end_0, end_mask = var_28189_end_mask_0, x = var_27792_cast_fp16)[name = tensor("op_28189_cast_fp16")]; + tensor var_28196_begin_0 = const()[name = tensor("op_28196_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_28196_end_0 = const()[name = tensor("op_28196_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_28196_end_mask_0 = const()[name = tensor("op_28196_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28196_cast_fp16 = slice_by_index(begin = var_28196_begin_0, end = var_28196_end_0, end_mask = var_28196_end_mask_0, x = var_27792_cast_fp16)[name = tensor("op_28196_cast_fp16")]; + tensor var_28203_begin_0 = const()[name = tensor("op_28203_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_28203_end_0 = const()[name = tensor("op_28203_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_28203_end_mask_0 = const()[name = tensor("op_28203_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28203_cast_fp16 = slice_by_index(begin = var_28203_begin_0, end = var_28203_end_0, end_mask = var_28203_end_mask_0, x = var_27792_cast_fp16)[name = tensor("op_28203_cast_fp16")]; + tensor var_28210_begin_0 = const()[name = tensor("op_28210_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_28210_end_0 = const()[name = tensor("op_28210_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_28210_end_mask_0 = const()[name = tensor("op_28210_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28210_cast_fp16 = slice_by_index(begin = var_28210_begin_0, end = var_28210_end_0, end_mask = var_28210_end_mask_0, x = var_27792_cast_fp16)[name = tensor("op_28210_cast_fp16")]; + tensor var_28217_begin_0 = const()[name = tensor("op_28217_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_28217_end_0 = const()[name = tensor("op_28217_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_28217_end_mask_0 = const()[name = tensor("op_28217_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28217_cast_fp16 = slice_by_index(begin = var_28217_begin_0, end = var_28217_end_0, end_mask = var_28217_end_mask_0, x = var_27796_cast_fp16)[name = tensor("op_28217_cast_fp16")]; + tensor var_28224_begin_0 = const()[name = tensor("op_28224_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_28224_end_0 = const()[name = tensor("op_28224_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_28224_end_mask_0 = const()[name = tensor("op_28224_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28224_cast_fp16 = slice_by_index(begin = var_28224_begin_0, end = var_28224_end_0, end_mask = var_28224_end_mask_0, x = var_27796_cast_fp16)[name = tensor("op_28224_cast_fp16")]; + tensor var_28231_begin_0 = const()[name = tensor("op_28231_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_28231_end_0 = const()[name = tensor("op_28231_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_28231_end_mask_0 = const()[name = tensor("op_28231_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28231_cast_fp16 = slice_by_index(begin = var_28231_begin_0, end = var_28231_end_0, end_mask = var_28231_end_mask_0, x = var_27796_cast_fp16)[name = tensor("op_28231_cast_fp16")]; + tensor var_28238_begin_0 = const()[name = tensor("op_28238_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_28238_end_0 = const()[name = tensor("op_28238_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_28238_end_mask_0 = const()[name = tensor("op_28238_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28238_cast_fp16 = slice_by_index(begin = var_28238_begin_0, end = var_28238_end_0, end_mask = var_28238_end_mask_0, x = var_27796_cast_fp16)[name = tensor("op_28238_cast_fp16")]; + tensor var_28245_begin_0 = const()[name = tensor("op_28245_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_28245_end_0 = const()[name = tensor("op_28245_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_28245_end_mask_0 = const()[name = tensor("op_28245_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28245_cast_fp16 = slice_by_index(begin = var_28245_begin_0, end = var_28245_end_0, end_mask = var_28245_end_mask_0, x = var_27800_cast_fp16)[name = tensor("op_28245_cast_fp16")]; + tensor var_28252_begin_0 = const()[name = tensor("op_28252_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_28252_end_0 = const()[name = tensor("op_28252_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_28252_end_mask_0 = const()[name = tensor("op_28252_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28252_cast_fp16 = slice_by_index(begin = var_28252_begin_0, end = var_28252_end_0, end_mask = var_28252_end_mask_0, x = var_27800_cast_fp16)[name = tensor("op_28252_cast_fp16")]; + tensor var_28259_begin_0 = const()[name = tensor("op_28259_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_28259_end_0 = const()[name = tensor("op_28259_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_28259_end_mask_0 = const()[name = tensor("op_28259_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28259_cast_fp16 = slice_by_index(begin = var_28259_begin_0, end = var_28259_end_0, end_mask = var_28259_end_mask_0, x = var_27800_cast_fp16)[name = tensor("op_28259_cast_fp16")]; + tensor var_28266_begin_0 = const()[name = tensor("op_28266_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_28266_end_0 = const()[name = tensor("op_28266_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_28266_end_mask_0 = const()[name = tensor("op_28266_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28266_cast_fp16 = slice_by_index(begin = var_28266_begin_0, end = var_28266_end_0, end_mask = var_28266_end_mask_0, x = var_27800_cast_fp16)[name = tensor("op_28266_cast_fp16")]; + tensor var_28273_begin_0 = const()[name = tensor("op_28273_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_28273_end_0 = const()[name = tensor("op_28273_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_28273_end_mask_0 = const()[name = tensor("op_28273_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28273_cast_fp16 = slice_by_index(begin = var_28273_begin_0, end = var_28273_end_0, end_mask = var_28273_end_mask_0, x = var_27804_cast_fp16)[name = tensor("op_28273_cast_fp16")]; + tensor var_28280_begin_0 = const()[name = tensor("op_28280_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_28280_end_0 = const()[name = tensor("op_28280_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_28280_end_mask_0 = const()[name = tensor("op_28280_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28280_cast_fp16 = slice_by_index(begin = var_28280_begin_0, end = var_28280_end_0, end_mask = var_28280_end_mask_0, x = var_27804_cast_fp16)[name = tensor("op_28280_cast_fp16")]; + tensor var_28287_begin_0 = const()[name = tensor("op_28287_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_28287_end_0 = const()[name = tensor("op_28287_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_28287_end_mask_0 = const()[name = tensor("op_28287_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28287_cast_fp16 = slice_by_index(begin = var_28287_begin_0, end = var_28287_end_0, end_mask = var_28287_end_mask_0, x = var_27804_cast_fp16)[name = tensor("op_28287_cast_fp16")]; + tensor var_28294_begin_0 = const()[name = tensor("op_28294_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_28294_end_0 = const()[name = tensor("op_28294_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_28294_end_mask_0 = const()[name = tensor("op_28294_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28294_cast_fp16 = slice_by_index(begin = var_28294_begin_0, end = var_28294_end_0, end_mask = var_28294_end_mask_0, x = var_27804_cast_fp16)[name = tensor("op_28294_cast_fp16")]; + tensor var_28301_begin_0 = const()[name = tensor("op_28301_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_28301_end_0 = const()[name = tensor("op_28301_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_28301_end_mask_0 = const()[name = tensor("op_28301_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28301_cast_fp16 = slice_by_index(begin = var_28301_begin_0, end = var_28301_end_0, end_mask = var_28301_end_mask_0, x = var_27808_cast_fp16)[name = tensor("op_28301_cast_fp16")]; + tensor var_28308_begin_0 = const()[name = tensor("op_28308_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_28308_end_0 = const()[name = tensor("op_28308_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_28308_end_mask_0 = const()[name = tensor("op_28308_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28308_cast_fp16 = slice_by_index(begin = var_28308_begin_0, end = var_28308_end_0, end_mask = var_28308_end_mask_0, x = var_27808_cast_fp16)[name = tensor("op_28308_cast_fp16")]; + tensor var_28315_begin_0 = const()[name = tensor("op_28315_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_28315_end_0 = const()[name = tensor("op_28315_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_28315_end_mask_0 = const()[name = tensor("op_28315_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28315_cast_fp16 = slice_by_index(begin = var_28315_begin_0, end = var_28315_end_0, end_mask = var_28315_end_mask_0, x = var_27808_cast_fp16)[name = tensor("op_28315_cast_fp16")]; + tensor var_28322_begin_0 = const()[name = tensor("op_28322_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_28322_end_0 = const()[name = tensor("op_28322_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_28322_end_mask_0 = const()[name = tensor("op_28322_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28322_cast_fp16 = slice_by_index(begin = var_28322_begin_0, end = var_28322_end_0, end_mask = var_28322_end_mask_0, x = var_27808_cast_fp16)[name = tensor("op_28322_cast_fp16")]; + tensor var_28329_begin_0 = const()[name = tensor("op_28329_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_28329_end_0 = const()[name = tensor("op_28329_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_28329_end_mask_0 = const()[name = tensor("op_28329_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28329_cast_fp16 = slice_by_index(begin = var_28329_begin_0, end = var_28329_end_0, end_mask = var_28329_end_mask_0, x = var_27812_cast_fp16)[name = tensor("op_28329_cast_fp16")]; + tensor var_28336_begin_0 = const()[name = tensor("op_28336_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_28336_end_0 = const()[name = tensor("op_28336_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_28336_end_mask_0 = const()[name = tensor("op_28336_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28336_cast_fp16 = slice_by_index(begin = var_28336_begin_0, end = var_28336_end_0, end_mask = var_28336_end_mask_0, x = var_27812_cast_fp16)[name = tensor("op_28336_cast_fp16")]; + tensor var_28343_begin_0 = const()[name = tensor("op_28343_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_28343_end_0 = const()[name = tensor("op_28343_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_28343_end_mask_0 = const()[name = tensor("op_28343_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28343_cast_fp16 = slice_by_index(begin = var_28343_begin_0, end = var_28343_end_0, end_mask = var_28343_end_mask_0, x = var_27812_cast_fp16)[name = tensor("op_28343_cast_fp16")]; + tensor var_28350_begin_0 = const()[name = tensor("op_28350_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_28350_end_0 = const()[name = tensor("op_28350_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_28350_end_mask_0 = const()[name = tensor("op_28350_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28350_cast_fp16 = slice_by_index(begin = var_28350_begin_0, end = var_28350_end_0, end_mask = var_28350_end_mask_0, x = var_27812_cast_fp16)[name = tensor("op_28350_cast_fp16")]; + tensor var_28357_begin_0 = const()[name = tensor("op_28357_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_28357_end_0 = const()[name = tensor("op_28357_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_28357_end_mask_0 = const()[name = tensor("op_28357_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28357_cast_fp16 = slice_by_index(begin = var_28357_begin_0, end = var_28357_end_0, end_mask = var_28357_end_mask_0, x = var_27816_cast_fp16)[name = tensor("op_28357_cast_fp16")]; + tensor var_28364_begin_0 = const()[name = tensor("op_28364_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_28364_end_0 = const()[name = tensor("op_28364_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_28364_end_mask_0 = const()[name = tensor("op_28364_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28364_cast_fp16 = slice_by_index(begin = var_28364_begin_0, end = var_28364_end_0, end_mask = var_28364_end_mask_0, x = var_27816_cast_fp16)[name = tensor("op_28364_cast_fp16")]; + tensor var_28371_begin_0 = const()[name = tensor("op_28371_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_28371_end_0 = const()[name = tensor("op_28371_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_28371_end_mask_0 = const()[name = tensor("op_28371_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28371_cast_fp16 = slice_by_index(begin = var_28371_begin_0, end = var_28371_end_0, end_mask = var_28371_end_mask_0, x = var_27816_cast_fp16)[name = tensor("op_28371_cast_fp16")]; + tensor var_28378_begin_0 = const()[name = tensor("op_28378_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_28378_end_0 = const()[name = tensor("op_28378_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_28378_end_mask_0 = const()[name = tensor("op_28378_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28378_cast_fp16 = slice_by_index(begin = var_28378_begin_0, end = var_28378_end_0, end_mask = var_28378_end_mask_0, x = var_27816_cast_fp16)[name = tensor("op_28378_cast_fp16")]; + tensor k_37_perm_0 = const()[name = tensor("k_37_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_28383_begin_0 = const()[name = tensor("op_28383_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_28383_end_0 = const()[name = tensor("op_28383_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_28383_end_mask_0 = const()[name = tensor("op_28383_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_13 = transpose(perm = k_37_perm_0, x = key_37_cast_fp16)[name = tensor("transpose_13")]; + tensor var_28383_cast_fp16 = slice_by_index(begin = var_28383_begin_0, end = var_28383_end_0, end_mask = var_28383_end_mask_0, x = transpose_13)[name = tensor("op_28383_cast_fp16")]; + tensor var_28387_begin_0 = const()[name = tensor("op_28387_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_28387_end_0 = const()[name = tensor("op_28387_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_28387_end_mask_0 = const()[name = tensor("op_28387_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28387_cast_fp16 = slice_by_index(begin = var_28387_begin_0, end = var_28387_end_0, end_mask = var_28387_end_mask_0, x = transpose_13)[name = tensor("op_28387_cast_fp16")]; + tensor var_28391_begin_0 = const()[name = tensor("op_28391_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_28391_end_0 = const()[name = tensor("op_28391_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_28391_end_mask_0 = const()[name = tensor("op_28391_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28391_cast_fp16 = slice_by_index(begin = var_28391_begin_0, end = var_28391_end_0, end_mask = var_28391_end_mask_0, x = transpose_13)[name = tensor("op_28391_cast_fp16")]; + tensor var_28395_begin_0 = const()[name = tensor("op_28395_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_28395_end_0 = const()[name = tensor("op_28395_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_28395_end_mask_0 = const()[name = tensor("op_28395_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28395_cast_fp16 = slice_by_index(begin = var_28395_begin_0, end = var_28395_end_0, end_mask = var_28395_end_mask_0, x = transpose_13)[name = tensor("op_28395_cast_fp16")]; + tensor var_28399_begin_0 = const()[name = tensor("op_28399_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_28399_end_0 = const()[name = tensor("op_28399_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_28399_end_mask_0 = const()[name = tensor("op_28399_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28399_cast_fp16 = slice_by_index(begin = var_28399_begin_0, end = var_28399_end_0, end_mask = var_28399_end_mask_0, x = transpose_13)[name = tensor("op_28399_cast_fp16")]; + tensor var_28403_begin_0 = const()[name = tensor("op_28403_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_28403_end_0 = const()[name = tensor("op_28403_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_28403_end_mask_0 = const()[name = tensor("op_28403_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28403_cast_fp16 = slice_by_index(begin = var_28403_begin_0, end = var_28403_end_0, end_mask = var_28403_end_mask_0, x = transpose_13)[name = tensor("op_28403_cast_fp16")]; + tensor var_28407_begin_0 = const()[name = tensor("op_28407_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_28407_end_0 = const()[name = tensor("op_28407_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_28407_end_mask_0 = const()[name = tensor("op_28407_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28407_cast_fp16 = slice_by_index(begin = var_28407_begin_0, end = var_28407_end_0, end_mask = var_28407_end_mask_0, x = transpose_13)[name = tensor("op_28407_cast_fp16")]; + tensor var_28411_begin_0 = const()[name = tensor("op_28411_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_28411_end_0 = const()[name = tensor("op_28411_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_28411_end_mask_0 = const()[name = tensor("op_28411_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28411_cast_fp16 = slice_by_index(begin = var_28411_begin_0, end = var_28411_end_0, end_mask = var_28411_end_mask_0, x = transpose_13)[name = tensor("op_28411_cast_fp16")]; + tensor var_28415_begin_0 = const()[name = tensor("op_28415_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_28415_end_0 = const()[name = tensor("op_28415_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_28415_end_mask_0 = const()[name = tensor("op_28415_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28415_cast_fp16 = slice_by_index(begin = var_28415_begin_0, end = var_28415_end_0, end_mask = var_28415_end_mask_0, x = transpose_13)[name = tensor("op_28415_cast_fp16")]; + tensor var_28419_begin_0 = const()[name = tensor("op_28419_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_28419_end_0 = const()[name = tensor("op_28419_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_28419_end_mask_0 = const()[name = tensor("op_28419_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28419_cast_fp16 = slice_by_index(begin = var_28419_begin_0, end = var_28419_end_0, end_mask = var_28419_end_mask_0, x = transpose_13)[name = tensor("op_28419_cast_fp16")]; + tensor var_28423_begin_0 = const()[name = tensor("op_28423_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_28423_end_0 = const()[name = tensor("op_28423_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_28423_end_mask_0 = const()[name = tensor("op_28423_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28423_cast_fp16 = slice_by_index(begin = var_28423_begin_0, end = var_28423_end_0, end_mask = var_28423_end_mask_0, x = transpose_13)[name = tensor("op_28423_cast_fp16")]; + tensor var_28427_begin_0 = const()[name = tensor("op_28427_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_28427_end_0 = const()[name = tensor("op_28427_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_28427_end_mask_0 = const()[name = tensor("op_28427_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28427_cast_fp16 = slice_by_index(begin = var_28427_begin_0, end = var_28427_end_0, end_mask = var_28427_end_mask_0, x = transpose_13)[name = tensor("op_28427_cast_fp16")]; + tensor var_28431_begin_0 = const()[name = tensor("op_28431_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_28431_end_0 = const()[name = tensor("op_28431_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_28431_end_mask_0 = const()[name = tensor("op_28431_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28431_cast_fp16 = slice_by_index(begin = var_28431_begin_0, end = var_28431_end_0, end_mask = var_28431_end_mask_0, x = transpose_13)[name = tensor("op_28431_cast_fp16")]; + tensor var_28435_begin_0 = const()[name = tensor("op_28435_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_28435_end_0 = const()[name = tensor("op_28435_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_28435_end_mask_0 = const()[name = tensor("op_28435_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28435_cast_fp16 = slice_by_index(begin = var_28435_begin_0, end = var_28435_end_0, end_mask = var_28435_end_mask_0, x = transpose_13)[name = tensor("op_28435_cast_fp16")]; + tensor var_28439_begin_0 = const()[name = tensor("op_28439_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_28439_end_0 = const()[name = tensor("op_28439_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_28439_end_mask_0 = const()[name = tensor("op_28439_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28439_cast_fp16 = slice_by_index(begin = var_28439_begin_0, end = var_28439_end_0, end_mask = var_28439_end_mask_0, x = transpose_13)[name = tensor("op_28439_cast_fp16")]; + tensor var_28443_begin_0 = const()[name = tensor("op_28443_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_28443_end_0 = const()[name = tensor("op_28443_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_28443_end_mask_0 = const()[name = tensor("op_28443_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28443_cast_fp16 = slice_by_index(begin = var_28443_begin_0, end = var_28443_end_0, end_mask = var_28443_end_mask_0, x = transpose_13)[name = tensor("op_28443_cast_fp16")]; + tensor var_28447_begin_0 = const()[name = tensor("op_28447_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_28447_end_0 = const()[name = tensor("op_28447_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_28447_end_mask_0 = const()[name = tensor("op_28447_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28447_cast_fp16 = slice_by_index(begin = var_28447_begin_0, end = var_28447_end_0, end_mask = var_28447_end_mask_0, x = transpose_13)[name = tensor("op_28447_cast_fp16")]; + tensor var_28451_begin_0 = const()[name = tensor("op_28451_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_28451_end_0 = const()[name = tensor("op_28451_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_28451_end_mask_0 = const()[name = tensor("op_28451_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28451_cast_fp16 = slice_by_index(begin = var_28451_begin_0, end = var_28451_end_0, end_mask = var_28451_end_mask_0, x = transpose_13)[name = tensor("op_28451_cast_fp16")]; + tensor var_28455_begin_0 = const()[name = tensor("op_28455_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_28455_end_0 = const()[name = tensor("op_28455_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_28455_end_mask_0 = const()[name = tensor("op_28455_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28455_cast_fp16 = slice_by_index(begin = var_28455_begin_0, end = var_28455_end_0, end_mask = var_28455_end_mask_0, x = transpose_13)[name = tensor("op_28455_cast_fp16")]; + tensor var_28459_begin_0 = const()[name = tensor("op_28459_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_28459_end_0 = const()[name = tensor("op_28459_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_28459_end_mask_0 = const()[name = tensor("op_28459_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_28459_cast_fp16 = slice_by_index(begin = var_28459_begin_0, end = var_28459_end_0, end_mask = var_28459_end_mask_0, x = transpose_13)[name = tensor("op_28459_cast_fp16")]; + tensor var_28461_begin_0 = const()[name = tensor("op_28461_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_28461_end_0 = const()[name = tensor("op_28461_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_28461_end_mask_0 = const()[name = tensor("op_28461_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_28461_cast_fp16 = slice_by_index(begin = var_28461_begin_0, end = var_28461_end_0, end_mask = var_28461_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_28461_cast_fp16")]; + tensor var_28465_begin_0 = const()[name = tensor("op_28465_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_28465_end_0 = const()[name = tensor("op_28465_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_28465_end_mask_0 = const()[name = tensor("op_28465_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_28465_cast_fp16 = slice_by_index(begin = var_28465_begin_0, end = var_28465_end_0, end_mask = var_28465_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_28465_cast_fp16")]; + tensor var_28469_begin_0 = const()[name = tensor("op_28469_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_28469_end_0 = const()[name = tensor("op_28469_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_28469_end_mask_0 = const()[name = tensor("op_28469_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_28469_cast_fp16 = slice_by_index(begin = var_28469_begin_0, end = var_28469_end_0, end_mask = var_28469_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_28469_cast_fp16")]; + tensor var_28473_begin_0 = const()[name = tensor("op_28473_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_28473_end_0 = const()[name = tensor("op_28473_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_28473_end_mask_0 = const()[name = tensor("op_28473_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_28473_cast_fp16 = slice_by_index(begin = var_28473_begin_0, end = var_28473_end_0, end_mask = var_28473_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_28473_cast_fp16")]; + tensor var_28477_begin_0 = const()[name = tensor("op_28477_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_28477_end_0 = const()[name = tensor("op_28477_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_28477_end_mask_0 = const()[name = tensor("op_28477_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_28477_cast_fp16 = slice_by_index(begin = var_28477_begin_0, end = var_28477_end_0, end_mask = var_28477_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_28477_cast_fp16")]; + tensor var_28481_begin_0 = const()[name = tensor("op_28481_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_28481_end_0 = const()[name = tensor("op_28481_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_28481_end_mask_0 = const()[name = tensor("op_28481_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_28481_cast_fp16 = slice_by_index(begin = var_28481_begin_0, end = var_28481_end_0, end_mask = var_28481_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_28481_cast_fp16")]; + tensor var_28485_begin_0 = const()[name = tensor("op_28485_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_28485_end_0 = const()[name = tensor("op_28485_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_28485_end_mask_0 = const()[name = tensor("op_28485_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_28485_cast_fp16 = slice_by_index(begin = var_28485_begin_0, end = var_28485_end_0, end_mask = var_28485_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_28485_cast_fp16")]; + tensor var_28489_begin_0 = const()[name = tensor("op_28489_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_28489_end_0 = const()[name = tensor("op_28489_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_28489_end_mask_0 = const()[name = tensor("op_28489_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_28489_cast_fp16 = slice_by_index(begin = var_28489_begin_0, end = var_28489_end_0, end_mask = var_28489_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_28489_cast_fp16")]; + tensor var_28493_begin_0 = const()[name = tensor("op_28493_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_28493_end_0 = const()[name = tensor("op_28493_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_28493_end_mask_0 = const()[name = tensor("op_28493_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_28493_cast_fp16 = slice_by_index(begin = var_28493_begin_0, end = var_28493_end_0, end_mask = var_28493_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_28493_cast_fp16")]; + tensor var_28497_begin_0 = const()[name = tensor("op_28497_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_28497_end_0 = const()[name = tensor("op_28497_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_28497_end_mask_0 = const()[name = tensor("op_28497_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_28497_cast_fp16 = slice_by_index(begin = var_28497_begin_0, end = var_28497_end_0, end_mask = var_28497_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_28497_cast_fp16")]; + tensor var_28501_begin_0 = const()[name = tensor("op_28501_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_28501_end_0 = const()[name = tensor("op_28501_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_28501_end_mask_0 = const()[name = tensor("op_28501_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_28501_cast_fp16 = slice_by_index(begin = var_28501_begin_0, end = var_28501_end_0, end_mask = var_28501_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_28501_cast_fp16")]; + tensor var_28505_begin_0 = const()[name = tensor("op_28505_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_28505_end_0 = const()[name = tensor("op_28505_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_28505_end_mask_0 = const()[name = tensor("op_28505_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_28505_cast_fp16 = slice_by_index(begin = var_28505_begin_0, end = var_28505_end_0, end_mask = var_28505_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_28505_cast_fp16")]; + tensor var_28509_begin_0 = const()[name = tensor("op_28509_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_28509_end_0 = const()[name = tensor("op_28509_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_28509_end_mask_0 = const()[name = tensor("op_28509_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_28509_cast_fp16 = slice_by_index(begin = var_28509_begin_0, end = var_28509_end_0, end_mask = var_28509_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_28509_cast_fp16")]; + tensor var_28513_begin_0 = const()[name = tensor("op_28513_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_28513_end_0 = const()[name = tensor("op_28513_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_28513_end_mask_0 = const()[name = tensor("op_28513_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_28513_cast_fp16 = slice_by_index(begin = var_28513_begin_0, end = var_28513_end_0, end_mask = var_28513_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_28513_cast_fp16")]; + tensor var_28517_begin_0 = const()[name = tensor("op_28517_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_28517_end_0 = const()[name = tensor("op_28517_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_28517_end_mask_0 = const()[name = tensor("op_28517_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_28517_cast_fp16 = slice_by_index(begin = var_28517_begin_0, end = var_28517_end_0, end_mask = var_28517_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_28517_cast_fp16")]; + tensor var_28521_begin_0 = const()[name = tensor("op_28521_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_28521_end_0 = const()[name = tensor("op_28521_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_28521_end_mask_0 = const()[name = tensor("op_28521_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_28521_cast_fp16 = slice_by_index(begin = var_28521_begin_0, end = var_28521_end_0, end_mask = var_28521_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_28521_cast_fp16")]; + tensor var_28525_begin_0 = const()[name = tensor("op_28525_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_28525_end_0 = const()[name = tensor("op_28525_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_28525_end_mask_0 = const()[name = tensor("op_28525_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_28525_cast_fp16 = slice_by_index(begin = var_28525_begin_0, end = var_28525_end_0, end_mask = var_28525_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_28525_cast_fp16")]; + tensor var_28529_begin_0 = const()[name = tensor("op_28529_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_28529_end_0 = const()[name = tensor("op_28529_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_28529_end_mask_0 = const()[name = tensor("op_28529_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_28529_cast_fp16 = slice_by_index(begin = var_28529_begin_0, end = var_28529_end_0, end_mask = var_28529_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_28529_cast_fp16")]; + tensor var_28533_begin_0 = const()[name = tensor("op_28533_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_28533_end_0 = const()[name = tensor("op_28533_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_28533_end_mask_0 = const()[name = tensor("op_28533_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_28533_cast_fp16 = slice_by_index(begin = var_28533_begin_0, end = var_28533_end_0, end_mask = var_28533_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_28533_cast_fp16")]; + tensor var_28537_begin_0 = const()[name = tensor("op_28537_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_28537_end_0 = const()[name = tensor("op_28537_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_28537_end_mask_0 = const()[name = tensor("op_28537_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_28537_cast_fp16 = slice_by_index(begin = var_28537_begin_0, end = var_28537_end_0, end_mask = var_28537_end_mask_0, x = value_37_cast_fp16)[name = tensor("op_28537_cast_fp16")]; + tensor var_28541_equation_0 = const()[name = tensor("op_28541_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28541_cast_fp16 = einsum(equation = var_28541_equation_0, values = (var_28383_cast_fp16, var_27825_cast_fp16))[name = tensor("op_28541_cast_fp16")]; + tensor var_28542_to_fp16 = const()[name = tensor("op_28542_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2881_cast_fp16 = mul(x = var_28541_cast_fp16, y = var_28542_to_fp16)[name = tensor("aw_chunk_2881_cast_fp16")]; + tensor var_28545_equation_0 = const()[name = tensor("op_28545_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28545_cast_fp16 = einsum(equation = var_28545_equation_0, values = (var_28383_cast_fp16, var_27832_cast_fp16))[name = tensor("op_28545_cast_fp16")]; + tensor var_28546_to_fp16 = const()[name = tensor("op_28546_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2883_cast_fp16 = mul(x = var_28545_cast_fp16, y = var_28546_to_fp16)[name = tensor("aw_chunk_2883_cast_fp16")]; + tensor var_28549_equation_0 = const()[name = tensor("op_28549_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28549_cast_fp16 = einsum(equation = var_28549_equation_0, values = (var_28383_cast_fp16, var_27839_cast_fp16))[name = tensor("op_28549_cast_fp16")]; + tensor var_28550_to_fp16 = const()[name = tensor("op_28550_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2885_cast_fp16 = mul(x = var_28549_cast_fp16, y = var_28550_to_fp16)[name = tensor("aw_chunk_2885_cast_fp16")]; + tensor var_28553_equation_0 = const()[name = tensor("op_28553_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28553_cast_fp16 = einsum(equation = var_28553_equation_0, values = (var_28383_cast_fp16, var_27846_cast_fp16))[name = tensor("op_28553_cast_fp16")]; + tensor var_28554_to_fp16 = const()[name = tensor("op_28554_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2887_cast_fp16 = mul(x = var_28553_cast_fp16, y = var_28554_to_fp16)[name = tensor("aw_chunk_2887_cast_fp16")]; + tensor var_28557_equation_0 = const()[name = tensor("op_28557_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28557_cast_fp16 = einsum(equation = var_28557_equation_0, values = (var_28387_cast_fp16, var_27853_cast_fp16))[name = tensor("op_28557_cast_fp16")]; + tensor var_28558_to_fp16 = const()[name = tensor("op_28558_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2889_cast_fp16 = mul(x = var_28557_cast_fp16, y = var_28558_to_fp16)[name = tensor("aw_chunk_2889_cast_fp16")]; + tensor var_28561_equation_0 = const()[name = tensor("op_28561_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28561_cast_fp16 = einsum(equation = var_28561_equation_0, values = (var_28387_cast_fp16, var_27860_cast_fp16))[name = tensor("op_28561_cast_fp16")]; + tensor var_28562_to_fp16 = const()[name = tensor("op_28562_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2891_cast_fp16 = mul(x = var_28561_cast_fp16, y = var_28562_to_fp16)[name = tensor("aw_chunk_2891_cast_fp16")]; + tensor var_28565_equation_0 = const()[name = tensor("op_28565_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28565_cast_fp16 = einsum(equation = var_28565_equation_0, values = (var_28387_cast_fp16, var_27867_cast_fp16))[name = tensor("op_28565_cast_fp16")]; + tensor var_28566_to_fp16 = const()[name = tensor("op_28566_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2893_cast_fp16 = mul(x = var_28565_cast_fp16, y = var_28566_to_fp16)[name = tensor("aw_chunk_2893_cast_fp16")]; + tensor var_28569_equation_0 = const()[name = tensor("op_28569_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28569_cast_fp16 = einsum(equation = var_28569_equation_0, values = (var_28387_cast_fp16, var_27874_cast_fp16))[name = tensor("op_28569_cast_fp16")]; + tensor var_28570_to_fp16 = const()[name = tensor("op_28570_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2895_cast_fp16 = mul(x = var_28569_cast_fp16, y = var_28570_to_fp16)[name = tensor("aw_chunk_2895_cast_fp16")]; + tensor var_28573_equation_0 = const()[name = tensor("op_28573_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28573_cast_fp16 = einsum(equation = var_28573_equation_0, values = (var_28391_cast_fp16, var_27881_cast_fp16))[name = tensor("op_28573_cast_fp16")]; + tensor var_28574_to_fp16 = const()[name = tensor("op_28574_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2897_cast_fp16 = mul(x = var_28573_cast_fp16, y = var_28574_to_fp16)[name = tensor("aw_chunk_2897_cast_fp16")]; + tensor var_28577_equation_0 = const()[name = tensor("op_28577_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28577_cast_fp16 = einsum(equation = var_28577_equation_0, values = (var_28391_cast_fp16, var_27888_cast_fp16))[name = tensor("op_28577_cast_fp16")]; + tensor var_28578_to_fp16 = const()[name = tensor("op_28578_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2899_cast_fp16 = mul(x = var_28577_cast_fp16, y = var_28578_to_fp16)[name = tensor("aw_chunk_2899_cast_fp16")]; + tensor var_28581_equation_0 = const()[name = tensor("op_28581_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28581_cast_fp16 = einsum(equation = var_28581_equation_0, values = (var_28391_cast_fp16, var_27895_cast_fp16))[name = tensor("op_28581_cast_fp16")]; + tensor var_28582_to_fp16 = const()[name = tensor("op_28582_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2901_cast_fp16 = mul(x = var_28581_cast_fp16, y = var_28582_to_fp16)[name = tensor("aw_chunk_2901_cast_fp16")]; + tensor var_28585_equation_0 = const()[name = tensor("op_28585_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28585_cast_fp16 = einsum(equation = var_28585_equation_0, values = (var_28391_cast_fp16, var_27902_cast_fp16))[name = tensor("op_28585_cast_fp16")]; + tensor var_28586_to_fp16 = const()[name = tensor("op_28586_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2903_cast_fp16 = mul(x = var_28585_cast_fp16, y = var_28586_to_fp16)[name = tensor("aw_chunk_2903_cast_fp16")]; + tensor var_28589_equation_0 = const()[name = tensor("op_28589_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28589_cast_fp16 = einsum(equation = var_28589_equation_0, values = (var_28395_cast_fp16, var_27909_cast_fp16))[name = tensor("op_28589_cast_fp16")]; + tensor var_28590_to_fp16 = const()[name = tensor("op_28590_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2905_cast_fp16 = mul(x = var_28589_cast_fp16, y = var_28590_to_fp16)[name = tensor("aw_chunk_2905_cast_fp16")]; + tensor var_28593_equation_0 = const()[name = tensor("op_28593_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28593_cast_fp16 = einsum(equation = var_28593_equation_0, values = (var_28395_cast_fp16, var_27916_cast_fp16))[name = tensor("op_28593_cast_fp16")]; + tensor var_28594_to_fp16 = const()[name = tensor("op_28594_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2907_cast_fp16 = mul(x = var_28593_cast_fp16, y = var_28594_to_fp16)[name = tensor("aw_chunk_2907_cast_fp16")]; + tensor var_28597_equation_0 = const()[name = tensor("op_28597_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28597_cast_fp16 = einsum(equation = var_28597_equation_0, values = (var_28395_cast_fp16, var_27923_cast_fp16))[name = tensor("op_28597_cast_fp16")]; + tensor var_28598_to_fp16 = const()[name = tensor("op_28598_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2909_cast_fp16 = mul(x = var_28597_cast_fp16, y = var_28598_to_fp16)[name = tensor("aw_chunk_2909_cast_fp16")]; + tensor var_28601_equation_0 = const()[name = tensor("op_28601_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28601_cast_fp16 = einsum(equation = var_28601_equation_0, values = (var_28395_cast_fp16, var_27930_cast_fp16))[name = tensor("op_28601_cast_fp16")]; + tensor var_28602_to_fp16 = const()[name = tensor("op_28602_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2911_cast_fp16 = mul(x = var_28601_cast_fp16, y = var_28602_to_fp16)[name = tensor("aw_chunk_2911_cast_fp16")]; + tensor var_28605_equation_0 = const()[name = tensor("op_28605_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28605_cast_fp16 = einsum(equation = var_28605_equation_0, values = (var_28399_cast_fp16, var_27937_cast_fp16))[name = tensor("op_28605_cast_fp16")]; + tensor var_28606_to_fp16 = const()[name = tensor("op_28606_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2913_cast_fp16 = mul(x = var_28605_cast_fp16, y = var_28606_to_fp16)[name = tensor("aw_chunk_2913_cast_fp16")]; + tensor var_28609_equation_0 = const()[name = tensor("op_28609_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28609_cast_fp16 = einsum(equation = var_28609_equation_0, values = (var_28399_cast_fp16, var_27944_cast_fp16))[name = tensor("op_28609_cast_fp16")]; + tensor var_28610_to_fp16 = const()[name = tensor("op_28610_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2915_cast_fp16 = mul(x = var_28609_cast_fp16, y = var_28610_to_fp16)[name = tensor("aw_chunk_2915_cast_fp16")]; + tensor var_28613_equation_0 = const()[name = tensor("op_28613_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28613_cast_fp16 = einsum(equation = var_28613_equation_0, values = (var_28399_cast_fp16, var_27951_cast_fp16))[name = tensor("op_28613_cast_fp16")]; + tensor var_28614_to_fp16 = const()[name = tensor("op_28614_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2917_cast_fp16 = mul(x = var_28613_cast_fp16, y = var_28614_to_fp16)[name = tensor("aw_chunk_2917_cast_fp16")]; + tensor var_28617_equation_0 = const()[name = tensor("op_28617_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28617_cast_fp16 = einsum(equation = var_28617_equation_0, values = (var_28399_cast_fp16, var_27958_cast_fp16))[name = tensor("op_28617_cast_fp16")]; + tensor var_28618_to_fp16 = const()[name = tensor("op_28618_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2919_cast_fp16 = mul(x = var_28617_cast_fp16, y = var_28618_to_fp16)[name = tensor("aw_chunk_2919_cast_fp16")]; + tensor var_28621_equation_0 = const()[name = tensor("op_28621_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28621_cast_fp16 = einsum(equation = var_28621_equation_0, values = (var_28403_cast_fp16, var_27965_cast_fp16))[name = tensor("op_28621_cast_fp16")]; + tensor var_28622_to_fp16 = const()[name = tensor("op_28622_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2921_cast_fp16 = mul(x = var_28621_cast_fp16, y = var_28622_to_fp16)[name = tensor("aw_chunk_2921_cast_fp16")]; + tensor var_28625_equation_0 = const()[name = tensor("op_28625_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28625_cast_fp16 = einsum(equation = var_28625_equation_0, values = (var_28403_cast_fp16, var_27972_cast_fp16))[name = tensor("op_28625_cast_fp16")]; + tensor var_28626_to_fp16 = const()[name = tensor("op_28626_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2923_cast_fp16 = mul(x = var_28625_cast_fp16, y = var_28626_to_fp16)[name = tensor("aw_chunk_2923_cast_fp16")]; + tensor var_28629_equation_0 = const()[name = tensor("op_28629_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28629_cast_fp16 = einsum(equation = var_28629_equation_0, values = (var_28403_cast_fp16, var_27979_cast_fp16))[name = tensor("op_28629_cast_fp16")]; + tensor var_28630_to_fp16 = const()[name = tensor("op_28630_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2925_cast_fp16 = mul(x = var_28629_cast_fp16, y = var_28630_to_fp16)[name = tensor("aw_chunk_2925_cast_fp16")]; + tensor var_28633_equation_0 = const()[name = tensor("op_28633_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28633_cast_fp16 = einsum(equation = var_28633_equation_0, values = (var_28403_cast_fp16, var_27986_cast_fp16))[name = tensor("op_28633_cast_fp16")]; + tensor var_28634_to_fp16 = const()[name = tensor("op_28634_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2927_cast_fp16 = mul(x = var_28633_cast_fp16, y = var_28634_to_fp16)[name = tensor("aw_chunk_2927_cast_fp16")]; + tensor var_28637_equation_0 = const()[name = tensor("op_28637_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28637_cast_fp16 = einsum(equation = var_28637_equation_0, values = (var_28407_cast_fp16, var_27993_cast_fp16))[name = tensor("op_28637_cast_fp16")]; + tensor var_28638_to_fp16 = const()[name = tensor("op_28638_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2929_cast_fp16 = mul(x = var_28637_cast_fp16, y = var_28638_to_fp16)[name = tensor("aw_chunk_2929_cast_fp16")]; + tensor var_28641_equation_0 = const()[name = tensor("op_28641_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28641_cast_fp16 = einsum(equation = var_28641_equation_0, values = (var_28407_cast_fp16, var_28000_cast_fp16))[name = tensor("op_28641_cast_fp16")]; + tensor var_28642_to_fp16 = const()[name = tensor("op_28642_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2931_cast_fp16 = mul(x = var_28641_cast_fp16, y = var_28642_to_fp16)[name = tensor("aw_chunk_2931_cast_fp16")]; + tensor var_28645_equation_0 = const()[name = tensor("op_28645_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28645_cast_fp16 = einsum(equation = var_28645_equation_0, values = (var_28407_cast_fp16, var_28007_cast_fp16))[name = tensor("op_28645_cast_fp16")]; + tensor var_28646_to_fp16 = const()[name = tensor("op_28646_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2933_cast_fp16 = mul(x = var_28645_cast_fp16, y = var_28646_to_fp16)[name = tensor("aw_chunk_2933_cast_fp16")]; + tensor var_28649_equation_0 = const()[name = tensor("op_28649_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28649_cast_fp16 = einsum(equation = var_28649_equation_0, values = (var_28407_cast_fp16, var_28014_cast_fp16))[name = tensor("op_28649_cast_fp16")]; + tensor var_28650_to_fp16 = const()[name = tensor("op_28650_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2935_cast_fp16 = mul(x = var_28649_cast_fp16, y = var_28650_to_fp16)[name = tensor("aw_chunk_2935_cast_fp16")]; + tensor var_28653_equation_0 = const()[name = tensor("op_28653_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28653_cast_fp16 = einsum(equation = var_28653_equation_0, values = (var_28411_cast_fp16, var_28021_cast_fp16))[name = tensor("op_28653_cast_fp16")]; + tensor var_28654_to_fp16 = const()[name = tensor("op_28654_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2937_cast_fp16 = mul(x = var_28653_cast_fp16, y = var_28654_to_fp16)[name = tensor("aw_chunk_2937_cast_fp16")]; + tensor var_28657_equation_0 = const()[name = tensor("op_28657_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28657_cast_fp16 = einsum(equation = var_28657_equation_0, values = (var_28411_cast_fp16, var_28028_cast_fp16))[name = tensor("op_28657_cast_fp16")]; + tensor var_28658_to_fp16 = const()[name = tensor("op_28658_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2939_cast_fp16 = mul(x = var_28657_cast_fp16, y = var_28658_to_fp16)[name = tensor("aw_chunk_2939_cast_fp16")]; + tensor var_28661_equation_0 = const()[name = tensor("op_28661_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28661_cast_fp16 = einsum(equation = var_28661_equation_0, values = (var_28411_cast_fp16, var_28035_cast_fp16))[name = tensor("op_28661_cast_fp16")]; + tensor var_28662_to_fp16 = const()[name = tensor("op_28662_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2941_cast_fp16 = mul(x = var_28661_cast_fp16, y = var_28662_to_fp16)[name = tensor("aw_chunk_2941_cast_fp16")]; + tensor var_28665_equation_0 = const()[name = tensor("op_28665_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28665_cast_fp16 = einsum(equation = var_28665_equation_0, values = (var_28411_cast_fp16, var_28042_cast_fp16))[name = tensor("op_28665_cast_fp16")]; + tensor var_28666_to_fp16 = const()[name = tensor("op_28666_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2943_cast_fp16 = mul(x = var_28665_cast_fp16, y = var_28666_to_fp16)[name = tensor("aw_chunk_2943_cast_fp16")]; + tensor var_28669_equation_0 = const()[name = tensor("op_28669_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28669_cast_fp16 = einsum(equation = var_28669_equation_0, values = (var_28415_cast_fp16, var_28049_cast_fp16))[name = tensor("op_28669_cast_fp16")]; + tensor var_28670_to_fp16 = const()[name = tensor("op_28670_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2945_cast_fp16 = mul(x = var_28669_cast_fp16, y = var_28670_to_fp16)[name = tensor("aw_chunk_2945_cast_fp16")]; + tensor var_28673_equation_0 = const()[name = tensor("op_28673_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28673_cast_fp16 = einsum(equation = var_28673_equation_0, values = (var_28415_cast_fp16, var_28056_cast_fp16))[name = tensor("op_28673_cast_fp16")]; + tensor var_28674_to_fp16 = const()[name = tensor("op_28674_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2947_cast_fp16 = mul(x = var_28673_cast_fp16, y = var_28674_to_fp16)[name = tensor("aw_chunk_2947_cast_fp16")]; + tensor var_28677_equation_0 = const()[name = tensor("op_28677_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28677_cast_fp16 = einsum(equation = var_28677_equation_0, values = (var_28415_cast_fp16, var_28063_cast_fp16))[name = tensor("op_28677_cast_fp16")]; + tensor var_28678_to_fp16 = const()[name = tensor("op_28678_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2949_cast_fp16 = mul(x = var_28677_cast_fp16, y = var_28678_to_fp16)[name = tensor("aw_chunk_2949_cast_fp16")]; + tensor var_28681_equation_0 = const()[name = tensor("op_28681_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28681_cast_fp16 = einsum(equation = var_28681_equation_0, values = (var_28415_cast_fp16, var_28070_cast_fp16))[name = tensor("op_28681_cast_fp16")]; + tensor var_28682_to_fp16 = const()[name = tensor("op_28682_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2951_cast_fp16 = mul(x = var_28681_cast_fp16, y = var_28682_to_fp16)[name = tensor("aw_chunk_2951_cast_fp16")]; + tensor var_28685_equation_0 = const()[name = tensor("op_28685_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28685_cast_fp16 = einsum(equation = var_28685_equation_0, values = (var_28419_cast_fp16, var_28077_cast_fp16))[name = tensor("op_28685_cast_fp16")]; + tensor var_28686_to_fp16 = const()[name = tensor("op_28686_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2953_cast_fp16 = mul(x = var_28685_cast_fp16, y = var_28686_to_fp16)[name = tensor("aw_chunk_2953_cast_fp16")]; + tensor var_28689_equation_0 = const()[name = tensor("op_28689_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28689_cast_fp16 = einsum(equation = var_28689_equation_0, values = (var_28419_cast_fp16, var_28084_cast_fp16))[name = tensor("op_28689_cast_fp16")]; + tensor var_28690_to_fp16 = const()[name = tensor("op_28690_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2955_cast_fp16 = mul(x = var_28689_cast_fp16, y = var_28690_to_fp16)[name = tensor("aw_chunk_2955_cast_fp16")]; + tensor var_28693_equation_0 = const()[name = tensor("op_28693_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28693_cast_fp16 = einsum(equation = var_28693_equation_0, values = (var_28419_cast_fp16, var_28091_cast_fp16))[name = tensor("op_28693_cast_fp16")]; + tensor var_28694_to_fp16 = const()[name = tensor("op_28694_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2957_cast_fp16 = mul(x = var_28693_cast_fp16, y = var_28694_to_fp16)[name = tensor("aw_chunk_2957_cast_fp16")]; + tensor var_28697_equation_0 = const()[name = tensor("op_28697_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28697_cast_fp16 = einsum(equation = var_28697_equation_0, values = (var_28419_cast_fp16, var_28098_cast_fp16))[name = tensor("op_28697_cast_fp16")]; + tensor var_28698_to_fp16 = const()[name = tensor("op_28698_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2959_cast_fp16 = mul(x = var_28697_cast_fp16, y = var_28698_to_fp16)[name = tensor("aw_chunk_2959_cast_fp16")]; + tensor var_28701_equation_0 = const()[name = tensor("op_28701_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28701_cast_fp16 = einsum(equation = var_28701_equation_0, values = (var_28423_cast_fp16, var_28105_cast_fp16))[name = tensor("op_28701_cast_fp16")]; + tensor var_28702_to_fp16 = const()[name = tensor("op_28702_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2961_cast_fp16 = mul(x = var_28701_cast_fp16, y = var_28702_to_fp16)[name = tensor("aw_chunk_2961_cast_fp16")]; + tensor var_28705_equation_0 = const()[name = tensor("op_28705_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28705_cast_fp16 = einsum(equation = var_28705_equation_0, values = (var_28423_cast_fp16, var_28112_cast_fp16))[name = tensor("op_28705_cast_fp16")]; + tensor var_28706_to_fp16 = const()[name = tensor("op_28706_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2963_cast_fp16 = mul(x = var_28705_cast_fp16, y = var_28706_to_fp16)[name = tensor("aw_chunk_2963_cast_fp16")]; + tensor var_28709_equation_0 = const()[name = tensor("op_28709_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28709_cast_fp16 = einsum(equation = var_28709_equation_0, values = (var_28423_cast_fp16, var_28119_cast_fp16))[name = tensor("op_28709_cast_fp16")]; + tensor var_28710_to_fp16 = const()[name = tensor("op_28710_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2965_cast_fp16 = mul(x = var_28709_cast_fp16, y = var_28710_to_fp16)[name = tensor("aw_chunk_2965_cast_fp16")]; + tensor var_28713_equation_0 = const()[name = tensor("op_28713_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28713_cast_fp16 = einsum(equation = var_28713_equation_0, values = (var_28423_cast_fp16, var_28126_cast_fp16))[name = tensor("op_28713_cast_fp16")]; + tensor var_28714_to_fp16 = const()[name = tensor("op_28714_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2967_cast_fp16 = mul(x = var_28713_cast_fp16, y = var_28714_to_fp16)[name = tensor("aw_chunk_2967_cast_fp16")]; + tensor var_28717_equation_0 = const()[name = tensor("op_28717_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28717_cast_fp16 = einsum(equation = var_28717_equation_0, values = (var_28427_cast_fp16, var_28133_cast_fp16))[name = tensor("op_28717_cast_fp16")]; + tensor var_28718_to_fp16 = const()[name = tensor("op_28718_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2969_cast_fp16 = mul(x = var_28717_cast_fp16, y = var_28718_to_fp16)[name = tensor("aw_chunk_2969_cast_fp16")]; + tensor var_28721_equation_0 = const()[name = tensor("op_28721_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28721_cast_fp16 = einsum(equation = var_28721_equation_0, values = (var_28427_cast_fp16, var_28140_cast_fp16))[name = tensor("op_28721_cast_fp16")]; + tensor var_28722_to_fp16 = const()[name = tensor("op_28722_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2971_cast_fp16 = mul(x = var_28721_cast_fp16, y = var_28722_to_fp16)[name = tensor("aw_chunk_2971_cast_fp16")]; + tensor var_28725_equation_0 = const()[name = tensor("op_28725_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28725_cast_fp16 = einsum(equation = var_28725_equation_0, values = (var_28427_cast_fp16, var_28147_cast_fp16))[name = tensor("op_28725_cast_fp16")]; + tensor var_28726_to_fp16 = const()[name = tensor("op_28726_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2973_cast_fp16 = mul(x = var_28725_cast_fp16, y = var_28726_to_fp16)[name = tensor("aw_chunk_2973_cast_fp16")]; + tensor var_28729_equation_0 = const()[name = tensor("op_28729_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28729_cast_fp16 = einsum(equation = var_28729_equation_0, values = (var_28427_cast_fp16, var_28154_cast_fp16))[name = tensor("op_28729_cast_fp16")]; + tensor var_28730_to_fp16 = const()[name = tensor("op_28730_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2975_cast_fp16 = mul(x = var_28729_cast_fp16, y = var_28730_to_fp16)[name = tensor("aw_chunk_2975_cast_fp16")]; + tensor var_28733_equation_0 = const()[name = tensor("op_28733_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28733_cast_fp16 = einsum(equation = var_28733_equation_0, values = (var_28431_cast_fp16, var_28161_cast_fp16))[name = tensor("op_28733_cast_fp16")]; + tensor var_28734_to_fp16 = const()[name = tensor("op_28734_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2977_cast_fp16 = mul(x = var_28733_cast_fp16, y = var_28734_to_fp16)[name = tensor("aw_chunk_2977_cast_fp16")]; + tensor var_28737_equation_0 = const()[name = tensor("op_28737_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28737_cast_fp16 = einsum(equation = var_28737_equation_0, values = (var_28431_cast_fp16, var_28168_cast_fp16))[name = tensor("op_28737_cast_fp16")]; + tensor var_28738_to_fp16 = const()[name = tensor("op_28738_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2979_cast_fp16 = mul(x = var_28737_cast_fp16, y = var_28738_to_fp16)[name = tensor("aw_chunk_2979_cast_fp16")]; + tensor var_28741_equation_0 = const()[name = tensor("op_28741_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28741_cast_fp16 = einsum(equation = var_28741_equation_0, values = (var_28431_cast_fp16, var_28175_cast_fp16))[name = tensor("op_28741_cast_fp16")]; + tensor var_28742_to_fp16 = const()[name = tensor("op_28742_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2981_cast_fp16 = mul(x = var_28741_cast_fp16, y = var_28742_to_fp16)[name = tensor("aw_chunk_2981_cast_fp16")]; + tensor var_28745_equation_0 = const()[name = tensor("op_28745_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28745_cast_fp16 = einsum(equation = var_28745_equation_0, values = (var_28431_cast_fp16, var_28182_cast_fp16))[name = tensor("op_28745_cast_fp16")]; + tensor var_28746_to_fp16 = const()[name = tensor("op_28746_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2983_cast_fp16 = mul(x = var_28745_cast_fp16, y = var_28746_to_fp16)[name = tensor("aw_chunk_2983_cast_fp16")]; + tensor var_28749_equation_0 = const()[name = tensor("op_28749_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28749_cast_fp16 = einsum(equation = var_28749_equation_0, values = (var_28435_cast_fp16, var_28189_cast_fp16))[name = tensor("op_28749_cast_fp16")]; + tensor var_28750_to_fp16 = const()[name = tensor("op_28750_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2985_cast_fp16 = mul(x = var_28749_cast_fp16, y = var_28750_to_fp16)[name = tensor("aw_chunk_2985_cast_fp16")]; + tensor var_28753_equation_0 = const()[name = tensor("op_28753_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28753_cast_fp16 = einsum(equation = var_28753_equation_0, values = (var_28435_cast_fp16, var_28196_cast_fp16))[name = tensor("op_28753_cast_fp16")]; + tensor var_28754_to_fp16 = const()[name = tensor("op_28754_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2987_cast_fp16 = mul(x = var_28753_cast_fp16, y = var_28754_to_fp16)[name = tensor("aw_chunk_2987_cast_fp16")]; + tensor var_28757_equation_0 = const()[name = tensor("op_28757_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28757_cast_fp16 = einsum(equation = var_28757_equation_0, values = (var_28435_cast_fp16, var_28203_cast_fp16))[name = tensor("op_28757_cast_fp16")]; + tensor var_28758_to_fp16 = const()[name = tensor("op_28758_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2989_cast_fp16 = mul(x = var_28757_cast_fp16, y = var_28758_to_fp16)[name = tensor("aw_chunk_2989_cast_fp16")]; + tensor var_28761_equation_0 = const()[name = tensor("op_28761_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28761_cast_fp16 = einsum(equation = var_28761_equation_0, values = (var_28435_cast_fp16, var_28210_cast_fp16))[name = tensor("op_28761_cast_fp16")]; + tensor var_28762_to_fp16 = const()[name = tensor("op_28762_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2991_cast_fp16 = mul(x = var_28761_cast_fp16, y = var_28762_to_fp16)[name = tensor("aw_chunk_2991_cast_fp16")]; + tensor var_28765_equation_0 = const()[name = tensor("op_28765_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28765_cast_fp16 = einsum(equation = var_28765_equation_0, values = (var_28439_cast_fp16, var_28217_cast_fp16))[name = tensor("op_28765_cast_fp16")]; + tensor var_28766_to_fp16 = const()[name = tensor("op_28766_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2993_cast_fp16 = mul(x = var_28765_cast_fp16, y = var_28766_to_fp16)[name = tensor("aw_chunk_2993_cast_fp16")]; + tensor var_28769_equation_0 = const()[name = tensor("op_28769_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28769_cast_fp16 = einsum(equation = var_28769_equation_0, values = (var_28439_cast_fp16, var_28224_cast_fp16))[name = tensor("op_28769_cast_fp16")]; + tensor var_28770_to_fp16 = const()[name = tensor("op_28770_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2995_cast_fp16 = mul(x = var_28769_cast_fp16, y = var_28770_to_fp16)[name = tensor("aw_chunk_2995_cast_fp16")]; + tensor var_28773_equation_0 = const()[name = tensor("op_28773_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28773_cast_fp16 = einsum(equation = var_28773_equation_0, values = (var_28439_cast_fp16, var_28231_cast_fp16))[name = tensor("op_28773_cast_fp16")]; + tensor var_28774_to_fp16 = const()[name = tensor("op_28774_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2997_cast_fp16 = mul(x = var_28773_cast_fp16, y = var_28774_to_fp16)[name = tensor("aw_chunk_2997_cast_fp16")]; + tensor var_28777_equation_0 = const()[name = tensor("op_28777_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28777_cast_fp16 = einsum(equation = var_28777_equation_0, values = (var_28439_cast_fp16, var_28238_cast_fp16))[name = tensor("op_28777_cast_fp16")]; + tensor var_28778_to_fp16 = const()[name = tensor("op_28778_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_2999_cast_fp16 = mul(x = var_28777_cast_fp16, y = var_28778_to_fp16)[name = tensor("aw_chunk_2999_cast_fp16")]; + tensor var_28781_equation_0 = const()[name = tensor("op_28781_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28781_cast_fp16 = einsum(equation = var_28781_equation_0, values = (var_28443_cast_fp16, var_28245_cast_fp16))[name = tensor("op_28781_cast_fp16")]; + tensor var_28782_to_fp16 = const()[name = tensor("op_28782_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3001_cast_fp16 = mul(x = var_28781_cast_fp16, y = var_28782_to_fp16)[name = tensor("aw_chunk_3001_cast_fp16")]; + tensor var_28785_equation_0 = const()[name = tensor("op_28785_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28785_cast_fp16 = einsum(equation = var_28785_equation_0, values = (var_28443_cast_fp16, var_28252_cast_fp16))[name = tensor("op_28785_cast_fp16")]; + tensor var_28786_to_fp16 = const()[name = tensor("op_28786_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3003_cast_fp16 = mul(x = var_28785_cast_fp16, y = var_28786_to_fp16)[name = tensor("aw_chunk_3003_cast_fp16")]; + tensor var_28789_equation_0 = const()[name = tensor("op_28789_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28789_cast_fp16 = einsum(equation = var_28789_equation_0, values = (var_28443_cast_fp16, var_28259_cast_fp16))[name = tensor("op_28789_cast_fp16")]; + tensor var_28790_to_fp16 = const()[name = tensor("op_28790_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3005_cast_fp16 = mul(x = var_28789_cast_fp16, y = var_28790_to_fp16)[name = tensor("aw_chunk_3005_cast_fp16")]; + tensor var_28793_equation_0 = const()[name = tensor("op_28793_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28793_cast_fp16 = einsum(equation = var_28793_equation_0, values = (var_28443_cast_fp16, var_28266_cast_fp16))[name = tensor("op_28793_cast_fp16")]; + tensor var_28794_to_fp16 = const()[name = tensor("op_28794_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3007_cast_fp16 = mul(x = var_28793_cast_fp16, y = var_28794_to_fp16)[name = tensor("aw_chunk_3007_cast_fp16")]; + tensor var_28797_equation_0 = const()[name = tensor("op_28797_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28797_cast_fp16 = einsum(equation = var_28797_equation_0, values = (var_28447_cast_fp16, var_28273_cast_fp16))[name = tensor("op_28797_cast_fp16")]; + tensor var_28798_to_fp16 = const()[name = tensor("op_28798_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3009_cast_fp16 = mul(x = var_28797_cast_fp16, y = var_28798_to_fp16)[name = tensor("aw_chunk_3009_cast_fp16")]; + tensor var_28801_equation_0 = const()[name = tensor("op_28801_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28801_cast_fp16 = einsum(equation = var_28801_equation_0, values = (var_28447_cast_fp16, var_28280_cast_fp16))[name = tensor("op_28801_cast_fp16")]; + tensor var_28802_to_fp16 = const()[name = tensor("op_28802_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3011_cast_fp16 = mul(x = var_28801_cast_fp16, y = var_28802_to_fp16)[name = tensor("aw_chunk_3011_cast_fp16")]; + tensor var_28805_equation_0 = const()[name = tensor("op_28805_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28805_cast_fp16 = einsum(equation = var_28805_equation_0, values = (var_28447_cast_fp16, var_28287_cast_fp16))[name = tensor("op_28805_cast_fp16")]; + tensor var_28806_to_fp16 = const()[name = tensor("op_28806_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3013_cast_fp16 = mul(x = var_28805_cast_fp16, y = var_28806_to_fp16)[name = tensor("aw_chunk_3013_cast_fp16")]; + tensor var_28809_equation_0 = const()[name = tensor("op_28809_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28809_cast_fp16 = einsum(equation = var_28809_equation_0, values = (var_28447_cast_fp16, var_28294_cast_fp16))[name = tensor("op_28809_cast_fp16")]; + tensor var_28810_to_fp16 = const()[name = tensor("op_28810_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3015_cast_fp16 = mul(x = var_28809_cast_fp16, y = var_28810_to_fp16)[name = tensor("aw_chunk_3015_cast_fp16")]; + tensor var_28813_equation_0 = const()[name = tensor("op_28813_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28813_cast_fp16 = einsum(equation = var_28813_equation_0, values = (var_28451_cast_fp16, var_28301_cast_fp16))[name = tensor("op_28813_cast_fp16")]; + tensor var_28814_to_fp16 = const()[name = tensor("op_28814_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3017_cast_fp16 = mul(x = var_28813_cast_fp16, y = var_28814_to_fp16)[name = tensor("aw_chunk_3017_cast_fp16")]; + tensor var_28817_equation_0 = const()[name = tensor("op_28817_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28817_cast_fp16 = einsum(equation = var_28817_equation_0, values = (var_28451_cast_fp16, var_28308_cast_fp16))[name = tensor("op_28817_cast_fp16")]; + tensor var_28818_to_fp16 = const()[name = tensor("op_28818_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3019_cast_fp16 = mul(x = var_28817_cast_fp16, y = var_28818_to_fp16)[name = tensor("aw_chunk_3019_cast_fp16")]; + tensor var_28821_equation_0 = const()[name = tensor("op_28821_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28821_cast_fp16 = einsum(equation = var_28821_equation_0, values = (var_28451_cast_fp16, var_28315_cast_fp16))[name = tensor("op_28821_cast_fp16")]; + tensor var_28822_to_fp16 = const()[name = tensor("op_28822_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3021_cast_fp16 = mul(x = var_28821_cast_fp16, y = var_28822_to_fp16)[name = tensor("aw_chunk_3021_cast_fp16")]; + tensor var_28825_equation_0 = const()[name = tensor("op_28825_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28825_cast_fp16 = einsum(equation = var_28825_equation_0, values = (var_28451_cast_fp16, var_28322_cast_fp16))[name = tensor("op_28825_cast_fp16")]; + tensor var_28826_to_fp16 = const()[name = tensor("op_28826_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3023_cast_fp16 = mul(x = var_28825_cast_fp16, y = var_28826_to_fp16)[name = tensor("aw_chunk_3023_cast_fp16")]; + tensor var_28829_equation_0 = const()[name = tensor("op_28829_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28829_cast_fp16 = einsum(equation = var_28829_equation_0, values = (var_28455_cast_fp16, var_28329_cast_fp16))[name = tensor("op_28829_cast_fp16")]; + tensor var_28830_to_fp16 = const()[name = tensor("op_28830_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3025_cast_fp16 = mul(x = var_28829_cast_fp16, y = var_28830_to_fp16)[name = tensor("aw_chunk_3025_cast_fp16")]; + tensor var_28833_equation_0 = const()[name = tensor("op_28833_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28833_cast_fp16 = einsum(equation = var_28833_equation_0, values = (var_28455_cast_fp16, var_28336_cast_fp16))[name = tensor("op_28833_cast_fp16")]; + tensor var_28834_to_fp16 = const()[name = tensor("op_28834_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3027_cast_fp16 = mul(x = var_28833_cast_fp16, y = var_28834_to_fp16)[name = tensor("aw_chunk_3027_cast_fp16")]; + tensor var_28837_equation_0 = const()[name = tensor("op_28837_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28837_cast_fp16 = einsum(equation = var_28837_equation_0, values = (var_28455_cast_fp16, var_28343_cast_fp16))[name = tensor("op_28837_cast_fp16")]; + tensor var_28838_to_fp16 = const()[name = tensor("op_28838_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3029_cast_fp16 = mul(x = var_28837_cast_fp16, y = var_28838_to_fp16)[name = tensor("aw_chunk_3029_cast_fp16")]; + tensor var_28841_equation_0 = const()[name = tensor("op_28841_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28841_cast_fp16 = einsum(equation = var_28841_equation_0, values = (var_28455_cast_fp16, var_28350_cast_fp16))[name = tensor("op_28841_cast_fp16")]; + tensor var_28842_to_fp16 = const()[name = tensor("op_28842_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3031_cast_fp16 = mul(x = var_28841_cast_fp16, y = var_28842_to_fp16)[name = tensor("aw_chunk_3031_cast_fp16")]; + tensor var_28845_equation_0 = const()[name = tensor("op_28845_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28845_cast_fp16 = einsum(equation = var_28845_equation_0, values = (var_28459_cast_fp16, var_28357_cast_fp16))[name = tensor("op_28845_cast_fp16")]; + tensor var_28846_to_fp16 = const()[name = tensor("op_28846_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3033_cast_fp16 = mul(x = var_28845_cast_fp16, y = var_28846_to_fp16)[name = tensor("aw_chunk_3033_cast_fp16")]; + tensor var_28849_equation_0 = const()[name = tensor("op_28849_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28849_cast_fp16 = einsum(equation = var_28849_equation_0, values = (var_28459_cast_fp16, var_28364_cast_fp16))[name = tensor("op_28849_cast_fp16")]; + tensor var_28850_to_fp16 = const()[name = tensor("op_28850_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3035_cast_fp16 = mul(x = var_28849_cast_fp16, y = var_28850_to_fp16)[name = tensor("aw_chunk_3035_cast_fp16")]; + tensor var_28853_equation_0 = const()[name = tensor("op_28853_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28853_cast_fp16 = einsum(equation = var_28853_equation_0, values = (var_28459_cast_fp16, var_28371_cast_fp16))[name = tensor("op_28853_cast_fp16")]; + tensor var_28854_to_fp16 = const()[name = tensor("op_28854_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3037_cast_fp16 = mul(x = var_28853_cast_fp16, y = var_28854_to_fp16)[name = tensor("aw_chunk_3037_cast_fp16")]; + tensor var_28857_equation_0 = const()[name = tensor("op_28857_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_28857_cast_fp16 = einsum(equation = var_28857_equation_0, values = (var_28459_cast_fp16, var_28378_cast_fp16))[name = tensor("op_28857_cast_fp16")]; + tensor var_28858_to_fp16 = const()[name = tensor("op_28858_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3039_cast_fp16 = mul(x = var_28857_cast_fp16, y = var_28858_to_fp16)[name = tensor("aw_chunk_3039_cast_fp16")]; + tensor var_28860_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2881_cast_fp16)[name = tensor("op_28860_cast_fp16")]; + tensor var_28861_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2883_cast_fp16)[name = tensor("op_28861_cast_fp16")]; + tensor var_28862_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2885_cast_fp16)[name = tensor("op_28862_cast_fp16")]; + tensor var_28863_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2887_cast_fp16)[name = tensor("op_28863_cast_fp16")]; + tensor var_28864_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2889_cast_fp16)[name = tensor("op_28864_cast_fp16")]; + tensor var_28865_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2891_cast_fp16)[name = tensor("op_28865_cast_fp16")]; + tensor var_28866_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2893_cast_fp16)[name = tensor("op_28866_cast_fp16")]; + tensor var_28867_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2895_cast_fp16)[name = tensor("op_28867_cast_fp16")]; + tensor var_28868_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2897_cast_fp16)[name = tensor("op_28868_cast_fp16")]; + tensor var_28869_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2899_cast_fp16)[name = tensor("op_28869_cast_fp16")]; + tensor var_28870_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2901_cast_fp16)[name = tensor("op_28870_cast_fp16")]; + tensor var_28871_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2903_cast_fp16)[name = tensor("op_28871_cast_fp16")]; + tensor var_28872_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2905_cast_fp16)[name = tensor("op_28872_cast_fp16")]; + tensor var_28873_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2907_cast_fp16)[name = tensor("op_28873_cast_fp16")]; + tensor var_28874_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2909_cast_fp16)[name = tensor("op_28874_cast_fp16")]; + tensor var_28875_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2911_cast_fp16)[name = tensor("op_28875_cast_fp16")]; + tensor var_28876_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2913_cast_fp16)[name = tensor("op_28876_cast_fp16")]; + tensor var_28877_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2915_cast_fp16)[name = tensor("op_28877_cast_fp16")]; + tensor var_28878_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2917_cast_fp16)[name = tensor("op_28878_cast_fp16")]; + tensor var_28879_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2919_cast_fp16)[name = tensor("op_28879_cast_fp16")]; + tensor var_28880_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2921_cast_fp16)[name = tensor("op_28880_cast_fp16")]; + tensor var_28881_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2923_cast_fp16)[name = tensor("op_28881_cast_fp16")]; + tensor var_28882_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2925_cast_fp16)[name = tensor("op_28882_cast_fp16")]; + tensor var_28883_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2927_cast_fp16)[name = tensor("op_28883_cast_fp16")]; + tensor var_28884_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2929_cast_fp16)[name = tensor("op_28884_cast_fp16")]; + tensor var_28885_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2931_cast_fp16)[name = tensor("op_28885_cast_fp16")]; + tensor var_28886_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2933_cast_fp16)[name = tensor("op_28886_cast_fp16")]; + tensor var_28887_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2935_cast_fp16)[name = tensor("op_28887_cast_fp16")]; + tensor var_28888_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2937_cast_fp16)[name = tensor("op_28888_cast_fp16")]; + tensor var_28889_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2939_cast_fp16)[name = tensor("op_28889_cast_fp16")]; + tensor var_28890_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2941_cast_fp16)[name = tensor("op_28890_cast_fp16")]; + tensor var_28891_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2943_cast_fp16)[name = tensor("op_28891_cast_fp16")]; + tensor var_28892_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2945_cast_fp16)[name = tensor("op_28892_cast_fp16")]; + tensor var_28893_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2947_cast_fp16)[name = tensor("op_28893_cast_fp16")]; + tensor var_28894_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2949_cast_fp16)[name = tensor("op_28894_cast_fp16")]; + tensor var_28895_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2951_cast_fp16)[name = tensor("op_28895_cast_fp16")]; + tensor var_28896_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2953_cast_fp16)[name = tensor("op_28896_cast_fp16")]; + tensor var_28897_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2955_cast_fp16)[name = tensor("op_28897_cast_fp16")]; + tensor var_28898_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2957_cast_fp16)[name = tensor("op_28898_cast_fp16")]; + tensor var_28899_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2959_cast_fp16)[name = tensor("op_28899_cast_fp16")]; + tensor var_28900_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2961_cast_fp16)[name = tensor("op_28900_cast_fp16")]; + tensor var_28901_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2963_cast_fp16)[name = tensor("op_28901_cast_fp16")]; + tensor var_28902_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2965_cast_fp16)[name = tensor("op_28902_cast_fp16")]; + tensor var_28903_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2967_cast_fp16)[name = tensor("op_28903_cast_fp16")]; + tensor var_28904_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2969_cast_fp16)[name = tensor("op_28904_cast_fp16")]; + tensor var_28905_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2971_cast_fp16)[name = tensor("op_28905_cast_fp16")]; + tensor var_28906_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2973_cast_fp16)[name = tensor("op_28906_cast_fp16")]; + tensor var_28907_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2975_cast_fp16)[name = tensor("op_28907_cast_fp16")]; + tensor var_28908_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2977_cast_fp16)[name = tensor("op_28908_cast_fp16")]; + tensor var_28909_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2979_cast_fp16)[name = tensor("op_28909_cast_fp16")]; + tensor var_28910_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2981_cast_fp16)[name = tensor("op_28910_cast_fp16")]; + tensor var_28911_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2983_cast_fp16)[name = tensor("op_28911_cast_fp16")]; + tensor var_28912_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2985_cast_fp16)[name = tensor("op_28912_cast_fp16")]; + tensor var_28913_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2987_cast_fp16)[name = tensor("op_28913_cast_fp16")]; + tensor var_28914_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2989_cast_fp16)[name = tensor("op_28914_cast_fp16")]; + tensor var_28915_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2991_cast_fp16)[name = tensor("op_28915_cast_fp16")]; + tensor var_28916_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2993_cast_fp16)[name = tensor("op_28916_cast_fp16")]; + tensor var_28917_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2995_cast_fp16)[name = tensor("op_28917_cast_fp16")]; + tensor var_28918_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2997_cast_fp16)[name = tensor("op_28918_cast_fp16")]; + tensor var_28919_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_2999_cast_fp16)[name = tensor("op_28919_cast_fp16")]; + tensor var_28920_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_3001_cast_fp16)[name = tensor("op_28920_cast_fp16")]; + tensor var_28921_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_3003_cast_fp16)[name = tensor("op_28921_cast_fp16")]; + tensor var_28922_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_3005_cast_fp16)[name = tensor("op_28922_cast_fp16")]; + tensor var_28923_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_3007_cast_fp16)[name = tensor("op_28923_cast_fp16")]; + tensor var_28924_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_3009_cast_fp16)[name = tensor("op_28924_cast_fp16")]; + tensor var_28925_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_3011_cast_fp16)[name = tensor("op_28925_cast_fp16")]; + tensor var_28926_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_3013_cast_fp16)[name = tensor("op_28926_cast_fp16")]; + tensor var_28927_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_3015_cast_fp16)[name = tensor("op_28927_cast_fp16")]; + tensor var_28928_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_3017_cast_fp16)[name = tensor("op_28928_cast_fp16")]; + tensor var_28929_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_3019_cast_fp16)[name = tensor("op_28929_cast_fp16")]; + tensor var_28930_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_3021_cast_fp16)[name = tensor("op_28930_cast_fp16")]; + tensor var_28931_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_3023_cast_fp16)[name = tensor("op_28931_cast_fp16")]; + tensor var_28932_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_3025_cast_fp16)[name = tensor("op_28932_cast_fp16")]; + tensor var_28933_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_3027_cast_fp16)[name = tensor("op_28933_cast_fp16")]; + tensor var_28934_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_3029_cast_fp16)[name = tensor("op_28934_cast_fp16")]; + tensor var_28935_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_3031_cast_fp16)[name = tensor("op_28935_cast_fp16")]; + tensor var_28936_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_3033_cast_fp16)[name = tensor("op_28936_cast_fp16")]; + tensor var_28937_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_3035_cast_fp16)[name = tensor("op_28937_cast_fp16")]; + tensor var_28938_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_3037_cast_fp16)[name = tensor("op_28938_cast_fp16")]; + tensor var_28939_cast_fp16 = softmax(axis = var_27685, x = aw_chunk_3039_cast_fp16)[name = tensor("op_28939_cast_fp16")]; + tensor var_28941_equation_0 = const()[name = tensor("op_28941_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28941_cast_fp16 = einsum(equation = var_28941_equation_0, values = (var_28461_cast_fp16, var_28860_cast_fp16))[name = tensor("op_28941_cast_fp16")]; + tensor var_28943_equation_0 = const()[name = tensor("op_28943_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28943_cast_fp16 = einsum(equation = var_28943_equation_0, values = (var_28461_cast_fp16, var_28861_cast_fp16))[name = tensor("op_28943_cast_fp16")]; + tensor var_28945_equation_0 = const()[name = tensor("op_28945_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28945_cast_fp16 = einsum(equation = var_28945_equation_0, values = (var_28461_cast_fp16, var_28862_cast_fp16))[name = tensor("op_28945_cast_fp16")]; + tensor var_28947_equation_0 = const()[name = tensor("op_28947_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28947_cast_fp16 = einsum(equation = var_28947_equation_0, values = (var_28461_cast_fp16, var_28863_cast_fp16))[name = tensor("op_28947_cast_fp16")]; + tensor var_28949_equation_0 = const()[name = tensor("op_28949_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28949_cast_fp16 = einsum(equation = var_28949_equation_0, values = (var_28465_cast_fp16, var_28864_cast_fp16))[name = tensor("op_28949_cast_fp16")]; + tensor var_28951_equation_0 = const()[name = tensor("op_28951_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28951_cast_fp16 = einsum(equation = var_28951_equation_0, values = (var_28465_cast_fp16, var_28865_cast_fp16))[name = tensor("op_28951_cast_fp16")]; + tensor var_28953_equation_0 = const()[name = tensor("op_28953_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28953_cast_fp16 = einsum(equation = var_28953_equation_0, values = (var_28465_cast_fp16, var_28866_cast_fp16))[name = tensor("op_28953_cast_fp16")]; + tensor var_28955_equation_0 = const()[name = tensor("op_28955_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28955_cast_fp16 = einsum(equation = var_28955_equation_0, values = (var_28465_cast_fp16, var_28867_cast_fp16))[name = tensor("op_28955_cast_fp16")]; + tensor var_28957_equation_0 = const()[name = tensor("op_28957_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28957_cast_fp16 = einsum(equation = var_28957_equation_0, values = (var_28469_cast_fp16, var_28868_cast_fp16))[name = tensor("op_28957_cast_fp16")]; + tensor var_28959_equation_0 = const()[name = tensor("op_28959_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28959_cast_fp16 = einsum(equation = var_28959_equation_0, values = (var_28469_cast_fp16, var_28869_cast_fp16))[name = tensor("op_28959_cast_fp16")]; + tensor var_28961_equation_0 = const()[name = tensor("op_28961_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28961_cast_fp16 = einsum(equation = var_28961_equation_0, values = (var_28469_cast_fp16, var_28870_cast_fp16))[name = tensor("op_28961_cast_fp16")]; + tensor var_28963_equation_0 = const()[name = tensor("op_28963_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28963_cast_fp16 = einsum(equation = var_28963_equation_0, values = (var_28469_cast_fp16, var_28871_cast_fp16))[name = tensor("op_28963_cast_fp16")]; + tensor var_28965_equation_0 = const()[name = tensor("op_28965_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28965_cast_fp16 = einsum(equation = var_28965_equation_0, values = (var_28473_cast_fp16, var_28872_cast_fp16))[name = tensor("op_28965_cast_fp16")]; + tensor var_28967_equation_0 = const()[name = tensor("op_28967_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28967_cast_fp16 = einsum(equation = var_28967_equation_0, values = (var_28473_cast_fp16, var_28873_cast_fp16))[name = tensor("op_28967_cast_fp16")]; + tensor var_28969_equation_0 = const()[name = tensor("op_28969_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28969_cast_fp16 = einsum(equation = var_28969_equation_0, values = (var_28473_cast_fp16, var_28874_cast_fp16))[name = tensor("op_28969_cast_fp16")]; + tensor var_28971_equation_0 = const()[name = tensor("op_28971_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28971_cast_fp16 = einsum(equation = var_28971_equation_0, values = (var_28473_cast_fp16, var_28875_cast_fp16))[name = tensor("op_28971_cast_fp16")]; + tensor var_28973_equation_0 = const()[name = tensor("op_28973_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28973_cast_fp16 = einsum(equation = var_28973_equation_0, values = (var_28477_cast_fp16, var_28876_cast_fp16))[name = tensor("op_28973_cast_fp16")]; + tensor var_28975_equation_0 = const()[name = tensor("op_28975_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28975_cast_fp16 = einsum(equation = var_28975_equation_0, values = (var_28477_cast_fp16, var_28877_cast_fp16))[name = tensor("op_28975_cast_fp16")]; + tensor var_28977_equation_0 = const()[name = tensor("op_28977_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28977_cast_fp16 = einsum(equation = var_28977_equation_0, values = (var_28477_cast_fp16, var_28878_cast_fp16))[name = tensor("op_28977_cast_fp16")]; + tensor var_28979_equation_0 = const()[name = tensor("op_28979_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28979_cast_fp16 = einsum(equation = var_28979_equation_0, values = (var_28477_cast_fp16, var_28879_cast_fp16))[name = tensor("op_28979_cast_fp16")]; + tensor var_28981_equation_0 = const()[name = tensor("op_28981_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28981_cast_fp16 = einsum(equation = var_28981_equation_0, values = (var_28481_cast_fp16, var_28880_cast_fp16))[name = tensor("op_28981_cast_fp16")]; + tensor var_28983_equation_0 = const()[name = tensor("op_28983_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28983_cast_fp16 = einsum(equation = var_28983_equation_0, values = (var_28481_cast_fp16, var_28881_cast_fp16))[name = tensor("op_28983_cast_fp16")]; + tensor var_28985_equation_0 = const()[name = tensor("op_28985_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28985_cast_fp16 = einsum(equation = var_28985_equation_0, values = (var_28481_cast_fp16, var_28882_cast_fp16))[name = tensor("op_28985_cast_fp16")]; + tensor var_28987_equation_0 = const()[name = tensor("op_28987_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28987_cast_fp16 = einsum(equation = var_28987_equation_0, values = (var_28481_cast_fp16, var_28883_cast_fp16))[name = tensor("op_28987_cast_fp16")]; + tensor var_28989_equation_0 = const()[name = tensor("op_28989_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28989_cast_fp16 = einsum(equation = var_28989_equation_0, values = (var_28485_cast_fp16, var_28884_cast_fp16))[name = tensor("op_28989_cast_fp16")]; + tensor var_28991_equation_0 = const()[name = tensor("op_28991_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28991_cast_fp16 = einsum(equation = var_28991_equation_0, values = (var_28485_cast_fp16, var_28885_cast_fp16))[name = tensor("op_28991_cast_fp16")]; + tensor var_28993_equation_0 = const()[name = tensor("op_28993_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28993_cast_fp16 = einsum(equation = var_28993_equation_0, values = (var_28485_cast_fp16, var_28886_cast_fp16))[name = tensor("op_28993_cast_fp16")]; + tensor var_28995_equation_0 = const()[name = tensor("op_28995_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28995_cast_fp16 = einsum(equation = var_28995_equation_0, values = (var_28485_cast_fp16, var_28887_cast_fp16))[name = tensor("op_28995_cast_fp16")]; + tensor var_28997_equation_0 = const()[name = tensor("op_28997_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28997_cast_fp16 = einsum(equation = var_28997_equation_0, values = (var_28489_cast_fp16, var_28888_cast_fp16))[name = tensor("op_28997_cast_fp16")]; + tensor var_28999_equation_0 = const()[name = tensor("op_28999_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_28999_cast_fp16 = einsum(equation = var_28999_equation_0, values = (var_28489_cast_fp16, var_28889_cast_fp16))[name = tensor("op_28999_cast_fp16")]; + tensor var_29001_equation_0 = const()[name = tensor("op_29001_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29001_cast_fp16 = einsum(equation = var_29001_equation_0, values = (var_28489_cast_fp16, var_28890_cast_fp16))[name = tensor("op_29001_cast_fp16")]; + tensor var_29003_equation_0 = const()[name = tensor("op_29003_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29003_cast_fp16 = einsum(equation = var_29003_equation_0, values = (var_28489_cast_fp16, var_28891_cast_fp16))[name = tensor("op_29003_cast_fp16")]; + tensor var_29005_equation_0 = const()[name = tensor("op_29005_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29005_cast_fp16 = einsum(equation = var_29005_equation_0, values = (var_28493_cast_fp16, var_28892_cast_fp16))[name = tensor("op_29005_cast_fp16")]; + tensor var_29007_equation_0 = const()[name = tensor("op_29007_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29007_cast_fp16 = einsum(equation = var_29007_equation_0, values = (var_28493_cast_fp16, var_28893_cast_fp16))[name = tensor("op_29007_cast_fp16")]; + tensor var_29009_equation_0 = const()[name = tensor("op_29009_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29009_cast_fp16 = einsum(equation = var_29009_equation_0, values = (var_28493_cast_fp16, var_28894_cast_fp16))[name = tensor("op_29009_cast_fp16")]; + tensor var_29011_equation_0 = const()[name = tensor("op_29011_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29011_cast_fp16 = einsum(equation = var_29011_equation_0, values = (var_28493_cast_fp16, var_28895_cast_fp16))[name = tensor("op_29011_cast_fp16")]; + tensor var_29013_equation_0 = const()[name = tensor("op_29013_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29013_cast_fp16 = einsum(equation = var_29013_equation_0, values = (var_28497_cast_fp16, var_28896_cast_fp16))[name = tensor("op_29013_cast_fp16")]; + tensor var_29015_equation_0 = const()[name = tensor("op_29015_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29015_cast_fp16 = einsum(equation = var_29015_equation_0, values = (var_28497_cast_fp16, var_28897_cast_fp16))[name = tensor("op_29015_cast_fp16")]; + tensor var_29017_equation_0 = const()[name = tensor("op_29017_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29017_cast_fp16 = einsum(equation = var_29017_equation_0, values = (var_28497_cast_fp16, var_28898_cast_fp16))[name = tensor("op_29017_cast_fp16")]; + tensor var_29019_equation_0 = const()[name = tensor("op_29019_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29019_cast_fp16 = einsum(equation = var_29019_equation_0, values = (var_28497_cast_fp16, var_28899_cast_fp16))[name = tensor("op_29019_cast_fp16")]; + tensor var_29021_equation_0 = const()[name = tensor("op_29021_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29021_cast_fp16 = einsum(equation = var_29021_equation_0, values = (var_28501_cast_fp16, var_28900_cast_fp16))[name = tensor("op_29021_cast_fp16")]; + tensor var_29023_equation_0 = const()[name = tensor("op_29023_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29023_cast_fp16 = einsum(equation = var_29023_equation_0, values = (var_28501_cast_fp16, var_28901_cast_fp16))[name = tensor("op_29023_cast_fp16")]; + tensor var_29025_equation_0 = const()[name = tensor("op_29025_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29025_cast_fp16 = einsum(equation = var_29025_equation_0, values = (var_28501_cast_fp16, var_28902_cast_fp16))[name = tensor("op_29025_cast_fp16")]; + tensor var_29027_equation_0 = const()[name = tensor("op_29027_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29027_cast_fp16 = einsum(equation = var_29027_equation_0, values = (var_28501_cast_fp16, var_28903_cast_fp16))[name = tensor("op_29027_cast_fp16")]; + tensor var_29029_equation_0 = const()[name = tensor("op_29029_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29029_cast_fp16 = einsum(equation = var_29029_equation_0, values = (var_28505_cast_fp16, var_28904_cast_fp16))[name = tensor("op_29029_cast_fp16")]; + tensor var_29031_equation_0 = const()[name = tensor("op_29031_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29031_cast_fp16 = einsum(equation = var_29031_equation_0, values = (var_28505_cast_fp16, var_28905_cast_fp16))[name = tensor("op_29031_cast_fp16")]; + tensor var_29033_equation_0 = const()[name = tensor("op_29033_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29033_cast_fp16 = einsum(equation = var_29033_equation_0, values = (var_28505_cast_fp16, var_28906_cast_fp16))[name = tensor("op_29033_cast_fp16")]; + tensor var_29035_equation_0 = const()[name = tensor("op_29035_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29035_cast_fp16 = einsum(equation = var_29035_equation_0, values = (var_28505_cast_fp16, var_28907_cast_fp16))[name = tensor("op_29035_cast_fp16")]; + tensor var_29037_equation_0 = const()[name = tensor("op_29037_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29037_cast_fp16 = einsum(equation = var_29037_equation_0, values = (var_28509_cast_fp16, var_28908_cast_fp16))[name = tensor("op_29037_cast_fp16")]; + tensor var_29039_equation_0 = const()[name = tensor("op_29039_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29039_cast_fp16 = einsum(equation = var_29039_equation_0, values = (var_28509_cast_fp16, var_28909_cast_fp16))[name = tensor("op_29039_cast_fp16")]; + tensor var_29041_equation_0 = const()[name = tensor("op_29041_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29041_cast_fp16 = einsum(equation = var_29041_equation_0, values = (var_28509_cast_fp16, var_28910_cast_fp16))[name = tensor("op_29041_cast_fp16")]; + tensor var_29043_equation_0 = const()[name = tensor("op_29043_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29043_cast_fp16 = einsum(equation = var_29043_equation_0, values = (var_28509_cast_fp16, var_28911_cast_fp16))[name = tensor("op_29043_cast_fp16")]; + tensor var_29045_equation_0 = const()[name = tensor("op_29045_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29045_cast_fp16 = einsum(equation = var_29045_equation_0, values = (var_28513_cast_fp16, var_28912_cast_fp16))[name = tensor("op_29045_cast_fp16")]; + tensor var_29047_equation_0 = const()[name = tensor("op_29047_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29047_cast_fp16 = einsum(equation = var_29047_equation_0, values = (var_28513_cast_fp16, var_28913_cast_fp16))[name = tensor("op_29047_cast_fp16")]; + tensor var_29049_equation_0 = const()[name = tensor("op_29049_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29049_cast_fp16 = einsum(equation = var_29049_equation_0, values = (var_28513_cast_fp16, var_28914_cast_fp16))[name = tensor("op_29049_cast_fp16")]; + tensor var_29051_equation_0 = const()[name = tensor("op_29051_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29051_cast_fp16 = einsum(equation = var_29051_equation_0, values = (var_28513_cast_fp16, var_28915_cast_fp16))[name = tensor("op_29051_cast_fp16")]; + tensor var_29053_equation_0 = const()[name = tensor("op_29053_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29053_cast_fp16 = einsum(equation = var_29053_equation_0, values = (var_28517_cast_fp16, var_28916_cast_fp16))[name = tensor("op_29053_cast_fp16")]; + tensor var_29055_equation_0 = const()[name = tensor("op_29055_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29055_cast_fp16 = einsum(equation = var_29055_equation_0, values = (var_28517_cast_fp16, var_28917_cast_fp16))[name = tensor("op_29055_cast_fp16")]; + tensor var_29057_equation_0 = const()[name = tensor("op_29057_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29057_cast_fp16 = einsum(equation = var_29057_equation_0, values = (var_28517_cast_fp16, var_28918_cast_fp16))[name = tensor("op_29057_cast_fp16")]; + tensor var_29059_equation_0 = const()[name = tensor("op_29059_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29059_cast_fp16 = einsum(equation = var_29059_equation_0, values = (var_28517_cast_fp16, var_28919_cast_fp16))[name = tensor("op_29059_cast_fp16")]; + tensor var_29061_equation_0 = const()[name = tensor("op_29061_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29061_cast_fp16 = einsum(equation = var_29061_equation_0, values = (var_28521_cast_fp16, var_28920_cast_fp16))[name = tensor("op_29061_cast_fp16")]; + tensor var_29063_equation_0 = const()[name = tensor("op_29063_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29063_cast_fp16 = einsum(equation = var_29063_equation_0, values = (var_28521_cast_fp16, var_28921_cast_fp16))[name = tensor("op_29063_cast_fp16")]; + tensor var_29065_equation_0 = const()[name = tensor("op_29065_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29065_cast_fp16 = einsum(equation = var_29065_equation_0, values = (var_28521_cast_fp16, var_28922_cast_fp16))[name = tensor("op_29065_cast_fp16")]; + tensor var_29067_equation_0 = const()[name = tensor("op_29067_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29067_cast_fp16 = einsum(equation = var_29067_equation_0, values = (var_28521_cast_fp16, var_28923_cast_fp16))[name = tensor("op_29067_cast_fp16")]; + tensor var_29069_equation_0 = const()[name = tensor("op_29069_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29069_cast_fp16 = einsum(equation = var_29069_equation_0, values = (var_28525_cast_fp16, var_28924_cast_fp16))[name = tensor("op_29069_cast_fp16")]; + tensor var_29071_equation_0 = const()[name = tensor("op_29071_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29071_cast_fp16 = einsum(equation = var_29071_equation_0, values = (var_28525_cast_fp16, var_28925_cast_fp16))[name = tensor("op_29071_cast_fp16")]; + tensor var_29073_equation_0 = const()[name = tensor("op_29073_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29073_cast_fp16 = einsum(equation = var_29073_equation_0, values = (var_28525_cast_fp16, var_28926_cast_fp16))[name = tensor("op_29073_cast_fp16")]; + tensor var_29075_equation_0 = const()[name = tensor("op_29075_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29075_cast_fp16 = einsum(equation = var_29075_equation_0, values = (var_28525_cast_fp16, var_28927_cast_fp16))[name = tensor("op_29075_cast_fp16")]; + tensor var_29077_equation_0 = const()[name = tensor("op_29077_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29077_cast_fp16 = einsum(equation = var_29077_equation_0, values = (var_28529_cast_fp16, var_28928_cast_fp16))[name = tensor("op_29077_cast_fp16")]; + tensor var_29079_equation_0 = const()[name = tensor("op_29079_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29079_cast_fp16 = einsum(equation = var_29079_equation_0, values = (var_28529_cast_fp16, var_28929_cast_fp16))[name = tensor("op_29079_cast_fp16")]; + tensor var_29081_equation_0 = const()[name = tensor("op_29081_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29081_cast_fp16 = einsum(equation = var_29081_equation_0, values = (var_28529_cast_fp16, var_28930_cast_fp16))[name = tensor("op_29081_cast_fp16")]; + tensor var_29083_equation_0 = const()[name = tensor("op_29083_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29083_cast_fp16 = einsum(equation = var_29083_equation_0, values = (var_28529_cast_fp16, var_28931_cast_fp16))[name = tensor("op_29083_cast_fp16")]; + tensor var_29085_equation_0 = const()[name = tensor("op_29085_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29085_cast_fp16 = einsum(equation = var_29085_equation_0, values = (var_28533_cast_fp16, var_28932_cast_fp16))[name = tensor("op_29085_cast_fp16")]; + tensor var_29087_equation_0 = const()[name = tensor("op_29087_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29087_cast_fp16 = einsum(equation = var_29087_equation_0, values = (var_28533_cast_fp16, var_28933_cast_fp16))[name = tensor("op_29087_cast_fp16")]; + tensor var_29089_equation_0 = const()[name = tensor("op_29089_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29089_cast_fp16 = einsum(equation = var_29089_equation_0, values = (var_28533_cast_fp16, var_28934_cast_fp16))[name = tensor("op_29089_cast_fp16")]; + tensor var_29091_equation_0 = const()[name = tensor("op_29091_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29091_cast_fp16 = einsum(equation = var_29091_equation_0, values = (var_28533_cast_fp16, var_28935_cast_fp16))[name = tensor("op_29091_cast_fp16")]; + tensor var_29093_equation_0 = const()[name = tensor("op_29093_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29093_cast_fp16 = einsum(equation = var_29093_equation_0, values = (var_28537_cast_fp16, var_28936_cast_fp16))[name = tensor("op_29093_cast_fp16")]; + tensor var_29095_equation_0 = const()[name = tensor("op_29095_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29095_cast_fp16 = einsum(equation = var_29095_equation_0, values = (var_28537_cast_fp16, var_28937_cast_fp16))[name = tensor("op_29095_cast_fp16")]; + tensor var_29097_equation_0 = const()[name = tensor("op_29097_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29097_cast_fp16 = einsum(equation = var_29097_equation_0, values = (var_28537_cast_fp16, var_28938_cast_fp16))[name = tensor("op_29097_cast_fp16")]; + tensor var_29099_equation_0 = const()[name = tensor("op_29099_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_29099_cast_fp16 = einsum(equation = var_29099_equation_0, values = (var_28537_cast_fp16, var_28939_cast_fp16))[name = tensor("op_29099_cast_fp16")]; + tensor var_29101_interleave_0 = const()[name = tensor("op_29101_interleave_0"), val = tensor(false)]; + tensor var_29101_cast_fp16 = concat(axis = var_27660, interleave = var_29101_interleave_0, values = (var_28941_cast_fp16, var_28943_cast_fp16, var_28945_cast_fp16, var_28947_cast_fp16))[name = tensor("op_29101_cast_fp16")]; + tensor var_29103_interleave_0 = const()[name = tensor("op_29103_interleave_0"), val = tensor(false)]; + tensor var_29103_cast_fp16 = concat(axis = var_27660, interleave = var_29103_interleave_0, values = (var_28949_cast_fp16, var_28951_cast_fp16, var_28953_cast_fp16, var_28955_cast_fp16))[name = tensor("op_29103_cast_fp16")]; + tensor var_29105_interleave_0 = const()[name = tensor("op_29105_interleave_0"), val = tensor(false)]; + tensor var_29105_cast_fp16 = concat(axis = var_27660, interleave = var_29105_interleave_0, values = (var_28957_cast_fp16, var_28959_cast_fp16, var_28961_cast_fp16, var_28963_cast_fp16))[name = tensor("op_29105_cast_fp16")]; + tensor var_29107_interleave_0 = const()[name = tensor("op_29107_interleave_0"), val = tensor(false)]; + tensor var_29107_cast_fp16 = concat(axis = var_27660, interleave = var_29107_interleave_0, values = (var_28965_cast_fp16, var_28967_cast_fp16, var_28969_cast_fp16, var_28971_cast_fp16))[name = tensor("op_29107_cast_fp16")]; + tensor var_29109_interleave_0 = const()[name = tensor("op_29109_interleave_0"), val = tensor(false)]; + tensor var_29109_cast_fp16 = concat(axis = var_27660, interleave = var_29109_interleave_0, values = (var_28973_cast_fp16, var_28975_cast_fp16, var_28977_cast_fp16, var_28979_cast_fp16))[name = tensor("op_29109_cast_fp16")]; + tensor var_29111_interleave_0 = const()[name = tensor("op_29111_interleave_0"), val = tensor(false)]; + tensor var_29111_cast_fp16 = concat(axis = var_27660, interleave = var_29111_interleave_0, values = (var_28981_cast_fp16, var_28983_cast_fp16, var_28985_cast_fp16, var_28987_cast_fp16))[name = tensor("op_29111_cast_fp16")]; + tensor var_29113_interleave_0 = const()[name = tensor("op_29113_interleave_0"), val = tensor(false)]; + tensor var_29113_cast_fp16 = concat(axis = var_27660, interleave = var_29113_interleave_0, values = (var_28989_cast_fp16, var_28991_cast_fp16, var_28993_cast_fp16, var_28995_cast_fp16))[name = tensor("op_29113_cast_fp16")]; + tensor var_29115_interleave_0 = const()[name = tensor("op_29115_interleave_0"), val = tensor(false)]; + tensor var_29115_cast_fp16 = concat(axis = var_27660, interleave = var_29115_interleave_0, values = (var_28997_cast_fp16, var_28999_cast_fp16, var_29001_cast_fp16, var_29003_cast_fp16))[name = tensor("op_29115_cast_fp16")]; + tensor var_29117_interleave_0 = const()[name = tensor("op_29117_interleave_0"), val = tensor(false)]; + tensor var_29117_cast_fp16 = concat(axis = var_27660, interleave = var_29117_interleave_0, values = (var_29005_cast_fp16, var_29007_cast_fp16, var_29009_cast_fp16, var_29011_cast_fp16))[name = tensor("op_29117_cast_fp16")]; + tensor var_29119_interleave_0 = const()[name = tensor("op_29119_interleave_0"), val = tensor(false)]; + tensor var_29119_cast_fp16 = concat(axis = var_27660, interleave = var_29119_interleave_0, values = (var_29013_cast_fp16, var_29015_cast_fp16, var_29017_cast_fp16, var_29019_cast_fp16))[name = tensor("op_29119_cast_fp16")]; + tensor var_29121_interleave_0 = const()[name = tensor("op_29121_interleave_0"), val = tensor(false)]; + tensor var_29121_cast_fp16 = concat(axis = var_27660, interleave = var_29121_interleave_0, values = (var_29021_cast_fp16, var_29023_cast_fp16, var_29025_cast_fp16, var_29027_cast_fp16))[name = tensor("op_29121_cast_fp16")]; + tensor var_29123_interleave_0 = const()[name = tensor("op_29123_interleave_0"), val = tensor(false)]; + tensor var_29123_cast_fp16 = concat(axis = var_27660, interleave = var_29123_interleave_0, values = (var_29029_cast_fp16, var_29031_cast_fp16, var_29033_cast_fp16, var_29035_cast_fp16))[name = tensor("op_29123_cast_fp16")]; + tensor var_29125_interleave_0 = const()[name = tensor("op_29125_interleave_0"), val = tensor(false)]; + tensor var_29125_cast_fp16 = concat(axis = var_27660, interleave = var_29125_interleave_0, values = (var_29037_cast_fp16, var_29039_cast_fp16, var_29041_cast_fp16, var_29043_cast_fp16))[name = tensor("op_29125_cast_fp16")]; + tensor var_29127_interleave_0 = const()[name = tensor("op_29127_interleave_0"), val = tensor(false)]; + tensor var_29127_cast_fp16 = concat(axis = var_27660, interleave = var_29127_interleave_0, values = (var_29045_cast_fp16, var_29047_cast_fp16, var_29049_cast_fp16, var_29051_cast_fp16))[name = tensor("op_29127_cast_fp16")]; + tensor var_29129_interleave_0 = const()[name = tensor("op_29129_interleave_0"), val = tensor(false)]; + tensor var_29129_cast_fp16 = concat(axis = var_27660, interleave = var_29129_interleave_0, values = (var_29053_cast_fp16, var_29055_cast_fp16, var_29057_cast_fp16, var_29059_cast_fp16))[name = tensor("op_29129_cast_fp16")]; + tensor var_29131_interleave_0 = const()[name = tensor("op_29131_interleave_0"), val = tensor(false)]; + tensor var_29131_cast_fp16 = concat(axis = var_27660, interleave = var_29131_interleave_0, values = (var_29061_cast_fp16, var_29063_cast_fp16, var_29065_cast_fp16, var_29067_cast_fp16))[name = tensor("op_29131_cast_fp16")]; + tensor var_29133_interleave_0 = const()[name = tensor("op_29133_interleave_0"), val = tensor(false)]; + tensor var_29133_cast_fp16 = concat(axis = var_27660, interleave = var_29133_interleave_0, values = (var_29069_cast_fp16, var_29071_cast_fp16, var_29073_cast_fp16, var_29075_cast_fp16))[name = tensor("op_29133_cast_fp16")]; + tensor var_29135_interleave_0 = const()[name = tensor("op_29135_interleave_0"), val = tensor(false)]; + tensor var_29135_cast_fp16 = concat(axis = var_27660, interleave = var_29135_interleave_0, values = (var_29077_cast_fp16, var_29079_cast_fp16, var_29081_cast_fp16, var_29083_cast_fp16))[name = tensor("op_29135_cast_fp16")]; + tensor var_29137_interleave_0 = const()[name = tensor("op_29137_interleave_0"), val = tensor(false)]; + tensor var_29137_cast_fp16 = concat(axis = var_27660, interleave = var_29137_interleave_0, values = (var_29085_cast_fp16, var_29087_cast_fp16, var_29089_cast_fp16, var_29091_cast_fp16))[name = tensor("op_29137_cast_fp16")]; + tensor var_29139_interleave_0 = const()[name = tensor("op_29139_interleave_0"), val = tensor(false)]; + tensor var_29139_cast_fp16 = concat(axis = var_27660, interleave = var_29139_interleave_0, values = (var_29093_cast_fp16, var_29095_cast_fp16, var_29097_cast_fp16, var_29099_cast_fp16))[name = tensor("op_29139_cast_fp16")]; + tensor input_145_interleave_0 = const()[name = tensor("input_145_interleave_0"), val = tensor(false)]; + tensor input_145_cast_fp16 = concat(axis = var_27685, interleave = input_145_interleave_0, values = (var_29101_cast_fp16, var_29103_cast_fp16, var_29105_cast_fp16, var_29107_cast_fp16, var_29109_cast_fp16, var_29111_cast_fp16, var_29113_cast_fp16, var_29115_cast_fp16, var_29117_cast_fp16, var_29119_cast_fp16, var_29121_cast_fp16, var_29123_cast_fp16, var_29125_cast_fp16, var_29127_cast_fp16, var_29129_cast_fp16, var_29131_cast_fp16, var_29133_cast_fp16, var_29135_cast_fp16, var_29137_cast_fp16, var_29139_cast_fp16))[name = tensor("input_145_cast_fp16")]; + tensor var_29144 = const()[name = tensor("op_29144"), val = tensor([1, 1])]; + tensor var_29146 = const()[name = tensor("op_29146"), val = tensor([1, 1])]; + tensor obj_75_pad_type_0 = const()[name = tensor("obj_75_pad_type_0"), val = tensor("custom")]; + tensor obj_75_pad_0 = const()[name = tensor("obj_75_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_18_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_18_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(732495680)))]; + tensor layers_18_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_18_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(735772544)))]; + tensor obj_75_cast_fp16 = conv(bias = layers_18_self_attn_o_proj_bias_to_fp16, dilations = var_29146, groups = var_27685, pad = obj_75_pad_0, pad_type = obj_75_pad_type_0, strides = var_29144, weight = layers_18_self_attn_o_proj_weight_to_fp16, x = input_145_cast_fp16)[name = tensor("obj_75_cast_fp16")]; + tensor inputs_75_cast_fp16 = add(x = inputs_73_cast_fp16, y = obj_75_cast_fp16)[name = tensor("inputs_75_cast_fp16")]; + tensor var_29152 = const()[name = tensor("op_29152"), val = tensor([1])]; + tensor channels_mean_75_cast_fp16 = reduce_mean(axes = var_29152, keep_dims = var_27686, x = inputs_75_cast_fp16)[name = tensor("channels_mean_75_cast_fp16")]; + tensor zero_mean_75_cast_fp16 = sub(x = inputs_75_cast_fp16, y = channels_mean_75_cast_fp16)[name = tensor("zero_mean_75_cast_fp16")]; + tensor zero_mean_sq_75_cast_fp16 = mul(x = zero_mean_75_cast_fp16, y = zero_mean_75_cast_fp16)[name = tensor("zero_mean_sq_75_cast_fp16")]; + tensor var_29156 = const()[name = tensor("op_29156"), val = tensor([1])]; + tensor var_29157_cast_fp16 = reduce_mean(axes = var_29156, keep_dims = var_27686, x = zero_mean_sq_75_cast_fp16)[name = tensor("op_29157_cast_fp16")]; + tensor var_29158_to_fp16 = const()[name = tensor("op_29158_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_29159_cast_fp16 = add(x = var_29157_cast_fp16, y = var_29158_to_fp16)[name = tensor("op_29159_cast_fp16")]; + tensor denom_75_epsilon_0_to_fp16 = const()[name = tensor("denom_75_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_75_cast_fp16 = rsqrt(epsilon = denom_75_epsilon_0_to_fp16, x = var_29159_cast_fp16)[name = tensor("denom_75_cast_fp16")]; + tensor out_75_cast_fp16 = mul(x = zero_mean_75_cast_fp16, y = denom_75_cast_fp16)[name = tensor("out_75_cast_fp16")]; + tensor input_147_gamma_0_to_fp16 = const()[name = tensor("input_147_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(735775168)))]; + tensor input_147_beta_0_to_fp16 = const()[name = tensor("input_147_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(735777792)))]; + tensor input_147_epsilon_0_to_fp16 = const()[name = tensor("input_147_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_147_cast_fp16 = batch_norm(beta = input_147_beta_0_to_fp16, epsilon = input_147_epsilon_0_to_fp16, gamma = input_147_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_75_cast_fp16)[name = tensor("input_147_cast_fp16")]; + tensor var_29170 = const()[name = tensor("op_29170"), val = tensor([1, 1])]; + tensor var_29172 = const()[name = tensor("op_29172"), val = tensor([1, 1])]; + tensor input_149_pad_type_0 = const()[name = tensor("input_149_pad_type_0"), val = tensor("custom")]; + tensor input_149_pad_0 = const()[name = tensor("input_149_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_18_fc1_weight_to_fp16 = const()[name = tensor("layers_18_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(735780416)))]; + tensor layers_18_fc1_bias_to_fp16 = const()[name = tensor("layers_18_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(748887680)))]; + tensor input_149_cast_fp16 = conv(bias = layers_18_fc1_bias_to_fp16, dilations = var_29172, groups = var_27685, pad = input_149_pad_0, pad_type = input_149_pad_type_0, strides = var_29170, weight = layers_18_fc1_weight_to_fp16, x = input_147_cast_fp16)[name = tensor("input_149_cast_fp16")]; + tensor input_151_mode_0 = const()[name = tensor("input_151_mode_0"), val = tensor("EXACT")]; + tensor input_151_cast_fp16 = gelu(mode = input_151_mode_0, x = input_149_cast_fp16)[name = tensor("input_151_cast_fp16")]; + tensor var_29178 = const()[name = tensor("op_29178"), val = tensor([1, 1])]; + tensor var_29180 = const()[name = tensor("op_29180"), val = tensor([1, 1])]; + tensor hidden_states_41_pad_type_0 = const()[name = tensor("hidden_states_41_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_41_pad_0 = const()[name = tensor("hidden_states_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_18_fc2_weight_to_fp16 = const()[name = tensor("layers_18_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(748897984)))]; + tensor layers_18_fc2_bias_to_fp16 = const()[name = tensor("layers_18_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(762005248)))]; + tensor hidden_states_41_cast_fp16 = conv(bias = layers_18_fc2_bias_to_fp16, dilations = var_29180, groups = var_27685, pad = hidden_states_41_pad_0, pad_type = hidden_states_41_pad_type_0, strides = var_29178, weight = layers_18_fc2_weight_to_fp16, x = input_151_cast_fp16)[name = tensor("hidden_states_41_cast_fp16")]; + tensor inputs_77_cast_fp16 = add(x = inputs_75_cast_fp16, y = hidden_states_41_cast_fp16)[name = tensor("inputs_77_cast_fp16")]; + tensor var_29187 = const()[name = tensor("op_29187"), val = tensor(3)]; + tensor var_29212 = const()[name = tensor("op_29212"), val = tensor(1)]; + tensor var_29213 = const()[name = tensor("op_29213"), val = tensor(true)]; + tensor var_29223 = const()[name = tensor("op_29223"), val = tensor([1])]; + tensor channels_mean_77_cast_fp16 = reduce_mean(axes = var_29223, keep_dims = var_29213, x = inputs_77_cast_fp16)[name = tensor("channels_mean_77_cast_fp16")]; + tensor zero_mean_77_cast_fp16 = sub(x = inputs_77_cast_fp16, y = channels_mean_77_cast_fp16)[name = tensor("zero_mean_77_cast_fp16")]; + tensor zero_mean_sq_77_cast_fp16 = mul(x = zero_mean_77_cast_fp16, y = zero_mean_77_cast_fp16)[name = tensor("zero_mean_sq_77_cast_fp16")]; + tensor var_29227 = const()[name = tensor("op_29227"), val = tensor([1])]; + tensor var_29228_cast_fp16 = reduce_mean(axes = var_29227, keep_dims = var_29213, x = zero_mean_sq_77_cast_fp16)[name = tensor("op_29228_cast_fp16")]; + tensor var_29229_to_fp16 = const()[name = tensor("op_29229_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_29230_cast_fp16 = add(x = var_29228_cast_fp16, y = var_29229_to_fp16)[name = tensor("op_29230_cast_fp16")]; + tensor denom_77_epsilon_0_to_fp16 = const()[name = tensor("denom_77_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_77_cast_fp16 = rsqrt(epsilon = denom_77_epsilon_0_to_fp16, x = var_29230_cast_fp16)[name = tensor("denom_77_cast_fp16")]; + tensor out_77_cast_fp16 = mul(x = zero_mean_77_cast_fp16, y = denom_77_cast_fp16)[name = tensor("out_77_cast_fp16")]; + tensor obj_77_gamma_0_to_fp16 = const()[name = tensor("obj_77_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(762007872)))]; + tensor obj_77_beta_0_to_fp16 = const()[name = tensor("obj_77_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(762010496)))]; + tensor obj_77_epsilon_0_to_fp16 = const()[name = tensor("obj_77_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_77_cast_fp16 = batch_norm(beta = obj_77_beta_0_to_fp16, epsilon = obj_77_epsilon_0_to_fp16, gamma = obj_77_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_77_cast_fp16)[name = tensor("obj_77_cast_fp16")]; + tensor var_29245 = const()[name = tensor("op_29245"), val = tensor([1, 1])]; + tensor var_29247 = const()[name = tensor("op_29247"), val = tensor([1, 1])]; + tensor query_39_pad_type_0 = const()[name = tensor("query_39_pad_type_0"), val = tensor("custom")]; + tensor query_39_pad_0 = const()[name = tensor("query_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_19_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_19_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(762013120)))]; + tensor layers_19_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_19_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(765289984)))]; + tensor query_39_cast_fp16 = conv(bias = layers_19_self_attn_q_proj_bias_to_fp16, dilations = var_29247, groups = var_29212, pad = query_39_pad_0, pad_type = query_39_pad_type_0, strides = var_29245, weight = layers_19_self_attn_q_proj_weight_to_fp16, x = obj_77_cast_fp16)[name = tensor("query_39_cast_fp16")]; + tensor var_29251 = const()[name = tensor("op_29251"), val = tensor([1, 1])]; + tensor var_29253 = const()[name = tensor("op_29253"), val = tensor([1, 1])]; + tensor key_39_pad_type_0 = const()[name = tensor("key_39_pad_type_0"), val = tensor("custom")]; + tensor key_39_pad_0 = const()[name = tensor("key_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_19_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_19_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(765292608)))]; + tensor key_39_cast_fp16 = conv(dilations = var_29253, groups = var_29212, pad = key_39_pad_0, pad_type = key_39_pad_type_0, strides = var_29251, weight = layers_19_self_attn_k_proj_weight_to_fp16, x = obj_77_cast_fp16)[name = tensor("key_39_cast_fp16")]; + tensor var_29258 = const()[name = tensor("op_29258"), val = tensor([1, 1])]; + tensor var_29260 = const()[name = tensor("op_29260"), val = tensor([1, 1])]; + tensor value_39_pad_type_0 = const()[name = tensor("value_39_pad_type_0"), val = tensor("custom")]; + tensor value_39_pad_0 = const()[name = tensor("value_39_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_19_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_19_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(768569472)))]; + tensor layers_19_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_19_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(771846336)))]; + tensor value_39_cast_fp16 = conv(bias = layers_19_self_attn_v_proj_bias_to_fp16, dilations = var_29260, groups = var_29212, pad = value_39_pad_0, pad_type = value_39_pad_type_0, strides = var_29258, weight = layers_19_self_attn_v_proj_weight_to_fp16, x = obj_77_cast_fp16)[name = tensor("value_39_cast_fp16")]; + tensor var_29267_begin_0 = const()[name = tensor("op_29267_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_29267_end_0 = const()[name = tensor("op_29267_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_29267_end_mask_0 = const()[name = tensor("op_29267_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_29267_cast_fp16 = slice_by_index(begin = var_29267_begin_0, end = var_29267_end_0, end_mask = var_29267_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_29267_cast_fp16")]; + tensor var_29271_begin_0 = const()[name = tensor("op_29271_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_29271_end_0 = const()[name = tensor("op_29271_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_29271_end_mask_0 = const()[name = tensor("op_29271_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_29271_cast_fp16 = slice_by_index(begin = var_29271_begin_0, end = var_29271_end_0, end_mask = var_29271_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_29271_cast_fp16")]; + tensor var_29275_begin_0 = const()[name = tensor("op_29275_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_29275_end_0 = const()[name = tensor("op_29275_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_29275_end_mask_0 = const()[name = tensor("op_29275_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_29275_cast_fp16 = slice_by_index(begin = var_29275_begin_0, end = var_29275_end_0, end_mask = var_29275_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_29275_cast_fp16")]; + tensor var_29279_begin_0 = const()[name = tensor("op_29279_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_29279_end_0 = const()[name = tensor("op_29279_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_29279_end_mask_0 = const()[name = tensor("op_29279_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_29279_cast_fp16 = slice_by_index(begin = var_29279_begin_0, end = var_29279_end_0, end_mask = var_29279_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_29279_cast_fp16")]; + tensor var_29283_begin_0 = const()[name = tensor("op_29283_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_29283_end_0 = const()[name = tensor("op_29283_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_29283_end_mask_0 = const()[name = tensor("op_29283_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_29283_cast_fp16 = slice_by_index(begin = var_29283_begin_0, end = var_29283_end_0, end_mask = var_29283_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_29283_cast_fp16")]; + tensor var_29287_begin_0 = const()[name = tensor("op_29287_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_29287_end_0 = const()[name = tensor("op_29287_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_29287_end_mask_0 = const()[name = tensor("op_29287_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_29287_cast_fp16 = slice_by_index(begin = var_29287_begin_0, end = var_29287_end_0, end_mask = var_29287_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_29287_cast_fp16")]; + tensor var_29291_begin_0 = const()[name = tensor("op_29291_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_29291_end_0 = const()[name = tensor("op_29291_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_29291_end_mask_0 = const()[name = tensor("op_29291_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_29291_cast_fp16 = slice_by_index(begin = var_29291_begin_0, end = var_29291_end_0, end_mask = var_29291_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_29291_cast_fp16")]; + tensor var_29295_begin_0 = const()[name = tensor("op_29295_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_29295_end_0 = const()[name = tensor("op_29295_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_29295_end_mask_0 = const()[name = tensor("op_29295_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_29295_cast_fp16 = slice_by_index(begin = var_29295_begin_0, end = var_29295_end_0, end_mask = var_29295_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_29295_cast_fp16")]; + tensor var_29299_begin_0 = const()[name = tensor("op_29299_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_29299_end_0 = const()[name = tensor("op_29299_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_29299_end_mask_0 = const()[name = tensor("op_29299_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_29299_cast_fp16 = slice_by_index(begin = var_29299_begin_0, end = var_29299_end_0, end_mask = var_29299_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_29299_cast_fp16")]; + tensor var_29303_begin_0 = const()[name = tensor("op_29303_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_29303_end_0 = const()[name = tensor("op_29303_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_29303_end_mask_0 = const()[name = tensor("op_29303_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_29303_cast_fp16 = slice_by_index(begin = var_29303_begin_0, end = var_29303_end_0, end_mask = var_29303_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_29303_cast_fp16")]; + tensor var_29307_begin_0 = const()[name = tensor("op_29307_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_29307_end_0 = const()[name = tensor("op_29307_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_29307_end_mask_0 = const()[name = tensor("op_29307_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_29307_cast_fp16 = slice_by_index(begin = var_29307_begin_0, end = var_29307_end_0, end_mask = var_29307_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_29307_cast_fp16")]; + tensor var_29311_begin_0 = const()[name = tensor("op_29311_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_29311_end_0 = const()[name = tensor("op_29311_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_29311_end_mask_0 = const()[name = tensor("op_29311_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_29311_cast_fp16 = slice_by_index(begin = var_29311_begin_0, end = var_29311_end_0, end_mask = var_29311_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_29311_cast_fp16")]; + tensor var_29315_begin_0 = const()[name = tensor("op_29315_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_29315_end_0 = const()[name = tensor("op_29315_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_29315_end_mask_0 = const()[name = tensor("op_29315_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_29315_cast_fp16 = slice_by_index(begin = var_29315_begin_0, end = var_29315_end_0, end_mask = var_29315_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_29315_cast_fp16")]; + tensor var_29319_begin_0 = const()[name = tensor("op_29319_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_29319_end_0 = const()[name = tensor("op_29319_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_29319_end_mask_0 = const()[name = tensor("op_29319_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_29319_cast_fp16 = slice_by_index(begin = var_29319_begin_0, end = var_29319_end_0, end_mask = var_29319_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_29319_cast_fp16")]; + tensor var_29323_begin_0 = const()[name = tensor("op_29323_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_29323_end_0 = const()[name = tensor("op_29323_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_29323_end_mask_0 = const()[name = tensor("op_29323_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_29323_cast_fp16 = slice_by_index(begin = var_29323_begin_0, end = var_29323_end_0, end_mask = var_29323_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_29323_cast_fp16")]; + tensor var_29327_begin_0 = const()[name = tensor("op_29327_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_29327_end_0 = const()[name = tensor("op_29327_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_29327_end_mask_0 = const()[name = tensor("op_29327_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_29327_cast_fp16 = slice_by_index(begin = var_29327_begin_0, end = var_29327_end_0, end_mask = var_29327_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_29327_cast_fp16")]; + tensor var_29331_begin_0 = const()[name = tensor("op_29331_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_29331_end_0 = const()[name = tensor("op_29331_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_29331_end_mask_0 = const()[name = tensor("op_29331_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_29331_cast_fp16 = slice_by_index(begin = var_29331_begin_0, end = var_29331_end_0, end_mask = var_29331_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_29331_cast_fp16")]; + tensor var_29335_begin_0 = const()[name = tensor("op_29335_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_29335_end_0 = const()[name = tensor("op_29335_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_29335_end_mask_0 = const()[name = tensor("op_29335_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_29335_cast_fp16 = slice_by_index(begin = var_29335_begin_0, end = var_29335_end_0, end_mask = var_29335_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_29335_cast_fp16")]; + tensor var_29339_begin_0 = const()[name = tensor("op_29339_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_29339_end_0 = const()[name = tensor("op_29339_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_29339_end_mask_0 = const()[name = tensor("op_29339_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_29339_cast_fp16 = slice_by_index(begin = var_29339_begin_0, end = var_29339_end_0, end_mask = var_29339_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_29339_cast_fp16")]; + tensor var_29343_begin_0 = const()[name = tensor("op_29343_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_29343_end_0 = const()[name = tensor("op_29343_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_29343_end_mask_0 = const()[name = tensor("op_29343_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_29343_cast_fp16 = slice_by_index(begin = var_29343_begin_0, end = var_29343_end_0, end_mask = var_29343_end_mask_0, x = query_39_cast_fp16)[name = tensor("op_29343_cast_fp16")]; + tensor var_29352_begin_0 = const()[name = tensor("op_29352_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_29352_end_0 = const()[name = tensor("op_29352_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_29352_end_mask_0 = const()[name = tensor("op_29352_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29352_cast_fp16 = slice_by_index(begin = var_29352_begin_0, end = var_29352_end_0, end_mask = var_29352_end_mask_0, x = var_29267_cast_fp16)[name = tensor("op_29352_cast_fp16")]; + tensor var_29359_begin_0 = const()[name = tensor("op_29359_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_29359_end_0 = const()[name = tensor("op_29359_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_29359_end_mask_0 = const()[name = tensor("op_29359_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29359_cast_fp16 = slice_by_index(begin = var_29359_begin_0, end = var_29359_end_0, end_mask = var_29359_end_mask_0, x = var_29267_cast_fp16)[name = tensor("op_29359_cast_fp16")]; + tensor var_29366_begin_0 = const()[name = tensor("op_29366_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_29366_end_0 = const()[name = tensor("op_29366_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_29366_end_mask_0 = const()[name = tensor("op_29366_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29366_cast_fp16 = slice_by_index(begin = var_29366_begin_0, end = var_29366_end_0, end_mask = var_29366_end_mask_0, x = var_29267_cast_fp16)[name = tensor("op_29366_cast_fp16")]; + tensor var_29373_begin_0 = const()[name = tensor("op_29373_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_29373_end_0 = const()[name = tensor("op_29373_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_29373_end_mask_0 = const()[name = tensor("op_29373_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29373_cast_fp16 = slice_by_index(begin = var_29373_begin_0, end = var_29373_end_0, end_mask = var_29373_end_mask_0, x = var_29267_cast_fp16)[name = tensor("op_29373_cast_fp16")]; + tensor var_29380_begin_0 = const()[name = tensor("op_29380_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_29380_end_0 = const()[name = tensor("op_29380_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_29380_end_mask_0 = const()[name = tensor("op_29380_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29380_cast_fp16 = slice_by_index(begin = var_29380_begin_0, end = var_29380_end_0, end_mask = var_29380_end_mask_0, x = var_29271_cast_fp16)[name = tensor("op_29380_cast_fp16")]; + tensor var_29387_begin_0 = const()[name = tensor("op_29387_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_29387_end_0 = const()[name = tensor("op_29387_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_29387_end_mask_0 = const()[name = tensor("op_29387_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29387_cast_fp16 = slice_by_index(begin = var_29387_begin_0, end = var_29387_end_0, end_mask = var_29387_end_mask_0, x = var_29271_cast_fp16)[name = tensor("op_29387_cast_fp16")]; + tensor var_29394_begin_0 = const()[name = tensor("op_29394_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_29394_end_0 = const()[name = tensor("op_29394_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_29394_end_mask_0 = const()[name = tensor("op_29394_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29394_cast_fp16 = slice_by_index(begin = var_29394_begin_0, end = var_29394_end_0, end_mask = var_29394_end_mask_0, x = var_29271_cast_fp16)[name = tensor("op_29394_cast_fp16")]; + tensor var_29401_begin_0 = const()[name = tensor("op_29401_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_29401_end_0 = const()[name = tensor("op_29401_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_29401_end_mask_0 = const()[name = tensor("op_29401_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29401_cast_fp16 = slice_by_index(begin = var_29401_begin_0, end = var_29401_end_0, end_mask = var_29401_end_mask_0, x = var_29271_cast_fp16)[name = tensor("op_29401_cast_fp16")]; + tensor var_29408_begin_0 = const()[name = tensor("op_29408_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_29408_end_0 = const()[name = tensor("op_29408_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_29408_end_mask_0 = const()[name = tensor("op_29408_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29408_cast_fp16 = slice_by_index(begin = var_29408_begin_0, end = var_29408_end_0, end_mask = var_29408_end_mask_0, x = var_29275_cast_fp16)[name = tensor("op_29408_cast_fp16")]; + tensor var_29415_begin_0 = const()[name = tensor("op_29415_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_29415_end_0 = const()[name = tensor("op_29415_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_29415_end_mask_0 = const()[name = tensor("op_29415_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29415_cast_fp16 = slice_by_index(begin = var_29415_begin_0, end = var_29415_end_0, end_mask = var_29415_end_mask_0, x = var_29275_cast_fp16)[name = tensor("op_29415_cast_fp16")]; + tensor var_29422_begin_0 = const()[name = tensor("op_29422_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_29422_end_0 = const()[name = tensor("op_29422_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_29422_end_mask_0 = const()[name = tensor("op_29422_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29422_cast_fp16 = slice_by_index(begin = var_29422_begin_0, end = var_29422_end_0, end_mask = var_29422_end_mask_0, x = var_29275_cast_fp16)[name = tensor("op_29422_cast_fp16")]; + tensor var_29429_begin_0 = const()[name = tensor("op_29429_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_29429_end_0 = const()[name = tensor("op_29429_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_29429_end_mask_0 = const()[name = tensor("op_29429_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29429_cast_fp16 = slice_by_index(begin = var_29429_begin_0, end = var_29429_end_0, end_mask = var_29429_end_mask_0, x = var_29275_cast_fp16)[name = tensor("op_29429_cast_fp16")]; + tensor var_29436_begin_0 = const()[name = tensor("op_29436_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_29436_end_0 = const()[name = tensor("op_29436_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_29436_end_mask_0 = const()[name = tensor("op_29436_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29436_cast_fp16 = slice_by_index(begin = var_29436_begin_0, end = var_29436_end_0, end_mask = var_29436_end_mask_0, x = var_29279_cast_fp16)[name = tensor("op_29436_cast_fp16")]; + tensor var_29443_begin_0 = const()[name = tensor("op_29443_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_29443_end_0 = const()[name = tensor("op_29443_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_29443_end_mask_0 = const()[name = tensor("op_29443_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29443_cast_fp16 = slice_by_index(begin = var_29443_begin_0, end = var_29443_end_0, end_mask = var_29443_end_mask_0, x = var_29279_cast_fp16)[name = tensor("op_29443_cast_fp16")]; + tensor var_29450_begin_0 = const()[name = tensor("op_29450_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_29450_end_0 = const()[name = tensor("op_29450_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_29450_end_mask_0 = const()[name = tensor("op_29450_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29450_cast_fp16 = slice_by_index(begin = var_29450_begin_0, end = var_29450_end_0, end_mask = var_29450_end_mask_0, x = var_29279_cast_fp16)[name = tensor("op_29450_cast_fp16")]; + tensor var_29457_begin_0 = const()[name = tensor("op_29457_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_29457_end_0 = const()[name = tensor("op_29457_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_29457_end_mask_0 = const()[name = tensor("op_29457_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29457_cast_fp16 = slice_by_index(begin = var_29457_begin_0, end = var_29457_end_0, end_mask = var_29457_end_mask_0, x = var_29279_cast_fp16)[name = tensor("op_29457_cast_fp16")]; + tensor var_29464_begin_0 = const()[name = tensor("op_29464_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_29464_end_0 = const()[name = tensor("op_29464_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_29464_end_mask_0 = const()[name = tensor("op_29464_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29464_cast_fp16 = slice_by_index(begin = var_29464_begin_0, end = var_29464_end_0, end_mask = var_29464_end_mask_0, x = var_29283_cast_fp16)[name = tensor("op_29464_cast_fp16")]; + tensor var_29471_begin_0 = const()[name = tensor("op_29471_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_29471_end_0 = const()[name = tensor("op_29471_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_29471_end_mask_0 = const()[name = tensor("op_29471_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29471_cast_fp16 = slice_by_index(begin = var_29471_begin_0, end = var_29471_end_0, end_mask = var_29471_end_mask_0, x = var_29283_cast_fp16)[name = tensor("op_29471_cast_fp16")]; + tensor var_29478_begin_0 = const()[name = tensor("op_29478_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_29478_end_0 = const()[name = tensor("op_29478_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_29478_end_mask_0 = const()[name = tensor("op_29478_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29478_cast_fp16 = slice_by_index(begin = var_29478_begin_0, end = var_29478_end_0, end_mask = var_29478_end_mask_0, x = var_29283_cast_fp16)[name = tensor("op_29478_cast_fp16")]; + tensor var_29485_begin_0 = const()[name = tensor("op_29485_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_29485_end_0 = const()[name = tensor("op_29485_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_29485_end_mask_0 = const()[name = tensor("op_29485_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29485_cast_fp16 = slice_by_index(begin = var_29485_begin_0, end = var_29485_end_0, end_mask = var_29485_end_mask_0, x = var_29283_cast_fp16)[name = tensor("op_29485_cast_fp16")]; + tensor var_29492_begin_0 = const()[name = tensor("op_29492_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_29492_end_0 = const()[name = tensor("op_29492_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_29492_end_mask_0 = const()[name = tensor("op_29492_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29492_cast_fp16 = slice_by_index(begin = var_29492_begin_0, end = var_29492_end_0, end_mask = var_29492_end_mask_0, x = var_29287_cast_fp16)[name = tensor("op_29492_cast_fp16")]; + tensor var_29499_begin_0 = const()[name = tensor("op_29499_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_29499_end_0 = const()[name = tensor("op_29499_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_29499_end_mask_0 = const()[name = tensor("op_29499_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29499_cast_fp16 = slice_by_index(begin = var_29499_begin_0, end = var_29499_end_0, end_mask = var_29499_end_mask_0, x = var_29287_cast_fp16)[name = tensor("op_29499_cast_fp16")]; + tensor var_29506_begin_0 = const()[name = tensor("op_29506_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_29506_end_0 = const()[name = tensor("op_29506_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_29506_end_mask_0 = const()[name = tensor("op_29506_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29506_cast_fp16 = slice_by_index(begin = var_29506_begin_0, end = var_29506_end_0, end_mask = var_29506_end_mask_0, x = var_29287_cast_fp16)[name = tensor("op_29506_cast_fp16")]; + tensor var_29513_begin_0 = const()[name = tensor("op_29513_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_29513_end_0 = const()[name = tensor("op_29513_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_29513_end_mask_0 = const()[name = tensor("op_29513_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29513_cast_fp16 = slice_by_index(begin = var_29513_begin_0, end = var_29513_end_0, end_mask = var_29513_end_mask_0, x = var_29287_cast_fp16)[name = tensor("op_29513_cast_fp16")]; + tensor var_29520_begin_0 = const()[name = tensor("op_29520_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_29520_end_0 = const()[name = tensor("op_29520_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_29520_end_mask_0 = const()[name = tensor("op_29520_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29520_cast_fp16 = slice_by_index(begin = var_29520_begin_0, end = var_29520_end_0, end_mask = var_29520_end_mask_0, x = var_29291_cast_fp16)[name = tensor("op_29520_cast_fp16")]; + tensor var_29527_begin_0 = const()[name = tensor("op_29527_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_29527_end_0 = const()[name = tensor("op_29527_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_29527_end_mask_0 = const()[name = tensor("op_29527_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29527_cast_fp16 = slice_by_index(begin = var_29527_begin_0, end = var_29527_end_0, end_mask = var_29527_end_mask_0, x = var_29291_cast_fp16)[name = tensor("op_29527_cast_fp16")]; + tensor var_29534_begin_0 = const()[name = tensor("op_29534_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_29534_end_0 = const()[name = tensor("op_29534_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_29534_end_mask_0 = const()[name = tensor("op_29534_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29534_cast_fp16 = slice_by_index(begin = var_29534_begin_0, end = var_29534_end_0, end_mask = var_29534_end_mask_0, x = var_29291_cast_fp16)[name = tensor("op_29534_cast_fp16")]; + tensor var_29541_begin_0 = const()[name = tensor("op_29541_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_29541_end_0 = const()[name = tensor("op_29541_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_29541_end_mask_0 = const()[name = tensor("op_29541_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29541_cast_fp16 = slice_by_index(begin = var_29541_begin_0, end = var_29541_end_0, end_mask = var_29541_end_mask_0, x = var_29291_cast_fp16)[name = tensor("op_29541_cast_fp16")]; + tensor var_29548_begin_0 = const()[name = tensor("op_29548_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_29548_end_0 = const()[name = tensor("op_29548_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_29548_end_mask_0 = const()[name = tensor("op_29548_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29548_cast_fp16 = slice_by_index(begin = var_29548_begin_0, end = var_29548_end_0, end_mask = var_29548_end_mask_0, x = var_29295_cast_fp16)[name = tensor("op_29548_cast_fp16")]; + tensor var_29555_begin_0 = const()[name = tensor("op_29555_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_29555_end_0 = const()[name = tensor("op_29555_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_29555_end_mask_0 = const()[name = tensor("op_29555_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29555_cast_fp16 = slice_by_index(begin = var_29555_begin_0, end = var_29555_end_0, end_mask = var_29555_end_mask_0, x = var_29295_cast_fp16)[name = tensor("op_29555_cast_fp16")]; + tensor var_29562_begin_0 = const()[name = tensor("op_29562_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_29562_end_0 = const()[name = tensor("op_29562_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_29562_end_mask_0 = const()[name = tensor("op_29562_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29562_cast_fp16 = slice_by_index(begin = var_29562_begin_0, end = var_29562_end_0, end_mask = var_29562_end_mask_0, x = var_29295_cast_fp16)[name = tensor("op_29562_cast_fp16")]; + tensor var_29569_begin_0 = const()[name = tensor("op_29569_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_29569_end_0 = const()[name = tensor("op_29569_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_29569_end_mask_0 = const()[name = tensor("op_29569_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29569_cast_fp16 = slice_by_index(begin = var_29569_begin_0, end = var_29569_end_0, end_mask = var_29569_end_mask_0, x = var_29295_cast_fp16)[name = tensor("op_29569_cast_fp16")]; + tensor var_29576_begin_0 = const()[name = tensor("op_29576_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_29576_end_0 = const()[name = tensor("op_29576_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_29576_end_mask_0 = const()[name = tensor("op_29576_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29576_cast_fp16 = slice_by_index(begin = var_29576_begin_0, end = var_29576_end_0, end_mask = var_29576_end_mask_0, x = var_29299_cast_fp16)[name = tensor("op_29576_cast_fp16")]; + tensor var_29583_begin_0 = const()[name = tensor("op_29583_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_29583_end_0 = const()[name = tensor("op_29583_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_29583_end_mask_0 = const()[name = tensor("op_29583_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29583_cast_fp16 = slice_by_index(begin = var_29583_begin_0, end = var_29583_end_0, end_mask = var_29583_end_mask_0, x = var_29299_cast_fp16)[name = tensor("op_29583_cast_fp16")]; + tensor var_29590_begin_0 = const()[name = tensor("op_29590_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_29590_end_0 = const()[name = tensor("op_29590_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_29590_end_mask_0 = const()[name = tensor("op_29590_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29590_cast_fp16 = slice_by_index(begin = var_29590_begin_0, end = var_29590_end_0, end_mask = var_29590_end_mask_0, x = var_29299_cast_fp16)[name = tensor("op_29590_cast_fp16")]; + tensor var_29597_begin_0 = const()[name = tensor("op_29597_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_29597_end_0 = const()[name = tensor("op_29597_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_29597_end_mask_0 = const()[name = tensor("op_29597_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29597_cast_fp16 = slice_by_index(begin = var_29597_begin_0, end = var_29597_end_0, end_mask = var_29597_end_mask_0, x = var_29299_cast_fp16)[name = tensor("op_29597_cast_fp16")]; + tensor var_29604_begin_0 = const()[name = tensor("op_29604_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_29604_end_0 = const()[name = tensor("op_29604_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_29604_end_mask_0 = const()[name = tensor("op_29604_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29604_cast_fp16 = slice_by_index(begin = var_29604_begin_0, end = var_29604_end_0, end_mask = var_29604_end_mask_0, x = var_29303_cast_fp16)[name = tensor("op_29604_cast_fp16")]; + tensor var_29611_begin_0 = const()[name = tensor("op_29611_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_29611_end_0 = const()[name = tensor("op_29611_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_29611_end_mask_0 = const()[name = tensor("op_29611_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29611_cast_fp16 = slice_by_index(begin = var_29611_begin_0, end = var_29611_end_0, end_mask = var_29611_end_mask_0, x = var_29303_cast_fp16)[name = tensor("op_29611_cast_fp16")]; + tensor var_29618_begin_0 = const()[name = tensor("op_29618_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_29618_end_0 = const()[name = tensor("op_29618_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_29618_end_mask_0 = const()[name = tensor("op_29618_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29618_cast_fp16 = slice_by_index(begin = var_29618_begin_0, end = var_29618_end_0, end_mask = var_29618_end_mask_0, x = var_29303_cast_fp16)[name = tensor("op_29618_cast_fp16")]; + tensor var_29625_begin_0 = const()[name = tensor("op_29625_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_29625_end_0 = const()[name = tensor("op_29625_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_29625_end_mask_0 = const()[name = tensor("op_29625_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29625_cast_fp16 = slice_by_index(begin = var_29625_begin_0, end = var_29625_end_0, end_mask = var_29625_end_mask_0, x = var_29303_cast_fp16)[name = tensor("op_29625_cast_fp16")]; + tensor var_29632_begin_0 = const()[name = tensor("op_29632_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_29632_end_0 = const()[name = tensor("op_29632_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_29632_end_mask_0 = const()[name = tensor("op_29632_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29632_cast_fp16 = slice_by_index(begin = var_29632_begin_0, end = var_29632_end_0, end_mask = var_29632_end_mask_0, x = var_29307_cast_fp16)[name = tensor("op_29632_cast_fp16")]; + tensor var_29639_begin_0 = const()[name = tensor("op_29639_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_29639_end_0 = const()[name = tensor("op_29639_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_29639_end_mask_0 = const()[name = tensor("op_29639_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29639_cast_fp16 = slice_by_index(begin = var_29639_begin_0, end = var_29639_end_0, end_mask = var_29639_end_mask_0, x = var_29307_cast_fp16)[name = tensor("op_29639_cast_fp16")]; + tensor var_29646_begin_0 = const()[name = tensor("op_29646_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_29646_end_0 = const()[name = tensor("op_29646_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_29646_end_mask_0 = const()[name = tensor("op_29646_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29646_cast_fp16 = slice_by_index(begin = var_29646_begin_0, end = var_29646_end_0, end_mask = var_29646_end_mask_0, x = var_29307_cast_fp16)[name = tensor("op_29646_cast_fp16")]; + tensor var_29653_begin_0 = const()[name = tensor("op_29653_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_29653_end_0 = const()[name = tensor("op_29653_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_29653_end_mask_0 = const()[name = tensor("op_29653_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29653_cast_fp16 = slice_by_index(begin = var_29653_begin_0, end = var_29653_end_0, end_mask = var_29653_end_mask_0, x = var_29307_cast_fp16)[name = tensor("op_29653_cast_fp16")]; + tensor var_29660_begin_0 = const()[name = tensor("op_29660_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_29660_end_0 = const()[name = tensor("op_29660_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_29660_end_mask_0 = const()[name = tensor("op_29660_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29660_cast_fp16 = slice_by_index(begin = var_29660_begin_0, end = var_29660_end_0, end_mask = var_29660_end_mask_0, x = var_29311_cast_fp16)[name = tensor("op_29660_cast_fp16")]; + tensor var_29667_begin_0 = const()[name = tensor("op_29667_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_29667_end_0 = const()[name = tensor("op_29667_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_29667_end_mask_0 = const()[name = tensor("op_29667_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29667_cast_fp16 = slice_by_index(begin = var_29667_begin_0, end = var_29667_end_0, end_mask = var_29667_end_mask_0, x = var_29311_cast_fp16)[name = tensor("op_29667_cast_fp16")]; + tensor var_29674_begin_0 = const()[name = tensor("op_29674_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_29674_end_0 = const()[name = tensor("op_29674_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_29674_end_mask_0 = const()[name = tensor("op_29674_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29674_cast_fp16 = slice_by_index(begin = var_29674_begin_0, end = var_29674_end_0, end_mask = var_29674_end_mask_0, x = var_29311_cast_fp16)[name = tensor("op_29674_cast_fp16")]; + tensor var_29681_begin_0 = const()[name = tensor("op_29681_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_29681_end_0 = const()[name = tensor("op_29681_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_29681_end_mask_0 = const()[name = tensor("op_29681_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29681_cast_fp16 = slice_by_index(begin = var_29681_begin_0, end = var_29681_end_0, end_mask = var_29681_end_mask_0, x = var_29311_cast_fp16)[name = tensor("op_29681_cast_fp16")]; + tensor var_29688_begin_0 = const()[name = tensor("op_29688_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_29688_end_0 = const()[name = tensor("op_29688_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_29688_end_mask_0 = const()[name = tensor("op_29688_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29688_cast_fp16 = slice_by_index(begin = var_29688_begin_0, end = var_29688_end_0, end_mask = var_29688_end_mask_0, x = var_29315_cast_fp16)[name = tensor("op_29688_cast_fp16")]; + tensor var_29695_begin_0 = const()[name = tensor("op_29695_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_29695_end_0 = const()[name = tensor("op_29695_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_29695_end_mask_0 = const()[name = tensor("op_29695_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29695_cast_fp16 = slice_by_index(begin = var_29695_begin_0, end = var_29695_end_0, end_mask = var_29695_end_mask_0, x = var_29315_cast_fp16)[name = tensor("op_29695_cast_fp16")]; + tensor var_29702_begin_0 = const()[name = tensor("op_29702_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_29702_end_0 = const()[name = tensor("op_29702_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_29702_end_mask_0 = const()[name = tensor("op_29702_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29702_cast_fp16 = slice_by_index(begin = var_29702_begin_0, end = var_29702_end_0, end_mask = var_29702_end_mask_0, x = var_29315_cast_fp16)[name = tensor("op_29702_cast_fp16")]; + tensor var_29709_begin_0 = const()[name = tensor("op_29709_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_29709_end_0 = const()[name = tensor("op_29709_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_29709_end_mask_0 = const()[name = tensor("op_29709_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29709_cast_fp16 = slice_by_index(begin = var_29709_begin_0, end = var_29709_end_0, end_mask = var_29709_end_mask_0, x = var_29315_cast_fp16)[name = tensor("op_29709_cast_fp16")]; + tensor var_29716_begin_0 = const()[name = tensor("op_29716_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_29716_end_0 = const()[name = tensor("op_29716_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_29716_end_mask_0 = const()[name = tensor("op_29716_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29716_cast_fp16 = slice_by_index(begin = var_29716_begin_0, end = var_29716_end_0, end_mask = var_29716_end_mask_0, x = var_29319_cast_fp16)[name = tensor("op_29716_cast_fp16")]; + tensor var_29723_begin_0 = const()[name = tensor("op_29723_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_29723_end_0 = const()[name = tensor("op_29723_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_29723_end_mask_0 = const()[name = tensor("op_29723_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29723_cast_fp16 = slice_by_index(begin = var_29723_begin_0, end = var_29723_end_0, end_mask = var_29723_end_mask_0, x = var_29319_cast_fp16)[name = tensor("op_29723_cast_fp16")]; + tensor var_29730_begin_0 = const()[name = tensor("op_29730_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_29730_end_0 = const()[name = tensor("op_29730_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_29730_end_mask_0 = const()[name = tensor("op_29730_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29730_cast_fp16 = slice_by_index(begin = var_29730_begin_0, end = var_29730_end_0, end_mask = var_29730_end_mask_0, x = var_29319_cast_fp16)[name = tensor("op_29730_cast_fp16")]; + tensor var_29737_begin_0 = const()[name = tensor("op_29737_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_29737_end_0 = const()[name = tensor("op_29737_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_29737_end_mask_0 = const()[name = tensor("op_29737_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29737_cast_fp16 = slice_by_index(begin = var_29737_begin_0, end = var_29737_end_0, end_mask = var_29737_end_mask_0, x = var_29319_cast_fp16)[name = tensor("op_29737_cast_fp16")]; + tensor var_29744_begin_0 = const()[name = tensor("op_29744_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_29744_end_0 = const()[name = tensor("op_29744_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_29744_end_mask_0 = const()[name = tensor("op_29744_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29744_cast_fp16 = slice_by_index(begin = var_29744_begin_0, end = var_29744_end_0, end_mask = var_29744_end_mask_0, x = var_29323_cast_fp16)[name = tensor("op_29744_cast_fp16")]; + tensor var_29751_begin_0 = const()[name = tensor("op_29751_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_29751_end_0 = const()[name = tensor("op_29751_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_29751_end_mask_0 = const()[name = tensor("op_29751_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29751_cast_fp16 = slice_by_index(begin = var_29751_begin_0, end = var_29751_end_0, end_mask = var_29751_end_mask_0, x = var_29323_cast_fp16)[name = tensor("op_29751_cast_fp16")]; + tensor var_29758_begin_0 = const()[name = tensor("op_29758_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_29758_end_0 = const()[name = tensor("op_29758_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_29758_end_mask_0 = const()[name = tensor("op_29758_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29758_cast_fp16 = slice_by_index(begin = var_29758_begin_0, end = var_29758_end_0, end_mask = var_29758_end_mask_0, x = var_29323_cast_fp16)[name = tensor("op_29758_cast_fp16")]; + tensor var_29765_begin_0 = const()[name = tensor("op_29765_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_29765_end_0 = const()[name = tensor("op_29765_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_29765_end_mask_0 = const()[name = tensor("op_29765_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29765_cast_fp16 = slice_by_index(begin = var_29765_begin_0, end = var_29765_end_0, end_mask = var_29765_end_mask_0, x = var_29323_cast_fp16)[name = tensor("op_29765_cast_fp16")]; + tensor var_29772_begin_0 = const()[name = tensor("op_29772_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_29772_end_0 = const()[name = tensor("op_29772_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_29772_end_mask_0 = const()[name = tensor("op_29772_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29772_cast_fp16 = slice_by_index(begin = var_29772_begin_0, end = var_29772_end_0, end_mask = var_29772_end_mask_0, x = var_29327_cast_fp16)[name = tensor("op_29772_cast_fp16")]; + tensor var_29779_begin_0 = const()[name = tensor("op_29779_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_29779_end_0 = const()[name = tensor("op_29779_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_29779_end_mask_0 = const()[name = tensor("op_29779_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29779_cast_fp16 = slice_by_index(begin = var_29779_begin_0, end = var_29779_end_0, end_mask = var_29779_end_mask_0, x = var_29327_cast_fp16)[name = tensor("op_29779_cast_fp16")]; + tensor var_29786_begin_0 = const()[name = tensor("op_29786_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_29786_end_0 = const()[name = tensor("op_29786_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_29786_end_mask_0 = const()[name = tensor("op_29786_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29786_cast_fp16 = slice_by_index(begin = var_29786_begin_0, end = var_29786_end_0, end_mask = var_29786_end_mask_0, x = var_29327_cast_fp16)[name = tensor("op_29786_cast_fp16")]; + tensor var_29793_begin_0 = const()[name = tensor("op_29793_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_29793_end_0 = const()[name = tensor("op_29793_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_29793_end_mask_0 = const()[name = tensor("op_29793_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29793_cast_fp16 = slice_by_index(begin = var_29793_begin_0, end = var_29793_end_0, end_mask = var_29793_end_mask_0, x = var_29327_cast_fp16)[name = tensor("op_29793_cast_fp16")]; + tensor var_29800_begin_0 = const()[name = tensor("op_29800_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_29800_end_0 = const()[name = tensor("op_29800_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_29800_end_mask_0 = const()[name = tensor("op_29800_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29800_cast_fp16 = slice_by_index(begin = var_29800_begin_0, end = var_29800_end_0, end_mask = var_29800_end_mask_0, x = var_29331_cast_fp16)[name = tensor("op_29800_cast_fp16")]; + tensor var_29807_begin_0 = const()[name = tensor("op_29807_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_29807_end_0 = const()[name = tensor("op_29807_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_29807_end_mask_0 = const()[name = tensor("op_29807_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29807_cast_fp16 = slice_by_index(begin = var_29807_begin_0, end = var_29807_end_0, end_mask = var_29807_end_mask_0, x = var_29331_cast_fp16)[name = tensor("op_29807_cast_fp16")]; + tensor var_29814_begin_0 = const()[name = tensor("op_29814_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_29814_end_0 = const()[name = tensor("op_29814_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_29814_end_mask_0 = const()[name = tensor("op_29814_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29814_cast_fp16 = slice_by_index(begin = var_29814_begin_0, end = var_29814_end_0, end_mask = var_29814_end_mask_0, x = var_29331_cast_fp16)[name = tensor("op_29814_cast_fp16")]; + tensor var_29821_begin_0 = const()[name = tensor("op_29821_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_29821_end_0 = const()[name = tensor("op_29821_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_29821_end_mask_0 = const()[name = tensor("op_29821_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29821_cast_fp16 = slice_by_index(begin = var_29821_begin_0, end = var_29821_end_0, end_mask = var_29821_end_mask_0, x = var_29331_cast_fp16)[name = tensor("op_29821_cast_fp16")]; + tensor var_29828_begin_0 = const()[name = tensor("op_29828_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_29828_end_0 = const()[name = tensor("op_29828_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_29828_end_mask_0 = const()[name = tensor("op_29828_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29828_cast_fp16 = slice_by_index(begin = var_29828_begin_0, end = var_29828_end_0, end_mask = var_29828_end_mask_0, x = var_29335_cast_fp16)[name = tensor("op_29828_cast_fp16")]; + tensor var_29835_begin_0 = const()[name = tensor("op_29835_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_29835_end_0 = const()[name = tensor("op_29835_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_29835_end_mask_0 = const()[name = tensor("op_29835_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29835_cast_fp16 = slice_by_index(begin = var_29835_begin_0, end = var_29835_end_0, end_mask = var_29835_end_mask_0, x = var_29335_cast_fp16)[name = tensor("op_29835_cast_fp16")]; + tensor var_29842_begin_0 = const()[name = tensor("op_29842_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_29842_end_0 = const()[name = tensor("op_29842_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_29842_end_mask_0 = const()[name = tensor("op_29842_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29842_cast_fp16 = slice_by_index(begin = var_29842_begin_0, end = var_29842_end_0, end_mask = var_29842_end_mask_0, x = var_29335_cast_fp16)[name = tensor("op_29842_cast_fp16")]; + tensor var_29849_begin_0 = const()[name = tensor("op_29849_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_29849_end_0 = const()[name = tensor("op_29849_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_29849_end_mask_0 = const()[name = tensor("op_29849_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29849_cast_fp16 = slice_by_index(begin = var_29849_begin_0, end = var_29849_end_0, end_mask = var_29849_end_mask_0, x = var_29335_cast_fp16)[name = tensor("op_29849_cast_fp16")]; + tensor var_29856_begin_0 = const()[name = tensor("op_29856_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_29856_end_0 = const()[name = tensor("op_29856_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_29856_end_mask_0 = const()[name = tensor("op_29856_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29856_cast_fp16 = slice_by_index(begin = var_29856_begin_0, end = var_29856_end_0, end_mask = var_29856_end_mask_0, x = var_29339_cast_fp16)[name = tensor("op_29856_cast_fp16")]; + tensor var_29863_begin_0 = const()[name = tensor("op_29863_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_29863_end_0 = const()[name = tensor("op_29863_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_29863_end_mask_0 = const()[name = tensor("op_29863_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29863_cast_fp16 = slice_by_index(begin = var_29863_begin_0, end = var_29863_end_0, end_mask = var_29863_end_mask_0, x = var_29339_cast_fp16)[name = tensor("op_29863_cast_fp16")]; + tensor var_29870_begin_0 = const()[name = tensor("op_29870_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_29870_end_0 = const()[name = tensor("op_29870_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_29870_end_mask_0 = const()[name = tensor("op_29870_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29870_cast_fp16 = slice_by_index(begin = var_29870_begin_0, end = var_29870_end_0, end_mask = var_29870_end_mask_0, x = var_29339_cast_fp16)[name = tensor("op_29870_cast_fp16")]; + tensor var_29877_begin_0 = const()[name = tensor("op_29877_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_29877_end_0 = const()[name = tensor("op_29877_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_29877_end_mask_0 = const()[name = tensor("op_29877_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29877_cast_fp16 = slice_by_index(begin = var_29877_begin_0, end = var_29877_end_0, end_mask = var_29877_end_mask_0, x = var_29339_cast_fp16)[name = tensor("op_29877_cast_fp16")]; + tensor var_29884_begin_0 = const()[name = tensor("op_29884_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_29884_end_0 = const()[name = tensor("op_29884_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_29884_end_mask_0 = const()[name = tensor("op_29884_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29884_cast_fp16 = slice_by_index(begin = var_29884_begin_0, end = var_29884_end_0, end_mask = var_29884_end_mask_0, x = var_29343_cast_fp16)[name = tensor("op_29884_cast_fp16")]; + tensor var_29891_begin_0 = const()[name = tensor("op_29891_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_29891_end_0 = const()[name = tensor("op_29891_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_29891_end_mask_0 = const()[name = tensor("op_29891_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29891_cast_fp16 = slice_by_index(begin = var_29891_begin_0, end = var_29891_end_0, end_mask = var_29891_end_mask_0, x = var_29343_cast_fp16)[name = tensor("op_29891_cast_fp16")]; + tensor var_29898_begin_0 = const()[name = tensor("op_29898_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_29898_end_0 = const()[name = tensor("op_29898_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_29898_end_mask_0 = const()[name = tensor("op_29898_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29898_cast_fp16 = slice_by_index(begin = var_29898_begin_0, end = var_29898_end_0, end_mask = var_29898_end_mask_0, x = var_29343_cast_fp16)[name = tensor("op_29898_cast_fp16")]; + tensor var_29905_begin_0 = const()[name = tensor("op_29905_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_29905_end_0 = const()[name = tensor("op_29905_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_29905_end_mask_0 = const()[name = tensor("op_29905_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29905_cast_fp16 = slice_by_index(begin = var_29905_begin_0, end = var_29905_end_0, end_mask = var_29905_end_mask_0, x = var_29343_cast_fp16)[name = tensor("op_29905_cast_fp16")]; + tensor k_39_perm_0 = const()[name = tensor("k_39_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_29910_begin_0 = const()[name = tensor("op_29910_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_29910_end_0 = const()[name = tensor("op_29910_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_29910_end_mask_0 = const()[name = tensor("op_29910_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_12 = transpose(perm = k_39_perm_0, x = key_39_cast_fp16)[name = tensor("transpose_12")]; + tensor var_29910_cast_fp16 = slice_by_index(begin = var_29910_begin_0, end = var_29910_end_0, end_mask = var_29910_end_mask_0, x = transpose_12)[name = tensor("op_29910_cast_fp16")]; + tensor var_29914_begin_0 = const()[name = tensor("op_29914_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_29914_end_0 = const()[name = tensor("op_29914_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_29914_end_mask_0 = const()[name = tensor("op_29914_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29914_cast_fp16 = slice_by_index(begin = var_29914_begin_0, end = var_29914_end_0, end_mask = var_29914_end_mask_0, x = transpose_12)[name = tensor("op_29914_cast_fp16")]; + tensor var_29918_begin_0 = const()[name = tensor("op_29918_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_29918_end_0 = const()[name = tensor("op_29918_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_29918_end_mask_0 = const()[name = tensor("op_29918_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29918_cast_fp16 = slice_by_index(begin = var_29918_begin_0, end = var_29918_end_0, end_mask = var_29918_end_mask_0, x = transpose_12)[name = tensor("op_29918_cast_fp16")]; + tensor var_29922_begin_0 = const()[name = tensor("op_29922_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_29922_end_0 = const()[name = tensor("op_29922_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_29922_end_mask_0 = const()[name = tensor("op_29922_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29922_cast_fp16 = slice_by_index(begin = var_29922_begin_0, end = var_29922_end_0, end_mask = var_29922_end_mask_0, x = transpose_12)[name = tensor("op_29922_cast_fp16")]; + tensor var_29926_begin_0 = const()[name = tensor("op_29926_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_29926_end_0 = const()[name = tensor("op_29926_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_29926_end_mask_0 = const()[name = tensor("op_29926_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29926_cast_fp16 = slice_by_index(begin = var_29926_begin_0, end = var_29926_end_0, end_mask = var_29926_end_mask_0, x = transpose_12)[name = tensor("op_29926_cast_fp16")]; + tensor var_29930_begin_0 = const()[name = tensor("op_29930_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_29930_end_0 = const()[name = tensor("op_29930_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_29930_end_mask_0 = const()[name = tensor("op_29930_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29930_cast_fp16 = slice_by_index(begin = var_29930_begin_0, end = var_29930_end_0, end_mask = var_29930_end_mask_0, x = transpose_12)[name = tensor("op_29930_cast_fp16")]; + tensor var_29934_begin_0 = const()[name = tensor("op_29934_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_29934_end_0 = const()[name = tensor("op_29934_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_29934_end_mask_0 = const()[name = tensor("op_29934_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29934_cast_fp16 = slice_by_index(begin = var_29934_begin_0, end = var_29934_end_0, end_mask = var_29934_end_mask_0, x = transpose_12)[name = tensor("op_29934_cast_fp16")]; + tensor var_29938_begin_0 = const()[name = tensor("op_29938_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_29938_end_0 = const()[name = tensor("op_29938_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_29938_end_mask_0 = const()[name = tensor("op_29938_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29938_cast_fp16 = slice_by_index(begin = var_29938_begin_0, end = var_29938_end_0, end_mask = var_29938_end_mask_0, x = transpose_12)[name = tensor("op_29938_cast_fp16")]; + tensor var_29942_begin_0 = const()[name = tensor("op_29942_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_29942_end_0 = const()[name = tensor("op_29942_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_29942_end_mask_0 = const()[name = tensor("op_29942_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29942_cast_fp16 = slice_by_index(begin = var_29942_begin_0, end = var_29942_end_0, end_mask = var_29942_end_mask_0, x = transpose_12)[name = tensor("op_29942_cast_fp16")]; + tensor var_29946_begin_0 = const()[name = tensor("op_29946_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_29946_end_0 = const()[name = tensor("op_29946_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_29946_end_mask_0 = const()[name = tensor("op_29946_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29946_cast_fp16 = slice_by_index(begin = var_29946_begin_0, end = var_29946_end_0, end_mask = var_29946_end_mask_0, x = transpose_12)[name = tensor("op_29946_cast_fp16")]; + tensor var_29950_begin_0 = const()[name = tensor("op_29950_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_29950_end_0 = const()[name = tensor("op_29950_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_29950_end_mask_0 = const()[name = tensor("op_29950_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29950_cast_fp16 = slice_by_index(begin = var_29950_begin_0, end = var_29950_end_0, end_mask = var_29950_end_mask_0, x = transpose_12)[name = tensor("op_29950_cast_fp16")]; + tensor var_29954_begin_0 = const()[name = tensor("op_29954_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_29954_end_0 = const()[name = tensor("op_29954_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_29954_end_mask_0 = const()[name = tensor("op_29954_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29954_cast_fp16 = slice_by_index(begin = var_29954_begin_0, end = var_29954_end_0, end_mask = var_29954_end_mask_0, x = transpose_12)[name = tensor("op_29954_cast_fp16")]; + tensor var_29958_begin_0 = const()[name = tensor("op_29958_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_29958_end_0 = const()[name = tensor("op_29958_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_29958_end_mask_0 = const()[name = tensor("op_29958_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29958_cast_fp16 = slice_by_index(begin = var_29958_begin_0, end = var_29958_end_0, end_mask = var_29958_end_mask_0, x = transpose_12)[name = tensor("op_29958_cast_fp16")]; + tensor var_29962_begin_0 = const()[name = tensor("op_29962_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_29962_end_0 = const()[name = tensor("op_29962_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_29962_end_mask_0 = const()[name = tensor("op_29962_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29962_cast_fp16 = slice_by_index(begin = var_29962_begin_0, end = var_29962_end_0, end_mask = var_29962_end_mask_0, x = transpose_12)[name = tensor("op_29962_cast_fp16")]; + tensor var_29966_begin_0 = const()[name = tensor("op_29966_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_29966_end_0 = const()[name = tensor("op_29966_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_29966_end_mask_0 = const()[name = tensor("op_29966_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29966_cast_fp16 = slice_by_index(begin = var_29966_begin_0, end = var_29966_end_0, end_mask = var_29966_end_mask_0, x = transpose_12)[name = tensor("op_29966_cast_fp16")]; + tensor var_29970_begin_0 = const()[name = tensor("op_29970_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_29970_end_0 = const()[name = tensor("op_29970_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_29970_end_mask_0 = const()[name = tensor("op_29970_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29970_cast_fp16 = slice_by_index(begin = var_29970_begin_0, end = var_29970_end_0, end_mask = var_29970_end_mask_0, x = transpose_12)[name = tensor("op_29970_cast_fp16")]; + tensor var_29974_begin_0 = const()[name = tensor("op_29974_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_29974_end_0 = const()[name = tensor("op_29974_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_29974_end_mask_0 = const()[name = tensor("op_29974_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29974_cast_fp16 = slice_by_index(begin = var_29974_begin_0, end = var_29974_end_0, end_mask = var_29974_end_mask_0, x = transpose_12)[name = tensor("op_29974_cast_fp16")]; + tensor var_29978_begin_0 = const()[name = tensor("op_29978_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_29978_end_0 = const()[name = tensor("op_29978_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_29978_end_mask_0 = const()[name = tensor("op_29978_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29978_cast_fp16 = slice_by_index(begin = var_29978_begin_0, end = var_29978_end_0, end_mask = var_29978_end_mask_0, x = transpose_12)[name = tensor("op_29978_cast_fp16")]; + tensor var_29982_begin_0 = const()[name = tensor("op_29982_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_29982_end_0 = const()[name = tensor("op_29982_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_29982_end_mask_0 = const()[name = tensor("op_29982_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29982_cast_fp16 = slice_by_index(begin = var_29982_begin_0, end = var_29982_end_0, end_mask = var_29982_end_mask_0, x = transpose_12)[name = tensor("op_29982_cast_fp16")]; + tensor var_29986_begin_0 = const()[name = tensor("op_29986_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_29986_end_0 = const()[name = tensor("op_29986_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_29986_end_mask_0 = const()[name = tensor("op_29986_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_29986_cast_fp16 = slice_by_index(begin = var_29986_begin_0, end = var_29986_end_0, end_mask = var_29986_end_mask_0, x = transpose_12)[name = tensor("op_29986_cast_fp16")]; + tensor var_29988_begin_0 = const()[name = tensor("op_29988_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_29988_end_0 = const()[name = tensor("op_29988_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_29988_end_mask_0 = const()[name = tensor("op_29988_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_29988_cast_fp16 = slice_by_index(begin = var_29988_begin_0, end = var_29988_end_0, end_mask = var_29988_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_29988_cast_fp16")]; + tensor var_29992_begin_0 = const()[name = tensor("op_29992_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_29992_end_0 = const()[name = tensor("op_29992_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_29992_end_mask_0 = const()[name = tensor("op_29992_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_29992_cast_fp16 = slice_by_index(begin = var_29992_begin_0, end = var_29992_end_0, end_mask = var_29992_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_29992_cast_fp16")]; + tensor var_29996_begin_0 = const()[name = tensor("op_29996_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_29996_end_0 = const()[name = tensor("op_29996_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_29996_end_mask_0 = const()[name = tensor("op_29996_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_29996_cast_fp16 = slice_by_index(begin = var_29996_begin_0, end = var_29996_end_0, end_mask = var_29996_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_29996_cast_fp16")]; + tensor var_30000_begin_0 = const()[name = tensor("op_30000_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_30000_end_0 = const()[name = tensor("op_30000_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_30000_end_mask_0 = const()[name = tensor("op_30000_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30000_cast_fp16 = slice_by_index(begin = var_30000_begin_0, end = var_30000_end_0, end_mask = var_30000_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_30000_cast_fp16")]; + tensor var_30004_begin_0 = const()[name = tensor("op_30004_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_30004_end_0 = const()[name = tensor("op_30004_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_30004_end_mask_0 = const()[name = tensor("op_30004_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30004_cast_fp16 = slice_by_index(begin = var_30004_begin_0, end = var_30004_end_0, end_mask = var_30004_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_30004_cast_fp16")]; + tensor var_30008_begin_0 = const()[name = tensor("op_30008_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_30008_end_0 = const()[name = tensor("op_30008_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_30008_end_mask_0 = const()[name = tensor("op_30008_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30008_cast_fp16 = slice_by_index(begin = var_30008_begin_0, end = var_30008_end_0, end_mask = var_30008_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_30008_cast_fp16")]; + tensor var_30012_begin_0 = const()[name = tensor("op_30012_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_30012_end_0 = const()[name = tensor("op_30012_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_30012_end_mask_0 = const()[name = tensor("op_30012_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30012_cast_fp16 = slice_by_index(begin = var_30012_begin_0, end = var_30012_end_0, end_mask = var_30012_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_30012_cast_fp16")]; + tensor var_30016_begin_0 = const()[name = tensor("op_30016_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_30016_end_0 = const()[name = tensor("op_30016_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_30016_end_mask_0 = const()[name = tensor("op_30016_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30016_cast_fp16 = slice_by_index(begin = var_30016_begin_0, end = var_30016_end_0, end_mask = var_30016_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_30016_cast_fp16")]; + tensor var_30020_begin_0 = const()[name = tensor("op_30020_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_30020_end_0 = const()[name = tensor("op_30020_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_30020_end_mask_0 = const()[name = tensor("op_30020_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30020_cast_fp16 = slice_by_index(begin = var_30020_begin_0, end = var_30020_end_0, end_mask = var_30020_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_30020_cast_fp16")]; + tensor var_30024_begin_0 = const()[name = tensor("op_30024_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_30024_end_0 = const()[name = tensor("op_30024_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_30024_end_mask_0 = const()[name = tensor("op_30024_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30024_cast_fp16 = slice_by_index(begin = var_30024_begin_0, end = var_30024_end_0, end_mask = var_30024_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_30024_cast_fp16")]; + tensor var_30028_begin_0 = const()[name = tensor("op_30028_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_30028_end_0 = const()[name = tensor("op_30028_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_30028_end_mask_0 = const()[name = tensor("op_30028_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30028_cast_fp16 = slice_by_index(begin = var_30028_begin_0, end = var_30028_end_0, end_mask = var_30028_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_30028_cast_fp16")]; + tensor var_30032_begin_0 = const()[name = tensor("op_30032_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_30032_end_0 = const()[name = tensor("op_30032_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_30032_end_mask_0 = const()[name = tensor("op_30032_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30032_cast_fp16 = slice_by_index(begin = var_30032_begin_0, end = var_30032_end_0, end_mask = var_30032_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_30032_cast_fp16")]; + tensor var_30036_begin_0 = const()[name = tensor("op_30036_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_30036_end_0 = const()[name = tensor("op_30036_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_30036_end_mask_0 = const()[name = tensor("op_30036_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30036_cast_fp16 = slice_by_index(begin = var_30036_begin_0, end = var_30036_end_0, end_mask = var_30036_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_30036_cast_fp16")]; + tensor var_30040_begin_0 = const()[name = tensor("op_30040_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_30040_end_0 = const()[name = tensor("op_30040_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_30040_end_mask_0 = const()[name = tensor("op_30040_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30040_cast_fp16 = slice_by_index(begin = var_30040_begin_0, end = var_30040_end_0, end_mask = var_30040_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_30040_cast_fp16")]; + tensor var_30044_begin_0 = const()[name = tensor("op_30044_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_30044_end_0 = const()[name = tensor("op_30044_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_30044_end_mask_0 = const()[name = tensor("op_30044_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30044_cast_fp16 = slice_by_index(begin = var_30044_begin_0, end = var_30044_end_0, end_mask = var_30044_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_30044_cast_fp16")]; + tensor var_30048_begin_0 = const()[name = tensor("op_30048_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_30048_end_0 = const()[name = tensor("op_30048_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_30048_end_mask_0 = const()[name = tensor("op_30048_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30048_cast_fp16 = slice_by_index(begin = var_30048_begin_0, end = var_30048_end_0, end_mask = var_30048_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_30048_cast_fp16")]; + tensor var_30052_begin_0 = const()[name = tensor("op_30052_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_30052_end_0 = const()[name = tensor("op_30052_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_30052_end_mask_0 = const()[name = tensor("op_30052_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30052_cast_fp16 = slice_by_index(begin = var_30052_begin_0, end = var_30052_end_0, end_mask = var_30052_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_30052_cast_fp16")]; + tensor var_30056_begin_0 = const()[name = tensor("op_30056_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_30056_end_0 = const()[name = tensor("op_30056_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_30056_end_mask_0 = const()[name = tensor("op_30056_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30056_cast_fp16 = slice_by_index(begin = var_30056_begin_0, end = var_30056_end_0, end_mask = var_30056_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_30056_cast_fp16")]; + tensor var_30060_begin_0 = const()[name = tensor("op_30060_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_30060_end_0 = const()[name = tensor("op_30060_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_30060_end_mask_0 = const()[name = tensor("op_30060_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30060_cast_fp16 = slice_by_index(begin = var_30060_begin_0, end = var_30060_end_0, end_mask = var_30060_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_30060_cast_fp16")]; + tensor var_30064_begin_0 = const()[name = tensor("op_30064_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_30064_end_0 = const()[name = tensor("op_30064_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_30064_end_mask_0 = const()[name = tensor("op_30064_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30064_cast_fp16 = slice_by_index(begin = var_30064_begin_0, end = var_30064_end_0, end_mask = var_30064_end_mask_0, x = value_39_cast_fp16)[name = tensor("op_30064_cast_fp16")]; + tensor var_30068_equation_0 = const()[name = tensor("op_30068_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30068_cast_fp16 = einsum(equation = var_30068_equation_0, values = (var_29910_cast_fp16, var_29352_cast_fp16))[name = tensor("op_30068_cast_fp16")]; + tensor var_30069_to_fp16 = const()[name = tensor("op_30069_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3041_cast_fp16 = mul(x = var_30068_cast_fp16, y = var_30069_to_fp16)[name = tensor("aw_chunk_3041_cast_fp16")]; + tensor var_30072_equation_0 = const()[name = tensor("op_30072_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30072_cast_fp16 = einsum(equation = var_30072_equation_0, values = (var_29910_cast_fp16, var_29359_cast_fp16))[name = tensor("op_30072_cast_fp16")]; + tensor var_30073_to_fp16 = const()[name = tensor("op_30073_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3043_cast_fp16 = mul(x = var_30072_cast_fp16, y = var_30073_to_fp16)[name = tensor("aw_chunk_3043_cast_fp16")]; + tensor var_30076_equation_0 = const()[name = tensor("op_30076_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30076_cast_fp16 = einsum(equation = var_30076_equation_0, values = (var_29910_cast_fp16, var_29366_cast_fp16))[name = tensor("op_30076_cast_fp16")]; + tensor var_30077_to_fp16 = const()[name = tensor("op_30077_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3045_cast_fp16 = mul(x = var_30076_cast_fp16, y = var_30077_to_fp16)[name = tensor("aw_chunk_3045_cast_fp16")]; + tensor var_30080_equation_0 = const()[name = tensor("op_30080_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30080_cast_fp16 = einsum(equation = var_30080_equation_0, values = (var_29910_cast_fp16, var_29373_cast_fp16))[name = tensor("op_30080_cast_fp16")]; + tensor var_30081_to_fp16 = const()[name = tensor("op_30081_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3047_cast_fp16 = mul(x = var_30080_cast_fp16, y = var_30081_to_fp16)[name = tensor("aw_chunk_3047_cast_fp16")]; + tensor var_30084_equation_0 = const()[name = tensor("op_30084_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30084_cast_fp16 = einsum(equation = var_30084_equation_0, values = (var_29914_cast_fp16, var_29380_cast_fp16))[name = tensor("op_30084_cast_fp16")]; + tensor var_30085_to_fp16 = const()[name = tensor("op_30085_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3049_cast_fp16 = mul(x = var_30084_cast_fp16, y = var_30085_to_fp16)[name = tensor("aw_chunk_3049_cast_fp16")]; + tensor var_30088_equation_0 = const()[name = tensor("op_30088_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30088_cast_fp16 = einsum(equation = var_30088_equation_0, values = (var_29914_cast_fp16, var_29387_cast_fp16))[name = tensor("op_30088_cast_fp16")]; + tensor var_30089_to_fp16 = const()[name = tensor("op_30089_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3051_cast_fp16 = mul(x = var_30088_cast_fp16, y = var_30089_to_fp16)[name = tensor("aw_chunk_3051_cast_fp16")]; + tensor var_30092_equation_0 = const()[name = tensor("op_30092_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30092_cast_fp16 = einsum(equation = var_30092_equation_0, values = (var_29914_cast_fp16, var_29394_cast_fp16))[name = tensor("op_30092_cast_fp16")]; + tensor var_30093_to_fp16 = const()[name = tensor("op_30093_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3053_cast_fp16 = mul(x = var_30092_cast_fp16, y = var_30093_to_fp16)[name = tensor("aw_chunk_3053_cast_fp16")]; + tensor var_30096_equation_0 = const()[name = tensor("op_30096_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30096_cast_fp16 = einsum(equation = var_30096_equation_0, values = (var_29914_cast_fp16, var_29401_cast_fp16))[name = tensor("op_30096_cast_fp16")]; + tensor var_30097_to_fp16 = const()[name = tensor("op_30097_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3055_cast_fp16 = mul(x = var_30096_cast_fp16, y = var_30097_to_fp16)[name = tensor("aw_chunk_3055_cast_fp16")]; + tensor var_30100_equation_0 = const()[name = tensor("op_30100_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30100_cast_fp16 = einsum(equation = var_30100_equation_0, values = (var_29918_cast_fp16, var_29408_cast_fp16))[name = tensor("op_30100_cast_fp16")]; + tensor var_30101_to_fp16 = const()[name = tensor("op_30101_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3057_cast_fp16 = mul(x = var_30100_cast_fp16, y = var_30101_to_fp16)[name = tensor("aw_chunk_3057_cast_fp16")]; + tensor var_30104_equation_0 = const()[name = tensor("op_30104_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30104_cast_fp16 = einsum(equation = var_30104_equation_0, values = (var_29918_cast_fp16, var_29415_cast_fp16))[name = tensor("op_30104_cast_fp16")]; + tensor var_30105_to_fp16 = const()[name = tensor("op_30105_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3059_cast_fp16 = mul(x = var_30104_cast_fp16, y = var_30105_to_fp16)[name = tensor("aw_chunk_3059_cast_fp16")]; + tensor var_30108_equation_0 = const()[name = tensor("op_30108_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30108_cast_fp16 = einsum(equation = var_30108_equation_0, values = (var_29918_cast_fp16, var_29422_cast_fp16))[name = tensor("op_30108_cast_fp16")]; + tensor var_30109_to_fp16 = const()[name = tensor("op_30109_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3061_cast_fp16 = mul(x = var_30108_cast_fp16, y = var_30109_to_fp16)[name = tensor("aw_chunk_3061_cast_fp16")]; + tensor var_30112_equation_0 = const()[name = tensor("op_30112_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30112_cast_fp16 = einsum(equation = var_30112_equation_0, values = (var_29918_cast_fp16, var_29429_cast_fp16))[name = tensor("op_30112_cast_fp16")]; + tensor var_30113_to_fp16 = const()[name = tensor("op_30113_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3063_cast_fp16 = mul(x = var_30112_cast_fp16, y = var_30113_to_fp16)[name = tensor("aw_chunk_3063_cast_fp16")]; + tensor var_30116_equation_0 = const()[name = tensor("op_30116_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30116_cast_fp16 = einsum(equation = var_30116_equation_0, values = (var_29922_cast_fp16, var_29436_cast_fp16))[name = tensor("op_30116_cast_fp16")]; + tensor var_30117_to_fp16 = const()[name = tensor("op_30117_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3065_cast_fp16 = mul(x = var_30116_cast_fp16, y = var_30117_to_fp16)[name = tensor("aw_chunk_3065_cast_fp16")]; + tensor var_30120_equation_0 = const()[name = tensor("op_30120_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30120_cast_fp16 = einsum(equation = var_30120_equation_0, values = (var_29922_cast_fp16, var_29443_cast_fp16))[name = tensor("op_30120_cast_fp16")]; + tensor var_30121_to_fp16 = const()[name = tensor("op_30121_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3067_cast_fp16 = mul(x = var_30120_cast_fp16, y = var_30121_to_fp16)[name = tensor("aw_chunk_3067_cast_fp16")]; + tensor var_30124_equation_0 = const()[name = tensor("op_30124_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30124_cast_fp16 = einsum(equation = var_30124_equation_0, values = (var_29922_cast_fp16, var_29450_cast_fp16))[name = tensor("op_30124_cast_fp16")]; + tensor var_30125_to_fp16 = const()[name = tensor("op_30125_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3069_cast_fp16 = mul(x = var_30124_cast_fp16, y = var_30125_to_fp16)[name = tensor("aw_chunk_3069_cast_fp16")]; + tensor var_30128_equation_0 = const()[name = tensor("op_30128_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30128_cast_fp16 = einsum(equation = var_30128_equation_0, values = (var_29922_cast_fp16, var_29457_cast_fp16))[name = tensor("op_30128_cast_fp16")]; + tensor var_30129_to_fp16 = const()[name = tensor("op_30129_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3071_cast_fp16 = mul(x = var_30128_cast_fp16, y = var_30129_to_fp16)[name = tensor("aw_chunk_3071_cast_fp16")]; + tensor var_30132_equation_0 = const()[name = tensor("op_30132_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30132_cast_fp16 = einsum(equation = var_30132_equation_0, values = (var_29926_cast_fp16, var_29464_cast_fp16))[name = tensor("op_30132_cast_fp16")]; + tensor var_30133_to_fp16 = const()[name = tensor("op_30133_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3073_cast_fp16 = mul(x = var_30132_cast_fp16, y = var_30133_to_fp16)[name = tensor("aw_chunk_3073_cast_fp16")]; + tensor var_30136_equation_0 = const()[name = tensor("op_30136_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30136_cast_fp16 = einsum(equation = var_30136_equation_0, values = (var_29926_cast_fp16, var_29471_cast_fp16))[name = tensor("op_30136_cast_fp16")]; + tensor var_30137_to_fp16 = const()[name = tensor("op_30137_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3075_cast_fp16 = mul(x = var_30136_cast_fp16, y = var_30137_to_fp16)[name = tensor("aw_chunk_3075_cast_fp16")]; + tensor var_30140_equation_0 = const()[name = tensor("op_30140_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30140_cast_fp16 = einsum(equation = var_30140_equation_0, values = (var_29926_cast_fp16, var_29478_cast_fp16))[name = tensor("op_30140_cast_fp16")]; + tensor var_30141_to_fp16 = const()[name = tensor("op_30141_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3077_cast_fp16 = mul(x = var_30140_cast_fp16, y = var_30141_to_fp16)[name = tensor("aw_chunk_3077_cast_fp16")]; + tensor var_30144_equation_0 = const()[name = tensor("op_30144_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30144_cast_fp16 = einsum(equation = var_30144_equation_0, values = (var_29926_cast_fp16, var_29485_cast_fp16))[name = tensor("op_30144_cast_fp16")]; + tensor var_30145_to_fp16 = const()[name = tensor("op_30145_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3079_cast_fp16 = mul(x = var_30144_cast_fp16, y = var_30145_to_fp16)[name = tensor("aw_chunk_3079_cast_fp16")]; + tensor var_30148_equation_0 = const()[name = tensor("op_30148_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30148_cast_fp16 = einsum(equation = var_30148_equation_0, values = (var_29930_cast_fp16, var_29492_cast_fp16))[name = tensor("op_30148_cast_fp16")]; + tensor var_30149_to_fp16 = const()[name = tensor("op_30149_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3081_cast_fp16 = mul(x = var_30148_cast_fp16, y = var_30149_to_fp16)[name = tensor("aw_chunk_3081_cast_fp16")]; + tensor var_30152_equation_0 = const()[name = tensor("op_30152_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30152_cast_fp16 = einsum(equation = var_30152_equation_0, values = (var_29930_cast_fp16, var_29499_cast_fp16))[name = tensor("op_30152_cast_fp16")]; + tensor var_30153_to_fp16 = const()[name = tensor("op_30153_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3083_cast_fp16 = mul(x = var_30152_cast_fp16, y = var_30153_to_fp16)[name = tensor("aw_chunk_3083_cast_fp16")]; + tensor var_30156_equation_0 = const()[name = tensor("op_30156_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30156_cast_fp16 = einsum(equation = var_30156_equation_0, values = (var_29930_cast_fp16, var_29506_cast_fp16))[name = tensor("op_30156_cast_fp16")]; + tensor var_30157_to_fp16 = const()[name = tensor("op_30157_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3085_cast_fp16 = mul(x = var_30156_cast_fp16, y = var_30157_to_fp16)[name = tensor("aw_chunk_3085_cast_fp16")]; + tensor var_30160_equation_0 = const()[name = tensor("op_30160_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30160_cast_fp16 = einsum(equation = var_30160_equation_0, values = (var_29930_cast_fp16, var_29513_cast_fp16))[name = tensor("op_30160_cast_fp16")]; + tensor var_30161_to_fp16 = const()[name = tensor("op_30161_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3087_cast_fp16 = mul(x = var_30160_cast_fp16, y = var_30161_to_fp16)[name = tensor("aw_chunk_3087_cast_fp16")]; + tensor var_30164_equation_0 = const()[name = tensor("op_30164_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30164_cast_fp16 = einsum(equation = var_30164_equation_0, values = (var_29934_cast_fp16, var_29520_cast_fp16))[name = tensor("op_30164_cast_fp16")]; + tensor var_30165_to_fp16 = const()[name = tensor("op_30165_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3089_cast_fp16 = mul(x = var_30164_cast_fp16, y = var_30165_to_fp16)[name = tensor("aw_chunk_3089_cast_fp16")]; + tensor var_30168_equation_0 = const()[name = tensor("op_30168_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30168_cast_fp16 = einsum(equation = var_30168_equation_0, values = (var_29934_cast_fp16, var_29527_cast_fp16))[name = tensor("op_30168_cast_fp16")]; + tensor var_30169_to_fp16 = const()[name = tensor("op_30169_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3091_cast_fp16 = mul(x = var_30168_cast_fp16, y = var_30169_to_fp16)[name = tensor("aw_chunk_3091_cast_fp16")]; + tensor var_30172_equation_0 = const()[name = tensor("op_30172_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30172_cast_fp16 = einsum(equation = var_30172_equation_0, values = (var_29934_cast_fp16, var_29534_cast_fp16))[name = tensor("op_30172_cast_fp16")]; + tensor var_30173_to_fp16 = const()[name = tensor("op_30173_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3093_cast_fp16 = mul(x = var_30172_cast_fp16, y = var_30173_to_fp16)[name = tensor("aw_chunk_3093_cast_fp16")]; + tensor var_30176_equation_0 = const()[name = tensor("op_30176_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30176_cast_fp16 = einsum(equation = var_30176_equation_0, values = (var_29934_cast_fp16, var_29541_cast_fp16))[name = tensor("op_30176_cast_fp16")]; + tensor var_30177_to_fp16 = const()[name = tensor("op_30177_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3095_cast_fp16 = mul(x = var_30176_cast_fp16, y = var_30177_to_fp16)[name = tensor("aw_chunk_3095_cast_fp16")]; + tensor var_30180_equation_0 = const()[name = tensor("op_30180_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30180_cast_fp16 = einsum(equation = var_30180_equation_0, values = (var_29938_cast_fp16, var_29548_cast_fp16))[name = tensor("op_30180_cast_fp16")]; + tensor var_30181_to_fp16 = const()[name = tensor("op_30181_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3097_cast_fp16 = mul(x = var_30180_cast_fp16, y = var_30181_to_fp16)[name = tensor("aw_chunk_3097_cast_fp16")]; + tensor var_30184_equation_0 = const()[name = tensor("op_30184_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30184_cast_fp16 = einsum(equation = var_30184_equation_0, values = (var_29938_cast_fp16, var_29555_cast_fp16))[name = tensor("op_30184_cast_fp16")]; + tensor var_30185_to_fp16 = const()[name = tensor("op_30185_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3099_cast_fp16 = mul(x = var_30184_cast_fp16, y = var_30185_to_fp16)[name = tensor("aw_chunk_3099_cast_fp16")]; + tensor var_30188_equation_0 = const()[name = tensor("op_30188_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30188_cast_fp16 = einsum(equation = var_30188_equation_0, values = (var_29938_cast_fp16, var_29562_cast_fp16))[name = tensor("op_30188_cast_fp16")]; + tensor var_30189_to_fp16 = const()[name = tensor("op_30189_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3101_cast_fp16 = mul(x = var_30188_cast_fp16, y = var_30189_to_fp16)[name = tensor("aw_chunk_3101_cast_fp16")]; + tensor var_30192_equation_0 = const()[name = tensor("op_30192_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30192_cast_fp16 = einsum(equation = var_30192_equation_0, values = (var_29938_cast_fp16, var_29569_cast_fp16))[name = tensor("op_30192_cast_fp16")]; + tensor var_30193_to_fp16 = const()[name = tensor("op_30193_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3103_cast_fp16 = mul(x = var_30192_cast_fp16, y = var_30193_to_fp16)[name = tensor("aw_chunk_3103_cast_fp16")]; + tensor var_30196_equation_0 = const()[name = tensor("op_30196_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30196_cast_fp16 = einsum(equation = var_30196_equation_0, values = (var_29942_cast_fp16, var_29576_cast_fp16))[name = tensor("op_30196_cast_fp16")]; + tensor var_30197_to_fp16 = const()[name = tensor("op_30197_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3105_cast_fp16 = mul(x = var_30196_cast_fp16, y = var_30197_to_fp16)[name = tensor("aw_chunk_3105_cast_fp16")]; + tensor var_30200_equation_0 = const()[name = tensor("op_30200_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30200_cast_fp16 = einsum(equation = var_30200_equation_0, values = (var_29942_cast_fp16, var_29583_cast_fp16))[name = tensor("op_30200_cast_fp16")]; + tensor var_30201_to_fp16 = const()[name = tensor("op_30201_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3107_cast_fp16 = mul(x = var_30200_cast_fp16, y = var_30201_to_fp16)[name = tensor("aw_chunk_3107_cast_fp16")]; + tensor var_30204_equation_0 = const()[name = tensor("op_30204_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30204_cast_fp16 = einsum(equation = var_30204_equation_0, values = (var_29942_cast_fp16, var_29590_cast_fp16))[name = tensor("op_30204_cast_fp16")]; + tensor var_30205_to_fp16 = const()[name = tensor("op_30205_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3109_cast_fp16 = mul(x = var_30204_cast_fp16, y = var_30205_to_fp16)[name = tensor("aw_chunk_3109_cast_fp16")]; + tensor var_30208_equation_0 = const()[name = tensor("op_30208_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30208_cast_fp16 = einsum(equation = var_30208_equation_0, values = (var_29942_cast_fp16, var_29597_cast_fp16))[name = tensor("op_30208_cast_fp16")]; + tensor var_30209_to_fp16 = const()[name = tensor("op_30209_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3111_cast_fp16 = mul(x = var_30208_cast_fp16, y = var_30209_to_fp16)[name = tensor("aw_chunk_3111_cast_fp16")]; + tensor var_30212_equation_0 = const()[name = tensor("op_30212_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30212_cast_fp16 = einsum(equation = var_30212_equation_0, values = (var_29946_cast_fp16, var_29604_cast_fp16))[name = tensor("op_30212_cast_fp16")]; + tensor var_30213_to_fp16 = const()[name = tensor("op_30213_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3113_cast_fp16 = mul(x = var_30212_cast_fp16, y = var_30213_to_fp16)[name = tensor("aw_chunk_3113_cast_fp16")]; + tensor var_30216_equation_0 = const()[name = tensor("op_30216_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30216_cast_fp16 = einsum(equation = var_30216_equation_0, values = (var_29946_cast_fp16, var_29611_cast_fp16))[name = tensor("op_30216_cast_fp16")]; + tensor var_30217_to_fp16 = const()[name = tensor("op_30217_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3115_cast_fp16 = mul(x = var_30216_cast_fp16, y = var_30217_to_fp16)[name = tensor("aw_chunk_3115_cast_fp16")]; + tensor var_30220_equation_0 = const()[name = tensor("op_30220_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30220_cast_fp16 = einsum(equation = var_30220_equation_0, values = (var_29946_cast_fp16, var_29618_cast_fp16))[name = tensor("op_30220_cast_fp16")]; + tensor var_30221_to_fp16 = const()[name = tensor("op_30221_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3117_cast_fp16 = mul(x = var_30220_cast_fp16, y = var_30221_to_fp16)[name = tensor("aw_chunk_3117_cast_fp16")]; + tensor var_30224_equation_0 = const()[name = tensor("op_30224_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30224_cast_fp16 = einsum(equation = var_30224_equation_0, values = (var_29946_cast_fp16, var_29625_cast_fp16))[name = tensor("op_30224_cast_fp16")]; + tensor var_30225_to_fp16 = const()[name = tensor("op_30225_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3119_cast_fp16 = mul(x = var_30224_cast_fp16, y = var_30225_to_fp16)[name = tensor("aw_chunk_3119_cast_fp16")]; + tensor var_30228_equation_0 = const()[name = tensor("op_30228_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30228_cast_fp16 = einsum(equation = var_30228_equation_0, values = (var_29950_cast_fp16, var_29632_cast_fp16))[name = tensor("op_30228_cast_fp16")]; + tensor var_30229_to_fp16 = const()[name = tensor("op_30229_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3121_cast_fp16 = mul(x = var_30228_cast_fp16, y = var_30229_to_fp16)[name = tensor("aw_chunk_3121_cast_fp16")]; + tensor var_30232_equation_0 = const()[name = tensor("op_30232_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30232_cast_fp16 = einsum(equation = var_30232_equation_0, values = (var_29950_cast_fp16, var_29639_cast_fp16))[name = tensor("op_30232_cast_fp16")]; + tensor var_30233_to_fp16 = const()[name = tensor("op_30233_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3123_cast_fp16 = mul(x = var_30232_cast_fp16, y = var_30233_to_fp16)[name = tensor("aw_chunk_3123_cast_fp16")]; + tensor var_30236_equation_0 = const()[name = tensor("op_30236_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30236_cast_fp16 = einsum(equation = var_30236_equation_0, values = (var_29950_cast_fp16, var_29646_cast_fp16))[name = tensor("op_30236_cast_fp16")]; + tensor var_30237_to_fp16 = const()[name = tensor("op_30237_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3125_cast_fp16 = mul(x = var_30236_cast_fp16, y = var_30237_to_fp16)[name = tensor("aw_chunk_3125_cast_fp16")]; + tensor var_30240_equation_0 = const()[name = tensor("op_30240_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30240_cast_fp16 = einsum(equation = var_30240_equation_0, values = (var_29950_cast_fp16, var_29653_cast_fp16))[name = tensor("op_30240_cast_fp16")]; + tensor var_30241_to_fp16 = const()[name = tensor("op_30241_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3127_cast_fp16 = mul(x = var_30240_cast_fp16, y = var_30241_to_fp16)[name = tensor("aw_chunk_3127_cast_fp16")]; + tensor var_30244_equation_0 = const()[name = tensor("op_30244_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30244_cast_fp16 = einsum(equation = var_30244_equation_0, values = (var_29954_cast_fp16, var_29660_cast_fp16))[name = tensor("op_30244_cast_fp16")]; + tensor var_30245_to_fp16 = const()[name = tensor("op_30245_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3129_cast_fp16 = mul(x = var_30244_cast_fp16, y = var_30245_to_fp16)[name = tensor("aw_chunk_3129_cast_fp16")]; + tensor var_30248_equation_0 = const()[name = tensor("op_30248_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30248_cast_fp16 = einsum(equation = var_30248_equation_0, values = (var_29954_cast_fp16, var_29667_cast_fp16))[name = tensor("op_30248_cast_fp16")]; + tensor var_30249_to_fp16 = const()[name = tensor("op_30249_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3131_cast_fp16 = mul(x = var_30248_cast_fp16, y = var_30249_to_fp16)[name = tensor("aw_chunk_3131_cast_fp16")]; + tensor var_30252_equation_0 = const()[name = tensor("op_30252_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30252_cast_fp16 = einsum(equation = var_30252_equation_0, values = (var_29954_cast_fp16, var_29674_cast_fp16))[name = tensor("op_30252_cast_fp16")]; + tensor var_30253_to_fp16 = const()[name = tensor("op_30253_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3133_cast_fp16 = mul(x = var_30252_cast_fp16, y = var_30253_to_fp16)[name = tensor("aw_chunk_3133_cast_fp16")]; + tensor var_30256_equation_0 = const()[name = tensor("op_30256_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30256_cast_fp16 = einsum(equation = var_30256_equation_0, values = (var_29954_cast_fp16, var_29681_cast_fp16))[name = tensor("op_30256_cast_fp16")]; + tensor var_30257_to_fp16 = const()[name = tensor("op_30257_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3135_cast_fp16 = mul(x = var_30256_cast_fp16, y = var_30257_to_fp16)[name = tensor("aw_chunk_3135_cast_fp16")]; + tensor var_30260_equation_0 = const()[name = tensor("op_30260_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30260_cast_fp16 = einsum(equation = var_30260_equation_0, values = (var_29958_cast_fp16, var_29688_cast_fp16))[name = tensor("op_30260_cast_fp16")]; + tensor var_30261_to_fp16 = const()[name = tensor("op_30261_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3137_cast_fp16 = mul(x = var_30260_cast_fp16, y = var_30261_to_fp16)[name = tensor("aw_chunk_3137_cast_fp16")]; + tensor var_30264_equation_0 = const()[name = tensor("op_30264_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30264_cast_fp16 = einsum(equation = var_30264_equation_0, values = (var_29958_cast_fp16, var_29695_cast_fp16))[name = tensor("op_30264_cast_fp16")]; + tensor var_30265_to_fp16 = const()[name = tensor("op_30265_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3139_cast_fp16 = mul(x = var_30264_cast_fp16, y = var_30265_to_fp16)[name = tensor("aw_chunk_3139_cast_fp16")]; + tensor var_30268_equation_0 = const()[name = tensor("op_30268_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30268_cast_fp16 = einsum(equation = var_30268_equation_0, values = (var_29958_cast_fp16, var_29702_cast_fp16))[name = tensor("op_30268_cast_fp16")]; + tensor var_30269_to_fp16 = const()[name = tensor("op_30269_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3141_cast_fp16 = mul(x = var_30268_cast_fp16, y = var_30269_to_fp16)[name = tensor("aw_chunk_3141_cast_fp16")]; + tensor var_30272_equation_0 = const()[name = tensor("op_30272_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30272_cast_fp16 = einsum(equation = var_30272_equation_0, values = (var_29958_cast_fp16, var_29709_cast_fp16))[name = tensor("op_30272_cast_fp16")]; + tensor var_30273_to_fp16 = const()[name = tensor("op_30273_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3143_cast_fp16 = mul(x = var_30272_cast_fp16, y = var_30273_to_fp16)[name = tensor("aw_chunk_3143_cast_fp16")]; + tensor var_30276_equation_0 = const()[name = tensor("op_30276_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30276_cast_fp16 = einsum(equation = var_30276_equation_0, values = (var_29962_cast_fp16, var_29716_cast_fp16))[name = tensor("op_30276_cast_fp16")]; + tensor var_30277_to_fp16 = const()[name = tensor("op_30277_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3145_cast_fp16 = mul(x = var_30276_cast_fp16, y = var_30277_to_fp16)[name = tensor("aw_chunk_3145_cast_fp16")]; + tensor var_30280_equation_0 = const()[name = tensor("op_30280_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30280_cast_fp16 = einsum(equation = var_30280_equation_0, values = (var_29962_cast_fp16, var_29723_cast_fp16))[name = tensor("op_30280_cast_fp16")]; + tensor var_30281_to_fp16 = const()[name = tensor("op_30281_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3147_cast_fp16 = mul(x = var_30280_cast_fp16, y = var_30281_to_fp16)[name = tensor("aw_chunk_3147_cast_fp16")]; + tensor var_30284_equation_0 = const()[name = tensor("op_30284_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30284_cast_fp16 = einsum(equation = var_30284_equation_0, values = (var_29962_cast_fp16, var_29730_cast_fp16))[name = tensor("op_30284_cast_fp16")]; + tensor var_30285_to_fp16 = const()[name = tensor("op_30285_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3149_cast_fp16 = mul(x = var_30284_cast_fp16, y = var_30285_to_fp16)[name = tensor("aw_chunk_3149_cast_fp16")]; + tensor var_30288_equation_0 = const()[name = tensor("op_30288_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30288_cast_fp16 = einsum(equation = var_30288_equation_0, values = (var_29962_cast_fp16, var_29737_cast_fp16))[name = tensor("op_30288_cast_fp16")]; + tensor var_30289_to_fp16 = const()[name = tensor("op_30289_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3151_cast_fp16 = mul(x = var_30288_cast_fp16, y = var_30289_to_fp16)[name = tensor("aw_chunk_3151_cast_fp16")]; + tensor var_30292_equation_0 = const()[name = tensor("op_30292_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30292_cast_fp16 = einsum(equation = var_30292_equation_0, values = (var_29966_cast_fp16, var_29744_cast_fp16))[name = tensor("op_30292_cast_fp16")]; + tensor var_30293_to_fp16 = const()[name = tensor("op_30293_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3153_cast_fp16 = mul(x = var_30292_cast_fp16, y = var_30293_to_fp16)[name = tensor("aw_chunk_3153_cast_fp16")]; + tensor var_30296_equation_0 = const()[name = tensor("op_30296_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30296_cast_fp16 = einsum(equation = var_30296_equation_0, values = (var_29966_cast_fp16, var_29751_cast_fp16))[name = tensor("op_30296_cast_fp16")]; + tensor var_30297_to_fp16 = const()[name = tensor("op_30297_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3155_cast_fp16 = mul(x = var_30296_cast_fp16, y = var_30297_to_fp16)[name = tensor("aw_chunk_3155_cast_fp16")]; + tensor var_30300_equation_0 = const()[name = tensor("op_30300_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30300_cast_fp16 = einsum(equation = var_30300_equation_0, values = (var_29966_cast_fp16, var_29758_cast_fp16))[name = tensor("op_30300_cast_fp16")]; + tensor var_30301_to_fp16 = const()[name = tensor("op_30301_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3157_cast_fp16 = mul(x = var_30300_cast_fp16, y = var_30301_to_fp16)[name = tensor("aw_chunk_3157_cast_fp16")]; + tensor var_30304_equation_0 = const()[name = tensor("op_30304_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30304_cast_fp16 = einsum(equation = var_30304_equation_0, values = (var_29966_cast_fp16, var_29765_cast_fp16))[name = tensor("op_30304_cast_fp16")]; + tensor var_30305_to_fp16 = const()[name = tensor("op_30305_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3159_cast_fp16 = mul(x = var_30304_cast_fp16, y = var_30305_to_fp16)[name = tensor("aw_chunk_3159_cast_fp16")]; + tensor var_30308_equation_0 = const()[name = tensor("op_30308_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30308_cast_fp16 = einsum(equation = var_30308_equation_0, values = (var_29970_cast_fp16, var_29772_cast_fp16))[name = tensor("op_30308_cast_fp16")]; + tensor var_30309_to_fp16 = const()[name = tensor("op_30309_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3161_cast_fp16 = mul(x = var_30308_cast_fp16, y = var_30309_to_fp16)[name = tensor("aw_chunk_3161_cast_fp16")]; + tensor var_30312_equation_0 = const()[name = tensor("op_30312_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30312_cast_fp16 = einsum(equation = var_30312_equation_0, values = (var_29970_cast_fp16, var_29779_cast_fp16))[name = tensor("op_30312_cast_fp16")]; + tensor var_30313_to_fp16 = const()[name = tensor("op_30313_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3163_cast_fp16 = mul(x = var_30312_cast_fp16, y = var_30313_to_fp16)[name = tensor("aw_chunk_3163_cast_fp16")]; + tensor var_30316_equation_0 = const()[name = tensor("op_30316_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30316_cast_fp16 = einsum(equation = var_30316_equation_0, values = (var_29970_cast_fp16, var_29786_cast_fp16))[name = tensor("op_30316_cast_fp16")]; + tensor var_30317_to_fp16 = const()[name = tensor("op_30317_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3165_cast_fp16 = mul(x = var_30316_cast_fp16, y = var_30317_to_fp16)[name = tensor("aw_chunk_3165_cast_fp16")]; + tensor var_30320_equation_0 = const()[name = tensor("op_30320_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30320_cast_fp16 = einsum(equation = var_30320_equation_0, values = (var_29970_cast_fp16, var_29793_cast_fp16))[name = tensor("op_30320_cast_fp16")]; + tensor var_30321_to_fp16 = const()[name = tensor("op_30321_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3167_cast_fp16 = mul(x = var_30320_cast_fp16, y = var_30321_to_fp16)[name = tensor("aw_chunk_3167_cast_fp16")]; + tensor var_30324_equation_0 = const()[name = tensor("op_30324_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30324_cast_fp16 = einsum(equation = var_30324_equation_0, values = (var_29974_cast_fp16, var_29800_cast_fp16))[name = tensor("op_30324_cast_fp16")]; + tensor var_30325_to_fp16 = const()[name = tensor("op_30325_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3169_cast_fp16 = mul(x = var_30324_cast_fp16, y = var_30325_to_fp16)[name = tensor("aw_chunk_3169_cast_fp16")]; + tensor var_30328_equation_0 = const()[name = tensor("op_30328_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30328_cast_fp16 = einsum(equation = var_30328_equation_0, values = (var_29974_cast_fp16, var_29807_cast_fp16))[name = tensor("op_30328_cast_fp16")]; + tensor var_30329_to_fp16 = const()[name = tensor("op_30329_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3171_cast_fp16 = mul(x = var_30328_cast_fp16, y = var_30329_to_fp16)[name = tensor("aw_chunk_3171_cast_fp16")]; + tensor var_30332_equation_0 = const()[name = tensor("op_30332_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30332_cast_fp16 = einsum(equation = var_30332_equation_0, values = (var_29974_cast_fp16, var_29814_cast_fp16))[name = tensor("op_30332_cast_fp16")]; + tensor var_30333_to_fp16 = const()[name = tensor("op_30333_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3173_cast_fp16 = mul(x = var_30332_cast_fp16, y = var_30333_to_fp16)[name = tensor("aw_chunk_3173_cast_fp16")]; + tensor var_30336_equation_0 = const()[name = tensor("op_30336_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30336_cast_fp16 = einsum(equation = var_30336_equation_0, values = (var_29974_cast_fp16, var_29821_cast_fp16))[name = tensor("op_30336_cast_fp16")]; + tensor var_30337_to_fp16 = const()[name = tensor("op_30337_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3175_cast_fp16 = mul(x = var_30336_cast_fp16, y = var_30337_to_fp16)[name = tensor("aw_chunk_3175_cast_fp16")]; + tensor var_30340_equation_0 = const()[name = tensor("op_30340_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30340_cast_fp16 = einsum(equation = var_30340_equation_0, values = (var_29978_cast_fp16, var_29828_cast_fp16))[name = tensor("op_30340_cast_fp16")]; + tensor var_30341_to_fp16 = const()[name = tensor("op_30341_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3177_cast_fp16 = mul(x = var_30340_cast_fp16, y = var_30341_to_fp16)[name = tensor("aw_chunk_3177_cast_fp16")]; + tensor var_30344_equation_0 = const()[name = tensor("op_30344_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30344_cast_fp16 = einsum(equation = var_30344_equation_0, values = (var_29978_cast_fp16, var_29835_cast_fp16))[name = tensor("op_30344_cast_fp16")]; + tensor var_30345_to_fp16 = const()[name = tensor("op_30345_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3179_cast_fp16 = mul(x = var_30344_cast_fp16, y = var_30345_to_fp16)[name = tensor("aw_chunk_3179_cast_fp16")]; + tensor var_30348_equation_0 = const()[name = tensor("op_30348_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30348_cast_fp16 = einsum(equation = var_30348_equation_0, values = (var_29978_cast_fp16, var_29842_cast_fp16))[name = tensor("op_30348_cast_fp16")]; + tensor var_30349_to_fp16 = const()[name = tensor("op_30349_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3181_cast_fp16 = mul(x = var_30348_cast_fp16, y = var_30349_to_fp16)[name = tensor("aw_chunk_3181_cast_fp16")]; + tensor var_30352_equation_0 = const()[name = tensor("op_30352_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30352_cast_fp16 = einsum(equation = var_30352_equation_0, values = (var_29978_cast_fp16, var_29849_cast_fp16))[name = tensor("op_30352_cast_fp16")]; + tensor var_30353_to_fp16 = const()[name = tensor("op_30353_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3183_cast_fp16 = mul(x = var_30352_cast_fp16, y = var_30353_to_fp16)[name = tensor("aw_chunk_3183_cast_fp16")]; + tensor var_30356_equation_0 = const()[name = tensor("op_30356_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30356_cast_fp16 = einsum(equation = var_30356_equation_0, values = (var_29982_cast_fp16, var_29856_cast_fp16))[name = tensor("op_30356_cast_fp16")]; + tensor var_30357_to_fp16 = const()[name = tensor("op_30357_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3185_cast_fp16 = mul(x = var_30356_cast_fp16, y = var_30357_to_fp16)[name = tensor("aw_chunk_3185_cast_fp16")]; + tensor var_30360_equation_0 = const()[name = tensor("op_30360_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30360_cast_fp16 = einsum(equation = var_30360_equation_0, values = (var_29982_cast_fp16, var_29863_cast_fp16))[name = tensor("op_30360_cast_fp16")]; + tensor var_30361_to_fp16 = const()[name = tensor("op_30361_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3187_cast_fp16 = mul(x = var_30360_cast_fp16, y = var_30361_to_fp16)[name = tensor("aw_chunk_3187_cast_fp16")]; + tensor var_30364_equation_0 = const()[name = tensor("op_30364_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30364_cast_fp16 = einsum(equation = var_30364_equation_0, values = (var_29982_cast_fp16, var_29870_cast_fp16))[name = tensor("op_30364_cast_fp16")]; + tensor var_30365_to_fp16 = const()[name = tensor("op_30365_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3189_cast_fp16 = mul(x = var_30364_cast_fp16, y = var_30365_to_fp16)[name = tensor("aw_chunk_3189_cast_fp16")]; + tensor var_30368_equation_0 = const()[name = tensor("op_30368_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30368_cast_fp16 = einsum(equation = var_30368_equation_0, values = (var_29982_cast_fp16, var_29877_cast_fp16))[name = tensor("op_30368_cast_fp16")]; + tensor var_30369_to_fp16 = const()[name = tensor("op_30369_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3191_cast_fp16 = mul(x = var_30368_cast_fp16, y = var_30369_to_fp16)[name = tensor("aw_chunk_3191_cast_fp16")]; + tensor var_30372_equation_0 = const()[name = tensor("op_30372_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30372_cast_fp16 = einsum(equation = var_30372_equation_0, values = (var_29986_cast_fp16, var_29884_cast_fp16))[name = tensor("op_30372_cast_fp16")]; + tensor var_30373_to_fp16 = const()[name = tensor("op_30373_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3193_cast_fp16 = mul(x = var_30372_cast_fp16, y = var_30373_to_fp16)[name = tensor("aw_chunk_3193_cast_fp16")]; + tensor var_30376_equation_0 = const()[name = tensor("op_30376_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30376_cast_fp16 = einsum(equation = var_30376_equation_0, values = (var_29986_cast_fp16, var_29891_cast_fp16))[name = tensor("op_30376_cast_fp16")]; + tensor var_30377_to_fp16 = const()[name = tensor("op_30377_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3195_cast_fp16 = mul(x = var_30376_cast_fp16, y = var_30377_to_fp16)[name = tensor("aw_chunk_3195_cast_fp16")]; + tensor var_30380_equation_0 = const()[name = tensor("op_30380_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30380_cast_fp16 = einsum(equation = var_30380_equation_0, values = (var_29986_cast_fp16, var_29898_cast_fp16))[name = tensor("op_30380_cast_fp16")]; + tensor var_30381_to_fp16 = const()[name = tensor("op_30381_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3197_cast_fp16 = mul(x = var_30380_cast_fp16, y = var_30381_to_fp16)[name = tensor("aw_chunk_3197_cast_fp16")]; + tensor var_30384_equation_0 = const()[name = tensor("op_30384_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_30384_cast_fp16 = einsum(equation = var_30384_equation_0, values = (var_29986_cast_fp16, var_29905_cast_fp16))[name = tensor("op_30384_cast_fp16")]; + tensor var_30385_to_fp16 = const()[name = tensor("op_30385_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3199_cast_fp16 = mul(x = var_30384_cast_fp16, y = var_30385_to_fp16)[name = tensor("aw_chunk_3199_cast_fp16")]; + tensor var_30387_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3041_cast_fp16)[name = tensor("op_30387_cast_fp16")]; + tensor var_30388_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3043_cast_fp16)[name = tensor("op_30388_cast_fp16")]; + tensor var_30389_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3045_cast_fp16)[name = tensor("op_30389_cast_fp16")]; + tensor var_30390_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3047_cast_fp16)[name = tensor("op_30390_cast_fp16")]; + tensor var_30391_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3049_cast_fp16)[name = tensor("op_30391_cast_fp16")]; + tensor var_30392_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3051_cast_fp16)[name = tensor("op_30392_cast_fp16")]; + tensor var_30393_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3053_cast_fp16)[name = tensor("op_30393_cast_fp16")]; + tensor var_30394_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3055_cast_fp16)[name = tensor("op_30394_cast_fp16")]; + tensor var_30395_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3057_cast_fp16)[name = tensor("op_30395_cast_fp16")]; + tensor var_30396_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3059_cast_fp16)[name = tensor("op_30396_cast_fp16")]; + tensor var_30397_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3061_cast_fp16)[name = tensor("op_30397_cast_fp16")]; + tensor var_30398_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3063_cast_fp16)[name = tensor("op_30398_cast_fp16")]; + tensor var_30399_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3065_cast_fp16)[name = tensor("op_30399_cast_fp16")]; + tensor var_30400_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3067_cast_fp16)[name = tensor("op_30400_cast_fp16")]; + tensor var_30401_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3069_cast_fp16)[name = tensor("op_30401_cast_fp16")]; + tensor var_30402_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3071_cast_fp16)[name = tensor("op_30402_cast_fp16")]; + tensor var_30403_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3073_cast_fp16)[name = tensor("op_30403_cast_fp16")]; + tensor var_30404_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3075_cast_fp16)[name = tensor("op_30404_cast_fp16")]; + tensor var_30405_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3077_cast_fp16)[name = tensor("op_30405_cast_fp16")]; + tensor var_30406_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3079_cast_fp16)[name = tensor("op_30406_cast_fp16")]; + tensor var_30407_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3081_cast_fp16)[name = tensor("op_30407_cast_fp16")]; + tensor var_30408_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3083_cast_fp16)[name = tensor("op_30408_cast_fp16")]; + tensor var_30409_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3085_cast_fp16)[name = tensor("op_30409_cast_fp16")]; + tensor var_30410_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3087_cast_fp16)[name = tensor("op_30410_cast_fp16")]; + tensor var_30411_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3089_cast_fp16)[name = tensor("op_30411_cast_fp16")]; + tensor var_30412_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3091_cast_fp16)[name = tensor("op_30412_cast_fp16")]; + tensor var_30413_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3093_cast_fp16)[name = tensor("op_30413_cast_fp16")]; + tensor var_30414_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3095_cast_fp16)[name = tensor("op_30414_cast_fp16")]; + tensor var_30415_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3097_cast_fp16)[name = tensor("op_30415_cast_fp16")]; + tensor var_30416_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3099_cast_fp16)[name = tensor("op_30416_cast_fp16")]; + tensor var_30417_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3101_cast_fp16)[name = tensor("op_30417_cast_fp16")]; + tensor var_30418_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3103_cast_fp16)[name = tensor("op_30418_cast_fp16")]; + tensor var_30419_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3105_cast_fp16)[name = tensor("op_30419_cast_fp16")]; + tensor var_30420_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3107_cast_fp16)[name = tensor("op_30420_cast_fp16")]; + tensor var_30421_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3109_cast_fp16)[name = tensor("op_30421_cast_fp16")]; + tensor var_30422_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3111_cast_fp16)[name = tensor("op_30422_cast_fp16")]; + tensor var_30423_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3113_cast_fp16)[name = tensor("op_30423_cast_fp16")]; + tensor var_30424_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3115_cast_fp16)[name = tensor("op_30424_cast_fp16")]; + tensor var_30425_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3117_cast_fp16)[name = tensor("op_30425_cast_fp16")]; + tensor var_30426_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3119_cast_fp16)[name = tensor("op_30426_cast_fp16")]; + tensor var_30427_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3121_cast_fp16)[name = tensor("op_30427_cast_fp16")]; + tensor var_30428_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3123_cast_fp16)[name = tensor("op_30428_cast_fp16")]; + tensor var_30429_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3125_cast_fp16)[name = tensor("op_30429_cast_fp16")]; + tensor var_30430_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3127_cast_fp16)[name = tensor("op_30430_cast_fp16")]; + tensor var_30431_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3129_cast_fp16)[name = tensor("op_30431_cast_fp16")]; + tensor var_30432_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3131_cast_fp16)[name = tensor("op_30432_cast_fp16")]; + tensor var_30433_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3133_cast_fp16)[name = tensor("op_30433_cast_fp16")]; + tensor var_30434_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3135_cast_fp16)[name = tensor("op_30434_cast_fp16")]; + tensor var_30435_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3137_cast_fp16)[name = tensor("op_30435_cast_fp16")]; + tensor var_30436_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3139_cast_fp16)[name = tensor("op_30436_cast_fp16")]; + tensor var_30437_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3141_cast_fp16)[name = tensor("op_30437_cast_fp16")]; + tensor var_30438_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3143_cast_fp16)[name = tensor("op_30438_cast_fp16")]; + tensor var_30439_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3145_cast_fp16)[name = tensor("op_30439_cast_fp16")]; + tensor var_30440_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3147_cast_fp16)[name = tensor("op_30440_cast_fp16")]; + tensor var_30441_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3149_cast_fp16)[name = tensor("op_30441_cast_fp16")]; + tensor var_30442_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3151_cast_fp16)[name = tensor("op_30442_cast_fp16")]; + tensor var_30443_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3153_cast_fp16)[name = tensor("op_30443_cast_fp16")]; + tensor var_30444_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3155_cast_fp16)[name = tensor("op_30444_cast_fp16")]; + tensor var_30445_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3157_cast_fp16)[name = tensor("op_30445_cast_fp16")]; + tensor var_30446_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3159_cast_fp16)[name = tensor("op_30446_cast_fp16")]; + tensor var_30447_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3161_cast_fp16)[name = tensor("op_30447_cast_fp16")]; + tensor var_30448_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3163_cast_fp16)[name = tensor("op_30448_cast_fp16")]; + tensor var_30449_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3165_cast_fp16)[name = tensor("op_30449_cast_fp16")]; + tensor var_30450_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3167_cast_fp16)[name = tensor("op_30450_cast_fp16")]; + tensor var_30451_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3169_cast_fp16)[name = tensor("op_30451_cast_fp16")]; + tensor var_30452_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3171_cast_fp16)[name = tensor("op_30452_cast_fp16")]; + tensor var_30453_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3173_cast_fp16)[name = tensor("op_30453_cast_fp16")]; + tensor var_30454_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3175_cast_fp16)[name = tensor("op_30454_cast_fp16")]; + tensor var_30455_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3177_cast_fp16)[name = tensor("op_30455_cast_fp16")]; + tensor var_30456_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3179_cast_fp16)[name = tensor("op_30456_cast_fp16")]; + tensor var_30457_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3181_cast_fp16)[name = tensor("op_30457_cast_fp16")]; + tensor var_30458_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3183_cast_fp16)[name = tensor("op_30458_cast_fp16")]; + tensor var_30459_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3185_cast_fp16)[name = tensor("op_30459_cast_fp16")]; + tensor var_30460_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3187_cast_fp16)[name = tensor("op_30460_cast_fp16")]; + tensor var_30461_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3189_cast_fp16)[name = tensor("op_30461_cast_fp16")]; + tensor var_30462_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3191_cast_fp16)[name = tensor("op_30462_cast_fp16")]; + tensor var_30463_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3193_cast_fp16)[name = tensor("op_30463_cast_fp16")]; + tensor var_30464_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3195_cast_fp16)[name = tensor("op_30464_cast_fp16")]; + tensor var_30465_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3197_cast_fp16)[name = tensor("op_30465_cast_fp16")]; + tensor var_30466_cast_fp16 = softmax(axis = var_29212, x = aw_chunk_3199_cast_fp16)[name = tensor("op_30466_cast_fp16")]; + tensor var_30468_equation_0 = const()[name = tensor("op_30468_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30468_cast_fp16 = einsum(equation = var_30468_equation_0, values = (var_29988_cast_fp16, var_30387_cast_fp16))[name = tensor("op_30468_cast_fp16")]; + tensor var_30470_equation_0 = const()[name = tensor("op_30470_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30470_cast_fp16 = einsum(equation = var_30470_equation_0, values = (var_29988_cast_fp16, var_30388_cast_fp16))[name = tensor("op_30470_cast_fp16")]; + tensor var_30472_equation_0 = const()[name = tensor("op_30472_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30472_cast_fp16 = einsum(equation = var_30472_equation_0, values = (var_29988_cast_fp16, var_30389_cast_fp16))[name = tensor("op_30472_cast_fp16")]; + tensor var_30474_equation_0 = const()[name = tensor("op_30474_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30474_cast_fp16 = einsum(equation = var_30474_equation_0, values = (var_29988_cast_fp16, var_30390_cast_fp16))[name = tensor("op_30474_cast_fp16")]; + tensor var_30476_equation_0 = const()[name = tensor("op_30476_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30476_cast_fp16 = einsum(equation = var_30476_equation_0, values = (var_29992_cast_fp16, var_30391_cast_fp16))[name = tensor("op_30476_cast_fp16")]; + tensor var_30478_equation_0 = const()[name = tensor("op_30478_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30478_cast_fp16 = einsum(equation = var_30478_equation_0, values = (var_29992_cast_fp16, var_30392_cast_fp16))[name = tensor("op_30478_cast_fp16")]; + tensor var_30480_equation_0 = const()[name = tensor("op_30480_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30480_cast_fp16 = einsum(equation = var_30480_equation_0, values = (var_29992_cast_fp16, var_30393_cast_fp16))[name = tensor("op_30480_cast_fp16")]; + tensor var_30482_equation_0 = const()[name = tensor("op_30482_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30482_cast_fp16 = einsum(equation = var_30482_equation_0, values = (var_29992_cast_fp16, var_30394_cast_fp16))[name = tensor("op_30482_cast_fp16")]; + tensor var_30484_equation_0 = const()[name = tensor("op_30484_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30484_cast_fp16 = einsum(equation = var_30484_equation_0, values = (var_29996_cast_fp16, var_30395_cast_fp16))[name = tensor("op_30484_cast_fp16")]; + tensor var_30486_equation_0 = const()[name = tensor("op_30486_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30486_cast_fp16 = einsum(equation = var_30486_equation_0, values = (var_29996_cast_fp16, var_30396_cast_fp16))[name = tensor("op_30486_cast_fp16")]; + tensor var_30488_equation_0 = const()[name = tensor("op_30488_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30488_cast_fp16 = einsum(equation = var_30488_equation_0, values = (var_29996_cast_fp16, var_30397_cast_fp16))[name = tensor("op_30488_cast_fp16")]; + tensor var_30490_equation_0 = const()[name = tensor("op_30490_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30490_cast_fp16 = einsum(equation = var_30490_equation_0, values = (var_29996_cast_fp16, var_30398_cast_fp16))[name = tensor("op_30490_cast_fp16")]; + tensor var_30492_equation_0 = const()[name = tensor("op_30492_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30492_cast_fp16 = einsum(equation = var_30492_equation_0, values = (var_30000_cast_fp16, var_30399_cast_fp16))[name = tensor("op_30492_cast_fp16")]; + tensor var_30494_equation_0 = const()[name = tensor("op_30494_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30494_cast_fp16 = einsum(equation = var_30494_equation_0, values = (var_30000_cast_fp16, var_30400_cast_fp16))[name = tensor("op_30494_cast_fp16")]; + tensor var_30496_equation_0 = const()[name = tensor("op_30496_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30496_cast_fp16 = einsum(equation = var_30496_equation_0, values = (var_30000_cast_fp16, var_30401_cast_fp16))[name = tensor("op_30496_cast_fp16")]; + tensor var_30498_equation_0 = const()[name = tensor("op_30498_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30498_cast_fp16 = einsum(equation = var_30498_equation_0, values = (var_30000_cast_fp16, var_30402_cast_fp16))[name = tensor("op_30498_cast_fp16")]; + tensor var_30500_equation_0 = const()[name = tensor("op_30500_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30500_cast_fp16 = einsum(equation = var_30500_equation_0, values = (var_30004_cast_fp16, var_30403_cast_fp16))[name = tensor("op_30500_cast_fp16")]; + tensor var_30502_equation_0 = const()[name = tensor("op_30502_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30502_cast_fp16 = einsum(equation = var_30502_equation_0, values = (var_30004_cast_fp16, var_30404_cast_fp16))[name = tensor("op_30502_cast_fp16")]; + tensor var_30504_equation_0 = const()[name = tensor("op_30504_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30504_cast_fp16 = einsum(equation = var_30504_equation_0, values = (var_30004_cast_fp16, var_30405_cast_fp16))[name = tensor("op_30504_cast_fp16")]; + tensor var_30506_equation_0 = const()[name = tensor("op_30506_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30506_cast_fp16 = einsum(equation = var_30506_equation_0, values = (var_30004_cast_fp16, var_30406_cast_fp16))[name = tensor("op_30506_cast_fp16")]; + tensor var_30508_equation_0 = const()[name = tensor("op_30508_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30508_cast_fp16 = einsum(equation = var_30508_equation_0, values = (var_30008_cast_fp16, var_30407_cast_fp16))[name = tensor("op_30508_cast_fp16")]; + tensor var_30510_equation_0 = const()[name = tensor("op_30510_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30510_cast_fp16 = einsum(equation = var_30510_equation_0, values = (var_30008_cast_fp16, var_30408_cast_fp16))[name = tensor("op_30510_cast_fp16")]; + tensor var_30512_equation_0 = const()[name = tensor("op_30512_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30512_cast_fp16 = einsum(equation = var_30512_equation_0, values = (var_30008_cast_fp16, var_30409_cast_fp16))[name = tensor("op_30512_cast_fp16")]; + tensor var_30514_equation_0 = const()[name = tensor("op_30514_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30514_cast_fp16 = einsum(equation = var_30514_equation_0, values = (var_30008_cast_fp16, var_30410_cast_fp16))[name = tensor("op_30514_cast_fp16")]; + tensor var_30516_equation_0 = const()[name = tensor("op_30516_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30516_cast_fp16 = einsum(equation = var_30516_equation_0, values = (var_30012_cast_fp16, var_30411_cast_fp16))[name = tensor("op_30516_cast_fp16")]; + tensor var_30518_equation_0 = const()[name = tensor("op_30518_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30518_cast_fp16 = einsum(equation = var_30518_equation_0, values = (var_30012_cast_fp16, var_30412_cast_fp16))[name = tensor("op_30518_cast_fp16")]; + tensor var_30520_equation_0 = const()[name = tensor("op_30520_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30520_cast_fp16 = einsum(equation = var_30520_equation_0, values = (var_30012_cast_fp16, var_30413_cast_fp16))[name = tensor("op_30520_cast_fp16")]; + tensor var_30522_equation_0 = const()[name = tensor("op_30522_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30522_cast_fp16 = einsum(equation = var_30522_equation_0, values = (var_30012_cast_fp16, var_30414_cast_fp16))[name = tensor("op_30522_cast_fp16")]; + tensor var_30524_equation_0 = const()[name = tensor("op_30524_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30524_cast_fp16 = einsum(equation = var_30524_equation_0, values = (var_30016_cast_fp16, var_30415_cast_fp16))[name = tensor("op_30524_cast_fp16")]; + tensor var_30526_equation_0 = const()[name = tensor("op_30526_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30526_cast_fp16 = einsum(equation = var_30526_equation_0, values = (var_30016_cast_fp16, var_30416_cast_fp16))[name = tensor("op_30526_cast_fp16")]; + tensor var_30528_equation_0 = const()[name = tensor("op_30528_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30528_cast_fp16 = einsum(equation = var_30528_equation_0, values = (var_30016_cast_fp16, var_30417_cast_fp16))[name = tensor("op_30528_cast_fp16")]; + tensor var_30530_equation_0 = const()[name = tensor("op_30530_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30530_cast_fp16 = einsum(equation = var_30530_equation_0, values = (var_30016_cast_fp16, var_30418_cast_fp16))[name = tensor("op_30530_cast_fp16")]; + tensor var_30532_equation_0 = const()[name = tensor("op_30532_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30532_cast_fp16 = einsum(equation = var_30532_equation_0, values = (var_30020_cast_fp16, var_30419_cast_fp16))[name = tensor("op_30532_cast_fp16")]; + tensor var_30534_equation_0 = const()[name = tensor("op_30534_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30534_cast_fp16 = einsum(equation = var_30534_equation_0, values = (var_30020_cast_fp16, var_30420_cast_fp16))[name = tensor("op_30534_cast_fp16")]; + tensor var_30536_equation_0 = const()[name = tensor("op_30536_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30536_cast_fp16 = einsum(equation = var_30536_equation_0, values = (var_30020_cast_fp16, var_30421_cast_fp16))[name = tensor("op_30536_cast_fp16")]; + tensor var_30538_equation_0 = const()[name = tensor("op_30538_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30538_cast_fp16 = einsum(equation = var_30538_equation_0, values = (var_30020_cast_fp16, var_30422_cast_fp16))[name = tensor("op_30538_cast_fp16")]; + tensor var_30540_equation_0 = const()[name = tensor("op_30540_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30540_cast_fp16 = einsum(equation = var_30540_equation_0, values = (var_30024_cast_fp16, var_30423_cast_fp16))[name = tensor("op_30540_cast_fp16")]; + tensor var_30542_equation_0 = const()[name = tensor("op_30542_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30542_cast_fp16 = einsum(equation = var_30542_equation_0, values = (var_30024_cast_fp16, var_30424_cast_fp16))[name = tensor("op_30542_cast_fp16")]; + tensor var_30544_equation_0 = const()[name = tensor("op_30544_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30544_cast_fp16 = einsum(equation = var_30544_equation_0, values = (var_30024_cast_fp16, var_30425_cast_fp16))[name = tensor("op_30544_cast_fp16")]; + tensor var_30546_equation_0 = const()[name = tensor("op_30546_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30546_cast_fp16 = einsum(equation = var_30546_equation_0, values = (var_30024_cast_fp16, var_30426_cast_fp16))[name = tensor("op_30546_cast_fp16")]; + tensor var_30548_equation_0 = const()[name = tensor("op_30548_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30548_cast_fp16 = einsum(equation = var_30548_equation_0, values = (var_30028_cast_fp16, var_30427_cast_fp16))[name = tensor("op_30548_cast_fp16")]; + tensor var_30550_equation_0 = const()[name = tensor("op_30550_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30550_cast_fp16 = einsum(equation = var_30550_equation_0, values = (var_30028_cast_fp16, var_30428_cast_fp16))[name = tensor("op_30550_cast_fp16")]; + tensor var_30552_equation_0 = const()[name = tensor("op_30552_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30552_cast_fp16 = einsum(equation = var_30552_equation_0, values = (var_30028_cast_fp16, var_30429_cast_fp16))[name = tensor("op_30552_cast_fp16")]; + tensor var_30554_equation_0 = const()[name = tensor("op_30554_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30554_cast_fp16 = einsum(equation = var_30554_equation_0, values = (var_30028_cast_fp16, var_30430_cast_fp16))[name = tensor("op_30554_cast_fp16")]; + tensor var_30556_equation_0 = const()[name = tensor("op_30556_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30556_cast_fp16 = einsum(equation = var_30556_equation_0, values = (var_30032_cast_fp16, var_30431_cast_fp16))[name = tensor("op_30556_cast_fp16")]; + tensor var_30558_equation_0 = const()[name = tensor("op_30558_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30558_cast_fp16 = einsum(equation = var_30558_equation_0, values = (var_30032_cast_fp16, var_30432_cast_fp16))[name = tensor("op_30558_cast_fp16")]; + tensor var_30560_equation_0 = const()[name = tensor("op_30560_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30560_cast_fp16 = einsum(equation = var_30560_equation_0, values = (var_30032_cast_fp16, var_30433_cast_fp16))[name = tensor("op_30560_cast_fp16")]; + tensor var_30562_equation_0 = const()[name = tensor("op_30562_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30562_cast_fp16 = einsum(equation = var_30562_equation_0, values = (var_30032_cast_fp16, var_30434_cast_fp16))[name = tensor("op_30562_cast_fp16")]; + tensor var_30564_equation_0 = const()[name = tensor("op_30564_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30564_cast_fp16 = einsum(equation = var_30564_equation_0, values = (var_30036_cast_fp16, var_30435_cast_fp16))[name = tensor("op_30564_cast_fp16")]; + tensor var_30566_equation_0 = const()[name = tensor("op_30566_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30566_cast_fp16 = einsum(equation = var_30566_equation_0, values = (var_30036_cast_fp16, var_30436_cast_fp16))[name = tensor("op_30566_cast_fp16")]; + tensor var_30568_equation_0 = const()[name = tensor("op_30568_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30568_cast_fp16 = einsum(equation = var_30568_equation_0, values = (var_30036_cast_fp16, var_30437_cast_fp16))[name = tensor("op_30568_cast_fp16")]; + tensor var_30570_equation_0 = const()[name = tensor("op_30570_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30570_cast_fp16 = einsum(equation = var_30570_equation_0, values = (var_30036_cast_fp16, var_30438_cast_fp16))[name = tensor("op_30570_cast_fp16")]; + tensor var_30572_equation_0 = const()[name = tensor("op_30572_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30572_cast_fp16 = einsum(equation = var_30572_equation_0, values = (var_30040_cast_fp16, var_30439_cast_fp16))[name = tensor("op_30572_cast_fp16")]; + tensor var_30574_equation_0 = const()[name = tensor("op_30574_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30574_cast_fp16 = einsum(equation = var_30574_equation_0, values = (var_30040_cast_fp16, var_30440_cast_fp16))[name = tensor("op_30574_cast_fp16")]; + tensor var_30576_equation_0 = const()[name = tensor("op_30576_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30576_cast_fp16 = einsum(equation = var_30576_equation_0, values = (var_30040_cast_fp16, var_30441_cast_fp16))[name = tensor("op_30576_cast_fp16")]; + tensor var_30578_equation_0 = const()[name = tensor("op_30578_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30578_cast_fp16 = einsum(equation = var_30578_equation_0, values = (var_30040_cast_fp16, var_30442_cast_fp16))[name = tensor("op_30578_cast_fp16")]; + tensor var_30580_equation_0 = const()[name = tensor("op_30580_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30580_cast_fp16 = einsum(equation = var_30580_equation_0, values = (var_30044_cast_fp16, var_30443_cast_fp16))[name = tensor("op_30580_cast_fp16")]; + tensor var_30582_equation_0 = const()[name = tensor("op_30582_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30582_cast_fp16 = einsum(equation = var_30582_equation_0, values = (var_30044_cast_fp16, var_30444_cast_fp16))[name = tensor("op_30582_cast_fp16")]; + tensor var_30584_equation_0 = const()[name = tensor("op_30584_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30584_cast_fp16 = einsum(equation = var_30584_equation_0, values = (var_30044_cast_fp16, var_30445_cast_fp16))[name = tensor("op_30584_cast_fp16")]; + tensor var_30586_equation_0 = const()[name = tensor("op_30586_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30586_cast_fp16 = einsum(equation = var_30586_equation_0, values = (var_30044_cast_fp16, var_30446_cast_fp16))[name = tensor("op_30586_cast_fp16")]; + tensor var_30588_equation_0 = const()[name = tensor("op_30588_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30588_cast_fp16 = einsum(equation = var_30588_equation_0, values = (var_30048_cast_fp16, var_30447_cast_fp16))[name = tensor("op_30588_cast_fp16")]; + tensor var_30590_equation_0 = const()[name = tensor("op_30590_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30590_cast_fp16 = einsum(equation = var_30590_equation_0, values = (var_30048_cast_fp16, var_30448_cast_fp16))[name = tensor("op_30590_cast_fp16")]; + tensor var_30592_equation_0 = const()[name = tensor("op_30592_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30592_cast_fp16 = einsum(equation = var_30592_equation_0, values = (var_30048_cast_fp16, var_30449_cast_fp16))[name = tensor("op_30592_cast_fp16")]; + tensor var_30594_equation_0 = const()[name = tensor("op_30594_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30594_cast_fp16 = einsum(equation = var_30594_equation_0, values = (var_30048_cast_fp16, var_30450_cast_fp16))[name = tensor("op_30594_cast_fp16")]; + tensor var_30596_equation_0 = const()[name = tensor("op_30596_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30596_cast_fp16 = einsum(equation = var_30596_equation_0, values = (var_30052_cast_fp16, var_30451_cast_fp16))[name = tensor("op_30596_cast_fp16")]; + tensor var_30598_equation_0 = const()[name = tensor("op_30598_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30598_cast_fp16 = einsum(equation = var_30598_equation_0, values = (var_30052_cast_fp16, var_30452_cast_fp16))[name = tensor("op_30598_cast_fp16")]; + tensor var_30600_equation_0 = const()[name = tensor("op_30600_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30600_cast_fp16 = einsum(equation = var_30600_equation_0, values = (var_30052_cast_fp16, var_30453_cast_fp16))[name = tensor("op_30600_cast_fp16")]; + tensor var_30602_equation_0 = const()[name = tensor("op_30602_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30602_cast_fp16 = einsum(equation = var_30602_equation_0, values = (var_30052_cast_fp16, var_30454_cast_fp16))[name = tensor("op_30602_cast_fp16")]; + tensor var_30604_equation_0 = const()[name = tensor("op_30604_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30604_cast_fp16 = einsum(equation = var_30604_equation_0, values = (var_30056_cast_fp16, var_30455_cast_fp16))[name = tensor("op_30604_cast_fp16")]; + tensor var_30606_equation_0 = const()[name = tensor("op_30606_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30606_cast_fp16 = einsum(equation = var_30606_equation_0, values = (var_30056_cast_fp16, var_30456_cast_fp16))[name = tensor("op_30606_cast_fp16")]; + tensor var_30608_equation_0 = const()[name = tensor("op_30608_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30608_cast_fp16 = einsum(equation = var_30608_equation_0, values = (var_30056_cast_fp16, var_30457_cast_fp16))[name = tensor("op_30608_cast_fp16")]; + tensor var_30610_equation_0 = const()[name = tensor("op_30610_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30610_cast_fp16 = einsum(equation = var_30610_equation_0, values = (var_30056_cast_fp16, var_30458_cast_fp16))[name = tensor("op_30610_cast_fp16")]; + tensor var_30612_equation_0 = const()[name = tensor("op_30612_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30612_cast_fp16 = einsum(equation = var_30612_equation_0, values = (var_30060_cast_fp16, var_30459_cast_fp16))[name = tensor("op_30612_cast_fp16")]; + tensor var_30614_equation_0 = const()[name = tensor("op_30614_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30614_cast_fp16 = einsum(equation = var_30614_equation_0, values = (var_30060_cast_fp16, var_30460_cast_fp16))[name = tensor("op_30614_cast_fp16")]; + tensor var_30616_equation_0 = const()[name = tensor("op_30616_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30616_cast_fp16 = einsum(equation = var_30616_equation_0, values = (var_30060_cast_fp16, var_30461_cast_fp16))[name = tensor("op_30616_cast_fp16")]; + tensor var_30618_equation_0 = const()[name = tensor("op_30618_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30618_cast_fp16 = einsum(equation = var_30618_equation_0, values = (var_30060_cast_fp16, var_30462_cast_fp16))[name = tensor("op_30618_cast_fp16")]; + tensor var_30620_equation_0 = const()[name = tensor("op_30620_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30620_cast_fp16 = einsum(equation = var_30620_equation_0, values = (var_30064_cast_fp16, var_30463_cast_fp16))[name = tensor("op_30620_cast_fp16")]; + tensor var_30622_equation_0 = const()[name = tensor("op_30622_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30622_cast_fp16 = einsum(equation = var_30622_equation_0, values = (var_30064_cast_fp16, var_30464_cast_fp16))[name = tensor("op_30622_cast_fp16")]; + tensor var_30624_equation_0 = const()[name = tensor("op_30624_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30624_cast_fp16 = einsum(equation = var_30624_equation_0, values = (var_30064_cast_fp16, var_30465_cast_fp16))[name = tensor("op_30624_cast_fp16")]; + tensor var_30626_equation_0 = const()[name = tensor("op_30626_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_30626_cast_fp16 = einsum(equation = var_30626_equation_0, values = (var_30064_cast_fp16, var_30466_cast_fp16))[name = tensor("op_30626_cast_fp16")]; + tensor var_30628_interleave_0 = const()[name = tensor("op_30628_interleave_0"), val = tensor(false)]; + tensor var_30628_cast_fp16 = concat(axis = var_29187, interleave = var_30628_interleave_0, values = (var_30468_cast_fp16, var_30470_cast_fp16, var_30472_cast_fp16, var_30474_cast_fp16))[name = tensor("op_30628_cast_fp16")]; + tensor var_30630_interleave_0 = const()[name = tensor("op_30630_interleave_0"), val = tensor(false)]; + tensor var_30630_cast_fp16 = concat(axis = var_29187, interleave = var_30630_interleave_0, values = (var_30476_cast_fp16, var_30478_cast_fp16, var_30480_cast_fp16, var_30482_cast_fp16))[name = tensor("op_30630_cast_fp16")]; + tensor var_30632_interleave_0 = const()[name = tensor("op_30632_interleave_0"), val = tensor(false)]; + tensor var_30632_cast_fp16 = concat(axis = var_29187, interleave = var_30632_interleave_0, values = (var_30484_cast_fp16, var_30486_cast_fp16, var_30488_cast_fp16, var_30490_cast_fp16))[name = tensor("op_30632_cast_fp16")]; + tensor var_30634_interleave_0 = const()[name = tensor("op_30634_interleave_0"), val = tensor(false)]; + tensor var_30634_cast_fp16 = concat(axis = var_29187, interleave = var_30634_interleave_0, values = (var_30492_cast_fp16, var_30494_cast_fp16, var_30496_cast_fp16, var_30498_cast_fp16))[name = tensor("op_30634_cast_fp16")]; + tensor var_30636_interleave_0 = const()[name = tensor("op_30636_interleave_0"), val = tensor(false)]; + tensor var_30636_cast_fp16 = concat(axis = var_29187, interleave = var_30636_interleave_0, values = (var_30500_cast_fp16, var_30502_cast_fp16, var_30504_cast_fp16, var_30506_cast_fp16))[name = tensor("op_30636_cast_fp16")]; + tensor var_30638_interleave_0 = const()[name = tensor("op_30638_interleave_0"), val = tensor(false)]; + tensor var_30638_cast_fp16 = concat(axis = var_29187, interleave = var_30638_interleave_0, values = (var_30508_cast_fp16, var_30510_cast_fp16, var_30512_cast_fp16, var_30514_cast_fp16))[name = tensor("op_30638_cast_fp16")]; + tensor var_30640_interleave_0 = const()[name = tensor("op_30640_interleave_0"), val = tensor(false)]; + tensor var_30640_cast_fp16 = concat(axis = var_29187, interleave = var_30640_interleave_0, values = (var_30516_cast_fp16, var_30518_cast_fp16, var_30520_cast_fp16, var_30522_cast_fp16))[name = tensor("op_30640_cast_fp16")]; + tensor var_30642_interleave_0 = const()[name = tensor("op_30642_interleave_0"), val = tensor(false)]; + tensor var_30642_cast_fp16 = concat(axis = var_29187, interleave = var_30642_interleave_0, values = (var_30524_cast_fp16, var_30526_cast_fp16, var_30528_cast_fp16, var_30530_cast_fp16))[name = tensor("op_30642_cast_fp16")]; + tensor var_30644_interleave_0 = const()[name = tensor("op_30644_interleave_0"), val = tensor(false)]; + tensor var_30644_cast_fp16 = concat(axis = var_29187, interleave = var_30644_interleave_0, values = (var_30532_cast_fp16, var_30534_cast_fp16, var_30536_cast_fp16, var_30538_cast_fp16))[name = tensor("op_30644_cast_fp16")]; + tensor var_30646_interleave_0 = const()[name = tensor("op_30646_interleave_0"), val = tensor(false)]; + tensor var_30646_cast_fp16 = concat(axis = var_29187, interleave = var_30646_interleave_0, values = (var_30540_cast_fp16, var_30542_cast_fp16, var_30544_cast_fp16, var_30546_cast_fp16))[name = tensor("op_30646_cast_fp16")]; + tensor var_30648_interleave_0 = const()[name = tensor("op_30648_interleave_0"), val = tensor(false)]; + tensor var_30648_cast_fp16 = concat(axis = var_29187, interleave = var_30648_interleave_0, values = (var_30548_cast_fp16, var_30550_cast_fp16, var_30552_cast_fp16, var_30554_cast_fp16))[name = tensor("op_30648_cast_fp16")]; + tensor var_30650_interleave_0 = const()[name = tensor("op_30650_interleave_0"), val = tensor(false)]; + tensor var_30650_cast_fp16 = concat(axis = var_29187, interleave = var_30650_interleave_0, values = (var_30556_cast_fp16, var_30558_cast_fp16, var_30560_cast_fp16, var_30562_cast_fp16))[name = tensor("op_30650_cast_fp16")]; + tensor var_30652_interleave_0 = const()[name = tensor("op_30652_interleave_0"), val = tensor(false)]; + tensor var_30652_cast_fp16 = concat(axis = var_29187, interleave = var_30652_interleave_0, values = (var_30564_cast_fp16, var_30566_cast_fp16, var_30568_cast_fp16, var_30570_cast_fp16))[name = tensor("op_30652_cast_fp16")]; + tensor var_30654_interleave_0 = const()[name = tensor("op_30654_interleave_0"), val = tensor(false)]; + tensor var_30654_cast_fp16 = concat(axis = var_29187, interleave = var_30654_interleave_0, values = (var_30572_cast_fp16, var_30574_cast_fp16, var_30576_cast_fp16, var_30578_cast_fp16))[name = tensor("op_30654_cast_fp16")]; + tensor var_30656_interleave_0 = const()[name = tensor("op_30656_interleave_0"), val = tensor(false)]; + tensor var_30656_cast_fp16 = concat(axis = var_29187, interleave = var_30656_interleave_0, values = (var_30580_cast_fp16, var_30582_cast_fp16, var_30584_cast_fp16, var_30586_cast_fp16))[name = tensor("op_30656_cast_fp16")]; + tensor var_30658_interleave_0 = const()[name = tensor("op_30658_interleave_0"), val = tensor(false)]; + tensor var_30658_cast_fp16 = concat(axis = var_29187, interleave = var_30658_interleave_0, values = (var_30588_cast_fp16, var_30590_cast_fp16, var_30592_cast_fp16, var_30594_cast_fp16))[name = tensor("op_30658_cast_fp16")]; + tensor var_30660_interleave_0 = const()[name = tensor("op_30660_interleave_0"), val = tensor(false)]; + tensor var_30660_cast_fp16 = concat(axis = var_29187, interleave = var_30660_interleave_0, values = (var_30596_cast_fp16, var_30598_cast_fp16, var_30600_cast_fp16, var_30602_cast_fp16))[name = tensor("op_30660_cast_fp16")]; + tensor var_30662_interleave_0 = const()[name = tensor("op_30662_interleave_0"), val = tensor(false)]; + tensor var_30662_cast_fp16 = concat(axis = var_29187, interleave = var_30662_interleave_0, values = (var_30604_cast_fp16, var_30606_cast_fp16, var_30608_cast_fp16, var_30610_cast_fp16))[name = tensor("op_30662_cast_fp16")]; + tensor var_30664_interleave_0 = const()[name = tensor("op_30664_interleave_0"), val = tensor(false)]; + tensor var_30664_cast_fp16 = concat(axis = var_29187, interleave = var_30664_interleave_0, values = (var_30612_cast_fp16, var_30614_cast_fp16, var_30616_cast_fp16, var_30618_cast_fp16))[name = tensor("op_30664_cast_fp16")]; + tensor var_30666_interleave_0 = const()[name = tensor("op_30666_interleave_0"), val = tensor(false)]; + tensor var_30666_cast_fp16 = concat(axis = var_29187, interleave = var_30666_interleave_0, values = (var_30620_cast_fp16, var_30622_cast_fp16, var_30624_cast_fp16, var_30626_cast_fp16))[name = tensor("op_30666_cast_fp16")]; + tensor input_153_interleave_0 = const()[name = tensor("input_153_interleave_0"), val = tensor(false)]; + tensor input_153_cast_fp16 = concat(axis = var_29212, interleave = input_153_interleave_0, values = (var_30628_cast_fp16, var_30630_cast_fp16, var_30632_cast_fp16, var_30634_cast_fp16, var_30636_cast_fp16, var_30638_cast_fp16, var_30640_cast_fp16, var_30642_cast_fp16, var_30644_cast_fp16, var_30646_cast_fp16, var_30648_cast_fp16, var_30650_cast_fp16, var_30652_cast_fp16, var_30654_cast_fp16, var_30656_cast_fp16, var_30658_cast_fp16, var_30660_cast_fp16, var_30662_cast_fp16, var_30664_cast_fp16, var_30666_cast_fp16))[name = tensor("input_153_cast_fp16")]; + tensor var_30671 = const()[name = tensor("op_30671"), val = tensor([1, 1])]; + tensor var_30673 = const()[name = tensor("op_30673"), val = tensor([1, 1])]; + tensor obj_79_pad_type_0 = const()[name = tensor("obj_79_pad_type_0"), val = tensor("custom")]; + tensor obj_79_pad_0 = const()[name = tensor("obj_79_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_19_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_19_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(771848960)))]; + tensor layers_19_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_19_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(775125824)))]; + tensor obj_79_cast_fp16 = conv(bias = layers_19_self_attn_o_proj_bias_to_fp16, dilations = var_30673, groups = var_29212, pad = obj_79_pad_0, pad_type = obj_79_pad_type_0, strides = var_30671, weight = layers_19_self_attn_o_proj_weight_to_fp16, x = input_153_cast_fp16)[name = tensor("obj_79_cast_fp16")]; + tensor inputs_79_cast_fp16 = add(x = inputs_77_cast_fp16, y = obj_79_cast_fp16)[name = tensor("inputs_79_cast_fp16")]; + tensor var_30679 = const()[name = tensor("op_30679"), val = tensor([1])]; + tensor channels_mean_79_cast_fp16 = reduce_mean(axes = var_30679, keep_dims = var_29213, x = inputs_79_cast_fp16)[name = tensor("channels_mean_79_cast_fp16")]; + tensor zero_mean_79_cast_fp16 = sub(x = inputs_79_cast_fp16, y = channels_mean_79_cast_fp16)[name = tensor("zero_mean_79_cast_fp16")]; + tensor zero_mean_sq_79_cast_fp16 = mul(x = zero_mean_79_cast_fp16, y = zero_mean_79_cast_fp16)[name = tensor("zero_mean_sq_79_cast_fp16")]; + tensor var_30683 = const()[name = tensor("op_30683"), val = tensor([1])]; + tensor var_30684_cast_fp16 = reduce_mean(axes = var_30683, keep_dims = var_29213, x = zero_mean_sq_79_cast_fp16)[name = tensor("op_30684_cast_fp16")]; + tensor var_30685_to_fp16 = const()[name = tensor("op_30685_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_30686_cast_fp16 = add(x = var_30684_cast_fp16, y = var_30685_to_fp16)[name = tensor("op_30686_cast_fp16")]; + tensor denom_79_epsilon_0_to_fp16 = const()[name = tensor("denom_79_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_79_cast_fp16 = rsqrt(epsilon = denom_79_epsilon_0_to_fp16, x = var_30686_cast_fp16)[name = tensor("denom_79_cast_fp16")]; + tensor out_79_cast_fp16 = mul(x = zero_mean_79_cast_fp16, y = denom_79_cast_fp16)[name = tensor("out_79_cast_fp16")]; + tensor input_155_gamma_0_to_fp16 = const()[name = tensor("input_155_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(775128448)))]; + tensor input_155_beta_0_to_fp16 = const()[name = tensor("input_155_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(775131072)))]; + tensor input_155_epsilon_0_to_fp16 = const()[name = tensor("input_155_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_155_cast_fp16 = batch_norm(beta = input_155_beta_0_to_fp16, epsilon = input_155_epsilon_0_to_fp16, gamma = input_155_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_79_cast_fp16)[name = tensor("input_155_cast_fp16")]; + tensor var_30697 = const()[name = tensor("op_30697"), val = tensor([1, 1])]; + tensor var_30699 = const()[name = tensor("op_30699"), val = tensor([1, 1])]; + tensor input_157_pad_type_0 = const()[name = tensor("input_157_pad_type_0"), val = tensor("custom")]; + tensor input_157_pad_0 = const()[name = tensor("input_157_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_19_fc1_weight_to_fp16 = const()[name = tensor("layers_19_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(775133696)))]; + tensor layers_19_fc1_bias_to_fp16 = const()[name = tensor("layers_19_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(788240960)))]; + tensor input_157_cast_fp16 = conv(bias = layers_19_fc1_bias_to_fp16, dilations = var_30699, groups = var_29212, pad = input_157_pad_0, pad_type = input_157_pad_type_0, strides = var_30697, weight = layers_19_fc1_weight_to_fp16, x = input_155_cast_fp16)[name = tensor("input_157_cast_fp16")]; + tensor input_159_mode_0 = const()[name = tensor("input_159_mode_0"), val = tensor("EXACT")]; + tensor input_159_cast_fp16 = gelu(mode = input_159_mode_0, x = input_157_cast_fp16)[name = tensor("input_159_cast_fp16")]; + tensor var_30705 = const()[name = tensor("op_30705"), val = tensor([1, 1])]; + tensor var_30707 = const()[name = tensor("op_30707"), val = tensor([1, 1])]; + tensor hidden_states_43_pad_type_0 = const()[name = tensor("hidden_states_43_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_43_pad_0 = const()[name = tensor("hidden_states_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_19_fc2_weight_to_fp16 = const()[name = tensor("layers_19_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(788251264)))]; + tensor layers_19_fc2_bias_to_fp16 = const()[name = tensor("layers_19_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(801358528)))]; + tensor hidden_states_43_cast_fp16 = conv(bias = layers_19_fc2_bias_to_fp16, dilations = var_30707, groups = var_29212, pad = hidden_states_43_pad_0, pad_type = hidden_states_43_pad_type_0, strides = var_30705, weight = layers_19_fc2_weight_to_fp16, x = input_159_cast_fp16)[name = tensor("hidden_states_43_cast_fp16")]; + tensor inputs_81_cast_fp16 = add(x = inputs_79_cast_fp16, y = hidden_states_43_cast_fp16)[name = tensor("inputs_81_cast_fp16")]; + tensor var_30714 = const()[name = tensor("op_30714"), val = tensor(3)]; + tensor var_30739 = const()[name = tensor("op_30739"), val = tensor(1)]; + tensor var_30740 = const()[name = tensor("op_30740"), val = tensor(true)]; + tensor var_30750 = const()[name = tensor("op_30750"), val = tensor([1])]; + tensor channels_mean_81_cast_fp16 = reduce_mean(axes = var_30750, keep_dims = var_30740, x = inputs_81_cast_fp16)[name = tensor("channels_mean_81_cast_fp16")]; + tensor zero_mean_81_cast_fp16 = sub(x = inputs_81_cast_fp16, y = channels_mean_81_cast_fp16)[name = tensor("zero_mean_81_cast_fp16")]; + tensor zero_mean_sq_81_cast_fp16 = mul(x = zero_mean_81_cast_fp16, y = zero_mean_81_cast_fp16)[name = tensor("zero_mean_sq_81_cast_fp16")]; + tensor var_30754 = const()[name = tensor("op_30754"), val = tensor([1])]; + tensor var_30755_cast_fp16 = reduce_mean(axes = var_30754, keep_dims = var_30740, x = zero_mean_sq_81_cast_fp16)[name = tensor("op_30755_cast_fp16")]; + tensor var_30756_to_fp16 = const()[name = tensor("op_30756_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_30757_cast_fp16 = add(x = var_30755_cast_fp16, y = var_30756_to_fp16)[name = tensor("op_30757_cast_fp16")]; + tensor denom_81_epsilon_0_to_fp16 = const()[name = tensor("denom_81_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_81_cast_fp16 = rsqrt(epsilon = denom_81_epsilon_0_to_fp16, x = var_30757_cast_fp16)[name = tensor("denom_81_cast_fp16")]; + tensor out_81_cast_fp16 = mul(x = zero_mean_81_cast_fp16, y = denom_81_cast_fp16)[name = tensor("out_81_cast_fp16")]; + tensor obj_81_gamma_0_to_fp16 = const()[name = tensor("obj_81_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(801361152)))]; + tensor obj_81_beta_0_to_fp16 = const()[name = tensor("obj_81_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(801363776)))]; + tensor obj_81_epsilon_0_to_fp16 = const()[name = tensor("obj_81_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_81_cast_fp16 = batch_norm(beta = obj_81_beta_0_to_fp16, epsilon = obj_81_epsilon_0_to_fp16, gamma = obj_81_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_81_cast_fp16)[name = tensor("obj_81_cast_fp16")]; + tensor var_30772 = const()[name = tensor("op_30772"), val = tensor([1, 1])]; + tensor var_30774 = const()[name = tensor("op_30774"), val = tensor([1, 1])]; + tensor query_41_pad_type_0 = const()[name = tensor("query_41_pad_type_0"), val = tensor("custom")]; + tensor query_41_pad_0 = const()[name = tensor("query_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_20_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_20_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(801366400)))]; + tensor layers_20_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_20_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(804643264)))]; + tensor query_41_cast_fp16 = conv(bias = layers_20_self_attn_q_proj_bias_to_fp16, dilations = var_30774, groups = var_30739, pad = query_41_pad_0, pad_type = query_41_pad_type_0, strides = var_30772, weight = layers_20_self_attn_q_proj_weight_to_fp16, x = obj_81_cast_fp16)[name = tensor("query_41_cast_fp16")]; + tensor var_30778 = const()[name = tensor("op_30778"), val = tensor([1, 1])]; + tensor var_30780 = const()[name = tensor("op_30780"), val = tensor([1, 1])]; + tensor key_41_pad_type_0 = const()[name = tensor("key_41_pad_type_0"), val = tensor("custom")]; + tensor key_41_pad_0 = const()[name = tensor("key_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_20_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_20_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(804645888)))]; + tensor key_41_cast_fp16 = conv(dilations = var_30780, groups = var_30739, pad = key_41_pad_0, pad_type = key_41_pad_type_0, strides = var_30778, weight = layers_20_self_attn_k_proj_weight_to_fp16, x = obj_81_cast_fp16)[name = tensor("key_41_cast_fp16")]; + tensor var_30785 = const()[name = tensor("op_30785"), val = tensor([1, 1])]; + tensor var_30787 = const()[name = tensor("op_30787"), val = tensor([1, 1])]; + tensor value_41_pad_type_0 = const()[name = tensor("value_41_pad_type_0"), val = tensor("custom")]; + tensor value_41_pad_0 = const()[name = tensor("value_41_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_20_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_20_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(807922752)))]; + tensor layers_20_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_20_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(811199616)))]; + tensor value_41_cast_fp16 = conv(bias = layers_20_self_attn_v_proj_bias_to_fp16, dilations = var_30787, groups = var_30739, pad = value_41_pad_0, pad_type = value_41_pad_type_0, strides = var_30785, weight = layers_20_self_attn_v_proj_weight_to_fp16, x = obj_81_cast_fp16)[name = tensor("value_41_cast_fp16")]; + tensor var_30794_begin_0 = const()[name = tensor("op_30794_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_30794_end_0 = const()[name = tensor("op_30794_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_30794_end_mask_0 = const()[name = tensor("op_30794_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30794_cast_fp16 = slice_by_index(begin = var_30794_begin_0, end = var_30794_end_0, end_mask = var_30794_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_30794_cast_fp16")]; + tensor var_30798_begin_0 = const()[name = tensor("op_30798_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_30798_end_0 = const()[name = tensor("op_30798_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_30798_end_mask_0 = const()[name = tensor("op_30798_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30798_cast_fp16 = slice_by_index(begin = var_30798_begin_0, end = var_30798_end_0, end_mask = var_30798_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_30798_cast_fp16")]; + tensor var_30802_begin_0 = const()[name = tensor("op_30802_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_30802_end_0 = const()[name = tensor("op_30802_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_30802_end_mask_0 = const()[name = tensor("op_30802_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30802_cast_fp16 = slice_by_index(begin = var_30802_begin_0, end = var_30802_end_0, end_mask = var_30802_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_30802_cast_fp16")]; + tensor var_30806_begin_0 = const()[name = tensor("op_30806_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_30806_end_0 = const()[name = tensor("op_30806_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_30806_end_mask_0 = const()[name = tensor("op_30806_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30806_cast_fp16 = slice_by_index(begin = var_30806_begin_0, end = var_30806_end_0, end_mask = var_30806_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_30806_cast_fp16")]; + tensor var_30810_begin_0 = const()[name = tensor("op_30810_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_30810_end_0 = const()[name = tensor("op_30810_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_30810_end_mask_0 = const()[name = tensor("op_30810_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30810_cast_fp16 = slice_by_index(begin = var_30810_begin_0, end = var_30810_end_0, end_mask = var_30810_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_30810_cast_fp16")]; + tensor var_30814_begin_0 = const()[name = tensor("op_30814_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_30814_end_0 = const()[name = tensor("op_30814_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_30814_end_mask_0 = const()[name = tensor("op_30814_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30814_cast_fp16 = slice_by_index(begin = var_30814_begin_0, end = var_30814_end_0, end_mask = var_30814_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_30814_cast_fp16")]; + tensor var_30818_begin_0 = const()[name = tensor("op_30818_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_30818_end_0 = const()[name = tensor("op_30818_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_30818_end_mask_0 = const()[name = tensor("op_30818_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30818_cast_fp16 = slice_by_index(begin = var_30818_begin_0, end = var_30818_end_0, end_mask = var_30818_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_30818_cast_fp16")]; + tensor var_30822_begin_0 = const()[name = tensor("op_30822_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_30822_end_0 = const()[name = tensor("op_30822_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_30822_end_mask_0 = const()[name = tensor("op_30822_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30822_cast_fp16 = slice_by_index(begin = var_30822_begin_0, end = var_30822_end_0, end_mask = var_30822_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_30822_cast_fp16")]; + tensor var_30826_begin_0 = const()[name = tensor("op_30826_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_30826_end_0 = const()[name = tensor("op_30826_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_30826_end_mask_0 = const()[name = tensor("op_30826_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30826_cast_fp16 = slice_by_index(begin = var_30826_begin_0, end = var_30826_end_0, end_mask = var_30826_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_30826_cast_fp16")]; + tensor var_30830_begin_0 = const()[name = tensor("op_30830_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_30830_end_0 = const()[name = tensor("op_30830_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_30830_end_mask_0 = const()[name = tensor("op_30830_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30830_cast_fp16 = slice_by_index(begin = var_30830_begin_0, end = var_30830_end_0, end_mask = var_30830_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_30830_cast_fp16")]; + tensor var_30834_begin_0 = const()[name = tensor("op_30834_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_30834_end_0 = const()[name = tensor("op_30834_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_30834_end_mask_0 = const()[name = tensor("op_30834_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30834_cast_fp16 = slice_by_index(begin = var_30834_begin_0, end = var_30834_end_0, end_mask = var_30834_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_30834_cast_fp16")]; + tensor var_30838_begin_0 = const()[name = tensor("op_30838_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_30838_end_0 = const()[name = tensor("op_30838_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_30838_end_mask_0 = const()[name = tensor("op_30838_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30838_cast_fp16 = slice_by_index(begin = var_30838_begin_0, end = var_30838_end_0, end_mask = var_30838_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_30838_cast_fp16")]; + tensor var_30842_begin_0 = const()[name = tensor("op_30842_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_30842_end_0 = const()[name = tensor("op_30842_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_30842_end_mask_0 = const()[name = tensor("op_30842_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30842_cast_fp16 = slice_by_index(begin = var_30842_begin_0, end = var_30842_end_0, end_mask = var_30842_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_30842_cast_fp16")]; + tensor var_30846_begin_0 = const()[name = tensor("op_30846_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_30846_end_0 = const()[name = tensor("op_30846_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_30846_end_mask_0 = const()[name = tensor("op_30846_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30846_cast_fp16 = slice_by_index(begin = var_30846_begin_0, end = var_30846_end_0, end_mask = var_30846_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_30846_cast_fp16")]; + tensor var_30850_begin_0 = const()[name = tensor("op_30850_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_30850_end_0 = const()[name = tensor("op_30850_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_30850_end_mask_0 = const()[name = tensor("op_30850_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30850_cast_fp16 = slice_by_index(begin = var_30850_begin_0, end = var_30850_end_0, end_mask = var_30850_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_30850_cast_fp16")]; + tensor var_30854_begin_0 = const()[name = tensor("op_30854_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_30854_end_0 = const()[name = tensor("op_30854_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_30854_end_mask_0 = const()[name = tensor("op_30854_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30854_cast_fp16 = slice_by_index(begin = var_30854_begin_0, end = var_30854_end_0, end_mask = var_30854_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_30854_cast_fp16")]; + tensor var_30858_begin_0 = const()[name = tensor("op_30858_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_30858_end_0 = const()[name = tensor("op_30858_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_30858_end_mask_0 = const()[name = tensor("op_30858_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30858_cast_fp16 = slice_by_index(begin = var_30858_begin_0, end = var_30858_end_0, end_mask = var_30858_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_30858_cast_fp16")]; + tensor var_30862_begin_0 = const()[name = tensor("op_30862_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_30862_end_0 = const()[name = tensor("op_30862_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_30862_end_mask_0 = const()[name = tensor("op_30862_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30862_cast_fp16 = slice_by_index(begin = var_30862_begin_0, end = var_30862_end_0, end_mask = var_30862_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_30862_cast_fp16")]; + tensor var_30866_begin_0 = const()[name = tensor("op_30866_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_30866_end_0 = const()[name = tensor("op_30866_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_30866_end_mask_0 = const()[name = tensor("op_30866_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30866_cast_fp16 = slice_by_index(begin = var_30866_begin_0, end = var_30866_end_0, end_mask = var_30866_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_30866_cast_fp16")]; + tensor var_30870_begin_0 = const()[name = tensor("op_30870_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_30870_end_0 = const()[name = tensor("op_30870_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_30870_end_mask_0 = const()[name = tensor("op_30870_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_30870_cast_fp16 = slice_by_index(begin = var_30870_begin_0, end = var_30870_end_0, end_mask = var_30870_end_mask_0, x = query_41_cast_fp16)[name = tensor("op_30870_cast_fp16")]; + tensor var_30879_begin_0 = const()[name = tensor("op_30879_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_30879_end_0 = const()[name = tensor("op_30879_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_30879_end_mask_0 = const()[name = tensor("op_30879_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_30879_cast_fp16 = slice_by_index(begin = var_30879_begin_0, end = var_30879_end_0, end_mask = var_30879_end_mask_0, x = var_30794_cast_fp16)[name = tensor("op_30879_cast_fp16")]; + tensor var_30886_begin_0 = const()[name = tensor("op_30886_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_30886_end_0 = const()[name = tensor("op_30886_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_30886_end_mask_0 = const()[name = tensor("op_30886_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_30886_cast_fp16 = slice_by_index(begin = var_30886_begin_0, end = var_30886_end_0, end_mask = var_30886_end_mask_0, x = var_30794_cast_fp16)[name = tensor("op_30886_cast_fp16")]; + tensor var_30893_begin_0 = const()[name = tensor("op_30893_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_30893_end_0 = const()[name = tensor("op_30893_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_30893_end_mask_0 = const()[name = tensor("op_30893_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_30893_cast_fp16 = slice_by_index(begin = var_30893_begin_0, end = var_30893_end_0, end_mask = var_30893_end_mask_0, x = var_30794_cast_fp16)[name = tensor("op_30893_cast_fp16")]; + tensor var_30900_begin_0 = const()[name = tensor("op_30900_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_30900_end_0 = const()[name = tensor("op_30900_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_30900_end_mask_0 = const()[name = tensor("op_30900_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_30900_cast_fp16 = slice_by_index(begin = var_30900_begin_0, end = var_30900_end_0, end_mask = var_30900_end_mask_0, x = var_30794_cast_fp16)[name = tensor("op_30900_cast_fp16")]; + tensor var_30907_begin_0 = const()[name = tensor("op_30907_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_30907_end_0 = const()[name = tensor("op_30907_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_30907_end_mask_0 = const()[name = tensor("op_30907_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_30907_cast_fp16 = slice_by_index(begin = var_30907_begin_0, end = var_30907_end_0, end_mask = var_30907_end_mask_0, x = var_30798_cast_fp16)[name = tensor("op_30907_cast_fp16")]; + tensor var_30914_begin_0 = const()[name = tensor("op_30914_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_30914_end_0 = const()[name = tensor("op_30914_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_30914_end_mask_0 = const()[name = tensor("op_30914_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_30914_cast_fp16 = slice_by_index(begin = var_30914_begin_0, end = var_30914_end_0, end_mask = var_30914_end_mask_0, x = var_30798_cast_fp16)[name = tensor("op_30914_cast_fp16")]; + tensor var_30921_begin_0 = const()[name = tensor("op_30921_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_30921_end_0 = const()[name = tensor("op_30921_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_30921_end_mask_0 = const()[name = tensor("op_30921_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_30921_cast_fp16 = slice_by_index(begin = var_30921_begin_0, end = var_30921_end_0, end_mask = var_30921_end_mask_0, x = var_30798_cast_fp16)[name = tensor("op_30921_cast_fp16")]; + tensor var_30928_begin_0 = const()[name = tensor("op_30928_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_30928_end_0 = const()[name = tensor("op_30928_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_30928_end_mask_0 = const()[name = tensor("op_30928_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_30928_cast_fp16 = slice_by_index(begin = var_30928_begin_0, end = var_30928_end_0, end_mask = var_30928_end_mask_0, x = var_30798_cast_fp16)[name = tensor("op_30928_cast_fp16")]; + tensor var_30935_begin_0 = const()[name = tensor("op_30935_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_30935_end_0 = const()[name = tensor("op_30935_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_30935_end_mask_0 = const()[name = tensor("op_30935_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_30935_cast_fp16 = slice_by_index(begin = var_30935_begin_0, end = var_30935_end_0, end_mask = var_30935_end_mask_0, x = var_30802_cast_fp16)[name = tensor("op_30935_cast_fp16")]; + tensor var_30942_begin_0 = const()[name = tensor("op_30942_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_30942_end_0 = const()[name = tensor("op_30942_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_30942_end_mask_0 = const()[name = tensor("op_30942_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_30942_cast_fp16 = slice_by_index(begin = var_30942_begin_0, end = var_30942_end_0, end_mask = var_30942_end_mask_0, x = var_30802_cast_fp16)[name = tensor("op_30942_cast_fp16")]; + tensor var_30949_begin_0 = const()[name = tensor("op_30949_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_30949_end_0 = const()[name = tensor("op_30949_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_30949_end_mask_0 = const()[name = tensor("op_30949_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_30949_cast_fp16 = slice_by_index(begin = var_30949_begin_0, end = var_30949_end_0, end_mask = var_30949_end_mask_0, x = var_30802_cast_fp16)[name = tensor("op_30949_cast_fp16")]; + tensor var_30956_begin_0 = const()[name = tensor("op_30956_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_30956_end_0 = const()[name = tensor("op_30956_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_30956_end_mask_0 = const()[name = tensor("op_30956_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_30956_cast_fp16 = slice_by_index(begin = var_30956_begin_0, end = var_30956_end_0, end_mask = var_30956_end_mask_0, x = var_30802_cast_fp16)[name = tensor("op_30956_cast_fp16")]; + tensor var_30963_begin_0 = const()[name = tensor("op_30963_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_30963_end_0 = const()[name = tensor("op_30963_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_30963_end_mask_0 = const()[name = tensor("op_30963_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_30963_cast_fp16 = slice_by_index(begin = var_30963_begin_0, end = var_30963_end_0, end_mask = var_30963_end_mask_0, x = var_30806_cast_fp16)[name = tensor("op_30963_cast_fp16")]; + tensor var_30970_begin_0 = const()[name = tensor("op_30970_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_30970_end_0 = const()[name = tensor("op_30970_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_30970_end_mask_0 = const()[name = tensor("op_30970_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_30970_cast_fp16 = slice_by_index(begin = var_30970_begin_0, end = var_30970_end_0, end_mask = var_30970_end_mask_0, x = var_30806_cast_fp16)[name = tensor("op_30970_cast_fp16")]; + tensor var_30977_begin_0 = const()[name = tensor("op_30977_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_30977_end_0 = const()[name = tensor("op_30977_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_30977_end_mask_0 = const()[name = tensor("op_30977_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_30977_cast_fp16 = slice_by_index(begin = var_30977_begin_0, end = var_30977_end_0, end_mask = var_30977_end_mask_0, x = var_30806_cast_fp16)[name = tensor("op_30977_cast_fp16")]; + tensor var_30984_begin_0 = const()[name = tensor("op_30984_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_30984_end_0 = const()[name = tensor("op_30984_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_30984_end_mask_0 = const()[name = tensor("op_30984_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_30984_cast_fp16 = slice_by_index(begin = var_30984_begin_0, end = var_30984_end_0, end_mask = var_30984_end_mask_0, x = var_30806_cast_fp16)[name = tensor("op_30984_cast_fp16")]; + tensor var_30991_begin_0 = const()[name = tensor("op_30991_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_30991_end_0 = const()[name = tensor("op_30991_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_30991_end_mask_0 = const()[name = tensor("op_30991_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_30991_cast_fp16 = slice_by_index(begin = var_30991_begin_0, end = var_30991_end_0, end_mask = var_30991_end_mask_0, x = var_30810_cast_fp16)[name = tensor("op_30991_cast_fp16")]; + tensor var_30998_begin_0 = const()[name = tensor("op_30998_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_30998_end_0 = const()[name = tensor("op_30998_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_30998_end_mask_0 = const()[name = tensor("op_30998_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_30998_cast_fp16 = slice_by_index(begin = var_30998_begin_0, end = var_30998_end_0, end_mask = var_30998_end_mask_0, x = var_30810_cast_fp16)[name = tensor("op_30998_cast_fp16")]; + tensor var_31005_begin_0 = const()[name = tensor("op_31005_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_31005_end_0 = const()[name = tensor("op_31005_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_31005_end_mask_0 = const()[name = tensor("op_31005_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31005_cast_fp16 = slice_by_index(begin = var_31005_begin_0, end = var_31005_end_0, end_mask = var_31005_end_mask_0, x = var_30810_cast_fp16)[name = tensor("op_31005_cast_fp16")]; + tensor var_31012_begin_0 = const()[name = tensor("op_31012_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_31012_end_0 = const()[name = tensor("op_31012_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_31012_end_mask_0 = const()[name = tensor("op_31012_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31012_cast_fp16 = slice_by_index(begin = var_31012_begin_0, end = var_31012_end_0, end_mask = var_31012_end_mask_0, x = var_30810_cast_fp16)[name = tensor("op_31012_cast_fp16")]; + tensor var_31019_begin_0 = const()[name = tensor("op_31019_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_31019_end_0 = const()[name = tensor("op_31019_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_31019_end_mask_0 = const()[name = tensor("op_31019_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31019_cast_fp16 = slice_by_index(begin = var_31019_begin_0, end = var_31019_end_0, end_mask = var_31019_end_mask_0, x = var_30814_cast_fp16)[name = tensor("op_31019_cast_fp16")]; + tensor var_31026_begin_0 = const()[name = tensor("op_31026_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_31026_end_0 = const()[name = tensor("op_31026_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_31026_end_mask_0 = const()[name = tensor("op_31026_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31026_cast_fp16 = slice_by_index(begin = var_31026_begin_0, end = var_31026_end_0, end_mask = var_31026_end_mask_0, x = var_30814_cast_fp16)[name = tensor("op_31026_cast_fp16")]; + tensor var_31033_begin_0 = const()[name = tensor("op_31033_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_31033_end_0 = const()[name = tensor("op_31033_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_31033_end_mask_0 = const()[name = tensor("op_31033_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31033_cast_fp16 = slice_by_index(begin = var_31033_begin_0, end = var_31033_end_0, end_mask = var_31033_end_mask_0, x = var_30814_cast_fp16)[name = tensor("op_31033_cast_fp16")]; + tensor var_31040_begin_0 = const()[name = tensor("op_31040_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_31040_end_0 = const()[name = tensor("op_31040_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_31040_end_mask_0 = const()[name = tensor("op_31040_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31040_cast_fp16 = slice_by_index(begin = var_31040_begin_0, end = var_31040_end_0, end_mask = var_31040_end_mask_0, x = var_30814_cast_fp16)[name = tensor("op_31040_cast_fp16")]; + tensor var_31047_begin_0 = const()[name = tensor("op_31047_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_31047_end_0 = const()[name = tensor("op_31047_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_31047_end_mask_0 = const()[name = tensor("op_31047_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31047_cast_fp16 = slice_by_index(begin = var_31047_begin_0, end = var_31047_end_0, end_mask = var_31047_end_mask_0, x = var_30818_cast_fp16)[name = tensor("op_31047_cast_fp16")]; + tensor var_31054_begin_0 = const()[name = tensor("op_31054_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_31054_end_0 = const()[name = tensor("op_31054_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_31054_end_mask_0 = const()[name = tensor("op_31054_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31054_cast_fp16 = slice_by_index(begin = var_31054_begin_0, end = var_31054_end_0, end_mask = var_31054_end_mask_0, x = var_30818_cast_fp16)[name = tensor("op_31054_cast_fp16")]; + tensor var_31061_begin_0 = const()[name = tensor("op_31061_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_31061_end_0 = const()[name = tensor("op_31061_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_31061_end_mask_0 = const()[name = tensor("op_31061_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31061_cast_fp16 = slice_by_index(begin = var_31061_begin_0, end = var_31061_end_0, end_mask = var_31061_end_mask_0, x = var_30818_cast_fp16)[name = tensor("op_31061_cast_fp16")]; + tensor var_31068_begin_0 = const()[name = tensor("op_31068_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_31068_end_0 = const()[name = tensor("op_31068_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_31068_end_mask_0 = const()[name = tensor("op_31068_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31068_cast_fp16 = slice_by_index(begin = var_31068_begin_0, end = var_31068_end_0, end_mask = var_31068_end_mask_0, x = var_30818_cast_fp16)[name = tensor("op_31068_cast_fp16")]; + tensor var_31075_begin_0 = const()[name = tensor("op_31075_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_31075_end_0 = const()[name = tensor("op_31075_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_31075_end_mask_0 = const()[name = tensor("op_31075_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31075_cast_fp16 = slice_by_index(begin = var_31075_begin_0, end = var_31075_end_0, end_mask = var_31075_end_mask_0, x = var_30822_cast_fp16)[name = tensor("op_31075_cast_fp16")]; + tensor var_31082_begin_0 = const()[name = tensor("op_31082_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_31082_end_0 = const()[name = tensor("op_31082_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_31082_end_mask_0 = const()[name = tensor("op_31082_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31082_cast_fp16 = slice_by_index(begin = var_31082_begin_0, end = var_31082_end_0, end_mask = var_31082_end_mask_0, x = var_30822_cast_fp16)[name = tensor("op_31082_cast_fp16")]; + tensor var_31089_begin_0 = const()[name = tensor("op_31089_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_31089_end_0 = const()[name = tensor("op_31089_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_31089_end_mask_0 = const()[name = tensor("op_31089_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31089_cast_fp16 = slice_by_index(begin = var_31089_begin_0, end = var_31089_end_0, end_mask = var_31089_end_mask_0, x = var_30822_cast_fp16)[name = tensor("op_31089_cast_fp16")]; + tensor var_31096_begin_0 = const()[name = tensor("op_31096_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_31096_end_0 = const()[name = tensor("op_31096_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_31096_end_mask_0 = const()[name = tensor("op_31096_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31096_cast_fp16 = slice_by_index(begin = var_31096_begin_0, end = var_31096_end_0, end_mask = var_31096_end_mask_0, x = var_30822_cast_fp16)[name = tensor("op_31096_cast_fp16")]; + tensor var_31103_begin_0 = const()[name = tensor("op_31103_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_31103_end_0 = const()[name = tensor("op_31103_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_31103_end_mask_0 = const()[name = tensor("op_31103_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31103_cast_fp16 = slice_by_index(begin = var_31103_begin_0, end = var_31103_end_0, end_mask = var_31103_end_mask_0, x = var_30826_cast_fp16)[name = tensor("op_31103_cast_fp16")]; + tensor var_31110_begin_0 = const()[name = tensor("op_31110_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_31110_end_0 = const()[name = tensor("op_31110_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_31110_end_mask_0 = const()[name = tensor("op_31110_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31110_cast_fp16 = slice_by_index(begin = var_31110_begin_0, end = var_31110_end_0, end_mask = var_31110_end_mask_0, x = var_30826_cast_fp16)[name = tensor("op_31110_cast_fp16")]; + tensor var_31117_begin_0 = const()[name = tensor("op_31117_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_31117_end_0 = const()[name = tensor("op_31117_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_31117_end_mask_0 = const()[name = tensor("op_31117_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31117_cast_fp16 = slice_by_index(begin = var_31117_begin_0, end = var_31117_end_0, end_mask = var_31117_end_mask_0, x = var_30826_cast_fp16)[name = tensor("op_31117_cast_fp16")]; + tensor var_31124_begin_0 = const()[name = tensor("op_31124_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_31124_end_0 = const()[name = tensor("op_31124_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_31124_end_mask_0 = const()[name = tensor("op_31124_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31124_cast_fp16 = slice_by_index(begin = var_31124_begin_0, end = var_31124_end_0, end_mask = var_31124_end_mask_0, x = var_30826_cast_fp16)[name = tensor("op_31124_cast_fp16")]; + tensor var_31131_begin_0 = const()[name = tensor("op_31131_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_31131_end_0 = const()[name = tensor("op_31131_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_31131_end_mask_0 = const()[name = tensor("op_31131_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31131_cast_fp16 = slice_by_index(begin = var_31131_begin_0, end = var_31131_end_0, end_mask = var_31131_end_mask_0, x = var_30830_cast_fp16)[name = tensor("op_31131_cast_fp16")]; + tensor var_31138_begin_0 = const()[name = tensor("op_31138_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_31138_end_0 = const()[name = tensor("op_31138_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_31138_end_mask_0 = const()[name = tensor("op_31138_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31138_cast_fp16 = slice_by_index(begin = var_31138_begin_0, end = var_31138_end_0, end_mask = var_31138_end_mask_0, x = var_30830_cast_fp16)[name = tensor("op_31138_cast_fp16")]; + tensor var_31145_begin_0 = const()[name = tensor("op_31145_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_31145_end_0 = const()[name = tensor("op_31145_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_31145_end_mask_0 = const()[name = tensor("op_31145_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31145_cast_fp16 = slice_by_index(begin = var_31145_begin_0, end = var_31145_end_0, end_mask = var_31145_end_mask_0, x = var_30830_cast_fp16)[name = tensor("op_31145_cast_fp16")]; + tensor var_31152_begin_0 = const()[name = tensor("op_31152_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_31152_end_0 = const()[name = tensor("op_31152_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_31152_end_mask_0 = const()[name = tensor("op_31152_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31152_cast_fp16 = slice_by_index(begin = var_31152_begin_0, end = var_31152_end_0, end_mask = var_31152_end_mask_0, x = var_30830_cast_fp16)[name = tensor("op_31152_cast_fp16")]; + tensor var_31159_begin_0 = const()[name = tensor("op_31159_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_31159_end_0 = const()[name = tensor("op_31159_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_31159_end_mask_0 = const()[name = tensor("op_31159_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31159_cast_fp16 = slice_by_index(begin = var_31159_begin_0, end = var_31159_end_0, end_mask = var_31159_end_mask_0, x = var_30834_cast_fp16)[name = tensor("op_31159_cast_fp16")]; + tensor var_31166_begin_0 = const()[name = tensor("op_31166_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_31166_end_0 = const()[name = tensor("op_31166_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_31166_end_mask_0 = const()[name = tensor("op_31166_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31166_cast_fp16 = slice_by_index(begin = var_31166_begin_0, end = var_31166_end_0, end_mask = var_31166_end_mask_0, x = var_30834_cast_fp16)[name = tensor("op_31166_cast_fp16")]; + tensor var_31173_begin_0 = const()[name = tensor("op_31173_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_31173_end_0 = const()[name = tensor("op_31173_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_31173_end_mask_0 = const()[name = tensor("op_31173_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31173_cast_fp16 = slice_by_index(begin = var_31173_begin_0, end = var_31173_end_0, end_mask = var_31173_end_mask_0, x = var_30834_cast_fp16)[name = tensor("op_31173_cast_fp16")]; + tensor var_31180_begin_0 = const()[name = tensor("op_31180_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_31180_end_0 = const()[name = tensor("op_31180_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_31180_end_mask_0 = const()[name = tensor("op_31180_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31180_cast_fp16 = slice_by_index(begin = var_31180_begin_0, end = var_31180_end_0, end_mask = var_31180_end_mask_0, x = var_30834_cast_fp16)[name = tensor("op_31180_cast_fp16")]; + tensor var_31187_begin_0 = const()[name = tensor("op_31187_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_31187_end_0 = const()[name = tensor("op_31187_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_31187_end_mask_0 = const()[name = tensor("op_31187_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31187_cast_fp16 = slice_by_index(begin = var_31187_begin_0, end = var_31187_end_0, end_mask = var_31187_end_mask_0, x = var_30838_cast_fp16)[name = tensor("op_31187_cast_fp16")]; + tensor var_31194_begin_0 = const()[name = tensor("op_31194_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_31194_end_0 = const()[name = tensor("op_31194_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_31194_end_mask_0 = const()[name = tensor("op_31194_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31194_cast_fp16 = slice_by_index(begin = var_31194_begin_0, end = var_31194_end_0, end_mask = var_31194_end_mask_0, x = var_30838_cast_fp16)[name = tensor("op_31194_cast_fp16")]; + tensor var_31201_begin_0 = const()[name = tensor("op_31201_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_31201_end_0 = const()[name = tensor("op_31201_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_31201_end_mask_0 = const()[name = tensor("op_31201_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31201_cast_fp16 = slice_by_index(begin = var_31201_begin_0, end = var_31201_end_0, end_mask = var_31201_end_mask_0, x = var_30838_cast_fp16)[name = tensor("op_31201_cast_fp16")]; + tensor var_31208_begin_0 = const()[name = tensor("op_31208_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_31208_end_0 = const()[name = tensor("op_31208_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_31208_end_mask_0 = const()[name = tensor("op_31208_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31208_cast_fp16 = slice_by_index(begin = var_31208_begin_0, end = var_31208_end_0, end_mask = var_31208_end_mask_0, x = var_30838_cast_fp16)[name = tensor("op_31208_cast_fp16")]; + tensor var_31215_begin_0 = const()[name = tensor("op_31215_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_31215_end_0 = const()[name = tensor("op_31215_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_31215_end_mask_0 = const()[name = tensor("op_31215_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31215_cast_fp16 = slice_by_index(begin = var_31215_begin_0, end = var_31215_end_0, end_mask = var_31215_end_mask_0, x = var_30842_cast_fp16)[name = tensor("op_31215_cast_fp16")]; + tensor var_31222_begin_0 = const()[name = tensor("op_31222_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_31222_end_0 = const()[name = tensor("op_31222_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_31222_end_mask_0 = const()[name = tensor("op_31222_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31222_cast_fp16 = slice_by_index(begin = var_31222_begin_0, end = var_31222_end_0, end_mask = var_31222_end_mask_0, x = var_30842_cast_fp16)[name = tensor("op_31222_cast_fp16")]; + tensor var_31229_begin_0 = const()[name = tensor("op_31229_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_31229_end_0 = const()[name = tensor("op_31229_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_31229_end_mask_0 = const()[name = tensor("op_31229_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31229_cast_fp16 = slice_by_index(begin = var_31229_begin_0, end = var_31229_end_0, end_mask = var_31229_end_mask_0, x = var_30842_cast_fp16)[name = tensor("op_31229_cast_fp16")]; + tensor var_31236_begin_0 = const()[name = tensor("op_31236_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_31236_end_0 = const()[name = tensor("op_31236_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_31236_end_mask_0 = const()[name = tensor("op_31236_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31236_cast_fp16 = slice_by_index(begin = var_31236_begin_0, end = var_31236_end_0, end_mask = var_31236_end_mask_0, x = var_30842_cast_fp16)[name = tensor("op_31236_cast_fp16")]; + tensor var_31243_begin_0 = const()[name = tensor("op_31243_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_31243_end_0 = const()[name = tensor("op_31243_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_31243_end_mask_0 = const()[name = tensor("op_31243_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31243_cast_fp16 = slice_by_index(begin = var_31243_begin_0, end = var_31243_end_0, end_mask = var_31243_end_mask_0, x = var_30846_cast_fp16)[name = tensor("op_31243_cast_fp16")]; + tensor var_31250_begin_0 = const()[name = tensor("op_31250_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_31250_end_0 = const()[name = tensor("op_31250_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_31250_end_mask_0 = const()[name = tensor("op_31250_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31250_cast_fp16 = slice_by_index(begin = var_31250_begin_0, end = var_31250_end_0, end_mask = var_31250_end_mask_0, x = var_30846_cast_fp16)[name = tensor("op_31250_cast_fp16")]; + tensor var_31257_begin_0 = const()[name = tensor("op_31257_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_31257_end_0 = const()[name = tensor("op_31257_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_31257_end_mask_0 = const()[name = tensor("op_31257_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31257_cast_fp16 = slice_by_index(begin = var_31257_begin_0, end = var_31257_end_0, end_mask = var_31257_end_mask_0, x = var_30846_cast_fp16)[name = tensor("op_31257_cast_fp16")]; + tensor var_31264_begin_0 = const()[name = tensor("op_31264_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_31264_end_0 = const()[name = tensor("op_31264_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_31264_end_mask_0 = const()[name = tensor("op_31264_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31264_cast_fp16 = slice_by_index(begin = var_31264_begin_0, end = var_31264_end_0, end_mask = var_31264_end_mask_0, x = var_30846_cast_fp16)[name = tensor("op_31264_cast_fp16")]; + tensor var_31271_begin_0 = const()[name = tensor("op_31271_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_31271_end_0 = const()[name = tensor("op_31271_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_31271_end_mask_0 = const()[name = tensor("op_31271_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31271_cast_fp16 = slice_by_index(begin = var_31271_begin_0, end = var_31271_end_0, end_mask = var_31271_end_mask_0, x = var_30850_cast_fp16)[name = tensor("op_31271_cast_fp16")]; + tensor var_31278_begin_0 = const()[name = tensor("op_31278_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_31278_end_0 = const()[name = tensor("op_31278_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_31278_end_mask_0 = const()[name = tensor("op_31278_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31278_cast_fp16 = slice_by_index(begin = var_31278_begin_0, end = var_31278_end_0, end_mask = var_31278_end_mask_0, x = var_30850_cast_fp16)[name = tensor("op_31278_cast_fp16")]; + tensor var_31285_begin_0 = const()[name = tensor("op_31285_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_31285_end_0 = const()[name = tensor("op_31285_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_31285_end_mask_0 = const()[name = tensor("op_31285_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31285_cast_fp16 = slice_by_index(begin = var_31285_begin_0, end = var_31285_end_0, end_mask = var_31285_end_mask_0, x = var_30850_cast_fp16)[name = tensor("op_31285_cast_fp16")]; + tensor var_31292_begin_0 = const()[name = tensor("op_31292_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_31292_end_0 = const()[name = tensor("op_31292_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_31292_end_mask_0 = const()[name = tensor("op_31292_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31292_cast_fp16 = slice_by_index(begin = var_31292_begin_0, end = var_31292_end_0, end_mask = var_31292_end_mask_0, x = var_30850_cast_fp16)[name = tensor("op_31292_cast_fp16")]; + tensor var_31299_begin_0 = const()[name = tensor("op_31299_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_31299_end_0 = const()[name = tensor("op_31299_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_31299_end_mask_0 = const()[name = tensor("op_31299_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31299_cast_fp16 = slice_by_index(begin = var_31299_begin_0, end = var_31299_end_0, end_mask = var_31299_end_mask_0, x = var_30854_cast_fp16)[name = tensor("op_31299_cast_fp16")]; + tensor var_31306_begin_0 = const()[name = tensor("op_31306_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_31306_end_0 = const()[name = tensor("op_31306_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_31306_end_mask_0 = const()[name = tensor("op_31306_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31306_cast_fp16 = slice_by_index(begin = var_31306_begin_0, end = var_31306_end_0, end_mask = var_31306_end_mask_0, x = var_30854_cast_fp16)[name = tensor("op_31306_cast_fp16")]; + tensor var_31313_begin_0 = const()[name = tensor("op_31313_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_31313_end_0 = const()[name = tensor("op_31313_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_31313_end_mask_0 = const()[name = tensor("op_31313_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31313_cast_fp16 = slice_by_index(begin = var_31313_begin_0, end = var_31313_end_0, end_mask = var_31313_end_mask_0, x = var_30854_cast_fp16)[name = tensor("op_31313_cast_fp16")]; + tensor var_31320_begin_0 = const()[name = tensor("op_31320_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_31320_end_0 = const()[name = tensor("op_31320_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_31320_end_mask_0 = const()[name = tensor("op_31320_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31320_cast_fp16 = slice_by_index(begin = var_31320_begin_0, end = var_31320_end_0, end_mask = var_31320_end_mask_0, x = var_30854_cast_fp16)[name = tensor("op_31320_cast_fp16")]; + tensor var_31327_begin_0 = const()[name = tensor("op_31327_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_31327_end_0 = const()[name = tensor("op_31327_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_31327_end_mask_0 = const()[name = tensor("op_31327_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31327_cast_fp16 = slice_by_index(begin = var_31327_begin_0, end = var_31327_end_0, end_mask = var_31327_end_mask_0, x = var_30858_cast_fp16)[name = tensor("op_31327_cast_fp16")]; + tensor var_31334_begin_0 = const()[name = tensor("op_31334_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_31334_end_0 = const()[name = tensor("op_31334_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_31334_end_mask_0 = const()[name = tensor("op_31334_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31334_cast_fp16 = slice_by_index(begin = var_31334_begin_0, end = var_31334_end_0, end_mask = var_31334_end_mask_0, x = var_30858_cast_fp16)[name = tensor("op_31334_cast_fp16")]; + tensor var_31341_begin_0 = const()[name = tensor("op_31341_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_31341_end_0 = const()[name = tensor("op_31341_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_31341_end_mask_0 = const()[name = tensor("op_31341_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31341_cast_fp16 = slice_by_index(begin = var_31341_begin_0, end = var_31341_end_0, end_mask = var_31341_end_mask_0, x = var_30858_cast_fp16)[name = tensor("op_31341_cast_fp16")]; + tensor var_31348_begin_0 = const()[name = tensor("op_31348_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_31348_end_0 = const()[name = tensor("op_31348_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_31348_end_mask_0 = const()[name = tensor("op_31348_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31348_cast_fp16 = slice_by_index(begin = var_31348_begin_0, end = var_31348_end_0, end_mask = var_31348_end_mask_0, x = var_30858_cast_fp16)[name = tensor("op_31348_cast_fp16")]; + tensor var_31355_begin_0 = const()[name = tensor("op_31355_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_31355_end_0 = const()[name = tensor("op_31355_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_31355_end_mask_0 = const()[name = tensor("op_31355_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31355_cast_fp16 = slice_by_index(begin = var_31355_begin_0, end = var_31355_end_0, end_mask = var_31355_end_mask_0, x = var_30862_cast_fp16)[name = tensor("op_31355_cast_fp16")]; + tensor var_31362_begin_0 = const()[name = tensor("op_31362_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_31362_end_0 = const()[name = tensor("op_31362_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_31362_end_mask_0 = const()[name = tensor("op_31362_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31362_cast_fp16 = slice_by_index(begin = var_31362_begin_0, end = var_31362_end_0, end_mask = var_31362_end_mask_0, x = var_30862_cast_fp16)[name = tensor("op_31362_cast_fp16")]; + tensor var_31369_begin_0 = const()[name = tensor("op_31369_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_31369_end_0 = const()[name = tensor("op_31369_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_31369_end_mask_0 = const()[name = tensor("op_31369_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31369_cast_fp16 = slice_by_index(begin = var_31369_begin_0, end = var_31369_end_0, end_mask = var_31369_end_mask_0, x = var_30862_cast_fp16)[name = tensor("op_31369_cast_fp16")]; + tensor var_31376_begin_0 = const()[name = tensor("op_31376_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_31376_end_0 = const()[name = tensor("op_31376_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_31376_end_mask_0 = const()[name = tensor("op_31376_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31376_cast_fp16 = slice_by_index(begin = var_31376_begin_0, end = var_31376_end_0, end_mask = var_31376_end_mask_0, x = var_30862_cast_fp16)[name = tensor("op_31376_cast_fp16")]; + tensor var_31383_begin_0 = const()[name = tensor("op_31383_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_31383_end_0 = const()[name = tensor("op_31383_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_31383_end_mask_0 = const()[name = tensor("op_31383_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31383_cast_fp16 = slice_by_index(begin = var_31383_begin_0, end = var_31383_end_0, end_mask = var_31383_end_mask_0, x = var_30866_cast_fp16)[name = tensor("op_31383_cast_fp16")]; + tensor var_31390_begin_0 = const()[name = tensor("op_31390_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_31390_end_0 = const()[name = tensor("op_31390_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_31390_end_mask_0 = const()[name = tensor("op_31390_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31390_cast_fp16 = slice_by_index(begin = var_31390_begin_0, end = var_31390_end_0, end_mask = var_31390_end_mask_0, x = var_30866_cast_fp16)[name = tensor("op_31390_cast_fp16")]; + tensor var_31397_begin_0 = const()[name = tensor("op_31397_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_31397_end_0 = const()[name = tensor("op_31397_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_31397_end_mask_0 = const()[name = tensor("op_31397_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31397_cast_fp16 = slice_by_index(begin = var_31397_begin_0, end = var_31397_end_0, end_mask = var_31397_end_mask_0, x = var_30866_cast_fp16)[name = tensor("op_31397_cast_fp16")]; + tensor var_31404_begin_0 = const()[name = tensor("op_31404_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_31404_end_0 = const()[name = tensor("op_31404_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_31404_end_mask_0 = const()[name = tensor("op_31404_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31404_cast_fp16 = slice_by_index(begin = var_31404_begin_0, end = var_31404_end_0, end_mask = var_31404_end_mask_0, x = var_30866_cast_fp16)[name = tensor("op_31404_cast_fp16")]; + tensor var_31411_begin_0 = const()[name = tensor("op_31411_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_31411_end_0 = const()[name = tensor("op_31411_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_31411_end_mask_0 = const()[name = tensor("op_31411_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31411_cast_fp16 = slice_by_index(begin = var_31411_begin_0, end = var_31411_end_0, end_mask = var_31411_end_mask_0, x = var_30870_cast_fp16)[name = tensor("op_31411_cast_fp16")]; + tensor var_31418_begin_0 = const()[name = tensor("op_31418_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_31418_end_0 = const()[name = tensor("op_31418_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_31418_end_mask_0 = const()[name = tensor("op_31418_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31418_cast_fp16 = slice_by_index(begin = var_31418_begin_0, end = var_31418_end_0, end_mask = var_31418_end_mask_0, x = var_30870_cast_fp16)[name = tensor("op_31418_cast_fp16")]; + tensor var_31425_begin_0 = const()[name = tensor("op_31425_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_31425_end_0 = const()[name = tensor("op_31425_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_31425_end_mask_0 = const()[name = tensor("op_31425_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31425_cast_fp16 = slice_by_index(begin = var_31425_begin_0, end = var_31425_end_0, end_mask = var_31425_end_mask_0, x = var_30870_cast_fp16)[name = tensor("op_31425_cast_fp16")]; + tensor var_31432_begin_0 = const()[name = tensor("op_31432_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_31432_end_0 = const()[name = tensor("op_31432_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_31432_end_mask_0 = const()[name = tensor("op_31432_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31432_cast_fp16 = slice_by_index(begin = var_31432_begin_0, end = var_31432_end_0, end_mask = var_31432_end_mask_0, x = var_30870_cast_fp16)[name = tensor("op_31432_cast_fp16")]; + tensor k_41_perm_0 = const()[name = tensor("k_41_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_31437_begin_0 = const()[name = tensor("op_31437_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_31437_end_0 = const()[name = tensor("op_31437_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_31437_end_mask_0 = const()[name = tensor("op_31437_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_11 = transpose(perm = k_41_perm_0, x = key_41_cast_fp16)[name = tensor("transpose_11")]; + tensor var_31437_cast_fp16 = slice_by_index(begin = var_31437_begin_0, end = var_31437_end_0, end_mask = var_31437_end_mask_0, x = transpose_11)[name = tensor("op_31437_cast_fp16")]; + tensor var_31441_begin_0 = const()[name = tensor("op_31441_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_31441_end_0 = const()[name = tensor("op_31441_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_31441_end_mask_0 = const()[name = tensor("op_31441_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31441_cast_fp16 = slice_by_index(begin = var_31441_begin_0, end = var_31441_end_0, end_mask = var_31441_end_mask_0, x = transpose_11)[name = tensor("op_31441_cast_fp16")]; + tensor var_31445_begin_0 = const()[name = tensor("op_31445_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_31445_end_0 = const()[name = tensor("op_31445_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_31445_end_mask_0 = const()[name = tensor("op_31445_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31445_cast_fp16 = slice_by_index(begin = var_31445_begin_0, end = var_31445_end_0, end_mask = var_31445_end_mask_0, x = transpose_11)[name = tensor("op_31445_cast_fp16")]; + tensor var_31449_begin_0 = const()[name = tensor("op_31449_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_31449_end_0 = const()[name = tensor("op_31449_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_31449_end_mask_0 = const()[name = tensor("op_31449_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31449_cast_fp16 = slice_by_index(begin = var_31449_begin_0, end = var_31449_end_0, end_mask = var_31449_end_mask_0, x = transpose_11)[name = tensor("op_31449_cast_fp16")]; + tensor var_31453_begin_0 = const()[name = tensor("op_31453_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_31453_end_0 = const()[name = tensor("op_31453_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_31453_end_mask_0 = const()[name = tensor("op_31453_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31453_cast_fp16 = slice_by_index(begin = var_31453_begin_0, end = var_31453_end_0, end_mask = var_31453_end_mask_0, x = transpose_11)[name = tensor("op_31453_cast_fp16")]; + tensor var_31457_begin_0 = const()[name = tensor("op_31457_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_31457_end_0 = const()[name = tensor("op_31457_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_31457_end_mask_0 = const()[name = tensor("op_31457_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31457_cast_fp16 = slice_by_index(begin = var_31457_begin_0, end = var_31457_end_0, end_mask = var_31457_end_mask_0, x = transpose_11)[name = tensor("op_31457_cast_fp16")]; + tensor var_31461_begin_0 = const()[name = tensor("op_31461_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_31461_end_0 = const()[name = tensor("op_31461_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_31461_end_mask_0 = const()[name = tensor("op_31461_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31461_cast_fp16 = slice_by_index(begin = var_31461_begin_0, end = var_31461_end_0, end_mask = var_31461_end_mask_0, x = transpose_11)[name = tensor("op_31461_cast_fp16")]; + tensor var_31465_begin_0 = const()[name = tensor("op_31465_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_31465_end_0 = const()[name = tensor("op_31465_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_31465_end_mask_0 = const()[name = tensor("op_31465_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31465_cast_fp16 = slice_by_index(begin = var_31465_begin_0, end = var_31465_end_0, end_mask = var_31465_end_mask_0, x = transpose_11)[name = tensor("op_31465_cast_fp16")]; + tensor var_31469_begin_0 = const()[name = tensor("op_31469_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_31469_end_0 = const()[name = tensor("op_31469_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_31469_end_mask_0 = const()[name = tensor("op_31469_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31469_cast_fp16 = slice_by_index(begin = var_31469_begin_0, end = var_31469_end_0, end_mask = var_31469_end_mask_0, x = transpose_11)[name = tensor("op_31469_cast_fp16")]; + tensor var_31473_begin_0 = const()[name = tensor("op_31473_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_31473_end_0 = const()[name = tensor("op_31473_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_31473_end_mask_0 = const()[name = tensor("op_31473_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31473_cast_fp16 = slice_by_index(begin = var_31473_begin_0, end = var_31473_end_0, end_mask = var_31473_end_mask_0, x = transpose_11)[name = tensor("op_31473_cast_fp16")]; + tensor var_31477_begin_0 = const()[name = tensor("op_31477_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_31477_end_0 = const()[name = tensor("op_31477_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_31477_end_mask_0 = const()[name = tensor("op_31477_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31477_cast_fp16 = slice_by_index(begin = var_31477_begin_0, end = var_31477_end_0, end_mask = var_31477_end_mask_0, x = transpose_11)[name = tensor("op_31477_cast_fp16")]; + tensor var_31481_begin_0 = const()[name = tensor("op_31481_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_31481_end_0 = const()[name = tensor("op_31481_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_31481_end_mask_0 = const()[name = tensor("op_31481_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31481_cast_fp16 = slice_by_index(begin = var_31481_begin_0, end = var_31481_end_0, end_mask = var_31481_end_mask_0, x = transpose_11)[name = tensor("op_31481_cast_fp16")]; + tensor var_31485_begin_0 = const()[name = tensor("op_31485_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_31485_end_0 = const()[name = tensor("op_31485_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_31485_end_mask_0 = const()[name = tensor("op_31485_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31485_cast_fp16 = slice_by_index(begin = var_31485_begin_0, end = var_31485_end_0, end_mask = var_31485_end_mask_0, x = transpose_11)[name = tensor("op_31485_cast_fp16")]; + tensor var_31489_begin_0 = const()[name = tensor("op_31489_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_31489_end_0 = const()[name = tensor("op_31489_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_31489_end_mask_0 = const()[name = tensor("op_31489_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31489_cast_fp16 = slice_by_index(begin = var_31489_begin_0, end = var_31489_end_0, end_mask = var_31489_end_mask_0, x = transpose_11)[name = tensor("op_31489_cast_fp16")]; + tensor var_31493_begin_0 = const()[name = tensor("op_31493_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_31493_end_0 = const()[name = tensor("op_31493_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_31493_end_mask_0 = const()[name = tensor("op_31493_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31493_cast_fp16 = slice_by_index(begin = var_31493_begin_0, end = var_31493_end_0, end_mask = var_31493_end_mask_0, x = transpose_11)[name = tensor("op_31493_cast_fp16")]; + tensor var_31497_begin_0 = const()[name = tensor("op_31497_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_31497_end_0 = const()[name = tensor("op_31497_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_31497_end_mask_0 = const()[name = tensor("op_31497_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31497_cast_fp16 = slice_by_index(begin = var_31497_begin_0, end = var_31497_end_0, end_mask = var_31497_end_mask_0, x = transpose_11)[name = tensor("op_31497_cast_fp16")]; + tensor var_31501_begin_0 = const()[name = tensor("op_31501_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_31501_end_0 = const()[name = tensor("op_31501_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_31501_end_mask_0 = const()[name = tensor("op_31501_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31501_cast_fp16 = slice_by_index(begin = var_31501_begin_0, end = var_31501_end_0, end_mask = var_31501_end_mask_0, x = transpose_11)[name = tensor("op_31501_cast_fp16")]; + tensor var_31505_begin_0 = const()[name = tensor("op_31505_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_31505_end_0 = const()[name = tensor("op_31505_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_31505_end_mask_0 = const()[name = tensor("op_31505_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31505_cast_fp16 = slice_by_index(begin = var_31505_begin_0, end = var_31505_end_0, end_mask = var_31505_end_mask_0, x = transpose_11)[name = tensor("op_31505_cast_fp16")]; + tensor var_31509_begin_0 = const()[name = tensor("op_31509_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_31509_end_0 = const()[name = tensor("op_31509_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_31509_end_mask_0 = const()[name = tensor("op_31509_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31509_cast_fp16 = slice_by_index(begin = var_31509_begin_0, end = var_31509_end_0, end_mask = var_31509_end_mask_0, x = transpose_11)[name = tensor("op_31509_cast_fp16")]; + tensor var_31513_begin_0 = const()[name = tensor("op_31513_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_31513_end_0 = const()[name = tensor("op_31513_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_31513_end_mask_0 = const()[name = tensor("op_31513_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_31513_cast_fp16 = slice_by_index(begin = var_31513_begin_0, end = var_31513_end_0, end_mask = var_31513_end_mask_0, x = transpose_11)[name = tensor("op_31513_cast_fp16")]; + tensor var_31515_begin_0 = const()[name = tensor("op_31515_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_31515_end_0 = const()[name = tensor("op_31515_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_31515_end_mask_0 = const()[name = tensor("op_31515_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_31515_cast_fp16 = slice_by_index(begin = var_31515_begin_0, end = var_31515_end_0, end_mask = var_31515_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_31515_cast_fp16")]; + tensor var_31519_begin_0 = const()[name = tensor("op_31519_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_31519_end_0 = const()[name = tensor("op_31519_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_31519_end_mask_0 = const()[name = tensor("op_31519_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_31519_cast_fp16 = slice_by_index(begin = var_31519_begin_0, end = var_31519_end_0, end_mask = var_31519_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_31519_cast_fp16")]; + tensor var_31523_begin_0 = const()[name = tensor("op_31523_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_31523_end_0 = const()[name = tensor("op_31523_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_31523_end_mask_0 = const()[name = tensor("op_31523_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_31523_cast_fp16 = slice_by_index(begin = var_31523_begin_0, end = var_31523_end_0, end_mask = var_31523_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_31523_cast_fp16")]; + tensor var_31527_begin_0 = const()[name = tensor("op_31527_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_31527_end_0 = const()[name = tensor("op_31527_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_31527_end_mask_0 = const()[name = tensor("op_31527_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_31527_cast_fp16 = slice_by_index(begin = var_31527_begin_0, end = var_31527_end_0, end_mask = var_31527_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_31527_cast_fp16")]; + tensor var_31531_begin_0 = const()[name = tensor("op_31531_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_31531_end_0 = const()[name = tensor("op_31531_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_31531_end_mask_0 = const()[name = tensor("op_31531_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_31531_cast_fp16 = slice_by_index(begin = var_31531_begin_0, end = var_31531_end_0, end_mask = var_31531_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_31531_cast_fp16")]; + tensor var_31535_begin_0 = const()[name = tensor("op_31535_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_31535_end_0 = const()[name = tensor("op_31535_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_31535_end_mask_0 = const()[name = tensor("op_31535_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_31535_cast_fp16 = slice_by_index(begin = var_31535_begin_0, end = var_31535_end_0, end_mask = var_31535_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_31535_cast_fp16")]; + tensor var_31539_begin_0 = const()[name = tensor("op_31539_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_31539_end_0 = const()[name = tensor("op_31539_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_31539_end_mask_0 = const()[name = tensor("op_31539_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_31539_cast_fp16 = slice_by_index(begin = var_31539_begin_0, end = var_31539_end_0, end_mask = var_31539_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_31539_cast_fp16")]; + tensor var_31543_begin_0 = const()[name = tensor("op_31543_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_31543_end_0 = const()[name = tensor("op_31543_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_31543_end_mask_0 = const()[name = tensor("op_31543_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_31543_cast_fp16 = slice_by_index(begin = var_31543_begin_0, end = var_31543_end_0, end_mask = var_31543_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_31543_cast_fp16")]; + tensor var_31547_begin_0 = const()[name = tensor("op_31547_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_31547_end_0 = const()[name = tensor("op_31547_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_31547_end_mask_0 = const()[name = tensor("op_31547_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_31547_cast_fp16 = slice_by_index(begin = var_31547_begin_0, end = var_31547_end_0, end_mask = var_31547_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_31547_cast_fp16")]; + tensor var_31551_begin_0 = const()[name = tensor("op_31551_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_31551_end_0 = const()[name = tensor("op_31551_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_31551_end_mask_0 = const()[name = tensor("op_31551_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_31551_cast_fp16 = slice_by_index(begin = var_31551_begin_0, end = var_31551_end_0, end_mask = var_31551_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_31551_cast_fp16")]; + tensor var_31555_begin_0 = const()[name = tensor("op_31555_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_31555_end_0 = const()[name = tensor("op_31555_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_31555_end_mask_0 = const()[name = tensor("op_31555_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_31555_cast_fp16 = slice_by_index(begin = var_31555_begin_0, end = var_31555_end_0, end_mask = var_31555_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_31555_cast_fp16")]; + tensor var_31559_begin_0 = const()[name = tensor("op_31559_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_31559_end_0 = const()[name = tensor("op_31559_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_31559_end_mask_0 = const()[name = tensor("op_31559_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_31559_cast_fp16 = slice_by_index(begin = var_31559_begin_0, end = var_31559_end_0, end_mask = var_31559_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_31559_cast_fp16")]; + tensor var_31563_begin_0 = const()[name = tensor("op_31563_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_31563_end_0 = const()[name = tensor("op_31563_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_31563_end_mask_0 = const()[name = tensor("op_31563_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_31563_cast_fp16 = slice_by_index(begin = var_31563_begin_0, end = var_31563_end_0, end_mask = var_31563_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_31563_cast_fp16")]; + tensor var_31567_begin_0 = const()[name = tensor("op_31567_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_31567_end_0 = const()[name = tensor("op_31567_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_31567_end_mask_0 = const()[name = tensor("op_31567_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_31567_cast_fp16 = slice_by_index(begin = var_31567_begin_0, end = var_31567_end_0, end_mask = var_31567_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_31567_cast_fp16")]; + tensor var_31571_begin_0 = const()[name = tensor("op_31571_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_31571_end_0 = const()[name = tensor("op_31571_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_31571_end_mask_0 = const()[name = tensor("op_31571_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_31571_cast_fp16 = slice_by_index(begin = var_31571_begin_0, end = var_31571_end_0, end_mask = var_31571_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_31571_cast_fp16")]; + tensor var_31575_begin_0 = const()[name = tensor("op_31575_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_31575_end_0 = const()[name = tensor("op_31575_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_31575_end_mask_0 = const()[name = tensor("op_31575_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_31575_cast_fp16 = slice_by_index(begin = var_31575_begin_0, end = var_31575_end_0, end_mask = var_31575_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_31575_cast_fp16")]; + tensor var_31579_begin_0 = const()[name = tensor("op_31579_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_31579_end_0 = const()[name = tensor("op_31579_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_31579_end_mask_0 = const()[name = tensor("op_31579_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_31579_cast_fp16 = slice_by_index(begin = var_31579_begin_0, end = var_31579_end_0, end_mask = var_31579_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_31579_cast_fp16")]; + tensor var_31583_begin_0 = const()[name = tensor("op_31583_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_31583_end_0 = const()[name = tensor("op_31583_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_31583_end_mask_0 = const()[name = tensor("op_31583_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_31583_cast_fp16 = slice_by_index(begin = var_31583_begin_0, end = var_31583_end_0, end_mask = var_31583_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_31583_cast_fp16")]; + tensor var_31587_begin_0 = const()[name = tensor("op_31587_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_31587_end_0 = const()[name = tensor("op_31587_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_31587_end_mask_0 = const()[name = tensor("op_31587_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_31587_cast_fp16 = slice_by_index(begin = var_31587_begin_0, end = var_31587_end_0, end_mask = var_31587_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_31587_cast_fp16")]; + tensor var_31591_begin_0 = const()[name = tensor("op_31591_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_31591_end_0 = const()[name = tensor("op_31591_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_31591_end_mask_0 = const()[name = tensor("op_31591_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_31591_cast_fp16 = slice_by_index(begin = var_31591_begin_0, end = var_31591_end_0, end_mask = var_31591_end_mask_0, x = value_41_cast_fp16)[name = tensor("op_31591_cast_fp16")]; + tensor var_31595_equation_0 = const()[name = tensor("op_31595_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31595_cast_fp16 = einsum(equation = var_31595_equation_0, values = (var_31437_cast_fp16, var_30879_cast_fp16))[name = tensor("op_31595_cast_fp16")]; + tensor var_31596_to_fp16 = const()[name = tensor("op_31596_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3201_cast_fp16 = mul(x = var_31595_cast_fp16, y = var_31596_to_fp16)[name = tensor("aw_chunk_3201_cast_fp16")]; + tensor var_31599_equation_0 = const()[name = tensor("op_31599_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31599_cast_fp16 = einsum(equation = var_31599_equation_0, values = (var_31437_cast_fp16, var_30886_cast_fp16))[name = tensor("op_31599_cast_fp16")]; + tensor var_31600_to_fp16 = const()[name = tensor("op_31600_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3203_cast_fp16 = mul(x = var_31599_cast_fp16, y = var_31600_to_fp16)[name = tensor("aw_chunk_3203_cast_fp16")]; + tensor var_31603_equation_0 = const()[name = tensor("op_31603_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31603_cast_fp16 = einsum(equation = var_31603_equation_0, values = (var_31437_cast_fp16, var_30893_cast_fp16))[name = tensor("op_31603_cast_fp16")]; + tensor var_31604_to_fp16 = const()[name = tensor("op_31604_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3205_cast_fp16 = mul(x = var_31603_cast_fp16, y = var_31604_to_fp16)[name = tensor("aw_chunk_3205_cast_fp16")]; + tensor var_31607_equation_0 = const()[name = tensor("op_31607_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31607_cast_fp16 = einsum(equation = var_31607_equation_0, values = (var_31437_cast_fp16, var_30900_cast_fp16))[name = tensor("op_31607_cast_fp16")]; + tensor var_31608_to_fp16 = const()[name = tensor("op_31608_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3207_cast_fp16 = mul(x = var_31607_cast_fp16, y = var_31608_to_fp16)[name = tensor("aw_chunk_3207_cast_fp16")]; + tensor var_31611_equation_0 = const()[name = tensor("op_31611_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31611_cast_fp16 = einsum(equation = var_31611_equation_0, values = (var_31441_cast_fp16, var_30907_cast_fp16))[name = tensor("op_31611_cast_fp16")]; + tensor var_31612_to_fp16 = const()[name = tensor("op_31612_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3209_cast_fp16 = mul(x = var_31611_cast_fp16, y = var_31612_to_fp16)[name = tensor("aw_chunk_3209_cast_fp16")]; + tensor var_31615_equation_0 = const()[name = tensor("op_31615_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31615_cast_fp16 = einsum(equation = var_31615_equation_0, values = (var_31441_cast_fp16, var_30914_cast_fp16))[name = tensor("op_31615_cast_fp16")]; + tensor var_31616_to_fp16 = const()[name = tensor("op_31616_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3211_cast_fp16 = mul(x = var_31615_cast_fp16, y = var_31616_to_fp16)[name = tensor("aw_chunk_3211_cast_fp16")]; + tensor var_31619_equation_0 = const()[name = tensor("op_31619_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31619_cast_fp16 = einsum(equation = var_31619_equation_0, values = (var_31441_cast_fp16, var_30921_cast_fp16))[name = tensor("op_31619_cast_fp16")]; + tensor var_31620_to_fp16 = const()[name = tensor("op_31620_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3213_cast_fp16 = mul(x = var_31619_cast_fp16, y = var_31620_to_fp16)[name = tensor("aw_chunk_3213_cast_fp16")]; + tensor var_31623_equation_0 = const()[name = tensor("op_31623_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31623_cast_fp16 = einsum(equation = var_31623_equation_0, values = (var_31441_cast_fp16, var_30928_cast_fp16))[name = tensor("op_31623_cast_fp16")]; + tensor var_31624_to_fp16 = const()[name = tensor("op_31624_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3215_cast_fp16 = mul(x = var_31623_cast_fp16, y = var_31624_to_fp16)[name = tensor("aw_chunk_3215_cast_fp16")]; + tensor var_31627_equation_0 = const()[name = tensor("op_31627_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31627_cast_fp16 = einsum(equation = var_31627_equation_0, values = (var_31445_cast_fp16, var_30935_cast_fp16))[name = tensor("op_31627_cast_fp16")]; + tensor var_31628_to_fp16 = const()[name = tensor("op_31628_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3217_cast_fp16 = mul(x = var_31627_cast_fp16, y = var_31628_to_fp16)[name = tensor("aw_chunk_3217_cast_fp16")]; + tensor var_31631_equation_0 = const()[name = tensor("op_31631_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31631_cast_fp16 = einsum(equation = var_31631_equation_0, values = (var_31445_cast_fp16, var_30942_cast_fp16))[name = tensor("op_31631_cast_fp16")]; + tensor var_31632_to_fp16 = const()[name = tensor("op_31632_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3219_cast_fp16 = mul(x = var_31631_cast_fp16, y = var_31632_to_fp16)[name = tensor("aw_chunk_3219_cast_fp16")]; + tensor var_31635_equation_0 = const()[name = tensor("op_31635_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31635_cast_fp16 = einsum(equation = var_31635_equation_0, values = (var_31445_cast_fp16, var_30949_cast_fp16))[name = tensor("op_31635_cast_fp16")]; + tensor var_31636_to_fp16 = const()[name = tensor("op_31636_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3221_cast_fp16 = mul(x = var_31635_cast_fp16, y = var_31636_to_fp16)[name = tensor("aw_chunk_3221_cast_fp16")]; + tensor var_31639_equation_0 = const()[name = tensor("op_31639_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31639_cast_fp16 = einsum(equation = var_31639_equation_0, values = (var_31445_cast_fp16, var_30956_cast_fp16))[name = tensor("op_31639_cast_fp16")]; + tensor var_31640_to_fp16 = const()[name = tensor("op_31640_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3223_cast_fp16 = mul(x = var_31639_cast_fp16, y = var_31640_to_fp16)[name = tensor("aw_chunk_3223_cast_fp16")]; + tensor var_31643_equation_0 = const()[name = tensor("op_31643_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31643_cast_fp16 = einsum(equation = var_31643_equation_0, values = (var_31449_cast_fp16, var_30963_cast_fp16))[name = tensor("op_31643_cast_fp16")]; + tensor var_31644_to_fp16 = const()[name = tensor("op_31644_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3225_cast_fp16 = mul(x = var_31643_cast_fp16, y = var_31644_to_fp16)[name = tensor("aw_chunk_3225_cast_fp16")]; + tensor var_31647_equation_0 = const()[name = tensor("op_31647_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31647_cast_fp16 = einsum(equation = var_31647_equation_0, values = (var_31449_cast_fp16, var_30970_cast_fp16))[name = tensor("op_31647_cast_fp16")]; + tensor var_31648_to_fp16 = const()[name = tensor("op_31648_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3227_cast_fp16 = mul(x = var_31647_cast_fp16, y = var_31648_to_fp16)[name = tensor("aw_chunk_3227_cast_fp16")]; + tensor var_31651_equation_0 = const()[name = tensor("op_31651_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31651_cast_fp16 = einsum(equation = var_31651_equation_0, values = (var_31449_cast_fp16, var_30977_cast_fp16))[name = tensor("op_31651_cast_fp16")]; + tensor var_31652_to_fp16 = const()[name = tensor("op_31652_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3229_cast_fp16 = mul(x = var_31651_cast_fp16, y = var_31652_to_fp16)[name = tensor("aw_chunk_3229_cast_fp16")]; + tensor var_31655_equation_0 = const()[name = tensor("op_31655_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31655_cast_fp16 = einsum(equation = var_31655_equation_0, values = (var_31449_cast_fp16, var_30984_cast_fp16))[name = tensor("op_31655_cast_fp16")]; + tensor var_31656_to_fp16 = const()[name = tensor("op_31656_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3231_cast_fp16 = mul(x = var_31655_cast_fp16, y = var_31656_to_fp16)[name = tensor("aw_chunk_3231_cast_fp16")]; + tensor var_31659_equation_0 = const()[name = tensor("op_31659_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31659_cast_fp16 = einsum(equation = var_31659_equation_0, values = (var_31453_cast_fp16, var_30991_cast_fp16))[name = tensor("op_31659_cast_fp16")]; + tensor var_31660_to_fp16 = const()[name = tensor("op_31660_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3233_cast_fp16 = mul(x = var_31659_cast_fp16, y = var_31660_to_fp16)[name = tensor("aw_chunk_3233_cast_fp16")]; + tensor var_31663_equation_0 = const()[name = tensor("op_31663_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31663_cast_fp16 = einsum(equation = var_31663_equation_0, values = (var_31453_cast_fp16, var_30998_cast_fp16))[name = tensor("op_31663_cast_fp16")]; + tensor var_31664_to_fp16 = const()[name = tensor("op_31664_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3235_cast_fp16 = mul(x = var_31663_cast_fp16, y = var_31664_to_fp16)[name = tensor("aw_chunk_3235_cast_fp16")]; + tensor var_31667_equation_0 = const()[name = tensor("op_31667_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31667_cast_fp16 = einsum(equation = var_31667_equation_0, values = (var_31453_cast_fp16, var_31005_cast_fp16))[name = tensor("op_31667_cast_fp16")]; + tensor var_31668_to_fp16 = const()[name = tensor("op_31668_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3237_cast_fp16 = mul(x = var_31667_cast_fp16, y = var_31668_to_fp16)[name = tensor("aw_chunk_3237_cast_fp16")]; + tensor var_31671_equation_0 = const()[name = tensor("op_31671_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31671_cast_fp16 = einsum(equation = var_31671_equation_0, values = (var_31453_cast_fp16, var_31012_cast_fp16))[name = tensor("op_31671_cast_fp16")]; + tensor var_31672_to_fp16 = const()[name = tensor("op_31672_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3239_cast_fp16 = mul(x = var_31671_cast_fp16, y = var_31672_to_fp16)[name = tensor("aw_chunk_3239_cast_fp16")]; + tensor var_31675_equation_0 = const()[name = tensor("op_31675_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31675_cast_fp16 = einsum(equation = var_31675_equation_0, values = (var_31457_cast_fp16, var_31019_cast_fp16))[name = tensor("op_31675_cast_fp16")]; + tensor var_31676_to_fp16 = const()[name = tensor("op_31676_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3241_cast_fp16 = mul(x = var_31675_cast_fp16, y = var_31676_to_fp16)[name = tensor("aw_chunk_3241_cast_fp16")]; + tensor var_31679_equation_0 = const()[name = tensor("op_31679_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31679_cast_fp16 = einsum(equation = var_31679_equation_0, values = (var_31457_cast_fp16, var_31026_cast_fp16))[name = tensor("op_31679_cast_fp16")]; + tensor var_31680_to_fp16 = const()[name = tensor("op_31680_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3243_cast_fp16 = mul(x = var_31679_cast_fp16, y = var_31680_to_fp16)[name = tensor("aw_chunk_3243_cast_fp16")]; + tensor var_31683_equation_0 = const()[name = tensor("op_31683_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31683_cast_fp16 = einsum(equation = var_31683_equation_0, values = (var_31457_cast_fp16, var_31033_cast_fp16))[name = tensor("op_31683_cast_fp16")]; + tensor var_31684_to_fp16 = const()[name = tensor("op_31684_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3245_cast_fp16 = mul(x = var_31683_cast_fp16, y = var_31684_to_fp16)[name = tensor("aw_chunk_3245_cast_fp16")]; + tensor var_31687_equation_0 = const()[name = tensor("op_31687_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31687_cast_fp16 = einsum(equation = var_31687_equation_0, values = (var_31457_cast_fp16, var_31040_cast_fp16))[name = tensor("op_31687_cast_fp16")]; + tensor var_31688_to_fp16 = const()[name = tensor("op_31688_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3247_cast_fp16 = mul(x = var_31687_cast_fp16, y = var_31688_to_fp16)[name = tensor("aw_chunk_3247_cast_fp16")]; + tensor var_31691_equation_0 = const()[name = tensor("op_31691_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31691_cast_fp16 = einsum(equation = var_31691_equation_0, values = (var_31461_cast_fp16, var_31047_cast_fp16))[name = tensor("op_31691_cast_fp16")]; + tensor var_31692_to_fp16 = const()[name = tensor("op_31692_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3249_cast_fp16 = mul(x = var_31691_cast_fp16, y = var_31692_to_fp16)[name = tensor("aw_chunk_3249_cast_fp16")]; + tensor var_31695_equation_0 = const()[name = tensor("op_31695_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31695_cast_fp16 = einsum(equation = var_31695_equation_0, values = (var_31461_cast_fp16, var_31054_cast_fp16))[name = tensor("op_31695_cast_fp16")]; + tensor var_31696_to_fp16 = const()[name = tensor("op_31696_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3251_cast_fp16 = mul(x = var_31695_cast_fp16, y = var_31696_to_fp16)[name = tensor("aw_chunk_3251_cast_fp16")]; + tensor var_31699_equation_0 = const()[name = tensor("op_31699_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31699_cast_fp16 = einsum(equation = var_31699_equation_0, values = (var_31461_cast_fp16, var_31061_cast_fp16))[name = tensor("op_31699_cast_fp16")]; + tensor var_31700_to_fp16 = const()[name = tensor("op_31700_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3253_cast_fp16 = mul(x = var_31699_cast_fp16, y = var_31700_to_fp16)[name = tensor("aw_chunk_3253_cast_fp16")]; + tensor var_31703_equation_0 = const()[name = tensor("op_31703_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31703_cast_fp16 = einsum(equation = var_31703_equation_0, values = (var_31461_cast_fp16, var_31068_cast_fp16))[name = tensor("op_31703_cast_fp16")]; + tensor var_31704_to_fp16 = const()[name = tensor("op_31704_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3255_cast_fp16 = mul(x = var_31703_cast_fp16, y = var_31704_to_fp16)[name = tensor("aw_chunk_3255_cast_fp16")]; + tensor var_31707_equation_0 = const()[name = tensor("op_31707_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31707_cast_fp16 = einsum(equation = var_31707_equation_0, values = (var_31465_cast_fp16, var_31075_cast_fp16))[name = tensor("op_31707_cast_fp16")]; + tensor var_31708_to_fp16 = const()[name = tensor("op_31708_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3257_cast_fp16 = mul(x = var_31707_cast_fp16, y = var_31708_to_fp16)[name = tensor("aw_chunk_3257_cast_fp16")]; + tensor var_31711_equation_0 = const()[name = tensor("op_31711_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31711_cast_fp16 = einsum(equation = var_31711_equation_0, values = (var_31465_cast_fp16, var_31082_cast_fp16))[name = tensor("op_31711_cast_fp16")]; + tensor var_31712_to_fp16 = const()[name = tensor("op_31712_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3259_cast_fp16 = mul(x = var_31711_cast_fp16, y = var_31712_to_fp16)[name = tensor("aw_chunk_3259_cast_fp16")]; + tensor var_31715_equation_0 = const()[name = tensor("op_31715_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31715_cast_fp16 = einsum(equation = var_31715_equation_0, values = (var_31465_cast_fp16, var_31089_cast_fp16))[name = tensor("op_31715_cast_fp16")]; + tensor var_31716_to_fp16 = const()[name = tensor("op_31716_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3261_cast_fp16 = mul(x = var_31715_cast_fp16, y = var_31716_to_fp16)[name = tensor("aw_chunk_3261_cast_fp16")]; + tensor var_31719_equation_0 = const()[name = tensor("op_31719_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31719_cast_fp16 = einsum(equation = var_31719_equation_0, values = (var_31465_cast_fp16, var_31096_cast_fp16))[name = tensor("op_31719_cast_fp16")]; + tensor var_31720_to_fp16 = const()[name = tensor("op_31720_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3263_cast_fp16 = mul(x = var_31719_cast_fp16, y = var_31720_to_fp16)[name = tensor("aw_chunk_3263_cast_fp16")]; + tensor var_31723_equation_0 = const()[name = tensor("op_31723_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31723_cast_fp16 = einsum(equation = var_31723_equation_0, values = (var_31469_cast_fp16, var_31103_cast_fp16))[name = tensor("op_31723_cast_fp16")]; + tensor var_31724_to_fp16 = const()[name = tensor("op_31724_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3265_cast_fp16 = mul(x = var_31723_cast_fp16, y = var_31724_to_fp16)[name = tensor("aw_chunk_3265_cast_fp16")]; + tensor var_31727_equation_0 = const()[name = tensor("op_31727_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31727_cast_fp16 = einsum(equation = var_31727_equation_0, values = (var_31469_cast_fp16, var_31110_cast_fp16))[name = tensor("op_31727_cast_fp16")]; + tensor var_31728_to_fp16 = const()[name = tensor("op_31728_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3267_cast_fp16 = mul(x = var_31727_cast_fp16, y = var_31728_to_fp16)[name = tensor("aw_chunk_3267_cast_fp16")]; + tensor var_31731_equation_0 = const()[name = tensor("op_31731_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31731_cast_fp16 = einsum(equation = var_31731_equation_0, values = (var_31469_cast_fp16, var_31117_cast_fp16))[name = tensor("op_31731_cast_fp16")]; + tensor var_31732_to_fp16 = const()[name = tensor("op_31732_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3269_cast_fp16 = mul(x = var_31731_cast_fp16, y = var_31732_to_fp16)[name = tensor("aw_chunk_3269_cast_fp16")]; + tensor var_31735_equation_0 = const()[name = tensor("op_31735_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31735_cast_fp16 = einsum(equation = var_31735_equation_0, values = (var_31469_cast_fp16, var_31124_cast_fp16))[name = tensor("op_31735_cast_fp16")]; + tensor var_31736_to_fp16 = const()[name = tensor("op_31736_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3271_cast_fp16 = mul(x = var_31735_cast_fp16, y = var_31736_to_fp16)[name = tensor("aw_chunk_3271_cast_fp16")]; + tensor var_31739_equation_0 = const()[name = tensor("op_31739_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31739_cast_fp16 = einsum(equation = var_31739_equation_0, values = (var_31473_cast_fp16, var_31131_cast_fp16))[name = tensor("op_31739_cast_fp16")]; + tensor var_31740_to_fp16 = const()[name = tensor("op_31740_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3273_cast_fp16 = mul(x = var_31739_cast_fp16, y = var_31740_to_fp16)[name = tensor("aw_chunk_3273_cast_fp16")]; + tensor var_31743_equation_0 = const()[name = tensor("op_31743_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31743_cast_fp16 = einsum(equation = var_31743_equation_0, values = (var_31473_cast_fp16, var_31138_cast_fp16))[name = tensor("op_31743_cast_fp16")]; + tensor var_31744_to_fp16 = const()[name = tensor("op_31744_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3275_cast_fp16 = mul(x = var_31743_cast_fp16, y = var_31744_to_fp16)[name = tensor("aw_chunk_3275_cast_fp16")]; + tensor var_31747_equation_0 = const()[name = tensor("op_31747_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31747_cast_fp16 = einsum(equation = var_31747_equation_0, values = (var_31473_cast_fp16, var_31145_cast_fp16))[name = tensor("op_31747_cast_fp16")]; + tensor var_31748_to_fp16 = const()[name = tensor("op_31748_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3277_cast_fp16 = mul(x = var_31747_cast_fp16, y = var_31748_to_fp16)[name = tensor("aw_chunk_3277_cast_fp16")]; + tensor var_31751_equation_0 = const()[name = tensor("op_31751_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31751_cast_fp16 = einsum(equation = var_31751_equation_0, values = (var_31473_cast_fp16, var_31152_cast_fp16))[name = tensor("op_31751_cast_fp16")]; + tensor var_31752_to_fp16 = const()[name = tensor("op_31752_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3279_cast_fp16 = mul(x = var_31751_cast_fp16, y = var_31752_to_fp16)[name = tensor("aw_chunk_3279_cast_fp16")]; + tensor var_31755_equation_0 = const()[name = tensor("op_31755_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31755_cast_fp16 = einsum(equation = var_31755_equation_0, values = (var_31477_cast_fp16, var_31159_cast_fp16))[name = tensor("op_31755_cast_fp16")]; + tensor var_31756_to_fp16 = const()[name = tensor("op_31756_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3281_cast_fp16 = mul(x = var_31755_cast_fp16, y = var_31756_to_fp16)[name = tensor("aw_chunk_3281_cast_fp16")]; + tensor var_31759_equation_0 = const()[name = tensor("op_31759_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31759_cast_fp16 = einsum(equation = var_31759_equation_0, values = (var_31477_cast_fp16, var_31166_cast_fp16))[name = tensor("op_31759_cast_fp16")]; + tensor var_31760_to_fp16 = const()[name = tensor("op_31760_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3283_cast_fp16 = mul(x = var_31759_cast_fp16, y = var_31760_to_fp16)[name = tensor("aw_chunk_3283_cast_fp16")]; + tensor var_31763_equation_0 = const()[name = tensor("op_31763_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31763_cast_fp16 = einsum(equation = var_31763_equation_0, values = (var_31477_cast_fp16, var_31173_cast_fp16))[name = tensor("op_31763_cast_fp16")]; + tensor var_31764_to_fp16 = const()[name = tensor("op_31764_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3285_cast_fp16 = mul(x = var_31763_cast_fp16, y = var_31764_to_fp16)[name = tensor("aw_chunk_3285_cast_fp16")]; + tensor var_31767_equation_0 = const()[name = tensor("op_31767_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31767_cast_fp16 = einsum(equation = var_31767_equation_0, values = (var_31477_cast_fp16, var_31180_cast_fp16))[name = tensor("op_31767_cast_fp16")]; + tensor var_31768_to_fp16 = const()[name = tensor("op_31768_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3287_cast_fp16 = mul(x = var_31767_cast_fp16, y = var_31768_to_fp16)[name = tensor("aw_chunk_3287_cast_fp16")]; + tensor var_31771_equation_0 = const()[name = tensor("op_31771_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31771_cast_fp16 = einsum(equation = var_31771_equation_0, values = (var_31481_cast_fp16, var_31187_cast_fp16))[name = tensor("op_31771_cast_fp16")]; + tensor var_31772_to_fp16 = const()[name = tensor("op_31772_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3289_cast_fp16 = mul(x = var_31771_cast_fp16, y = var_31772_to_fp16)[name = tensor("aw_chunk_3289_cast_fp16")]; + tensor var_31775_equation_0 = const()[name = tensor("op_31775_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31775_cast_fp16 = einsum(equation = var_31775_equation_0, values = (var_31481_cast_fp16, var_31194_cast_fp16))[name = tensor("op_31775_cast_fp16")]; + tensor var_31776_to_fp16 = const()[name = tensor("op_31776_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3291_cast_fp16 = mul(x = var_31775_cast_fp16, y = var_31776_to_fp16)[name = tensor("aw_chunk_3291_cast_fp16")]; + tensor var_31779_equation_0 = const()[name = tensor("op_31779_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31779_cast_fp16 = einsum(equation = var_31779_equation_0, values = (var_31481_cast_fp16, var_31201_cast_fp16))[name = tensor("op_31779_cast_fp16")]; + tensor var_31780_to_fp16 = const()[name = tensor("op_31780_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3293_cast_fp16 = mul(x = var_31779_cast_fp16, y = var_31780_to_fp16)[name = tensor("aw_chunk_3293_cast_fp16")]; + tensor var_31783_equation_0 = const()[name = tensor("op_31783_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31783_cast_fp16 = einsum(equation = var_31783_equation_0, values = (var_31481_cast_fp16, var_31208_cast_fp16))[name = tensor("op_31783_cast_fp16")]; + tensor var_31784_to_fp16 = const()[name = tensor("op_31784_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3295_cast_fp16 = mul(x = var_31783_cast_fp16, y = var_31784_to_fp16)[name = tensor("aw_chunk_3295_cast_fp16")]; + tensor var_31787_equation_0 = const()[name = tensor("op_31787_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31787_cast_fp16 = einsum(equation = var_31787_equation_0, values = (var_31485_cast_fp16, var_31215_cast_fp16))[name = tensor("op_31787_cast_fp16")]; + tensor var_31788_to_fp16 = const()[name = tensor("op_31788_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3297_cast_fp16 = mul(x = var_31787_cast_fp16, y = var_31788_to_fp16)[name = tensor("aw_chunk_3297_cast_fp16")]; + tensor var_31791_equation_0 = const()[name = tensor("op_31791_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31791_cast_fp16 = einsum(equation = var_31791_equation_0, values = (var_31485_cast_fp16, var_31222_cast_fp16))[name = tensor("op_31791_cast_fp16")]; + tensor var_31792_to_fp16 = const()[name = tensor("op_31792_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3299_cast_fp16 = mul(x = var_31791_cast_fp16, y = var_31792_to_fp16)[name = tensor("aw_chunk_3299_cast_fp16")]; + tensor var_31795_equation_0 = const()[name = tensor("op_31795_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31795_cast_fp16 = einsum(equation = var_31795_equation_0, values = (var_31485_cast_fp16, var_31229_cast_fp16))[name = tensor("op_31795_cast_fp16")]; + tensor var_31796_to_fp16 = const()[name = tensor("op_31796_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3301_cast_fp16 = mul(x = var_31795_cast_fp16, y = var_31796_to_fp16)[name = tensor("aw_chunk_3301_cast_fp16")]; + tensor var_31799_equation_0 = const()[name = tensor("op_31799_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31799_cast_fp16 = einsum(equation = var_31799_equation_0, values = (var_31485_cast_fp16, var_31236_cast_fp16))[name = tensor("op_31799_cast_fp16")]; + tensor var_31800_to_fp16 = const()[name = tensor("op_31800_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3303_cast_fp16 = mul(x = var_31799_cast_fp16, y = var_31800_to_fp16)[name = tensor("aw_chunk_3303_cast_fp16")]; + tensor var_31803_equation_0 = const()[name = tensor("op_31803_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31803_cast_fp16 = einsum(equation = var_31803_equation_0, values = (var_31489_cast_fp16, var_31243_cast_fp16))[name = tensor("op_31803_cast_fp16")]; + tensor var_31804_to_fp16 = const()[name = tensor("op_31804_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3305_cast_fp16 = mul(x = var_31803_cast_fp16, y = var_31804_to_fp16)[name = tensor("aw_chunk_3305_cast_fp16")]; + tensor var_31807_equation_0 = const()[name = tensor("op_31807_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31807_cast_fp16 = einsum(equation = var_31807_equation_0, values = (var_31489_cast_fp16, var_31250_cast_fp16))[name = tensor("op_31807_cast_fp16")]; + tensor var_31808_to_fp16 = const()[name = tensor("op_31808_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3307_cast_fp16 = mul(x = var_31807_cast_fp16, y = var_31808_to_fp16)[name = tensor("aw_chunk_3307_cast_fp16")]; + tensor var_31811_equation_0 = const()[name = tensor("op_31811_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31811_cast_fp16 = einsum(equation = var_31811_equation_0, values = (var_31489_cast_fp16, var_31257_cast_fp16))[name = tensor("op_31811_cast_fp16")]; + tensor var_31812_to_fp16 = const()[name = tensor("op_31812_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3309_cast_fp16 = mul(x = var_31811_cast_fp16, y = var_31812_to_fp16)[name = tensor("aw_chunk_3309_cast_fp16")]; + tensor var_31815_equation_0 = const()[name = tensor("op_31815_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31815_cast_fp16 = einsum(equation = var_31815_equation_0, values = (var_31489_cast_fp16, var_31264_cast_fp16))[name = tensor("op_31815_cast_fp16")]; + tensor var_31816_to_fp16 = const()[name = tensor("op_31816_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3311_cast_fp16 = mul(x = var_31815_cast_fp16, y = var_31816_to_fp16)[name = tensor("aw_chunk_3311_cast_fp16")]; + tensor var_31819_equation_0 = const()[name = tensor("op_31819_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31819_cast_fp16 = einsum(equation = var_31819_equation_0, values = (var_31493_cast_fp16, var_31271_cast_fp16))[name = tensor("op_31819_cast_fp16")]; + tensor var_31820_to_fp16 = const()[name = tensor("op_31820_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3313_cast_fp16 = mul(x = var_31819_cast_fp16, y = var_31820_to_fp16)[name = tensor("aw_chunk_3313_cast_fp16")]; + tensor var_31823_equation_0 = const()[name = tensor("op_31823_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31823_cast_fp16 = einsum(equation = var_31823_equation_0, values = (var_31493_cast_fp16, var_31278_cast_fp16))[name = tensor("op_31823_cast_fp16")]; + tensor var_31824_to_fp16 = const()[name = tensor("op_31824_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3315_cast_fp16 = mul(x = var_31823_cast_fp16, y = var_31824_to_fp16)[name = tensor("aw_chunk_3315_cast_fp16")]; + tensor var_31827_equation_0 = const()[name = tensor("op_31827_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31827_cast_fp16 = einsum(equation = var_31827_equation_0, values = (var_31493_cast_fp16, var_31285_cast_fp16))[name = tensor("op_31827_cast_fp16")]; + tensor var_31828_to_fp16 = const()[name = tensor("op_31828_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3317_cast_fp16 = mul(x = var_31827_cast_fp16, y = var_31828_to_fp16)[name = tensor("aw_chunk_3317_cast_fp16")]; + tensor var_31831_equation_0 = const()[name = tensor("op_31831_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31831_cast_fp16 = einsum(equation = var_31831_equation_0, values = (var_31493_cast_fp16, var_31292_cast_fp16))[name = tensor("op_31831_cast_fp16")]; + tensor var_31832_to_fp16 = const()[name = tensor("op_31832_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3319_cast_fp16 = mul(x = var_31831_cast_fp16, y = var_31832_to_fp16)[name = tensor("aw_chunk_3319_cast_fp16")]; + tensor var_31835_equation_0 = const()[name = tensor("op_31835_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31835_cast_fp16 = einsum(equation = var_31835_equation_0, values = (var_31497_cast_fp16, var_31299_cast_fp16))[name = tensor("op_31835_cast_fp16")]; + tensor var_31836_to_fp16 = const()[name = tensor("op_31836_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3321_cast_fp16 = mul(x = var_31835_cast_fp16, y = var_31836_to_fp16)[name = tensor("aw_chunk_3321_cast_fp16")]; + tensor var_31839_equation_0 = const()[name = tensor("op_31839_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31839_cast_fp16 = einsum(equation = var_31839_equation_0, values = (var_31497_cast_fp16, var_31306_cast_fp16))[name = tensor("op_31839_cast_fp16")]; + tensor var_31840_to_fp16 = const()[name = tensor("op_31840_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3323_cast_fp16 = mul(x = var_31839_cast_fp16, y = var_31840_to_fp16)[name = tensor("aw_chunk_3323_cast_fp16")]; + tensor var_31843_equation_0 = const()[name = tensor("op_31843_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31843_cast_fp16 = einsum(equation = var_31843_equation_0, values = (var_31497_cast_fp16, var_31313_cast_fp16))[name = tensor("op_31843_cast_fp16")]; + tensor var_31844_to_fp16 = const()[name = tensor("op_31844_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3325_cast_fp16 = mul(x = var_31843_cast_fp16, y = var_31844_to_fp16)[name = tensor("aw_chunk_3325_cast_fp16")]; + tensor var_31847_equation_0 = const()[name = tensor("op_31847_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31847_cast_fp16 = einsum(equation = var_31847_equation_0, values = (var_31497_cast_fp16, var_31320_cast_fp16))[name = tensor("op_31847_cast_fp16")]; + tensor var_31848_to_fp16 = const()[name = tensor("op_31848_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3327_cast_fp16 = mul(x = var_31847_cast_fp16, y = var_31848_to_fp16)[name = tensor("aw_chunk_3327_cast_fp16")]; + tensor var_31851_equation_0 = const()[name = tensor("op_31851_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31851_cast_fp16 = einsum(equation = var_31851_equation_0, values = (var_31501_cast_fp16, var_31327_cast_fp16))[name = tensor("op_31851_cast_fp16")]; + tensor var_31852_to_fp16 = const()[name = tensor("op_31852_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3329_cast_fp16 = mul(x = var_31851_cast_fp16, y = var_31852_to_fp16)[name = tensor("aw_chunk_3329_cast_fp16")]; + tensor var_31855_equation_0 = const()[name = tensor("op_31855_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31855_cast_fp16 = einsum(equation = var_31855_equation_0, values = (var_31501_cast_fp16, var_31334_cast_fp16))[name = tensor("op_31855_cast_fp16")]; + tensor var_31856_to_fp16 = const()[name = tensor("op_31856_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3331_cast_fp16 = mul(x = var_31855_cast_fp16, y = var_31856_to_fp16)[name = tensor("aw_chunk_3331_cast_fp16")]; + tensor var_31859_equation_0 = const()[name = tensor("op_31859_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31859_cast_fp16 = einsum(equation = var_31859_equation_0, values = (var_31501_cast_fp16, var_31341_cast_fp16))[name = tensor("op_31859_cast_fp16")]; + tensor var_31860_to_fp16 = const()[name = tensor("op_31860_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3333_cast_fp16 = mul(x = var_31859_cast_fp16, y = var_31860_to_fp16)[name = tensor("aw_chunk_3333_cast_fp16")]; + tensor var_31863_equation_0 = const()[name = tensor("op_31863_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31863_cast_fp16 = einsum(equation = var_31863_equation_0, values = (var_31501_cast_fp16, var_31348_cast_fp16))[name = tensor("op_31863_cast_fp16")]; + tensor var_31864_to_fp16 = const()[name = tensor("op_31864_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3335_cast_fp16 = mul(x = var_31863_cast_fp16, y = var_31864_to_fp16)[name = tensor("aw_chunk_3335_cast_fp16")]; + tensor var_31867_equation_0 = const()[name = tensor("op_31867_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31867_cast_fp16 = einsum(equation = var_31867_equation_0, values = (var_31505_cast_fp16, var_31355_cast_fp16))[name = tensor("op_31867_cast_fp16")]; + tensor var_31868_to_fp16 = const()[name = tensor("op_31868_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3337_cast_fp16 = mul(x = var_31867_cast_fp16, y = var_31868_to_fp16)[name = tensor("aw_chunk_3337_cast_fp16")]; + tensor var_31871_equation_0 = const()[name = tensor("op_31871_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31871_cast_fp16 = einsum(equation = var_31871_equation_0, values = (var_31505_cast_fp16, var_31362_cast_fp16))[name = tensor("op_31871_cast_fp16")]; + tensor var_31872_to_fp16 = const()[name = tensor("op_31872_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3339_cast_fp16 = mul(x = var_31871_cast_fp16, y = var_31872_to_fp16)[name = tensor("aw_chunk_3339_cast_fp16")]; + tensor var_31875_equation_0 = const()[name = tensor("op_31875_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31875_cast_fp16 = einsum(equation = var_31875_equation_0, values = (var_31505_cast_fp16, var_31369_cast_fp16))[name = tensor("op_31875_cast_fp16")]; + tensor var_31876_to_fp16 = const()[name = tensor("op_31876_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3341_cast_fp16 = mul(x = var_31875_cast_fp16, y = var_31876_to_fp16)[name = tensor("aw_chunk_3341_cast_fp16")]; + tensor var_31879_equation_0 = const()[name = tensor("op_31879_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31879_cast_fp16 = einsum(equation = var_31879_equation_0, values = (var_31505_cast_fp16, var_31376_cast_fp16))[name = tensor("op_31879_cast_fp16")]; + tensor var_31880_to_fp16 = const()[name = tensor("op_31880_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3343_cast_fp16 = mul(x = var_31879_cast_fp16, y = var_31880_to_fp16)[name = tensor("aw_chunk_3343_cast_fp16")]; + tensor var_31883_equation_0 = const()[name = tensor("op_31883_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31883_cast_fp16 = einsum(equation = var_31883_equation_0, values = (var_31509_cast_fp16, var_31383_cast_fp16))[name = tensor("op_31883_cast_fp16")]; + tensor var_31884_to_fp16 = const()[name = tensor("op_31884_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3345_cast_fp16 = mul(x = var_31883_cast_fp16, y = var_31884_to_fp16)[name = tensor("aw_chunk_3345_cast_fp16")]; + tensor var_31887_equation_0 = const()[name = tensor("op_31887_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31887_cast_fp16 = einsum(equation = var_31887_equation_0, values = (var_31509_cast_fp16, var_31390_cast_fp16))[name = tensor("op_31887_cast_fp16")]; + tensor var_31888_to_fp16 = const()[name = tensor("op_31888_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3347_cast_fp16 = mul(x = var_31887_cast_fp16, y = var_31888_to_fp16)[name = tensor("aw_chunk_3347_cast_fp16")]; + tensor var_31891_equation_0 = const()[name = tensor("op_31891_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31891_cast_fp16 = einsum(equation = var_31891_equation_0, values = (var_31509_cast_fp16, var_31397_cast_fp16))[name = tensor("op_31891_cast_fp16")]; + tensor var_31892_to_fp16 = const()[name = tensor("op_31892_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3349_cast_fp16 = mul(x = var_31891_cast_fp16, y = var_31892_to_fp16)[name = tensor("aw_chunk_3349_cast_fp16")]; + tensor var_31895_equation_0 = const()[name = tensor("op_31895_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31895_cast_fp16 = einsum(equation = var_31895_equation_0, values = (var_31509_cast_fp16, var_31404_cast_fp16))[name = tensor("op_31895_cast_fp16")]; + tensor var_31896_to_fp16 = const()[name = tensor("op_31896_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3351_cast_fp16 = mul(x = var_31895_cast_fp16, y = var_31896_to_fp16)[name = tensor("aw_chunk_3351_cast_fp16")]; + tensor var_31899_equation_0 = const()[name = tensor("op_31899_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31899_cast_fp16 = einsum(equation = var_31899_equation_0, values = (var_31513_cast_fp16, var_31411_cast_fp16))[name = tensor("op_31899_cast_fp16")]; + tensor var_31900_to_fp16 = const()[name = tensor("op_31900_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3353_cast_fp16 = mul(x = var_31899_cast_fp16, y = var_31900_to_fp16)[name = tensor("aw_chunk_3353_cast_fp16")]; + tensor var_31903_equation_0 = const()[name = tensor("op_31903_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31903_cast_fp16 = einsum(equation = var_31903_equation_0, values = (var_31513_cast_fp16, var_31418_cast_fp16))[name = tensor("op_31903_cast_fp16")]; + tensor var_31904_to_fp16 = const()[name = tensor("op_31904_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3355_cast_fp16 = mul(x = var_31903_cast_fp16, y = var_31904_to_fp16)[name = tensor("aw_chunk_3355_cast_fp16")]; + tensor var_31907_equation_0 = const()[name = tensor("op_31907_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31907_cast_fp16 = einsum(equation = var_31907_equation_0, values = (var_31513_cast_fp16, var_31425_cast_fp16))[name = tensor("op_31907_cast_fp16")]; + tensor var_31908_to_fp16 = const()[name = tensor("op_31908_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3357_cast_fp16 = mul(x = var_31907_cast_fp16, y = var_31908_to_fp16)[name = tensor("aw_chunk_3357_cast_fp16")]; + tensor var_31911_equation_0 = const()[name = tensor("op_31911_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_31911_cast_fp16 = einsum(equation = var_31911_equation_0, values = (var_31513_cast_fp16, var_31432_cast_fp16))[name = tensor("op_31911_cast_fp16")]; + tensor var_31912_to_fp16 = const()[name = tensor("op_31912_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3359_cast_fp16 = mul(x = var_31911_cast_fp16, y = var_31912_to_fp16)[name = tensor("aw_chunk_3359_cast_fp16")]; + tensor var_31914_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3201_cast_fp16)[name = tensor("op_31914_cast_fp16")]; + tensor var_31915_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3203_cast_fp16)[name = tensor("op_31915_cast_fp16")]; + tensor var_31916_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3205_cast_fp16)[name = tensor("op_31916_cast_fp16")]; + tensor var_31917_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3207_cast_fp16)[name = tensor("op_31917_cast_fp16")]; + tensor var_31918_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3209_cast_fp16)[name = tensor("op_31918_cast_fp16")]; + tensor var_31919_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3211_cast_fp16)[name = tensor("op_31919_cast_fp16")]; + tensor var_31920_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3213_cast_fp16)[name = tensor("op_31920_cast_fp16")]; + tensor var_31921_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3215_cast_fp16)[name = tensor("op_31921_cast_fp16")]; + tensor var_31922_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3217_cast_fp16)[name = tensor("op_31922_cast_fp16")]; + tensor var_31923_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3219_cast_fp16)[name = tensor("op_31923_cast_fp16")]; + tensor var_31924_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3221_cast_fp16)[name = tensor("op_31924_cast_fp16")]; + tensor var_31925_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3223_cast_fp16)[name = tensor("op_31925_cast_fp16")]; + tensor var_31926_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3225_cast_fp16)[name = tensor("op_31926_cast_fp16")]; + tensor var_31927_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3227_cast_fp16)[name = tensor("op_31927_cast_fp16")]; + tensor var_31928_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3229_cast_fp16)[name = tensor("op_31928_cast_fp16")]; + tensor var_31929_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3231_cast_fp16)[name = tensor("op_31929_cast_fp16")]; + tensor var_31930_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3233_cast_fp16)[name = tensor("op_31930_cast_fp16")]; + tensor var_31931_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3235_cast_fp16)[name = tensor("op_31931_cast_fp16")]; + tensor var_31932_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3237_cast_fp16)[name = tensor("op_31932_cast_fp16")]; + tensor var_31933_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3239_cast_fp16)[name = tensor("op_31933_cast_fp16")]; + tensor var_31934_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3241_cast_fp16)[name = tensor("op_31934_cast_fp16")]; + tensor var_31935_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3243_cast_fp16)[name = tensor("op_31935_cast_fp16")]; + tensor var_31936_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3245_cast_fp16)[name = tensor("op_31936_cast_fp16")]; + tensor var_31937_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3247_cast_fp16)[name = tensor("op_31937_cast_fp16")]; + tensor var_31938_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3249_cast_fp16)[name = tensor("op_31938_cast_fp16")]; + tensor var_31939_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3251_cast_fp16)[name = tensor("op_31939_cast_fp16")]; + tensor var_31940_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3253_cast_fp16)[name = tensor("op_31940_cast_fp16")]; + tensor var_31941_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3255_cast_fp16)[name = tensor("op_31941_cast_fp16")]; + tensor var_31942_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3257_cast_fp16)[name = tensor("op_31942_cast_fp16")]; + tensor var_31943_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3259_cast_fp16)[name = tensor("op_31943_cast_fp16")]; + tensor var_31944_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3261_cast_fp16)[name = tensor("op_31944_cast_fp16")]; + tensor var_31945_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3263_cast_fp16)[name = tensor("op_31945_cast_fp16")]; + tensor var_31946_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3265_cast_fp16)[name = tensor("op_31946_cast_fp16")]; + tensor var_31947_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3267_cast_fp16)[name = tensor("op_31947_cast_fp16")]; + tensor var_31948_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3269_cast_fp16)[name = tensor("op_31948_cast_fp16")]; + tensor var_31949_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3271_cast_fp16)[name = tensor("op_31949_cast_fp16")]; + tensor var_31950_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3273_cast_fp16)[name = tensor("op_31950_cast_fp16")]; + tensor var_31951_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3275_cast_fp16)[name = tensor("op_31951_cast_fp16")]; + tensor var_31952_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3277_cast_fp16)[name = tensor("op_31952_cast_fp16")]; + tensor var_31953_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3279_cast_fp16)[name = tensor("op_31953_cast_fp16")]; + tensor var_31954_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3281_cast_fp16)[name = tensor("op_31954_cast_fp16")]; + tensor var_31955_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3283_cast_fp16)[name = tensor("op_31955_cast_fp16")]; + tensor var_31956_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3285_cast_fp16)[name = tensor("op_31956_cast_fp16")]; + tensor var_31957_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3287_cast_fp16)[name = tensor("op_31957_cast_fp16")]; + tensor var_31958_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3289_cast_fp16)[name = tensor("op_31958_cast_fp16")]; + tensor var_31959_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3291_cast_fp16)[name = tensor("op_31959_cast_fp16")]; + tensor var_31960_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3293_cast_fp16)[name = tensor("op_31960_cast_fp16")]; + tensor var_31961_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3295_cast_fp16)[name = tensor("op_31961_cast_fp16")]; + tensor var_31962_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3297_cast_fp16)[name = tensor("op_31962_cast_fp16")]; + tensor var_31963_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3299_cast_fp16)[name = tensor("op_31963_cast_fp16")]; + tensor var_31964_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3301_cast_fp16)[name = tensor("op_31964_cast_fp16")]; + tensor var_31965_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3303_cast_fp16)[name = tensor("op_31965_cast_fp16")]; + tensor var_31966_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3305_cast_fp16)[name = tensor("op_31966_cast_fp16")]; + tensor var_31967_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3307_cast_fp16)[name = tensor("op_31967_cast_fp16")]; + tensor var_31968_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3309_cast_fp16)[name = tensor("op_31968_cast_fp16")]; + tensor var_31969_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3311_cast_fp16)[name = tensor("op_31969_cast_fp16")]; + tensor var_31970_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3313_cast_fp16)[name = tensor("op_31970_cast_fp16")]; + tensor var_31971_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3315_cast_fp16)[name = tensor("op_31971_cast_fp16")]; + tensor var_31972_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3317_cast_fp16)[name = tensor("op_31972_cast_fp16")]; + tensor var_31973_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3319_cast_fp16)[name = tensor("op_31973_cast_fp16")]; + tensor var_31974_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3321_cast_fp16)[name = tensor("op_31974_cast_fp16")]; + tensor var_31975_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3323_cast_fp16)[name = tensor("op_31975_cast_fp16")]; + tensor var_31976_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3325_cast_fp16)[name = tensor("op_31976_cast_fp16")]; + tensor var_31977_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3327_cast_fp16)[name = tensor("op_31977_cast_fp16")]; + tensor var_31978_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3329_cast_fp16)[name = tensor("op_31978_cast_fp16")]; + tensor var_31979_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3331_cast_fp16)[name = tensor("op_31979_cast_fp16")]; + tensor var_31980_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3333_cast_fp16)[name = tensor("op_31980_cast_fp16")]; + tensor var_31981_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3335_cast_fp16)[name = tensor("op_31981_cast_fp16")]; + tensor var_31982_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3337_cast_fp16)[name = tensor("op_31982_cast_fp16")]; + tensor var_31983_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3339_cast_fp16)[name = tensor("op_31983_cast_fp16")]; + tensor var_31984_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3341_cast_fp16)[name = tensor("op_31984_cast_fp16")]; + tensor var_31985_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3343_cast_fp16)[name = tensor("op_31985_cast_fp16")]; + tensor var_31986_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3345_cast_fp16)[name = tensor("op_31986_cast_fp16")]; + tensor var_31987_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3347_cast_fp16)[name = tensor("op_31987_cast_fp16")]; + tensor var_31988_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3349_cast_fp16)[name = tensor("op_31988_cast_fp16")]; + tensor var_31989_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3351_cast_fp16)[name = tensor("op_31989_cast_fp16")]; + tensor var_31990_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3353_cast_fp16)[name = tensor("op_31990_cast_fp16")]; + tensor var_31991_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3355_cast_fp16)[name = tensor("op_31991_cast_fp16")]; + tensor var_31992_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3357_cast_fp16)[name = tensor("op_31992_cast_fp16")]; + tensor var_31993_cast_fp16 = softmax(axis = var_30739, x = aw_chunk_3359_cast_fp16)[name = tensor("op_31993_cast_fp16")]; + tensor var_31995_equation_0 = const()[name = tensor("op_31995_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_31995_cast_fp16 = einsum(equation = var_31995_equation_0, values = (var_31515_cast_fp16, var_31914_cast_fp16))[name = tensor("op_31995_cast_fp16")]; + tensor var_31997_equation_0 = const()[name = tensor("op_31997_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_31997_cast_fp16 = einsum(equation = var_31997_equation_0, values = (var_31515_cast_fp16, var_31915_cast_fp16))[name = tensor("op_31997_cast_fp16")]; + tensor var_31999_equation_0 = const()[name = tensor("op_31999_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_31999_cast_fp16 = einsum(equation = var_31999_equation_0, values = (var_31515_cast_fp16, var_31916_cast_fp16))[name = tensor("op_31999_cast_fp16")]; + tensor var_32001_equation_0 = const()[name = tensor("op_32001_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32001_cast_fp16 = einsum(equation = var_32001_equation_0, values = (var_31515_cast_fp16, var_31917_cast_fp16))[name = tensor("op_32001_cast_fp16")]; + tensor var_32003_equation_0 = const()[name = tensor("op_32003_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32003_cast_fp16 = einsum(equation = var_32003_equation_0, values = (var_31519_cast_fp16, var_31918_cast_fp16))[name = tensor("op_32003_cast_fp16")]; + tensor var_32005_equation_0 = const()[name = tensor("op_32005_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32005_cast_fp16 = einsum(equation = var_32005_equation_0, values = (var_31519_cast_fp16, var_31919_cast_fp16))[name = tensor("op_32005_cast_fp16")]; + tensor var_32007_equation_0 = const()[name = tensor("op_32007_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32007_cast_fp16 = einsum(equation = var_32007_equation_0, values = (var_31519_cast_fp16, var_31920_cast_fp16))[name = tensor("op_32007_cast_fp16")]; + tensor var_32009_equation_0 = const()[name = tensor("op_32009_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32009_cast_fp16 = einsum(equation = var_32009_equation_0, values = (var_31519_cast_fp16, var_31921_cast_fp16))[name = tensor("op_32009_cast_fp16")]; + tensor var_32011_equation_0 = const()[name = tensor("op_32011_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32011_cast_fp16 = einsum(equation = var_32011_equation_0, values = (var_31523_cast_fp16, var_31922_cast_fp16))[name = tensor("op_32011_cast_fp16")]; + tensor var_32013_equation_0 = const()[name = tensor("op_32013_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32013_cast_fp16 = einsum(equation = var_32013_equation_0, values = (var_31523_cast_fp16, var_31923_cast_fp16))[name = tensor("op_32013_cast_fp16")]; + tensor var_32015_equation_0 = const()[name = tensor("op_32015_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32015_cast_fp16 = einsum(equation = var_32015_equation_0, values = (var_31523_cast_fp16, var_31924_cast_fp16))[name = tensor("op_32015_cast_fp16")]; + tensor var_32017_equation_0 = const()[name = tensor("op_32017_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32017_cast_fp16 = einsum(equation = var_32017_equation_0, values = (var_31523_cast_fp16, var_31925_cast_fp16))[name = tensor("op_32017_cast_fp16")]; + tensor var_32019_equation_0 = const()[name = tensor("op_32019_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32019_cast_fp16 = einsum(equation = var_32019_equation_0, values = (var_31527_cast_fp16, var_31926_cast_fp16))[name = tensor("op_32019_cast_fp16")]; + tensor var_32021_equation_0 = const()[name = tensor("op_32021_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32021_cast_fp16 = einsum(equation = var_32021_equation_0, values = (var_31527_cast_fp16, var_31927_cast_fp16))[name = tensor("op_32021_cast_fp16")]; + tensor var_32023_equation_0 = const()[name = tensor("op_32023_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32023_cast_fp16 = einsum(equation = var_32023_equation_0, values = (var_31527_cast_fp16, var_31928_cast_fp16))[name = tensor("op_32023_cast_fp16")]; + tensor var_32025_equation_0 = const()[name = tensor("op_32025_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32025_cast_fp16 = einsum(equation = var_32025_equation_0, values = (var_31527_cast_fp16, var_31929_cast_fp16))[name = tensor("op_32025_cast_fp16")]; + tensor var_32027_equation_0 = const()[name = tensor("op_32027_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32027_cast_fp16 = einsum(equation = var_32027_equation_0, values = (var_31531_cast_fp16, var_31930_cast_fp16))[name = tensor("op_32027_cast_fp16")]; + tensor var_32029_equation_0 = const()[name = tensor("op_32029_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32029_cast_fp16 = einsum(equation = var_32029_equation_0, values = (var_31531_cast_fp16, var_31931_cast_fp16))[name = tensor("op_32029_cast_fp16")]; + tensor var_32031_equation_0 = const()[name = tensor("op_32031_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32031_cast_fp16 = einsum(equation = var_32031_equation_0, values = (var_31531_cast_fp16, var_31932_cast_fp16))[name = tensor("op_32031_cast_fp16")]; + tensor var_32033_equation_0 = const()[name = tensor("op_32033_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32033_cast_fp16 = einsum(equation = var_32033_equation_0, values = (var_31531_cast_fp16, var_31933_cast_fp16))[name = tensor("op_32033_cast_fp16")]; + tensor var_32035_equation_0 = const()[name = tensor("op_32035_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32035_cast_fp16 = einsum(equation = var_32035_equation_0, values = (var_31535_cast_fp16, var_31934_cast_fp16))[name = tensor("op_32035_cast_fp16")]; + tensor var_32037_equation_0 = const()[name = tensor("op_32037_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32037_cast_fp16 = einsum(equation = var_32037_equation_0, values = (var_31535_cast_fp16, var_31935_cast_fp16))[name = tensor("op_32037_cast_fp16")]; + tensor var_32039_equation_0 = const()[name = tensor("op_32039_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32039_cast_fp16 = einsum(equation = var_32039_equation_0, values = (var_31535_cast_fp16, var_31936_cast_fp16))[name = tensor("op_32039_cast_fp16")]; + tensor var_32041_equation_0 = const()[name = tensor("op_32041_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32041_cast_fp16 = einsum(equation = var_32041_equation_0, values = (var_31535_cast_fp16, var_31937_cast_fp16))[name = tensor("op_32041_cast_fp16")]; + tensor var_32043_equation_0 = const()[name = tensor("op_32043_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32043_cast_fp16 = einsum(equation = var_32043_equation_0, values = (var_31539_cast_fp16, var_31938_cast_fp16))[name = tensor("op_32043_cast_fp16")]; + tensor var_32045_equation_0 = const()[name = tensor("op_32045_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32045_cast_fp16 = einsum(equation = var_32045_equation_0, values = (var_31539_cast_fp16, var_31939_cast_fp16))[name = tensor("op_32045_cast_fp16")]; + tensor var_32047_equation_0 = const()[name = tensor("op_32047_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32047_cast_fp16 = einsum(equation = var_32047_equation_0, values = (var_31539_cast_fp16, var_31940_cast_fp16))[name = tensor("op_32047_cast_fp16")]; + tensor var_32049_equation_0 = const()[name = tensor("op_32049_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32049_cast_fp16 = einsum(equation = var_32049_equation_0, values = (var_31539_cast_fp16, var_31941_cast_fp16))[name = tensor("op_32049_cast_fp16")]; + tensor var_32051_equation_0 = const()[name = tensor("op_32051_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32051_cast_fp16 = einsum(equation = var_32051_equation_0, values = (var_31543_cast_fp16, var_31942_cast_fp16))[name = tensor("op_32051_cast_fp16")]; + tensor var_32053_equation_0 = const()[name = tensor("op_32053_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32053_cast_fp16 = einsum(equation = var_32053_equation_0, values = (var_31543_cast_fp16, var_31943_cast_fp16))[name = tensor("op_32053_cast_fp16")]; + tensor var_32055_equation_0 = const()[name = tensor("op_32055_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32055_cast_fp16 = einsum(equation = var_32055_equation_0, values = (var_31543_cast_fp16, var_31944_cast_fp16))[name = tensor("op_32055_cast_fp16")]; + tensor var_32057_equation_0 = const()[name = tensor("op_32057_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32057_cast_fp16 = einsum(equation = var_32057_equation_0, values = (var_31543_cast_fp16, var_31945_cast_fp16))[name = tensor("op_32057_cast_fp16")]; + tensor var_32059_equation_0 = const()[name = tensor("op_32059_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32059_cast_fp16 = einsum(equation = var_32059_equation_0, values = (var_31547_cast_fp16, var_31946_cast_fp16))[name = tensor("op_32059_cast_fp16")]; + tensor var_32061_equation_0 = const()[name = tensor("op_32061_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32061_cast_fp16 = einsum(equation = var_32061_equation_0, values = (var_31547_cast_fp16, var_31947_cast_fp16))[name = tensor("op_32061_cast_fp16")]; + tensor var_32063_equation_0 = const()[name = tensor("op_32063_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32063_cast_fp16 = einsum(equation = var_32063_equation_0, values = (var_31547_cast_fp16, var_31948_cast_fp16))[name = tensor("op_32063_cast_fp16")]; + tensor var_32065_equation_0 = const()[name = tensor("op_32065_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32065_cast_fp16 = einsum(equation = var_32065_equation_0, values = (var_31547_cast_fp16, var_31949_cast_fp16))[name = tensor("op_32065_cast_fp16")]; + tensor var_32067_equation_0 = const()[name = tensor("op_32067_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32067_cast_fp16 = einsum(equation = var_32067_equation_0, values = (var_31551_cast_fp16, var_31950_cast_fp16))[name = tensor("op_32067_cast_fp16")]; + tensor var_32069_equation_0 = const()[name = tensor("op_32069_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32069_cast_fp16 = einsum(equation = var_32069_equation_0, values = (var_31551_cast_fp16, var_31951_cast_fp16))[name = tensor("op_32069_cast_fp16")]; + tensor var_32071_equation_0 = const()[name = tensor("op_32071_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32071_cast_fp16 = einsum(equation = var_32071_equation_0, values = (var_31551_cast_fp16, var_31952_cast_fp16))[name = tensor("op_32071_cast_fp16")]; + tensor var_32073_equation_0 = const()[name = tensor("op_32073_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32073_cast_fp16 = einsum(equation = var_32073_equation_0, values = (var_31551_cast_fp16, var_31953_cast_fp16))[name = tensor("op_32073_cast_fp16")]; + tensor var_32075_equation_0 = const()[name = tensor("op_32075_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32075_cast_fp16 = einsum(equation = var_32075_equation_0, values = (var_31555_cast_fp16, var_31954_cast_fp16))[name = tensor("op_32075_cast_fp16")]; + tensor var_32077_equation_0 = const()[name = tensor("op_32077_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32077_cast_fp16 = einsum(equation = var_32077_equation_0, values = (var_31555_cast_fp16, var_31955_cast_fp16))[name = tensor("op_32077_cast_fp16")]; + tensor var_32079_equation_0 = const()[name = tensor("op_32079_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32079_cast_fp16 = einsum(equation = var_32079_equation_0, values = (var_31555_cast_fp16, var_31956_cast_fp16))[name = tensor("op_32079_cast_fp16")]; + tensor var_32081_equation_0 = const()[name = tensor("op_32081_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32081_cast_fp16 = einsum(equation = var_32081_equation_0, values = (var_31555_cast_fp16, var_31957_cast_fp16))[name = tensor("op_32081_cast_fp16")]; + tensor var_32083_equation_0 = const()[name = tensor("op_32083_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32083_cast_fp16 = einsum(equation = var_32083_equation_0, values = (var_31559_cast_fp16, var_31958_cast_fp16))[name = tensor("op_32083_cast_fp16")]; + tensor var_32085_equation_0 = const()[name = tensor("op_32085_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32085_cast_fp16 = einsum(equation = var_32085_equation_0, values = (var_31559_cast_fp16, var_31959_cast_fp16))[name = tensor("op_32085_cast_fp16")]; + tensor var_32087_equation_0 = const()[name = tensor("op_32087_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32087_cast_fp16 = einsum(equation = var_32087_equation_0, values = (var_31559_cast_fp16, var_31960_cast_fp16))[name = tensor("op_32087_cast_fp16")]; + tensor var_32089_equation_0 = const()[name = tensor("op_32089_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32089_cast_fp16 = einsum(equation = var_32089_equation_0, values = (var_31559_cast_fp16, var_31961_cast_fp16))[name = tensor("op_32089_cast_fp16")]; + tensor var_32091_equation_0 = const()[name = tensor("op_32091_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32091_cast_fp16 = einsum(equation = var_32091_equation_0, values = (var_31563_cast_fp16, var_31962_cast_fp16))[name = tensor("op_32091_cast_fp16")]; + tensor var_32093_equation_0 = const()[name = tensor("op_32093_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32093_cast_fp16 = einsum(equation = var_32093_equation_0, values = (var_31563_cast_fp16, var_31963_cast_fp16))[name = tensor("op_32093_cast_fp16")]; + tensor var_32095_equation_0 = const()[name = tensor("op_32095_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32095_cast_fp16 = einsum(equation = var_32095_equation_0, values = (var_31563_cast_fp16, var_31964_cast_fp16))[name = tensor("op_32095_cast_fp16")]; + tensor var_32097_equation_0 = const()[name = tensor("op_32097_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32097_cast_fp16 = einsum(equation = var_32097_equation_0, values = (var_31563_cast_fp16, var_31965_cast_fp16))[name = tensor("op_32097_cast_fp16")]; + tensor var_32099_equation_0 = const()[name = tensor("op_32099_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32099_cast_fp16 = einsum(equation = var_32099_equation_0, values = (var_31567_cast_fp16, var_31966_cast_fp16))[name = tensor("op_32099_cast_fp16")]; + tensor var_32101_equation_0 = const()[name = tensor("op_32101_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32101_cast_fp16 = einsum(equation = var_32101_equation_0, values = (var_31567_cast_fp16, var_31967_cast_fp16))[name = tensor("op_32101_cast_fp16")]; + tensor var_32103_equation_0 = const()[name = tensor("op_32103_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32103_cast_fp16 = einsum(equation = var_32103_equation_0, values = (var_31567_cast_fp16, var_31968_cast_fp16))[name = tensor("op_32103_cast_fp16")]; + tensor var_32105_equation_0 = const()[name = tensor("op_32105_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32105_cast_fp16 = einsum(equation = var_32105_equation_0, values = (var_31567_cast_fp16, var_31969_cast_fp16))[name = tensor("op_32105_cast_fp16")]; + tensor var_32107_equation_0 = const()[name = tensor("op_32107_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32107_cast_fp16 = einsum(equation = var_32107_equation_0, values = (var_31571_cast_fp16, var_31970_cast_fp16))[name = tensor("op_32107_cast_fp16")]; + tensor var_32109_equation_0 = const()[name = tensor("op_32109_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32109_cast_fp16 = einsum(equation = var_32109_equation_0, values = (var_31571_cast_fp16, var_31971_cast_fp16))[name = tensor("op_32109_cast_fp16")]; + tensor var_32111_equation_0 = const()[name = tensor("op_32111_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32111_cast_fp16 = einsum(equation = var_32111_equation_0, values = (var_31571_cast_fp16, var_31972_cast_fp16))[name = tensor("op_32111_cast_fp16")]; + tensor var_32113_equation_0 = const()[name = tensor("op_32113_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32113_cast_fp16 = einsum(equation = var_32113_equation_0, values = (var_31571_cast_fp16, var_31973_cast_fp16))[name = tensor("op_32113_cast_fp16")]; + tensor var_32115_equation_0 = const()[name = tensor("op_32115_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32115_cast_fp16 = einsum(equation = var_32115_equation_0, values = (var_31575_cast_fp16, var_31974_cast_fp16))[name = tensor("op_32115_cast_fp16")]; + tensor var_32117_equation_0 = const()[name = tensor("op_32117_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32117_cast_fp16 = einsum(equation = var_32117_equation_0, values = (var_31575_cast_fp16, var_31975_cast_fp16))[name = tensor("op_32117_cast_fp16")]; + tensor var_32119_equation_0 = const()[name = tensor("op_32119_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32119_cast_fp16 = einsum(equation = var_32119_equation_0, values = (var_31575_cast_fp16, var_31976_cast_fp16))[name = tensor("op_32119_cast_fp16")]; + tensor var_32121_equation_0 = const()[name = tensor("op_32121_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32121_cast_fp16 = einsum(equation = var_32121_equation_0, values = (var_31575_cast_fp16, var_31977_cast_fp16))[name = tensor("op_32121_cast_fp16")]; + tensor var_32123_equation_0 = const()[name = tensor("op_32123_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32123_cast_fp16 = einsum(equation = var_32123_equation_0, values = (var_31579_cast_fp16, var_31978_cast_fp16))[name = tensor("op_32123_cast_fp16")]; + tensor var_32125_equation_0 = const()[name = tensor("op_32125_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32125_cast_fp16 = einsum(equation = var_32125_equation_0, values = (var_31579_cast_fp16, var_31979_cast_fp16))[name = tensor("op_32125_cast_fp16")]; + tensor var_32127_equation_0 = const()[name = tensor("op_32127_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32127_cast_fp16 = einsum(equation = var_32127_equation_0, values = (var_31579_cast_fp16, var_31980_cast_fp16))[name = tensor("op_32127_cast_fp16")]; + tensor var_32129_equation_0 = const()[name = tensor("op_32129_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32129_cast_fp16 = einsum(equation = var_32129_equation_0, values = (var_31579_cast_fp16, var_31981_cast_fp16))[name = tensor("op_32129_cast_fp16")]; + tensor var_32131_equation_0 = const()[name = tensor("op_32131_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32131_cast_fp16 = einsum(equation = var_32131_equation_0, values = (var_31583_cast_fp16, var_31982_cast_fp16))[name = tensor("op_32131_cast_fp16")]; + tensor var_32133_equation_0 = const()[name = tensor("op_32133_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32133_cast_fp16 = einsum(equation = var_32133_equation_0, values = (var_31583_cast_fp16, var_31983_cast_fp16))[name = tensor("op_32133_cast_fp16")]; + tensor var_32135_equation_0 = const()[name = tensor("op_32135_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32135_cast_fp16 = einsum(equation = var_32135_equation_0, values = (var_31583_cast_fp16, var_31984_cast_fp16))[name = tensor("op_32135_cast_fp16")]; + tensor var_32137_equation_0 = const()[name = tensor("op_32137_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32137_cast_fp16 = einsum(equation = var_32137_equation_0, values = (var_31583_cast_fp16, var_31985_cast_fp16))[name = tensor("op_32137_cast_fp16")]; + tensor var_32139_equation_0 = const()[name = tensor("op_32139_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32139_cast_fp16 = einsum(equation = var_32139_equation_0, values = (var_31587_cast_fp16, var_31986_cast_fp16))[name = tensor("op_32139_cast_fp16")]; + tensor var_32141_equation_0 = const()[name = tensor("op_32141_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32141_cast_fp16 = einsum(equation = var_32141_equation_0, values = (var_31587_cast_fp16, var_31987_cast_fp16))[name = tensor("op_32141_cast_fp16")]; + tensor var_32143_equation_0 = const()[name = tensor("op_32143_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32143_cast_fp16 = einsum(equation = var_32143_equation_0, values = (var_31587_cast_fp16, var_31988_cast_fp16))[name = tensor("op_32143_cast_fp16")]; + tensor var_32145_equation_0 = const()[name = tensor("op_32145_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32145_cast_fp16 = einsum(equation = var_32145_equation_0, values = (var_31587_cast_fp16, var_31989_cast_fp16))[name = tensor("op_32145_cast_fp16")]; + tensor var_32147_equation_0 = const()[name = tensor("op_32147_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32147_cast_fp16 = einsum(equation = var_32147_equation_0, values = (var_31591_cast_fp16, var_31990_cast_fp16))[name = tensor("op_32147_cast_fp16")]; + tensor var_32149_equation_0 = const()[name = tensor("op_32149_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32149_cast_fp16 = einsum(equation = var_32149_equation_0, values = (var_31591_cast_fp16, var_31991_cast_fp16))[name = tensor("op_32149_cast_fp16")]; + tensor var_32151_equation_0 = const()[name = tensor("op_32151_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32151_cast_fp16 = einsum(equation = var_32151_equation_0, values = (var_31591_cast_fp16, var_31992_cast_fp16))[name = tensor("op_32151_cast_fp16")]; + tensor var_32153_equation_0 = const()[name = tensor("op_32153_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_32153_cast_fp16 = einsum(equation = var_32153_equation_0, values = (var_31591_cast_fp16, var_31993_cast_fp16))[name = tensor("op_32153_cast_fp16")]; + tensor var_32155_interleave_0 = const()[name = tensor("op_32155_interleave_0"), val = tensor(false)]; + tensor var_32155_cast_fp16 = concat(axis = var_30714, interleave = var_32155_interleave_0, values = (var_31995_cast_fp16, var_31997_cast_fp16, var_31999_cast_fp16, var_32001_cast_fp16))[name = tensor("op_32155_cast_fp16")]; + tensor var_32157_interleave_0 = const()[name = tensor("op_32157_interleave_0"), val = tensor(false)]; + tensor var_32157_cast_fp16 = concat(axis = var_30714, interleave = var_32157_interleave_0, values = (var_32003_cast_fp16, var_32005_cast_fp16, var_32007_cast_fp16, var_32009_cast_fp16))[name = tensor("op_32157_cast_fp16")]; + tensor var_32159_interleave_0 = const()[name = tensor("op_32159_interleave_0"), val = tensor(false)]; + tensor var_32159_cast_fp16 = concat(axis = var_30714, interleave = var_32159_interleave_0, values = (var_32011_cast_fp16, var_32013_cast_fp16, var_32015_cast_fp16, var_32017_cast_fp16))[name = tensor("op_32159_cast_fp16")]; + tensor var_32161_interleave_0 = const()[name = tensor("op_32161_interleave_0"), val = tensor(false)]; + tensor var_32161_cast_fp16 = concat(axis = var_30714, interleave = var_32161_interleave_0, values = (var_32019_cast_fp16, var_32021_cast_fp16, var_32023_cast_fp16, var_32025_cast_fp16))[name = tensor("op_32161_cast_fp16")]; + tensor var_32163_interleave_0 = const()[name = tensor("op_32163_interleave_0"), val = tensor(false)]; + tensor var_32163_cast_fp16 = concat(axis = var_30714, interleave = var_32163_interleave_0, values = (var_32027_cast_fp16, var_32029_cast_fp16, var_32031_cast_fp16, var_32033_cast_fp16))[name = tensor("op_32163_cast_fp16")]; + tensor var_32165_interleave_0 = const()[name = tensor("op_32165_interleave_0"), val = tensor(false)]; + tensor var_32165_cast_fp16 = concat(axis = var_30714, interleave = var_32165_interleave_0, values = (var_32035_cast_fp16, var_32037_cast_fp16, var_32039_cast_fp16, var_32041_cast_fp16))[name = tensor("op_32165_cast_fp16")]; + tensor var_32167_interleave_0 = const()[name = tensor("op_32167_interleave_0"), val = tensor(false)]; + tensor var_32167_cast_fp16 = concat(axis = var_30714, interleave = var_32167_interleave_0, values = (var_32043_cast_fp16, var_32045_cast_fp16, var_32047_cast_fp16, var_32049_cast_fp16))[name = tensor("op_32167_cast_fp16")]; + tensor var_32169_interleave_0 = const()[name = tensor("op_32169_interleave_0"), val = tensor(false)]; + tensor var_32169_cast_fp16 = concat(axis = var_30714, interleave = var_32169_interleave_0, values = (var_32051_cast_fp16, var_32053_cast_fp16, var_32055_cast_fp16, var_32057_cast_fp16))[name = tensor("op_32169_cast_fp16")]; + tensor var_32171_interleave_0 = const()[name = tensor("op_32171_interleave_0"), val = tensor(false)]; + tensor var_32171_cast_fp16 = concat(axis = var_30714, interleave = var_32171_interleave_0, values = (var_32059_cast_fp16, var_32061_cast_fp16, var_32063_cast_fp16, var_32065_cast_fp16))[name = tensor("op_32171_cast_fp16")]; + tensor var_32173_interleave_0 = const()[name = tensor("op_32173_interleave_0"), val = tensor(false)]; + tensor var_32173_cast_fp16 = concat(axis = var_30714, interleave = var_32173_interleave_0, values = (var_32067_cast_fp16, var_32069_cast_fp16, var_32071_cast_fp16, var_32073_cast_fp16))[name = tensor("op_32173_cast_fp16")]; + tensor var_32175_interleave_0 = const()[name = tensor("op_32175_interleave_0"), val = tensor(false)]; + tensor var_32175_cast_fp16 = concat(axis = var_30714, interleave = var_32175_interleave_0, values = (var_32075_cast_fp16, var_32077_cast_fp16, var_32079_cast_fp16, var_32081_cast_fp16))[name = tensor("op_32175_cast_fp16")]; + tensor var_32177_interleave_0 = const()[name = tensor("op_32177_interleave_0"), val = tensor(false)]; + tensor var_32177_cast_fp16 = concat(axis = var_30714, interleave = var_32177_interleave_0, values = (var_32083_cast_fp16, var_32085_cast_fp16, var_32087_cast_fp16, var_32089_cast_fp16))[name = tensor("op_32177_cast_fp16")]; + tensor var_32179_interleave_0 = const()[name = tensor("op_32179_interleave_0"), val = tensor(false)]; + tensor var_32179_cast_fp16 = concat(axis = var_30714, interleave = var_32179_interleave_0, values = (var_32091_cast_fp16, var_32093_cast_fp16, var_32095_cast_fp16, var_32097_cast_fp16))[name = tensor("op_32179_cast_fp16")]; + tensor var_32181_interleave_0 = const()[name = tensor("op_32181_interleave_0"), val = tensor(false)]; + tensor var_32181_cast_fp16 = concat(axis = var_30714, interleave = var_32181_interleave_0, values = (var_32099_cast_fp16, var_32101_cast_fp16, var_32103_cast_fp16, var_32105_cast_fp16))[name = tensor("op_32181_cast_fp16")]; + tensor var_32183_interleave_0 = const()[name = tensor("op_32183_interleave_0"), val = tensor(false)]; + tensor var_32183_cast_fp16 = concat(axis = var_30714, interleave = var_32183_interleave_0, values = (var_32107_cast_fp16, var_32109_cast_fp16, var_32111_cast_fp16, var_32113_cast_fp16))[name = tensor("op_32183_cast_fp16")]; + tensor var_32185_interleave_0 = const()[name = tensor("op_32185_interleave_0"), val = tensor(false)]; + tensor var_32185_cast_fp16 = concat(axis = var_30714, interleave = var_32185_interleave_0, values = (var_32115_cast_fp16, var_32117_cast_fp16, var_32119_cast_fp16, var_32121_cast_fp16))[name = tensor("op_32185_cast_fp16")]; + tensor var_32187_interleave_0 = const()[name = tensor("op_32187_interleave_0"), val = tensor(false)]; + tensor var_32187_cast_fp16 = concat(axis = var_30714, interleave = var_32187_interleave_0, values = (var_32123_cast_fp16, var_32125_cast_fp16, var_32127_cast_fp16, var_32129_cast_fp16))[name = tensor("op_32187_cast_fp16")]; + tensor var_32189_interleave_0 = const()[name = tensor("op_32189_interleave_0"), val = tensor(false)]; + tensor var_32189_cast_fp16 = concat(axis = var_30714, interleave = var_32189_interleave_0, values = (var_32131_cast_fp16, var_32133_cast_fp16, var_32135_cast_fp16, var_32137_cast_fp16))[name = tensor("op_32189_cast_fp16")]; + tensor var_32191_interleave_0 = const()[name = tensor("op_32191_interleave_0"), val = tensor(false)]; + tensor var_32191_cast_fp16 = concat(axis = var_30714, interleave = var_32191_interleave_0, values = (var_32139_cast_fp16, var_32141_cast_fp16, var_32143_cast_fp16, var_32145_cast_fp16))[name = tensor("op_32191_cast_fp16")]; + tensor var_32193_interleave_0 = const()[name = tensor("op_32193_interleave_0"), val = tensor(false)]; + tensor var_32193_cast_fp16 = concat(axis = var_30714, interleave = var_32193_interleave_0, values = (var_32147_cast_fp16, var_32149_cast_fp16, var_32151_cast_fp16, var_32153_cast_fp16))[name = tensor("op_32193_cast_fp16")]; + tensor input_161_interleave_0 = const()[name = tensor("input_161_interleave_0"), val = tensor(false)]; + tensor input_161_cast_fp16 = concat(axis = var_30739, interleave = input_161_interleave_0, values = (var_32155_cast_fp16, var_32157_cast_fp16, var_32159_cast_fp16, var_32161_cast_fp16, var_32163_cast_fp16, var_32165_cast_fp16, var_32167_cast_fp16, var_32169_cast_fp16, var_32171_cast_fp16, var_32173_cast_fp16, var_32175_cast_fp16, var_32177_cast_fp16, var_32179_cast_fp16, var_32181_cast_fp16, var_32183_cast_fp16, var_32185_cast_fp16, var_32187_cast_fp16, var_32189_cast_fp16, var_32191_cast_fp16, var_32193_cast_fp16))[name = tensor("input_161_cast_fp16")]; + tensor var_32198 = const()[name = tensor("op_32198"), val = tensor([1, 1])]; + tensor var_32200 = const()[name = tensor("op_32200"), val = tensor([1, 1])]; + tensor obj_83_pad_type_0 = const()[name = tensor("obj_83_pad_type_0"), val = tensor("custom")]; + tensor obj_83_pad_0 = const()[name = tensor("obj_83_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_20_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_20_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(811202240)))]; + tensor layers_20_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_20_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(814479104)))]; + tensor obj_83_cast_fp16 = conv(bias = layers_20_self_attn_o_proj_bias_to_fp16, dilations = var_32200, groups = var_30739, pad = obj_83_pad_0, pad_type = obj_83_pad_type_0, strides = var_32198, weight = layers_20_self_attn_o_proj_weight_to_fp16, x = input_161_cast_fp16)[name = tensor("obj_83_cast_fp16")]; + tensor inputs_83_cast_fp16 = add(x = inputs_81_cast_fp16, y = obj_83_cast_fp16)[name = tensor("inputs_83_cast_fp16")]; + tensor var_32206 = const()[name = tensor("op_32206"), val = tensor([1])]; + tensor channels_mean_83_cast_fp16 = reduce_mean(axes = var_32206, keep_dims = var_30740, x = inputs_83_cast_fp16)[name = tensor("channels_mean_83_cast_fp16")]; + tensor zero_mean_83_cast_fp16 = sub(x = inputs_83_cast_fp16, y = channels_mean_83_cast_fp16)[name = tensor("zero_mean_83_cast_fp16")]; + tensor zero_mean_sq_83_cast_fp16 = mul(x = zero_mean_83_cast_fp16, y = zero_mean_83_cast_fp16)[name = tensor("zero_mean_sq_83_cast_fp16")]; + tensor var_32210 = const()[name = tensor("op_32210"), val = tensor([1])]; + tensor var_32211_cast_fp16 = reduce_mean(axes = var_32210, keep_dims = var_30740, x = zero_mean_sq_83_cast_fp16)[name = tensor("op_32211_cast_fp16")]; + tensor var_32212_to_fp16 = const()[name = tensor("op_32212_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_32213_cast_fp16 = add(x = var_32211_cast_fp16, y = var_32212_to_fp16)[name = tensor("op_32213_cast_fp16")]; + tensor denom_83_epsilon_0_to_fp16 = const()[name = tensor("denom_83_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_83_cast_fp16 = rsqrt(epsilon = denom_83_epsilon_0_to_fp16, x = var_32213_cast_fp16)[name = tensor("denom_83_cast_fp16")]; + tensor out_83_cast_fp16 = mul(x = zero_mean_83_cast_fp16, y = denom_83_cast_fp16)[name = tensor("out_83_cast_fp16")]; + tensor input_163_gamma_0_to_fp16 = const()[name = tensor("input_163_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(814481728)))]; + tensor input_163_beta_0_to_fp16 = const()[name = tensor("input_163_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(814484352)))]; + tensor input_163_epsilon_0_to_fp16 = const()[name = tensor("input_163_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_163_cast_fp16 = batch_norm(beta = input_163_beta_0_to_fp16, epsilon = input_163_epsilon_0_to_fp16, gamma = input_163_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_83_cast_fp16)[name = tensor("input_163_cast_fp16")]; + tensor var_32224 = const()[name = tensor("op_32224"), val = tensor([1, 1])]; + tensor var_32226 = const()[name = tensor("op_32226"), val = tensor([1, 1])]; + tensor input_165_pad_type_0 = const()[name = tensor("input_165_pad_type_0"), val = tensor("custom")]; + tensor input_165_pad_0 = const()[name = tensor("input_165_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_20_fc1_weight_to_fp16 = const()[name = tensor("layers_20_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(814486976)))]; + tensor layers_20_fc1_bias_to_fp16 = const()[name = tensor("layers_20_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(827594240)))]; + tensor input_165_cast_fp16 = conv(bias = layers_20_fc1_bias_to_fp16, dilations = var_32226, groups = var_30739, pad = input_165_pad_0, pad_type = input_165_pad_type_0, strides = var_32224, weight = layers_20_fc1_weight_to_fp16, x = input_163_cast_fp16)[name = tensor("input_165_cast_fp16")]; + tensor input_167_mode_0 = const()[name = tensor("input_167_mode_0"), val = tensor("EXACT")]; + tensor input_167_cast_fp16 = gelu(mode = input_167_mode_0, x = input_165_cast_fp16)[name = tensor("input_167_cast_fp16")]; + tensor var_32232 = const()[name = tensor("op_32232"), val = tensor([1, 1])]; + tensor var_32234 = const()[name = tensor("op_32234"), val = tensor([1, 1])]; + tensor hidden_states_45_pad_type_0 = const()[name = tensor("hidden_states_45_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_45_pad_0 = const()[name = tensor("hidden_states_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_20_fc2_weight_to_fp16 = const()[name = tensor("layers_20_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(827604544)))]; + tensor layers_20_fc2_bias_to_fp16 = const()[name = tensor("layers_20_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(840711808)))]; + tensor hidden_states_45_cast_fp16 = conv(bias = layers_20_fc2_bias_to_fp16, dilations = var_32234, groups = var_30739, pad = hidden_states_45_pad_0, pad_type = hidden_states_45_pad_type_0, strides = var_32232, weight = layers_20_fc2_weight_to_fp16, x = input_167_cast_fp16)[name = tensor("hidden_states_45_cast_fp16")]; + tensor inputs_85_cast_fp16 = add(x = inputs_83_cast_fp16, y = hidden_states_45_cast_fp16)[name = tensor("inputs_85_cast_fp16")]; + tensor var_32241 = const()[name = tensor("op_32241"), val = tensor(3)]; + tensor var_32266 = const()[name = tensor("op_32266"), val = tensor(1)]; + tensor var_32267 = const()[name = tensor("op_32267"), val = tensor(true)]; + tensor var_32277 = const()[name = tensor("op_32277"), val = tensor([1])]; + tensor channels_mean_85_cast_fp16 = reduce_mean(axes = var_32277, keep_dims = var_32267, x = inputs_85_cast_fp16)[name = tensor("channels_mean_85_cast_fp16")]; + tensor zero_mean_85_cast_fp16 = sub(x = inputs_85_cast_fp16, y = channels_mean_85_cast_fp16)[name = tensor("zero_mean_85_cast_fp16")]; + tensor zero_mean_sq_85_cast_fp16 = mul(x = zero_mean_85_cast_fp16, y = zero_mean_85_cast_fp16)[name = tensor("zero_mean_sq_85_cast_fp16")]; + tensor var_32281 = const()[name = tensor("op_32281"), val = tensor([1])]; + tensor var_32282_cast_fp16 = reduce_mean(axes = var_32281, keep_dims = var_32267, x = zero_mean_sq_85_cast_fp16)[name = tensor("op_32282_cast_fp16")]; + tensor var_32283_to_fp16 = const()[name = tensor("op_32283_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_32284_cast_fp16 = add(x = var_32282_cast_fp16, y = var_32283_to_fp16)[name = tensor("op_32284_cast_fp16")]; + tensor denom_85_epsilon_0_to_fp16 = const()[name = tensor("denom_85_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_85_cast_fp16 = rsqrt(epsilon = denom_85_epsilon_0_to_fp16, x = var_32284_cast_fp16)[name = tensor("denom_85_cast_fp16")]; + tensor out_85_cast_fp16 = mul(x = zero_mean_85_cast_fp16, y = denom_85_cast_fp16)[name = tensor("out_85_cast_fp16")]; + tensor obj_85_gamma_0_to_fp16 = const()[name = tensor("obj_85_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(840714432)))]; + tensor obj_85_beta_0_to_fp16 = const()[name = tensor("obj_85_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(840717056)))]; + tensor obj_85_epsilon_0_to_fp16 = const()[name = tensor("obj_85_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_85_cast_fp16 = batch_norm(beta = obj_85_beta_0_to_fp16, epsilon = obj_85_epsilon_0_to_fp16, gamma = obj_85_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_85_cast_fp16)[name = tensor("obj_85_cast_fp16")]; + tensor var_32299 = const()[name = tensor("op_32299"), val = tensor([1, 1])]; + tensor var_32301 = const()[name = tensor("op_32301"), val = tensor([1, 1])]; + tensor query_43_pad_type_0 = const()[name = tensor("query_43_pad_type_0"), val = tensor("custom")]; + tensor query_43_pad_0 = const()[name = tensor("query_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_21_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_21_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(840719680)))]; + tensor layers_21_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_21_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(843996544)))]; + tensor query_43_cast_fp16 = conv(bias = layers_21_self_attn_q_proj_bias_to_fp16, dilations = var_32301, groups = var_32266, pad = query_43_pad_0, pad_type = query_43_pad_type_0, strides = var_32299, weight = layers_21_self_attn_q_proj_weight_to_fp16, x = obj_85_cast_fp16)[name = tensor("query_43_cast_fp16")]; + tensor var_32305 = const()[name = tensor("op_32305"), val = tensor([1, 1])]; + tensor var_32307 = const()[name = tensor("op_32307"), val = tensor([1, 1])]; + tensor key_43_pad_type_0 = const()[name = tensor("key_43_pad_type_0"), val = tensor("custom")]; + tensor key_43_pad_0 = const()[name = tensor("key_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_21_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_21_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(843999168)))]; + tensor key_43_cast_fp16 = conv(dilations = var_32307, groups = var_32266, pad = key_43_pad_0, pad_type = key_43_pad_type_0, strides = var_32305, weight = layers_21_self_attn_k_proj_weight_to_fp16, x = obj_85_cast_fp16)[name = tensor("key_43_cast_fp16")]; + tensor var_32312 = const()[name = tensor("op_32312"), val = tensor([1, 1])]; + tensor var_32314 = const()[name = tensor("op_32314"), val = tensor([1, 1])]; + tensor value_43_pad_type_0 = const()[name = tensor("value_43_pad_type_0"), val = tensor("custom")]; + tensor value_43_pad_0 = const()[name = tensor("value_43_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_21_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_21_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(847276032)))]; + tensor layers_21_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_21_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(850552896)))]; + tensor value_43_cast_fp16 = conv(bias = layers_21_self_attn_v_proj_bias_to_fp16, dilations = var_32314, groups = var_32266, pad = value_43_pad_0, pad_type = value_43_pad_type_0, strides = var_32312, weight = layers_21_self_attn_v_proj_weight_to_fp16, x = obj_85_cast_fp16)[name = tensor("value_43_cast_fp16")]; + tensor var_32321_begin_0 = const()[name = tensor("op_32321_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_32321_end_0 = const()[name = tensor("op_32321_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_32321_end_mask_0 = const()[name = tensor("op_32321_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_32321_cast_fp16 = slice_by_index(begin = var_32321_begin_0, end = var_32321_end_0, end_mask = var_32321_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_32321_cast_fp16")]; + tensor var_32325_begin_0 = const()[name = tensor("op_32325_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_32325_end_0 = const()[name = tensor("op_32325_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_32325_end_mask_0 = const()[name = tensor("op_32325_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_32325_cast_fp16 = slice_by_index(begin = var_32325_begin_0, end = var_32325_end_0, end_mask = var_32325_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_32325_cast_fp16")]; + tensor var_32329_begin_0 = const()[name = tensor("op_32329_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_32329_end_0 = const()[name = tensor("op_32329_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_32329_end_mask_0 = const()[name = tensor("op_32329_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_32329_cast_fp16 = slice_by_index(begin = var_32329_begin_0, end = var_32329_end_0, end_mask = var_32329_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_32329_cast_fp16")]; + tensor var_32333_begin_0 = const()[name = tensor("op_32333_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_32333_end_0 = const()[name = tensor("op_32333_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_32333_end_mask_0 = const()[name = tensor("op_32333_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_32333_cast_fp16 = slice_by_index(begin = var_32333_begin_0, end = var_32333_end_0, end_mask = var_32333_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_32333_cast_fp16")]; + tensor var_32337_begin_0 = const()[name = tensor("op_32337_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_32337_end_0 = const()[name = tensor("op_32337_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_32337_end_mask_0 = const()[name = tensor("op_32337_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_32337_cast_fp16 = slice_by_index(begin = var_32337_begin_0, end = var_32337_end_0, end_mask = var_32337_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_32337_cast_fp16")]; + tensor var_32341_begin_0 = const()[name = tensor("op_32341_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_32341_end_0 = const()[name = tensor("op_32341_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_32341_end_mask_0 = const()[name = tensor("op_32341_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_32341_cast_fp16 = slice_by_index(begin = var_32341_begin_0, end = var_32341_end_0, end_mask = var_32341_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_32341_cast_fp16")]; + tensor var_32345_begin_0 = const()[name = tensor("op_32345_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_32345_end_0 = const()[name = tensor("op_32345_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_32345_end_mask_0 = const()[name = tensor("op_32345_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_32345_cast_fp16 = slice_by_index(begin = var_32345_begin_0, end = var_32345_end_0, end_mask = var_32345_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_32345_cast_fp16")]; + tensor var_32349_begin_0 = const()[name = tensor("op_32349_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_32349_end_0 = const()[name = tensor("op_32349_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_32349_end_mask_0 = const()[name = tensor("op_32349_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_32349_cast_fp16 = slice_by_index(begin = var_32349_begin_0, end = var_32349_end_0, end_mask = var_32349_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_32349_cast_fp16")]; + tensor var_32353_begin_0 = const()[name = tensor("op_32353_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_32353_end_0 = const()[name = tensor("op_32353_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_32353_end_mask_0 = const()[name = tensor("op_32353_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_32353_cast_fp16 = slice_by_index(begin = var_32353_begin_0, end = var_32353_end_0, end_mask = var_32353_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_32353_cast_fp16")]; + tensor var_32357_begin_0 = const()[name = tensor("op_32357_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_32357_end_0 = const()[name = tensor("op_32357_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_32357_end_mask_0 = const()[name = tensor("op_32357_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_32357_cast_fp16 = slice_by_index(begin = var_32357_begin_0, end = var_32357_end_0, end_mask = var_32357_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_32357_cast_fp16")]; + tensor var_32361_begin_0 = const()[name = tensor("op_32361_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_32361_end_0 = const()[name = tensor("op_32361_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_32361_end_mask_0 = const()[name = tensor("op_32361_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_32361_cast_fp16 = slice_by_index(begin = var_32361_begin_0, end = var_32361_end_0, end_mask = var_32361_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_32361_cast_fp16")]; + tensor var_32365_begin_0 = const()[name = tensor("op_32365_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_32365_end_0 = const()[name = tensor("op_32365_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_32365_end_mask_0 = const()[name = tensor("op_32365_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_32365_cast_fp16 = slice_by_index(begin = var_32365_begin_0, end = var_32365_end_0, end_mask = var_32365_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_32365_cast_fp16")]; + tensor var_32369_begin_0 = const()[name = tensor("op_32369_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_32369_end_0 = const()[name = tensor("op_32369_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_32369_end_mask_0 = const()[name = tensor("op_32369_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_32369_cast_fp16 = slice_by_index(begin = var_32369_begin_0, end = var_32369_end_0, end_mask = var_32369_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_32369_cast_fp16")]; + tensor var_32373_begin_0 = const()[name = tensor("op_32373_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_32373_end_0 = const()[name = tensor("op_32373_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_32373_end_mask_0 = const()[name = tensor("op_32373_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_32373_cast_fp16 = slice_by_index(begin = var_32373_begin_0, end = var_32373_end_0, end_mask = var_32373_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_32373_cast_fp16")]; + tensor var_32377_begin_0 = const()[name = tensor("op_32377_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_32377_end_0 = const()[name = tensor("op_32377_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_32377_end_mask_0 = const()[name = tensor("op_32377_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_32377_cast_fp16 = slice_by_index(begin = var_32377_begin_0, end = var_32377_end_0, end_mask = var_32377_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_32377_cast_fp16")]; + tensor var_32381_begin_0 = const()[name = tensor("op_32381_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_32381_end_0 = const()[name = tensor("op_32381_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_32381_end_mask_0 = const()[name = tensor("op_32381_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_32381_cast_fp16 = slice_by_index(begin = var_32381_begin_0, end = var_32381_end_0, end_mask = var_32381_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_32381_cast_fp16")]; + tensor var_32385_begin_0 = const()[name = tensor("op_32385_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_32385_end_0 = const()[name = tensor("op_32385_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_32385_end_mask_0 = const()[name = tensor("op_32385_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_32385_cast_fp16 = slice_by_index(begin = var_32385_begin_0, end = var_32385_end_0, end_mask = var_32385_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_32385_cast_fp16")]; + tensor var_32389_begin_0 = const()[name = tensor("op_32389_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_32389_end_0 = const()[name = tensor("op_32389_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_32389_end_mask_0 = const()[name = tensor("op_32389_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_32389_cast_fp16 = slice_by_index(begin = var_32389_begin_0, end = var_32389_end_0, end_mask = var_32389_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_32389_cast_fp16")]; + tensor var_32393_begin_0 = const()[name = tensor("op_32393_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_32393_end_0 = const()[name = tensor("op_32393_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_32393_end_mask_0 = const()[name = tensor("op_32393_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_32393_cast_fp16 = slice_by_index(begin = var_32393_begin_0, end = var_32393_end_0, end_mask = var_32393_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_32393_cast_fp16")]; + tensor var_32397_begin_0 = const()[name = tensor("op_32397_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_32397_end_0 = const()[name = tensor("op_32397_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_32397_end_mask_0 = const()[name = tensor("op_32397_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_32397_cast_fp16 = slice_by_index(begin = var_32397_begin_0, end = var_32397_end_0, end_mask = var_32397_end_mask_0, x = query_43_cast_fp16)[name = tensor("op_32397_cast_fp16")]; + tensor var_32406_begin_0 = const()[name = tensor("op_32406_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_32406_end_0 = const()[name = tensor("op_32406_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_32406_end_mask_0 = const()[name = tensor("op_32406_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32406_cast_fp16 = slice_by_index(begin = var_32406_begin_0, end = var_32406_end_0, end_mask = var_32406_end_mask_0, x = var_32321_cast_fp16)[name = tensor("op_32406_cast_fp16")]; + tensor var_32413_begin_0 = const()[name = tensor("op_32413_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_32413_end_0 = const()[name = tensor("op_32413_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_32413_end_mask_0 = const()[name = tensor("op_32413_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32413_cast_fp16 = slice_by_index(begin = var_32413_begin_0, end = var_32413_end_0, end_mask = var_32413_end_mask_0, x = var_32321_cast_fp16)[name = tensor("op_32413_cast_fp16")]; + tensor var_32420_begin_0 = const()[name = tensor("op_32420_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_32420_end_0 = const()[name = tensor("op_32420_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_32420_end_mask_0 = const()[name = tensor("op_32420_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32420_cast_fp16 = slice_by_index(begin = var_32420_begin_0, end = var_32420_end_0, end_mask = var_32420_end_mask_0, x = var_32321_cast_fp16)[name = tensor("op_32420_cast_fp16")]; + tensor var_32427_begin_0 = const()[name = tensor("op_32427_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_32427_end_0 = const()[name = tensor("op_32427_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_32427_end_mask_0 = const()[name = tensor("op_32427_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32427_cast_fp16 = slice_by_index(begin = var_32427_begin_0, end = var_32427_end_0, end_mask = var_32427_end_mask_0, x = var_32321_cast_fp16)[name = tensor("op_32427_cast_fp16")]; + tensor var_32434_begin_0 = const()[name = tensor("op_32434_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_32434_end_0 = const()[name = tensor("op_32434_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_32434_end_mask_0 = const()[name = tensor("op_32434_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32434_cast_fp16 = slice_by_index(begin = var_32434_begin_0, end = var_32434_end_0, end_mask = var_32434_end_mask_0, x = var_32325_cast_fp16)[name = tensor("op_32434_cast_fp16")]; + tensor var_32441_begin_0 = const()[name = tensor("op_32441_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_32441_end_0 = const()[name = tensor("op_32441_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_32441_end_mask_0 = const()[name = tensor("op_32441_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32441_cast_fp16 = slice_by_index(begin = var_32441_begin_0, end = var_32441_end_0, end_mask = var_32441_end_mask_0, x = var_32325_cast_fp16)[name = tensor("op_32441_cast_fp16")]; + tensor var_32448_begin_0 = const()[name = tensor("op_32448_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_32448_end_0 = const()[name = tensor("op_32448_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_32448_end_mask_0 = const()[name = tensor("op_32448_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32448_cast_fp16 = slice_by_index(begin = var_32448_begin_0, end = var_32448_end_0, end_mask = var_32448_end_mask_0, x = var_32325_cast_fp16)[name = tensor("op_32448_cast_fp16")]; + tensor var_32455_begin_0 = const()[name = tensor("op_32455_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_32455_end_0 = const()[name = tensor("op_32455_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_32455_end_mask_0 = const()[name = tensor("op_32455_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32455_cast_fp16 = slice_by_index(begin = var_32455_begin_0, end = var_32455_end_0, end_mask = var_32455_end_mask_0, x = var_32325_cast_fp16)[name = tensor("op_32455_cast_fp16")]; + tensor var_32462_begin_0 = const()[name = tensor("op_32462_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_32462_end_0 = const()[name = tensor("op_32462_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_32462_end_mask_0 = const()[name = tensor("op_32462_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32462_cast_fp16 = slice_by_index(begin = var_32462_begin_0, end = var_32462_end_0, end_mask = var_32462_end_mask_0, x = var_32329_cast_fp16)[name = tensor("op_32462_cast_fp16")]; + tensor var_32469_begin_0 = const()[name = tensor("op_32469_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_32469_end_0 = const()[name = tensor("op_32469_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_32469_end_mask_0 = const()[name = tensor("op_32469_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32469_cast_fp16 = slice_by_index(begin = var_32469_begin_0, end = var_32469_end_0, end_mask = var_32469_end_mask_0, x = var_32329_cast_fp16)[name = tensor("op_32469_cast_fp16")]; + tensor var_32476_begin_0 = const()[name = tensor("op_32476_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_32476_end_0 = const()[name = tensor("op_32476_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_32476_end_mask_0 = const()[name = tensor("op_32476_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32476_cast_fp16 = slice_by_index(begin = var_32476_begin_0, end = var_32476_end_0, end_mask = var_32476_end_mask_0, x = var_32329_cast_fp16)[name = tensor("op_32476_cast_fp16")]; + tensor var_32483_begin_0 = const()[name = tensor("op_32483_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_32483_end_0 = const()[name = tensor("op_32483_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_32483_end_mask_0 = const()[name = tensor("op_32483_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32483_cast_fp16 = slice_by_index(begin = var_32483_begin_0, end = var_32483_end_0, end_mask = var_32483_end_mask_0, x = var_32329_cast_fp16)[name = tensor("op_32483_cast_fp16")]; + tensor var_32490_begin_0 = const()[name = tensor("op_32490_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_32490_end_0 = const()[name = tensor("op_32490_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_32490_end_mask_0 = const()[name = tensor("op_32490_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32490_cast_fp16 = slice_by_index(begin = var_32490_begin_0, end = var_32490_end_0, end_mask = var_32490_end_mask_0, x = var_32333_cast_fp16)[name = tensor("op_32490_cast_fp16")]; + tensor var_32497_begin_0 = const()[name = tensor("op_32497_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_32497_end_0 = const()[name = tensor("op_32497_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_32497_end_mask_0 = const()[name = tensor("op_32497_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32497_cast_fp16 = slice_by_index(begin = var_32497_begin_0, end = var_32497_end_0, end_mask = var_32497_end_mask_0, x = var_32333_cast_fp16)[name = tensor("op_32497_cast_fp16")]; + tensor var_32504_begin_0 = const()[name = tensor("op_32504_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_32504_end_0 = const()[name = tensor("op_32504_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_32504_end_mask_0 = const()[name = tensor("op_32504_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32504_cast_fp16 = slice_by_index(begin = var_32504_begin_0, end = var_32504_end_0, end_mask = var_32504_end_mask_0, x = var_32333_cast_fp16)[name = tensor("op_32504_cast_fp16")]; + tensor var_32511_begin_0 = const()[name = tensor("op_32511_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_32511_end_0 = const()[name = tensor("op_32511_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_32511_end_mask_0 = const()[name = tensor("op_32511_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32511_cast_fp16 = slice_by_index(begin = var_32511_begin_0, end = var_32511_end_0, end_mask = var_32511_end_mask_0, x = var_32333_cast_fp16)[name = tensor("op_32511_cast_fp16")]; + tensor var_32518_begin_0 = const()[name = tensor("op_32518_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_32518_end_0 = const()[name = tensor("op_32518_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_32518_end_mask_0 = const()[name = tensor("op_32518_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32518_cast_fp16 = slice_by_index(begin = var_32518_begin_0, end = var_32518_end_0, end_mask = var_32518_end_mask_0, x = var_32337_cast_fp16)[name = tensor("op_32518_cast_fp16")]; + tensor var_32525_begin_0 = const()[name = tensor("op_32525_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_32525_end_0 = const()[name = tensor("op_32525_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_32525_end_mask_0 = const()[name = tensor("op_32525_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32525_cast_fp16 = slice_by_index(begin = var_32525_begin_0, end = var_32525_end_0, end_mask = var_32525_end_mask_0, x = var_32337_cast_fp16)[name = tensor("op_32525_cast_fp16")]; + tensor var_32532_begin_0 = const()[name = tensor("op_32532_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_32532_end_0 = const()[name = tensor("op_32532_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_32532_end_mask_0 = const()[name = tensor("op_32532_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32532_cast_fp16 = slice_by_index(begin = var_32532_begin_0, end = var_32532_end_0, end_mask = var_32532_end_mask_0, x = var_32337_cast_fp16)[name = tensor("op_32532_cast_fp16")]; + tensor var_32539_begin_0 = const()[name = tensor("op_32539_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_32539_end_0 = const()[name = tensor("op_32539_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_32539_end_mask_0 = const()[name = tensor("op_32539_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32539_cast_fp16 = slice_by_index(begin = var_32539_begin_0, end = var_32539_end_0, end_mask = var_32539_end_mask_0, x = var_32337_cast_fp16)[name = tensor("op_32539_cast_fp16")]; + tensor var_32546_begin_0 = const()[name = tensor("op_32546_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_32546_end_0 = const()[name = tensor("op_32546_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_32546_end_mask_0 = const()[name = tensor("op_32546_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32546_cast_fp16 = slice_by_index(begin = var_32546_begin_0, end = var_32546_end_0, end_mask = var_32546_end_mask_0, x = var_32341_cast_fp16)[name = tensor("op_32546_cast_fp16")]; + tensor var_32553_begin_0 = const()[name = tensor("op_32553_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_32553_end_0 = const()[name = tensor("op_32553_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_32553_end_mask_0 = const()[name = tensor("op_32553_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32553_cast_fp16 = slice_by_index(begin = var_32553_begin_0, end = var_32553_end_0, end_mask = var_32553_end_mask_0, x = var_32341_cast_fp16)[name = tensor("op_32553_cast_fp16")]; + tensor var_32560_begin_0 = const()[name = tensor("op_32560_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_32560_end_0 = const()[name = tensor("op_32560_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_32560_end_mask_0 = const()[name = tensor("op_32560_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32560_cast_fp16 = slice_by_index(begin = var_32560_begin_0, end = var_32560_end_0, end_mask = var_32560_end_mask_0, x = var_32341_cast_fp16)[name = tensor("op_32560_cast_fp16")]; + tensor var_32567_begin_0 = const()[name = tensor("op_32567_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_32567_end_0 = const()[name = tensor("op_32567_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_32567_end_mask_0 = const()[name = tensor("op_32567_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32567_cast_fp16 = slice_by_index(begin = var_32567_begin_0, end = var_32567_end_0, end_mask = var_32567_end_mask_0, x = var_32341_cast_fp16)[name = tensor("op_32567_cast_fp16")]; + tensor var_32574_begin_0 = const()[name = tensor("op_32574_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_32574_end_0 = const()[name = tensor("op_32574_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_32574_end_mask_0 = const()[name = tensor("op_32574_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32574_cast_fp16 = slice_by_index(begin = var_32574_begin_0, end = var_32574_end_0, end_mask = var_32574_end_mask_0, x = var_32345_cast_fp16)[name = tensor("op_32574_cast_fp16")]; + tensor var_32581_begin_0 = const()[name = tensor("op_32581_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_32581_end_0 = const()[name = tensor("op_32581_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_32581_end_mask_0 = const()[name = tensor("op_32581_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32581_cast_fp16 = slice_by_index(begin = var_32581_begin_0, end = var_32581_end_0, end_mask = var_32581_end_mask_0, x = var_32345_cast_fp16)[name = tensor("op_32581_cast_fp16")]; + tensor var_32588_begin_0 = const()[name = tensor("op_32588_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_32588_end_0 = const()[name = tensor("op_32588_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_32588_end_mask_0 = const()[name = tensor("op_32588_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32588_cast_fp16 = slice_by_index(begin = var_32588_begin_0, end = var_32588_end_0, end_mask = var_32588_end_mask_0, x = var_32345_cast_fp16)[name = tensor("op_32588_cast_fp16")]; + tensor var_32595_begin_0 = const()[name = tensor("op_32595_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_32595_end_0 = const()[name = tensor("op_32595_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_32595_end_mask_0 = const()[name = tensor("op_32595_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32595_cast_fp16 = slice_by_index(begin = var_32595_begin_0, end = var_32595_end_0, end_mask = var_32595_end_mask_0, x = var_32345_cast_fp16)[name = tensor("op_32595_cast_fp16")]; + tensor var_32602_begin_0 = const()[name = tensor("op_32602_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_32602_end_0 = const()[name = tensor("op_32602_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_32602_end_mask_0 = const()[name = tensor("op_32602_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32602_cast_fp16 = slice_by_index(begin = var_32602_begin_0, end = var_32602_end_0, end_mask = var_32602_end_mask_0, x = var_32349_cast_fp16)[name = tensor("op_32602_cast_fp16")]; + tensor var_32609_begin_0 = const()[name = tensor("op_32609_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_32609_end_0 = const()[name = tensor("op_32609_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_32609_end_mask_0 = const()[name = tensor("op_32609_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32609_cast_fp16 = slice_by_index(begin = var_32609_begin_0, end = var_32609_end_0, end_mask = var_32609_end_mask_0, x = var_32349_cast_fp16)[name = tensor("op_32609_cast_fp16")]; + tensor var_32616_begin_0 = const()[name = tensor("op_32616_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_32616_end_0 = const()[name = tensor("op_32616_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_32616_end_mask_0 = const()[name = tensor("op_32616_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32616_cast_fp16 = slice_by_index(begin = var_32616_begin_0, end = var_32616_end_0, end_mask = var_32616_end_mask_0, x = var_32349_cast_fp16)[name = tensor("op_32616_cast_fp16")]; + tensor var_32623_begin_0 = const()[name = tensor("op_32623_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_32623_end_0 = const()[name = tensor("op_32623_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_32623_end_mask_0 = const()[name = tensor("op_32623_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32623_cast_fp16 = slice_by_index(begin = var_32623_begin_0, end = var_32623_end_0, end_mask = var_32623_end_mask_0, x = var_32349_cast_fp16)[name = tensor("op_32623_cast_fp16")]; + tensor var_32630_begin_0 = const()[name = tensor("op_32630_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_32630_end_0 = const()[name = tensor("op_32630_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_32630_end_mask_0 = const()[name = tensor("op_32630_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32630_cast_fp16 = slice_by_index(begin = var_32630_begin_0, end = var_32630_end_0, end_mask = var_32630_end_mask_0, x = var_32353_cast_fp16)[name = tensor("op_32630_cast_fp16")]; + tensor var_32637_begin_0 = const()[name = tensor("op_32637_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_32637_end_0 = const()[name = tensor("op_32637_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_32637_end_mask_0 = const()[name = tensor("op_32637_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32637_cast_fp16 = slice_by_index(begin = var_32637_begin_0, end = var_32637_end_0, end_mask = var_32637_end_mask_0, x = var_32353_cast_fp16)[name = tensor("op_32637_cast_fp16")]; + tensor var_32644_begin_0 = const()[name = tensor("op_32644_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_32644_end_0 = const()[name = tensor("op_32644_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_32644_end_mask_0 = const()[name = tensor("op_32644_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32644_cast_fp16 = slice_by_index(begin = var_32644_begin_0, end = var_32644_end_0, end_mask = var_32644_end_mask_0, x = var_32353_cast_fp16)[name = tensor("op_32644_cast_fp16")]; + tensor var_32651_begin_0 = const()[name = tensor("op_32651_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_32651_end_0 = const()[name = tensor("op_32651_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_32651_end_mask_0 = const()[name = tensor("op_32651_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32651_cast_fp16 = slice_by_index(begin = var_32651_begin_0, end = var_32651_end_0, end_mask = var_32651_end_mask_0, x = var_32353_cast_fp16)[name = tensor("op_32651_cast_fp16")]; + tensor var_32658_begin_0 = const()[name = tensor("op_32658_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_32658_end_0 = const()[name = tensor("op_32658_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_32658_end_mask_0 = const()[name = tensor("op_32658_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32658_cast_fp16 = slice_by_index(begin = var_32658_begin_0, end = var_32658_end_0, end_mask = var_32658_end_mask_0, x = var_32357_cast_fp16)[name = tensor("op_32658_cast_fp16")]; + tensor var_32665_begin_0 = const()[name = tensor("op_32665_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_32665_end_0 = const()[name = tensor("op_32665_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_32665_end_mask_0 = const()[name = tensor("op_32665_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32665_cast_fp16 = slice_by_index(begin = var_32665_begin_0, end = var_32665_end_0, end_mask = var_32665_end_mask_0, x = var_32357_cast_fp16)[name = tensor("op_32665_cast_fp16")]; + tensor var_32672_begin_0 = const()[name = tensor("op_32672_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_32672_end_0 = const()[name = tensor("op_32672_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_32672_end_mask_0 = const()[name = tensor("op_32672_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32672_cast_fp16 = slice_by_index(begin = var_32672_begin_0, end = var_32672_end_0, end_mask = var_32672_end_mask_0, x = var_32357_cast_fp16)[name = tensor("op_32672_cast_fp16")]; + tensor var_32679_begin_0 = const()[name = tensor("op_32679_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_32679_end_0 = const()[name = tensor("op_32679_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_32679_end_mask_0 = const()[name = tensor("op_32679_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32679_cast_fp16 = slice_by_index(begin = var_32679_begin_0, end = var_32679_end_0, end_mask = var_32679_end_mask_0, x = var_32357_cast_fp16)[name = tensor("op_32679_cast_fp16")]; + tensor var_32686_begin_0 = const()[name = tensor("op_32686_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_32686_end_0 = const()[name = tensor("op_32686_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_32686_end_mask_0 = const()[name = tensor("op_32686_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32686_cast_fp16 = slice_by_index(begin = var_32686_begin_0, end = var_32686_end_0, end_mask = var_32686_end_mask_0, x = var_32361_cast_fp16)[name = tensor("op_32686_cast_fp16")]; + tensor var_32693_begin_0 = const()[name = tensor("op_32693_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_32693_end_0 = const()[name = tensor("op_32693_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_32693_end_mask_0 = const()[name = tensor("op_32693_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32693_cast_fp16 = slice_by_index(begin = var_32693_begin_0, end = var_32693_end_0, end_mask = var_32693_end_mask_0, x = var_32361_cast_fp16)[name = tensor("op_32693_cast_fp16")]; + tensor var_32700_begin_0 = const()[name = tensor("op_32700_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_32700_end_0 = const()[name = tensor("op_32700_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_32700_end_mask_0 = const()[name = tensor("op_32700_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32700_cast_fp16 = slice_by_index(begin = var_32700_begin_0, end = var_32700_end_0, end_mask = var_32700_end_mask_0, x = var_32361_cast_fp16)[name = tensor("op_32700_cast_fp16")]; + tensor var_32707_begin_0 = const()[name = tensor("op_32707_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_32707_end_0 = const()[name = tensor("op_32707_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_32707_end_mask_0 = const()[name = tensor("op_32707_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32707_cast_fp16 = slice_by_index(begin = var_32707_begin_0, end = var_32707_end_0, end_mask = var_32707_end_mask_0, x = var_32361_cast_fp16)[name = tensor("op_32707_cast_fp16")]; + tensor var_32714_begin_0 = const()[name = tensor("op_32714_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_32714_end_0 = const()[name = tensor("op_32714_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_32714_end_mask_0 = const()[name = tensor("op_32714_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32714_cast_fp16 = slice_by_index(begin = var_32714_begin_0, end = var_32714_end_0, end_mask = var_32714_end_mask_0, x = var_32365_cast_fp16)[name = tensor("op_32714_cast_fp16")]; + tensor var_32721_begin_0 = const()[name = tensor("op_32721_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_32721_end_0 = const()[name = tensor("op_32721_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_32721_end_mask_0 = const()[name = tensor("op_32721_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32721_cast_fp16 = slice_by_index(begin = var_32721_begin_0, end = var_32721_end_0, end_mask = var_32721_end_mask_0, x = var_32365_cast_fp16)[name = tensor("op_32721_cast_fp16")]; + tensor var_32728_begin_0 = const()[name = tensor("op_32728_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_32728_end_0 = const()[name = tensor("op_32728_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_32728_end_mask_0 = const()[name = tensor("op_32728_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32728_cast_fp16 = slice_by_index(begin = var_32728_begin_0, end = var_32728_end_0, end_mask = var_32728_end_mask_0, x = var_32365_cast_fp16)[name = tensor("op_32728_cast_fp16")]; + tensor var_32735_begin_0 = const()[name = tensor("op_32735_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_32735_end_0 = const()[name = tensor("op_32735_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_32735_end_mask_0 = const()[name = tensor("op_32735_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32735_cast_fp16 = slice_by_index(begin = var_32735_begin_0, end = var_32735_end_0, end_mask = var_32735_end_mask_0, x = var_32365_cast_fp16)[name = tensor("op_32735_cast_fp16")]; + tensor var_32742_begin_0 = const()[name = tensor("op_32742_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_32742_end_0 = const()[name = tensor("op_32742_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_32742_end_mask_0 = const()[name = tensor("op_32742_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32742_cast_fp16 = slice_by_index(begin = var_32742_begin_0, end = var_32742_end_0, end_mask = var_32742_end_mask_0, x = var_32369_cast_fp16)[name = tensor("op_32742_cast_fp16")]; + tensor var_32749_begin_0 = const()[name = tensor("op_32749_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_32749_end_0 = const()[name = tensor("op_32749_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_32749_end_mask_0 = const()[name = tensor("op_32749_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32749_cast_fp16 = slice_by_index(begin = var_32749_begin_0, end = var_32749_end_0, end_mask = var_32749_end_mask_0, x = var_32369_cast_fp16)[name = tensor("op_32749_cast_fp16")]; + tensor var_32756_begin_0 = const()[name = tensor("op_32756_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_32756_end_0 = const()[name = tensor("op_32756_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_32756_end_mask_0 = const()[name = tensor("op_32756_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32756_cast_fp16 = slice_by_index(begin = var_32756_begin_0, end = var_32756_end_0, end_mask = var_32756_end_mask_0, x = var_32369_cast_fp16)[name = tensor("op_32756_cast_fp16")]; + tensor var_32763_begin_0 = const()[name = tensor("op_32763_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_32763_end_0 = const()[name = tensor("op_32763_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_32763_end_mask_0 = const()[name = tensor("op_32763_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32763_cast_fp16 = slice_by_index(begin = var_32763_begin_0, end = var_32763_end_0, end_mask = var_32763_end_mask_0, x = var_32369_cast_fp16)[name = tensor("op_32763_cast_fp16")]; + tensor var_32770_begin_0 = const()[name = tensor("op_32770_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_32770_end_0 = const()[name = tensor("op_32770_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_32770_end_mask_0 = const()[name = tensor("op_32770_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32770_cast_fp16 = slice_by_index(begin = var_32770_begin_0, end = var_32770_end_0, end_mask = var_32770_end_mask_0, x = var_32373_cast_fp16)[name = tensor("op_32770_cast_fp16")]; + tensor var_32777_begin_0 = const()[name = tensor("op_32777_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_32777_end_0 = const()[name = tensor("op_32777_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_32777_end_mask_0 = const()[name = tensor("op_32777_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32777_cast_fp16 = slice_by_index(begin = var_32777_begin_0, end = var_32777_end_0, end_mask = var_32777_end_mask_0, x = var_32373_cast_fp16)[name = tensor("op_32777_cast_fp16")]; + tensor var_32784_begin_0 = const()[name = tensor("op_32784_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_32784_end_0 = const()[name = tensor("op_32784_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_32784_end_mask_0 = const()[name = tensor("op_32784_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32784_cast_fp16 = slice_by_index(begin = var_32784_begin_0, end = var_32784_end_0, end_mask = var_32784_end_mask_0, x = var_32373_cast_fp16)[name = tensor("op_32784_cast_fp16")]; + tensor var_32791_begin_0 = const()[name = tensor("op_32791_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_32791_end_0 = const()[name = tensor("op_32791_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_32791_end_mask_0 = const()[name = tensor("op_32791_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32791_cast_fp16 = slice_by_index(begin = var_32791_begin_0, end = var_32791_end_0, end_mask = var_32791_end_mask_0, x = var_32373_cast_fp16)[name = tensor("op_32791_cast_fp16")]; + tensor var_32798_begin_0 = const()[name = tensor("op_32798_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_32798_end_0 = const()[name = tensor("op_32798_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_32798_end_mask_0 = const()[name = tensor("op_32798_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32798_cast_fp16 = slice_by_index(begin = var_32798_begin_0, end = var_32798_end_0, end_mask = var_32798_end_mask_0, x = var_32377_cast_fp16)[name = tensor("op_32798_cast_fp16")]; + tensor var_32805_begin_0 = const()[name = tensor("op_32805_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_32805_end_0 = const()[name = tensor("op_32805_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_32805_end_mask_0 = const()[name = tensor("op_32805_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32805_cast_fp16 = slice_by_index(begin = var_32805_begin_0, end = var_32805_end_0, end_mask = var_32805_end_mask_0, x = var_32377_cast_fp16)[name = tensor("op_32805_cast_fp16")]; + tensor var_32812_begin_0 = const()[name = tensor("op_32812_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_32812_end_0 = const()[name = tensor("op_32812_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_32812_end_mask_0 = const()[name = tensor("op_32812_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32812_cast_fp16 = slice_by_index(begin = var_32812_begin_0, end = var_32812_end_0, end_mask = var_32812_end_mask_0, x = var_32377_cast_fp16)[name = tensor("op_32812_cast_fp16")]; + tensor var_32819_begin_0 = const()[name = tensor("op_32819_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_32819_end_0 = const()[name = tensor("op_32819_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_32819_end_mask_0 = const()[name = tensor("op_32819_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32819_cast_fp16 = slice_by_index(begin = var_32819_begin_0, end = var_32819_end_0, end_mask = var_32819_end_mask_0, x = var_32377_cast_fp16)[name = tensor("op_32819_cast_fp16")]; + tensor var_32826_begin_0 = const()[name = tensor("op_32826_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_32826_end_0 = const()[name = tensor("op_32826_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_32826_end_mask_0 = const()[name = tensor("op_32826_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32826_cast_fp16 = slice_by_index(begin = var_32826_begin_0, end = var_32826_end_0, end_mask = var_32826_end_mask_0, x = var_32381_cast_fp16)[name = tensor("op_32826_cast_fp16")]; + tensor var_32833_begin_0 = const()[name = tensor("op_32833_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_32833_end_0 = const()[name = tensor("op_32833_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_32833_end_mask_0 = const()[name = tensor("op_32833_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32833_cast_fp16 = slice_by_index(begin = var_32833_begin_0, end = var_32833_end_0, end_mask = var_32833_end_mask_0, x = var_32381_cast_fp16)[name = tensor("op_32833_cast_fp16")]; + tensor var_32840_begin_0 = const()[name = tensor("op_32840_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_32840_end_0 = const()[name = tensor("op_32840_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_32840_end_mask_0 = const()[name = tensor("op_32840_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32840_cast_fp16 = slice_by_index(begin = var_32840_begin_0, end = var_32840_end_0, end_mask = var_32840_end_mask_0, x = var_32381_cast_fp16)[name = tensor("op_32840_cast_fp16")]; + tensor var_32847_begin_0 = const()[name = tensor("op_32847_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_32847_end_0 = const()[name = tensor("op_32847_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_32847_end_mask_0 = const()[name = tensor("op_32847_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32847_cast_fp16 = slice_by_index(begin = var_32847_begin_0, end = var_32847_end_0, end_mask = var_32847_end_mask_0, x = var_32381_cast_fp16)[name = tensor("op_32847_cast_fp16")]; + tensor var_32854_begin_0 = const()[name = tensor("op_32854_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_32854_end_0 = const()[name = tensor("op_32854_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_32854_end_mask_0 = const()[name = tensor("op_32854_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32854_cast_fp16 = slice_by_index(begin = var_32854_begin_0, end = var_32854_end_0, end_mask = var_32854_end_mask_0, x = var_32385_cast_fp16)[name = tensor("op_32854_cast_fp16")]; + tensor var_32861_begin_0 = const()[name = tensor("op_32861_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_32861_end_0 = const()[name = tensor("op_32861_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_32861_end_mask_0 = const()[name = tensor("op_32861_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32861_cast_fp16 = slice_by_index(begin = var_32861_begin_0, end = var_32861_end_0, end_mask = var_32861_end_mask_0, x = var_32385_cast_fp16)[name = tensor("op_32861_cast_fp16")]; + tensor var_32868_begin_0 = const()[name = tensor("op_32868_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_32868_end_0 = const()[name = tensor("op_32868_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_32868_end_mask_0 = const()[name = tensor("op_32868_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32868_cast_fp16 = slice_by_index(begin = var_32868_begin_0, end = var_32868_end_0, end_mask = var_32868_end_mask_0, x = var_32385_cast_fp16)[name = tensor("op_32868_cast_fp16")]; + tensor var_32875_begin_0 = const()[name = tensor("op_32875_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_32875_end_0 = const()[name = tensor("op_32875_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_32875_end_mask_0 = const()[name = tensor("op_32875_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32875_cast_fp16 = slice_by_index(begin = var_32875_begin_0, end = var_32875_end_0, end_mask = var_32875_end_mask_0, x = var_32385_cast_fp16)[name = tensor("op_32875_cast_fp16")]; + tensor var_32882_begin_0 = const()[name = tensor("op_32882_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_32882_end_0 = const()[name = tensor("op_32882_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_32882_end_mask_0 = const()[name = tensor("op_32882_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32882_cast_fp16 = slice_by_index(begin = var_32882_begin_0, end = var_32882_end_0, end_mask = var_32882_end_mask_0, x = var_32389_cast_fp16)[name = tensor("op_32882_cast_fp16")]; + tensor var_32889_begin_0 = const()[name = tensor("op_32889_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_32889_end_0 = const()[name = tensor("op_32889_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_32889_end_mask_0 = const()[name = tensor("op_32889_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32889_cast_fp16 = slice_by_index(begin = var_32889_begin_0, end = var_32889_end_0, end_mask = var_32889_end_mask_0, x = var_32389_cast_fp16)[name = tensor("op_32889_cast_fp16")]; + tensor var_32896_begin_0 = const()[name = tensor("op_32896_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_32896_end_0 = const()[name = tensor("op_32896_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_32896_end_mask_0 = const()[name = tensor("op_32896_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32896_cast_fp16 = slice_by_index(begin = var_32896_begin_0, end = var_32896_end_0, end_mask = var_32896_end_mask_0, x = var_32389_cast_fp16)[name = tensor("op_32896_cast_fp16")]; + tensor var_32903_begin_0 = const()[name = tensor("op_32903_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_32903_end_0 = const()[name = tensor("op_32903_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_32903_end_mask_0 = const()[name = tensor("op_32903_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32903_cast_fp16 = slice_by_index(begin = var_32903_begin_0, end = var_32903_end_0, end_mask = var_32903_end_mask_0, x = var_32389_cast_fp16)[name = tensor("op_32903_cast_fp16")]; + tensor var_32910_begin_0 = const()[name = tensor("op_32910_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_32910_end_0 = const()[name = tensor("op_32910_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_32910_end_mask_0 = const()[name = tensor("op_32910_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32910_cast_fp16 = slice_by_index(begin = var_32910_begin_0, end = var_32910_end_0, end_mask = var_32910_end_mask_0, x = var_32393_cast_fp16)[name = tensor("op_32910_cast_fp16")]; + tensor var_32917_begin_0 = const()[name = tensor("op_32917_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_32917_end_0 = const()[name = tensor("op_32917_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_32917_end_mask_0 = const()[name = tensor("op_32917_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32917_cast_fp16 = slice_by_index(begin = var_32917_begin_0, end = var_32917_end_0, end_mask = var_32917_end_mask_0, x = var_32393_cast_fp16)[name = tensor("op_32917_cast_fp16")]; + tensor var_32924_begin_0 = const()[name = tensor("op_32924_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_32924_end_0 = const()[name = tensor("op_32924_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_32924_end_mask_0 = const()[name = tensor("op_32924_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32924_cast_fp16 = slice_by_index(begin = var_32924_begin_0, end = var_32924_end_0, end_mask = var_32924_end_mask_0, x = var_32393_cast_fp16)[name = tensor("op_32924_cast_fp16")]; + tensor var_32931_begin_0 = const()[name = tensor("op_32931_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_32931_end_0 = const()[name = tensor("op_32931_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_32931_end_mask_0 = const()[name = tensor("op_32931_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32931_cast_fp16 = slice_by_index(begin = var_32931_begin_0, end = var_32931_end_0, end_mask = var_32931_end_mask_0, x = var_32393_cast_fp16)[name = tensor("op_32931_cast_fp16")]; + tensor var_32938_begin_0 = const()[name = tensor("op_32938_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_32938_end_0 = const()[name = tensor("op_32938_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_32938_end_mask_0 = const()[name = tensor("op_32938_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32938_cast_fp16 = slice_by_index(begin = var_32938_begin_0, end = var_32938_end_0, end_mask = var_32938_end_mask_0, x = var_32397_cast_fp16)[name = tensor("op_32938_cast_fp16")]; + tensor var_32945_begin_0 = const()[name = tensor("op_32945_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_32945_end_0 = const()[name = tensor("op_32945_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_32945_end_mask_0 = const()[name = tensor("op_32945_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32945_cast_fp16 = slice_by_index(begin = var_32945_begin_0, end = var_32945_end_0, end_mask = var_32945_end_mask_0, x = var_32397_cast_fp16)[name = tensor("op_32945_cast_fp16")]; + tensor var_32952_begin_0 = const()[name = tensor("op_32952_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_32952_end_0 = const()[name = tensor("op_32952_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_32952_end_mask_0 = const()[name = tensor("op_32952_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32952_cast_fp16 = slice_by_index(begin = var_32952_begin_0, end = var_32952_end_0, end_mask = var_32952_end_mask_0, x = var_32397_cast_fp16)[name = tensor("op_32952_cast_fp16")]; + tensor var_32959_begin_0 = const()[name = tensor("op_32959_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_32959_end_0 = const()[name = tensor("op_32959_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_32959_end_mask_0 = const()[name = tensor("op_32959_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32959_cast_fp16 = slice_by_index(begin = var_32959_begin_0, end = var_32959_end_0, end_mask = var_32959_end_mask_0, x = var_32397_cast_fp16)[name = tensor("op_32959_cast_fp16")]; + tensor k_43_perm_0 = const()[name = tensor("k_43_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_32964_begin_0 = const()[name = tensor("op_32964_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_32964_end_0 = const()[name = tensor("op_32964_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_32964_end_mask_0 = const()[name = tensor("op_32964_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_10 = transpose(perm = k_43_perm_0, x = key_43_cast_fp16)[name = tensor("transpose_10")]; + tensor var_32964_cast_fp16 = slice_by_index(begin = var_32964_begin_0, end = var_32964_end_0, end_mask = var_32964_end_mask_0, x = transpose_10)[name = tensor("op_32964_cast_fp16")]; + tensor var_32968_begin_0 = const()[name = tensor("op_32968_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_32968_end_0 = const()[name = tensor("op_32968_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_32968_end_mask_0 = const()[name = tensor("op_32968_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32968_cast_fp16 = slice_by_index(begin = var_32968_begin_0, end = var_32968_end_0, end_mask = var_32968_end_mask_0, x = transpose_10)[name = tensor("op_32968_cast_fp16")]; + tensor var_32972_begin_0 = const()[name = tensor("op_32972_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_32972_end_0 = const()[name = tensor("op_32972_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_32972_end_mask_0 = const()[name = tensor("op_32972_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32972_cast_fp16 = slice_by_index(begin = var_32972_begin_0, end = var_32972_end_0, end_mask = var_32972_end_mask_0, x = transpose_10)[name = tensor("op_32972_cast_fp16")]; + tensor var_32976_begin_0 = const()[name = tensor("op_32976_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_32976_end_0 = const()[name = tensor("op_32976_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_32976_end_mask_0 = const()[name = tensor("op_32976_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32976_cast_fp16 = slice_by_index(begin = var_32976_begin_0, end = var_32976_end_0, end_mask = var_32976_end_mask_0, x = transpose_10)[name = tensor("op_32976_cast_fp16")]; + tensor var_32980_begin_0 = const()[name = tensor("op_32980_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_32980_end_0 = const()[name = tensor("op_32980_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_32980_end_mask_0 = const()[name = tensor("op_32980_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32980_cast_fp16 = slice_by_index(begin = var_32980_begin_0, end = var_32980_end_0, end_mask = var_32980_end_mask_0, x = transpose_10)[name = tensor("op_32980_cast_fp16")]; + tensor var_32984_begin_0 = const()[name = tensor("op_32984_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_32984_end_0 = const()[name = tensor("op_32984_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_32984_end_mask_0 = const()[name = tensor("op_32984_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32984_cast_fp16 = slice_by_index(begin = var_32984_begin_0, end = var_32984_end_0, end_mask = var_32984_end_mask_0, x = transpose_10)[name = tensor("op_32984_cast_fp16")]; + tensor var_32988_begin_0 = const()[name = tensor("op_32988_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_32988_end_0 = const()[name = tensor("op_32988_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_32988_end_mask_0 = const()[name = tensor("op_32988_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32988_cast_fp16 = slice_by_index(begin = var_32988_begin_0, end = var_32988_end_0, end_mask = var_32988_end_mask_0, x = transpose_10)[name = tensor("op_32988_cast_fp16")]; + tensor var_32992_begin_0 = const()[name = tensor("op_32992_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_32992_end_0 = const()[name = tensor("op_32992_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_32992_end_mask_0 = const()[name = tensor("op_32992_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32992_cast_fp16 = slice_by_index(begin = var_32992_begin_0, end = var_32992_end_0, end_mask = var_32992_end_mask_0, x = transpose_10)[name = tensor("op_32992_cast_fp16")]; + tensor var_32996_begin_0 = const()[name = tensor("op_32996_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_32996_end_0 = const()[name = tensor("op_32996_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_32996_end_mask_0 = const()[name = tensor("op_32996_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_32996_cast_fp16 = slice_by_index(begin = var_32996_begin_0, end = var_32996_end_0, end_mask = var_32996_end_mask_0, x = transpose_10)[name = tensor("op_32996_cast_fp16")]; + tensor var_33000_begin_0 = const()[name = tensor("op_33000_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_33000_end_0 = const()[name = tensor("op_33000_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_33000_end_mask_0 = const()[name = tensor("op_33000_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_33000_cast_fp16 = slice_by_index(begin = var_33000_begin_0, end = var_33000_end_0, end_mask = var_33000_end_mask_0, x = transpose_10)[name = tensor("op_33000_cast_fp16")]; + tensor var_33004_begin_0 = const()[name = tensor("op_33004_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_33004_end_0 = const()[name = tensor("op_33004_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_33004_end_mask_0 = const()[name = tensor("op_33004_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_33004_cast_fp16 = slice_by_index(begin = var_33004_begin_0, end = var_33004_end_0, end_mask = var_33004_end_mask_0, x = transpose_10)[name = tensor("op_33004_cast_fp16")]; + tensor var_33008_begin_0 = const()[name = tensor("op_33008_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_33008_end_0 = const()[name = tensor("op_33008_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_33008_end_mask_0 = const()[name = tensor("op_33008_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_33008_cast_fp16 = slice_by_index(begin = var_33008_begin_0, end = var_33008_end_0, end_mask = var_33008_end_mask_0, x = transpose_10)[name = tensor("op_33008_cast_fp16")]; + tensor var_33012_begin_0 = const()[name = tensor("op_33012_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_33012_end_0 = const()[name = tensor("op_33012_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_33012_end_mask_0 = const()[name = tensor("op_33012_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_33012_cast_fp16 = slice_by_index(begin = var_33012_begin_0, end = var_33012_end_0, end_mask = var_33012_end_mask_0, x = transpose_10)[name = tensor("op_33012_cast_fp16")]; + tensor var_33016_begin_0 = const()[name = tensor("op_33016_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_33016_end_0 = const()[name = tensor("op_33016_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_33016_end_mask_0 = const()[name = tensor("op_33016_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_33016_cast_fp16 = slice_by_index(begin = var_33016_begin_0, end = var_33016_end_0, end_mask = var_33016_end_mask_0, x = transpose_10)[name = tensor("op_33016_cast_fp16")]; + tensor var_33020_begin_0 = const()[name = tensor("op_33020_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_33020_end_0 = const()[name = tensor("op_33020_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_33020_end_mask_0 = const()[name = tensor("op_33020_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_33020_cast_fp16 = slice_by_index(begin = var_33020_begin_0, end = var_33020_end_0, end_mask = var_33020_end_mask_0, x = transpose_10)[name = tensor("op_33020_cast_fp16")]; + tensor var_33024_begin_0 = const()[name = tensor("op_33024_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_33024_end_0 = const()[name = tensor("op_33024_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_33024_end_mask_0 = const()[name = tensor("op_33024_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_33024_cast_fp16 = slice_by_index(begin = var_33024_begin_0, end = var_33024_end_0, end_mask = var_33024_end_mask_0, x = transpose_10)[name = tensor("op_33024_cast_fp16")]; + tensor var_33028_begin_0 = const()[name = tensor("op_33028_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_33028_end_0 = const()[name = tensor("op_33028_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_33028_end_mask_0 = const()[name = tensor("op_33028_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_33028_cast_fp16 = slice_by_index(begin = var_33028_begin_0, end = var_33028_end_0, end_mask = var_33028_end_mask_0, x = transpose_10)[name = tensor("op_33028_cast_fp16")]; + tensor var_33032_begin_0 = const()[name = tensor("op_33032_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_33032_end_0 = const()[name = tensor("op_33032_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_33032_end_mask_0 = const()[name = tensor("op_33032_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_33032_cast_fp16 = slice_by_index(begin = var_33032_begin_0, end = var_33032_end_0, end_mask = var_33032_end_mask_0, x = transpose_10)[name = tensor("op_33032_cast_fp16")]; + tensor var_33036_begin_0 = const()[name = tensor("op_33036_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_33036_end_0 = const()[name = tensor("op_33036_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_33036_end_mask_0 = const()[name = tensor("op_33036_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_33036_cast_fp16 = slice_by_index(begin = var_33036_begin_0, end = var_33036_end_0, end_mask = var_33036_end_mask_0, x = transpose_10)[name = tensor("op_33036_cast_fp16")]; + tensor var_33040_begin_0 = const()[name = tensor("op_33040_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_33040_end_0 = const()[name = tensor("op_33040_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_33040_end_mask_0 = const()[name = tensor("op_33040_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_33040_cast_fp16 = slice_by_index(begin = var_33040_begin_0, end = var_33040_end_0, end_mask = var_33040_end_mask_0, x = transpose_10)[name = tensor("op_33040_cast_fp16")]; + tensor var_33042_begin_0 = const()[name = tensor("op_33042_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_33042_end_0 = const()[name = tensor("op_33042_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_33042_end_mask_0 = const()[name = tensor("op_33042_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33042_cast_fp16 = slice_by_index(begin = var_33042_begin_0, end = var_33042_end_0, end_mask = var_33042_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_33042_cast_fp16")]; + tensor var_33046_begin_0 = const()[name = tensor("op_33046_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_33046_end_0 = const()[name = tensor("op_33046_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_33046_end_mask_0 = const()[name = tensor("op_33046_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33046_cast_fp16 = slice_by_index(begin = var_33046_begin_0, end = var_33046_end_0, end_mask = var_33046_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_33046_cast_fp16")]; + tensor var_33050_begin_0 = const()[name = tensor("op_33050_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_33050_end_0 = const()[name = tensor("op_33050_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_33050_end_mask_0 = const()[name = tensor("op_33050_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33050_cast_fp16 = slice_by_index(begin = var_33050_begin_0, end = var_33050_end_0, end_mask = var_33050_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_33050_cast_fp16")]; + tensor var_33054_begin_0 = const()[name = tensor("op_33054_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_33054_end_0 = const()[name = tensor("op_33054_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_33054_end_mask_0 = const()[name = tensor("op_33054_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33054_cast_fp16 = slice_by_index(begin = var_33054_begin_0, end = var_33054_end_0, end_mask = var_33054_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_33054_cast_fp16")]; + tensor var_33058_begin_0 = const()[name = tensor("op_33058_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_33058_end_0 = const()[name = tensor("op_33058_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_33058_end_mask_0 = const()[name = tensor("op_33058_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33058_cast_fp16 = slice_by_index(begin = var_33058_begin_0, end = var_33058_end_0, end_mask = var_33058_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_33058_cast_fp16")]; + tensor var_33062_begin_0 = const()[name = tensor("op_33062_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_33062_end_0 = const()[name = tensor("op_33062_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_33062_end_mask_0 = const()[name = tensor("op_33062_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33062_cast_fp16 = slice_by_index(begin = var_33062_begin_0, end = var_33062_end_0, end_mask = var_33062_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_33062_cast_fp16")]; + tensor var_33066_begin_0 = const()[name = tensor("op_33066_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_33066_end_0 = const()[name = tensor("op_33066_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_33066_end_mask_0 = const()[name = tensor("op_33066_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33066_cast_fp16 = slice_by_index(begin = var_33066_begin_0, end = var_33066_end_0, end_mask = var_33066_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_33066_cast_fp16")]; + tensor var_33070_begin_0 = const()[name = tensor("op_33070_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_33070_end_0 = const()[name = tensor("op_33070_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_33070_end_mask_0 = const()[name = tensor("op_33070_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33070_cast_fp16 = slice_by_index(begin = var_33070_begin_0, end = var_33070_end_0, end_mask = var_33070_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_33070_cast_fp16")]; + tensor var_33074_begin_0 = const()[name = tensor("op_33074_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_33074_end_0 = const()[name = tensor("op_33074_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_33074_end_mask_0 = const()[name = tensor("op_33074_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33074_cast_fp16 = slice_by_index(begin = var_33074_begin_0, end = var_33074_end_0, end_mask = var_33074_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_33074_cast_fp16")]; + tensor var_33078_begin_0 = const()[name = tensor("op_33078_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_33078_end_0 = const()[name = tensor("op_33078_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_33078_end_mask_0 = const()[name = tensor("op_33078_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33078_cast_fp16 = slice_by_index(begin = var_33078_begin_0, end = var_33078_end_0, end_mask = var_33078_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_33078_cast_fp16")]; + tensor var_33082_begin_0 = const()[name = tensor("op_33082_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_33082_end_0 = const()[name = tensor("op_33082_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_33082_end_mask_0 = const()[name = tensor("op_33082_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33082_cast_fp16 = slice_by_index(begin = var_33082_begin_0, end = var_33082_end_0, end_mask = var_33082_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_33082_cast_fp16")]; + tensor var_33086_begin_0 = const()[name = tensor("op_33086_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_33086_end_0 = const()[name = tensor("op_33086_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_33086_end_mask_0 = const()[name = tensor("op_33086_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33086_cast_fp16 = slice_by_index(begin = var_33086_begin_0, end = var_33086_end_0, end_mask = var_33086_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_33086_cast_fp16")]; + tensor var_33090_begin_0 = const()[name = tensor("op_33090_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_33090_end_0 = const()[name = tensor("op_33090_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_33090_end_mask_0 = const()[name = tensor("op_33090_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33090_cast_fp16 = slice_by_index(begin = var_33090_begin_0, end = var_33090_end_0, end_mask = var_33090_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_33090_cast_fp16")]; + tensor var_33094_begin_0 = const()[name = tensor("op_33094_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_33094_end_0 = const()[name = tensor("op_33094_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_33094_end_mask_0 = const()[name = tensor("op_33094_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33094_cast_fp16 = slice_by_index(begin = var_33094_begin_0, end = var_33094_end_0, end_mask = var_33094_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_33094_cast_fp16")]; + tensor var_33098_begin_0 = const()[name = tensor("op_33098_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_33098_end_0 = const()[name = tensor("op_33098_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_33098_end_mask_0 = const()[name = tensor("op_33098_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33098_cast_fp16 = slice_by_index(begin = var_33098_begin_0, end = var_33098_end_0, end_mask = var_33098_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_33098_cast_fp16")]; + tensor var_33102_begin_0 = const()[name = tensor("op_33102_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_33102_end_0 = const()[name = tensor("op_33102_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_33102_end_mask_0 = const()[name = tensor("op_33102_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33102_cast_fp16 = slice_by_index(begin = var_33102_begin_0, end = var_33102_end_0, end_mask = var_33102_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_33102_cast_fp16")]; + tensor var_33106_begin_0 = const()[name = tensor("op_33106_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_33106_end_0 = const()[name = tensor("op_33106_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_33106_end_mask_0 = const()[name = tensor("op_33106_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33106_cast_fp16 = slice_by_index(begin = var_33106_begin_0, end = var_33106_end_0, end_mask = var_33106_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_33106_cast_fp16")]; + tensor var_33110_begin_0 = const()[name = tensor("op_33110_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_33110_end_0 = const()[name = tensor("op_33110_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_33110_end_mask_0 = const()[name = tensor("op_33110_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33110_cast_fp16 = slice_by_index(begin = var_33110_begin_0, end = var_33110_end_0, end_mask = var_33110_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_33110_cast_fp16")]; + tensor var_33114_begin_0 = const()[name = tensor("op_33114_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_33114_end_0 = const()[name = tensor("op_33114_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_33114_end_mask_0 = const()[name = tensor("op_33114_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33114_cast_fp16 = slice_by_index(begin = var_33114_begin_0, end = var_33114_end_0, end_mask = var_33114_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_33114_cast_fp16")]; + tensor var_33118_begin_0 = const()[name = tensor("op_33118_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_33118_end_0 = const()[name = tensor("op_33118_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_33118_end_mask_0 = const()[name = tensor("op_33118_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33118_cast_fp16 = slice_by_index(begin = var_33118_begin_0, end = var_33118_end_0, end_mask = var_33118_end_mask_0, x = value_43_cast_fp16)[name = tensor("op_33118_cast_fp16")]; + tensor var_33122_equation_0 = const()[name = tensor("op_33122_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33122_cast_fp16 = einsum(equation = var_33122_equation_0, values = (var_32964_cast_fp16, var_32406_cast_fp16))[name = tensor("op_33122_cast_fp16")]; + tensor var_33123_to_fp16 = const()[name = tensor("op_33123_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3361_cast_fp16 = mul(x = var_33122_cast_fp16, y = var_33123_to_fp16)[name = tensor("aw_chunk_3361_cast_fp16")]; + tensor var_33126_equation_0 = const()[name = tensor("op_33126_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33126_cast_fp16 = einsum(equation = var_33126_equation_0, values = (var_32964_cast_fp16, var_32413_cast_fp16))[name = tensor("op_33126_cast_fp16")]; + tensor var_33127_to_fp16 = const()[name = tensor("op_33127_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3363_cast_fp16 = mul(x = var_33126_cast_fp16, y = var_33127_to_fp16)[name = tensor("aw_chunk_3363_cast_fp16")]; + tensor var_33130_equation_0 = const()[name = tensor("op_33130_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33130_cast_fp16 = einsum(equation = var_33130_equation_0, values = (var_32964_cast_fp16, var_32420_cast_fp16))[name = tensor("op_33130_cast_fp16")]; + tensor var_33131_to_fp16 = const()[name = tensor("op_33131_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3365_cast_fp16 = mul(x = var_33130_cast_fp16, y = var_33131_to_fp16)[name = tensor("aw_chunk_3365_cast_fp16")]; + tensor var_33134_equation_0 = const()[name = tensor("op_33134_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33134_cast_fp16 = einsum(equation = var_33134_equation_0, values = (var_32964_cast_fp16, var_32427_cast_fp16))[name = tensor("op_33134_cast_fp16")]; + tensor var_33135_to_fp16 = const()[name = tensor("op_33135_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3367_cast_fp16 = mul(x = var_33134_cast_fp16, y = var_33135_to_fp16)[name = tensor("aw_chunk_3367_cast_fp16")]; + tensor var_33138_equation_0 = const()[name = tensor("op_33138_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33138_cast_fp16 = einsum(equation = var_33138_equation_0, values = (var_32968_cast_fp16, var_32434_cast_fp16))[name = tensor("op_33138_cast_fp16")]; + tensor var_33139_to_fp16 = const()[name = tensor("op_33139_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3369_cast_fp16 = mul(x = var_33138_cast_fp16, y = var_33139_to_fp16)[name = tensor("aw_chunk_3369_cast_fp16")]; + tensor var_33142_equation_0 = const()[name = tensor("op_33142_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33142_cast_fp16 = einsum(equation = var_33142_equation_0, values = (var_32968_cast_fp16, var_32441_cast_fp16))[name = tensor("op_33142_cast_fp16")]; + tensor var_33143_to_fp16 = const()[name = tensor("op_33143_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3371_cast_fp16 = mul(x = var_33142_cast_fp16, y = var_33143_to_fp16)[name = tensor("aw_chunk_3371_cast_fp16")]; + tensor var_33146_equation_0 = const()[name = tensor("op_33146_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33146_cast_fp16 = einsum(equation = var_33146_equation_0, values = (var_32968_cast_fp16, var_32448_cast_fp16))[name = tensor("op_33146_cast_fp16")]; + tensor var_33147_to_fp16 = const()[name = tensor("op_33147_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3373_cast_fp16 = mul(x = var_33146_cast_fp16, y = var_33147_to_fp16)[name = tensor("aw_chunk_3373_cast_fp16")]; + tensor var_33150_equation_0 = const()[name = tensor("op_33150_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33150_cast_fp16 = einsum(equation = var_33150_equation_0, values = (var_32968_cast_fp16, var_32455_cast_fp16))[name = tensor("op_33150_cast_fp16")]; + tensor var_33151_to_fp16 = const()[name = tensor("op_33151_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3375_cast_fp16 = mul(x = var_33150_cast_fp16, y = var_33151_to_fp16)[name = tensor("aw_chunk_3375_cast_fp16")]; + tensor var_33154_equation_0 = const()[name = tensor("op_33154_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33154_cast_fp16 = einsum(equation = var_33154_equation_0, values = (var_32972_cast_fp16, var_32462_cast_fp16))[name = tensor("op_33154_cast_fp16")]; + tensor var_33155_to_fp16 = const()[name = tensor("op_33155_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3377_cast_fp16 = mul(x = var_33154_cast_fp16, y = var_33155_to_fp16)[name = tensor("aw_chunk_3377_cast_fp16")]; + tensor var_33158_equation_0 = const()[name = tensor("op_33158_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33158_cast_fp16 = einsum(equation = var_33158_equation_0, values = (var_32972_cast_fp16, var_32469_cast_fp16))[name = tensor("op_33158_cast_fp16")]; + tensor var_33159_to_fp16 = const()[name = tensor("op_33159_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3379_cast_fp16 = mul(x = var_33158_cast_fp16, y = var_33159_to_fp16)[name = tensor("aw_chunk_3379_cast_fp16")]; + tensor var_33162_equation_0 = const()[name = tensor("op_33162_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33162_cast_fp16 = einsum(equation = var_33162_equation_0, values = (var_32972_cast_fp16, var_32476_cast_fp16))[name = tensor("op_33162_cast_fp16")]; + tensor var_33163_to_fp16 = const()[name = tensor("op_33163_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3381_cast_fp16 = mul(x = var_33162_cast_fp16, y = var_33163_to_fp16)[name = tensor("aw_chunk_3381_cast_fp16")]; + tensor var_33166_equation_0 = const()[name = tensor("op_33166_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33166_cast_fp16 = einsum(equation = var_33166_equation_0, values = (var_32972_cast_fp16, var_32483_cast_fp16))[name = tensor("op_33166_cast_fp16")]; + tensor var_33167_to_fp16 = const()[name = tensor("op_33167_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3383_cast_fp16 = mul(x = var_33166_cast_fp16, y = var_33167_to_fp16)[name = tensor("aw_chunk_3383_cast_fp16")]; + tensor var_33170_equation_0 = const()[name = tensor("op_33170_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33170_cast_fp16 = einsum(equation = var_33170_equation_0, values = (var_32976_cast_fp16, var_32490_cast_fp16))[name = tensor("op_33170_cast_fp16")]; + tensor var_33171_to_fp16 = const()[name = tensor("op_33171_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3385_cast_fp16 = mul(x = var_33170_cast_fp16, y = var_33171_to_fp16)[name = tensor("aw_chunk_3385_cast_fp16")]; + tensor var_33174_equation_0 = const()[name = tensor("op_33174_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33174_cast_fp16 = einsum(equation = var_33174_equation_0, values = (var_32976_cast_fp16, var_32497_cast_fp16))[name = tensor("op_33174_cast_fp16")]; + tensor var_33175_to_fp16 = const()[name = tensor("op_33175_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3387_cast_fp16 = mul(x = var_33174_cast_fp16, y = var_33175_to_fp16)[name = tensor("aw_chunk_3387_cast_fp16")]; + tensor var_33178_equation_0 = const()[name = tensor("op_33178_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33178_cast_fp16 = einsum(equation = var_33178_equation_0, values = (var_32976_cast_fp16, var_32504_cast_fp16))[name = tensor("op_33178_cast_fp16")]; + tensor var_33179_to_fp16 = const()[name = tensor("op_33179_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3389_cast_fp16 = mul(x = var_33178_cast_fp16, y = var_33179_to_fp16)[name = tensor("aw_chunk_3389_cast_fp16")]; + tensor var_33182_equation_0 = const()[name = tensor("op_33182_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33182_cast_fp16 = einsum(equation = var_33182_equation_0, values = (var_32976_cast_fp16, var_32511_cast_fp16))[name = tensor("op_33182_cast_fp16")]; + tensor var_33183_to_fp16 = const()[name = tensor("op_33183_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3391_cast_fp16 = mul(x = var_33182_cast_fp16, y = var_33183_to_fp16)[name = tensor("aw_chunk_3391_cast_fp16")]; + tensor var_33186_equation_0 = const()[name = tensor("op_33186_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33186_cast_fp16 = einsum(equation = var_33186_equation_0, values = (var_32980_cast_fp16, var_32518_cast_fp16))[name = tensor("op_33186_cast_fp16")]; + tensor var_33187_to_fp16 = const()[name = tensor("op_33187_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3393_cast_fp16 = mul(x = var_33186_cast_fp16, y = var_33187_to_fp16)[name = tensor("aw_chunk_3393_cast_fp16")]; + tensor var_33190_equation_0 = const()[name = tensor("op_33190_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33190_cast_fp16 = einsum(equation = var_33190_equation_0, values = (var_32980_cast_fp16, var_32525_cast_fp16))[name = tensor("op_33190_cast_fp16")]; + tensor var_33191_to_fp16 = const()[name = tensor("op_33191_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3395_cast_fp16 = mul(x = var_33190_cast_fp16, y = var_33191_to_fp16)[name = tensor("aw_chunk_3395_cast_fp16")]; + tensor var_33194_equation_0 = const()[name = tensor("op_33194_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33194_cast_fp16 = einsum(equation = var_33194_equation_0, values = (var_32980_cast_fp16, var_32532_cast_fp16))[name = tensor("op_33194_cast_fp16")]; + tensor var_33195_to_fp16 = const()[name = tensor("op_33195_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3397_cast_fp16 = mul(x = var_33194_cast_fp16, y = var_33195_to_fp16)[name = tensor("aw_chunk_3397_cast_fp16")]; + tensor var_33198_equation_0 = const()[name = tensor("op_33198_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33198_cast_fp16 = einsum(equation = var_33198_equation_0, values = (var_32980_cast_fp16, var_32539_cast_fp16))[name = tensor("op_33198_cast_fp16")]; + tensor var_33199_to_fp16 = const()[name = tensor("op_33199_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3399_cast_fp16 = mul(x = var_33198_cast_fp16, y = var_33199_to_fp16)[name = tensor("aw_chunk_3399_cast_fp16")]; + tensor var_33202_equation_0 = const()[name = tensor("op_33202_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33202_cast_fp16 = einsum(equation = var_33202_equation_0, values = (var_32984_cast_fp16, var_32546_cast_fp16))[name = tensor("op_33202_cast_fp16")]; + tensor var_33203_to_fp16 = const()[name = tensor("op_33203_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3401_cast_fp16 = mul(x = var_33202_cast_fp16, y = var_33203_to_fp16)[name = tensor("aw_chunk_3401_cast_fp16")]; + tensor var_33206_equation_0 = const()[name = tensor("op_33206_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33206_cast_fp16 = einsum(equation = var_33206_equation_0, values = (var_32984_cast_fp16, var_32553_cast_fp16))[name = tensor("op_33206_cast_fp16")]; + tensor var_33207_to_fp16 = const()[name = tensor("op_33207_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3403_cast_fp16 = mul(x = var_33206_cast_fp16, y = var_33207_to_fp16)[name = tensor("aw_chunk_3403_cast_fp16")]; + tensor var_33210_equation_0 = const()[name = tensor("op_33210_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33210_cast_fp16 = einsum(equation = var_33210_equation_0, values = (var_32984_cast_fp16, var_32560_cast_fp16))[name = tensor("op_33210_cast_fp16")]; + tensor var_33211_to_fp16 = const()[name = tensor("op_33211_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3405_cast_fp16 = mul(x = var_33210_cast_fp16, y = var_33211_to_fp16)[name = tensor("aw_chunk_3405_cast_fp16")]; + tensor var_33214_equation_0 = const()[name = tensor("op_33214_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33214_cast_fp16 = einsum(equation = var_33214_equation_0, values = (var_32984_cast_fp16, var_32567_cast_fp16))[name = tensor("op_33214_cast_fp16")]; + tensor var_33215_to_fp16 = const()[name = tensor("op_33215_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3407_cast_fp16 = mul(x = var_33214_cast_fp16, y = var_33215_to_fp16)[name = tensor("aw_chunk_3407_cast_fp16")]; + tensor var_33218_equation_0 = const()[name = tensor("op_33218_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33218_cast_fp16 = einsum(equation = var_33218_equation_0, values = (var_32988_cast_fp16, var_32574_cast_fp16))[name = tensor("op_33218_cast_fp16")]; + tensor var_33219_to_fp16 = const()[name = tensor("op_33219_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3409_cast_fp16 = mul(x = var_33218_cast_fp16, y = var_33219_to_fp16)[name = tensor("aw_chunk_3409_cast_fp16")]; + tensor var_33222_equation_0 = const()[name = tensor("op_33222_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33222_cast_fp16 = einsum(equation = var_33222_equation_0, values = (var_32988_cast_fp16, var_32581_cast_fp16))[name = tensor("op_33222_cast_fp16")]; + tensor var_33223_to_fp16 = const()[name = tensor("op_33223_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3411_cast_fp16 = mul(x = var_33222_cast_fp16, y = var_33223_to_fp16)[name = tensor("aw_chunk_3411_cast_fp16")]; + tensor var_33226_equation_0 = const()[name = tensor("op_33226_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33226_cast_fp16 = einsum(equation = var_33226_equation_0, values = (var_32988_cast_fp16, var_32588_cast_fp16))[name = tensor("op_33226_cast_fp16")]; + tensor var_33227_to_fp16 = const()[name = tensor("op_33227_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3413_cast_fp16 = mul(x = var_33226_cast_fp16, y = var_33227_to_fp16)[name = tensor("aw_chunk_3413_cast_fp16")]; + tensor var_33230_equation_0 = const()[name = tensor("op_33230_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33230_cast_fp16 = einsum(equation = var_33230_equation_0, values = (var_32988_cast_fp16, var_32595_cast_fp16))[name = tensor("op_33230_cast_fp16")]; + tensor var_33231_to_fp16 = const()[name = tensor("op_33231_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3415_cast_fp16 = mul(x = var_33230_cast_fp16, y = var_33231_to_fp16)[name = tensor("aw_chunk_3415_cast_fp16")]; + tensor var_33234_equation_0 = const()[name = tensor("op_33234_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33234_cast_fp16 = einsum(equation = var_33234_equation_0, values = (var_32992_cast_fp16, var_32602_cast_fp16))[name = tensor("op_33234_cast_fp16")]; + tensor var_33235_to_fp16 = const()[name = tensor("op_33235_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3417_cast_fp16 = mul(x = var_33234_cast_fp16, y = var_33235_to_fp16)[name = tensor("aw_chunk_3417_cast_fp16")]; + tensor var_33238_equation_0 = const()[name = tensor("op_33238_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33238_cast_fp16 = einsum(equation = var_33238_equation_0, values = (var_32992_cast_fp16, var_32609_cast_fp16))[name = tensor("op_33238_cast_fp16")]; + tensor var_33239_to_fp16 = const()[name = tensor("op_33239_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3419_cast_fp16 = mul(x = var_33238_cast_fp16, y = var_33239_to_fp16)[name = tensor("aw_chunk_3419_cast_fp16")]; + tensor var_33242_equation_0 = const()[name = tensor("op_33242_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33242_cast_fp16 = einsum(equation = var_33242_equation_0, values = (var_32992_cast_fp16, var_32616_cast_fp16))[name = tensor("op_33242_cast_fp16")]; + tensor var_33243_to_fp16 = const()[name = tensor("op_33243_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3421_cast_fp16 = mul(x = var_33242_cast_fp16, y = var_33243_to_fp16)[name = tensor("aw_chunk_3421_cast_fp16")]; + tensor var_33246_equation_0 = const()[name = tensor("op_33246_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33246_cast_fp16 = einsum(equation = var_33246_equation_0, values = (var_32992_cast_fp16, var_32623_cast_fp16))[name = tensor("op_33246_cast_fp16")]; + tensor var_33247_to_fp16 = const()[name = tensor("op_33247_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3423_cast_fp16 = mul(x = var_33246_cast_fp16, y = var_33247_to_fp16)[name = tensor("aw_chunk_3423_cast_fp16")]; + tensor var_33250_equation_0 = const()[name = tensor("op_33250_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33250_cast_fp16 = einsum(equation = var_33250_equation_0, values = (var_32996_cast_fp16, var_32630_cast_fp16))[name = tensor("op_33250_cast_fp16")]; + tensor var_33251_to_fp16 = const()[name = tensor("op_33251_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3425_cast_fp16 = mul(x = var_33250_cast_fp16, y = var_33251_to_fp16)[name = tensor("aw_chunk_3425_cast_fp16")]; + tensor var_33254_equation_0 = const()[name = tensor("op_33254_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33254_cast_fp16 = einsum(equation = var_33254_equation_0, values = (var_32996_cast_fp16, var_32637_cast_fp16))[name = tensor("op_33254_cast_fp16")]; + tensor var_33255_to_fp16 = const()[name = tensor("op_33255_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3427_cast_fp16 = mul(x = var_33254_cast_fp16, y = var_33255_to_fp16)[name = tensor("aw_chunk_3427_cast_fp16")]; + tensor var_33258_equation_0 = const()[name = tensor("op_33258_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33258_cast_fp16 = einsum(equation = var_33258_equation_0, values = (var_32996_cast_fp16, var_32644_cast_fp16))[name = tensor("op_33258_cast_fp16")]; + tensor var_33259_to_fp16 = const()[name = tensor("op_33259_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3429_cast_fp16 = mul(x = var_33258_cast_fp16, y = var_33259_to_fp16)[name = tensor("aw_chunk_3429_cast_fp16")]; + tensor var_33262_equation_0 = const()[name = tensor("op_33262_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33262_cast_fp16 = einsum(equation = var_33262_equation_0, values = (var_32996_cast_fp16, var_32651_cast_fp16))[name = tensor("op_33262_cast_fp16")]; + tensor var_33263_to_fp16 = const()[name = tensor("op_33263_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3431_cast_fp16 = mul(x = var_33262_cast_fp16, y = var_33263_to_fp16)[name = tensor("aw_chunk_3431_cast_fp16")]; + tensor var_33266_equation_0 = const()[name = tensor("op_33266_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33266_cast_fp16 = einsum(equation = var_33266_equation_0, values = (var_33000_cast_fp16, var_32658_cast_fp16))[name = tensor("op_33266_cast_fp16")]; + tensor var_33267_to_fp16 = const()[name = tensor("op_33267_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3433_cast_fp16 = mul(x = var_33266_cast_fp16, y = var_33267_to_fp16)[name = tensor("aw_chunk_3433_cast_fp16")]; + tensor var_33270_equation_0 = const()[name = tensor("op_33270_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33270_cast_fp16 = einsum(equation = var_33270_equation_0, values = (var_33000_cast_fp16, var_32665_cast_fp16))[name = tensor("op_33270_cast_fp16")]; + tensor var_33271_to_fp16 = const()[name = tensor("op_33271_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3435_cast_fp16 = mul(x = var_33270_cast_fp16, y = var_33271_to_fp16)[name = tensor("aw_chunk_3435_cast_fp16")]; + tensor var_33274_equation_0 = const()[name = tensor("op_33274_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33274_cast_fp16 = einsum(equation = var_33274_equation_0, values = (var_33000_cast_fp16, var_32672_cast_fp16))[name = tensor("op_33274_cast_fp16")]; + tensor var_33275_to_fp16 = const()[name = tensor("op_33275_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3437_cast_fp16 = mul(x = var_33274_cast_fp16, y = var_33275_to_fp16)[name = tensor("aw_chunk_3437_cast_fp16")]; + tensor var_33278_equation_0 = const()[name = tensor("op_33278_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33278_cast_fp16 = einsum(equation = var_33278_equation_0, values = (var_33000_cast_fp16, var_32679_cast_fp16))[name = tensor("op_33278_cast_fp16")]; + tensor var_33279_to_fp16 = const()[name = tensor("op_33279_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3439_cast_fp16 = mul(x = var_33278_cast_fp16, y = var_33279_to_fp16)[name = tensor("aw_chunk_3439_cast_fp16")]; + tensor var_33282_equation_0 = const()[name = tensor("op_33282_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33282_cast_fp16 = einsum(equation = var_33282_equation_0, values = (var_33004_cast_fp16, var_32686_cast_fp16))[name = tensor("op_33282_cast_fp16")]; + tensor var_33283_to_fp16 = const()[name = tensor("op_33283_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3441_cast_fp16 = mul(x = var_33282_cast_fp16, y = var_33283_to_fp16)[name = tensor("aw_chunk_3441_cast_fp16")]; + tensor var_33286_equation_0 = const()[name = tensor("op_33286_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33286_cast_fp16 = einsum(equation = var_33286_equation_0, values = (var_33004_cast_fp16, var_32693_cast_fp16))[name = tensor("op_33286_cast_fp16")]; + tensor var_33287_to_fp16 = const()[name = tensor("op_33287_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3443_cast_fp16 = mul(x = var_33286_cast_fp16, y = var_33287_to_fp16)[name = tensor("aw_chunk_3443_cast_fp16")]; + tensor var_33290_equation_0 = const()[name = tensor("op_33290_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33290_cast_fp16 = einsum(equation = var_33290_equation_0, values = (var_33004_cast_fp16, var_32700_cast_fp16))[name = tensor("op_33290_cast_fp16")]; + tensor var_33291_to_fp16 = const()[name = tensor("op_33291_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3445_cast_fp16 = mul(x = var_33290_cast_fp16, y = var_33291_to_fp16)[name = tensor("aw_chunk_3445_cast_fp16")]; + tensor var_33294_equation_0 = const()[name = tensor("op_33294_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33294_cast_fp16 = einsum(equation = var_33294_equation_0, values = (var_33004_cast_fp16, var_32707_cast_fp16))[name = tensor("op_33294_cast_fp16")]; + tensor var_33295_to_fp16 = const()[name = tensor("op_33295_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3447_cast_fp16 = mul(x = var_33294_cast_fp16, y = var_33295_to_fp16)[name = tensor("aw_chunk_3447_cast_fp16")]; + tensor var_33298_equation_0 = const()[name = tensor("op_33298_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33298_cast_fp16 = einsum(equation = var_33298_equation_0, values = (var_33008_cast_fp16, var_32714_cast_fp16))[name = tensor("op_33298_cast_fp16")]; + tensor var_33299_to_fp16 = const()[name = tensor("op_33299_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3449_cast_fp16 = mul(x = var_33298_cast_fp16, y = var_33299_to_fp16)[name = tensor("aw_chunk_3449_cast_fp16")]; + tensor var_33302_equation_0 = const()[name = tensor("op_33302_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33302_cast_fp16 = einsum(equation = var_33302_equation_0, values = (var_33008_cast_fp16, var_32721_cast_fp16))[name = tensor("op_33302_cast_fp16")]; + tensor var_33303_to_fp16 = const()[name = tensor("op_33303_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3451_cast_fp16 = mul(x = var_33302_cast_fp16, y = var_33303_to_fp16)[name = tensor("aw_chunk_3451_cast_fp16")]; + tensor var_33306_equation_0 = const()[name = tensor("op_33306_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33306_cast_fp16 = einsum(equation = var_33306_equation_0, values = (var_33008_cast_fp16, var_32728_cast_fp16))[name = tensor("op_33306_cast_fp16")]; + tensor var_33307_to_fp16 = const()[name = tensor("op_33307_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3453_cast_fp16 = mul(x = var_33306_cast_fp16, y = var_33307_to_fp16)[name = tensor("aw_chunk_3453_cast_fp16")]; + tensor var_33310_equation_0 = const()[name = tensor("op_33310_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33310_cast_fp16 = einsum(equation = var_33310_equation_0, values = (var_33008_cast_fp16, var_32735_cast_fp16))[name = tensor("op_33310_cast_fp16")]; + tensor var_33311_to_fp16 = const()[name = tensor("op_33311_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3455_cast_fp16 = mul(x = var_33310_cast_fp16, y = var_33311_to_fp16)[name = tensor("aw_chunk_3455_cast_fp16")]; + tensor var_33314_equation_0 = const()[name = tensor("op_33314_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33314_cast_fp16 = einsum(equation = var_33314_equation_0, values = (var_33012_cast_fp16, var_32742_cast_fp16))[name = tensor("op_33314_cast_fp16")]; + tensor var_33315_to_fp16 = const()[name = tensor("op_33315_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3457_cast_fp16 = mul(x = var_33314_cast_fp16, y = var_33315_to_fp16)[name = tensor("aw_chunk_3457_cast_fp16")]; + tensor var_33318_equation_0 = const()[name = tensor("op_33318_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33318_cast_fp16 = einsum(equation = var_33318_equation_0, values = (var_33012_cast_fp16, var_32749_cast_fp16))[name = tensor("op_33318_cast_fp16")]; + tensor var_33319_to_fp16 = const()[name = tensor("op_33319_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3459_cast_fp16 = mul(x = var_33318_cast_fp16, y = var_33319_to_fp16)[name = tensor("aw_chunk_3459_cast_fp16")]; + tensor var_33322_equation_0 = const()[name = tensor("op_33322_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33322_cast_fp16 = einsum(equation = var_33322_equation_0, values = (var_33012_cast_fp16, var_32756_cast_fp16))[name = tensor("op_33322_cast_fp16")]; + tensor var_33323_to_fp16 = const()[name = tensor("op_33323_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3461_cast_fp16 = mul(x = var_33322_cast_fp16, y = var_33323_to_fp16)[name = tensor("aw_chunk_3461_cast_fp16")]; + tensor var_33326_equation_0 = const()[name = tensor("op_33326_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33326_cast_fp16 = einsum(equation = var_33326_equation_0, values = (var_33012_cast_fp16, var_32763_cast_fp16))[name = tensor("op_33326_cast_fp16")]; + tensor var_33327_to_fp16 = const()[name = tensor("op_33327_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3463_cast_fp16 = mul(x = var_33326_cast_fp16, y = var_33327_to_fp16)[name = tensor("aw_chunk_3463_cast_fp16")]; + tensor var_33330_equation_0 = const()[name = tensor("op_33330_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33330_cast_fp16 = einsum(equation = var_33330_equation_0, values = (var_33016_cast_fp16, var_32770_cast_fp16))[name = tensor("op_33330_cast_fp16")]; + tensor var_33331_to_fp16 = const()[name = tensor("op_33331_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3465_cast_fp16 = mul(x = var_33330_cast_fp16, y = var_33331_to_fp16)[name = tensor("aw_chunk_3465_cast_fp16")]; + tensor var_33334_equation_0 = const()[name = tensor("op_33334_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33334_cast_fp16 = einsum(equation = var_33334_equation_0, values = (var_33016_cast_fp16, var_32777_cast_fp16))[name = tensor("op_33334_cast_fp16")]; + tensor var_33335_to_fp16 = const()[name = tensor("op_33335_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3467_cast_fp16 = mul(x = var_33334_cast_fp16, y = var_33335_to_fp16)[name = tensor("aw_chunk_3467_cast_fp16")]; + tensor var_33338_equation_0 = const()[name = tensor("op_33338_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33338_cast_fp16 = einsum(equation = var_33338_equation_0, values = (var_33016_cast_fp16, var_32784_cast_fp16))[name = tensor("op_33338_cast_fp16")]; + tensor var_33339_to_fp16 = const()[name = tensor("op_33339_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3469_cast_fp16 = mul(x = var_33338_cast_fp16, y = var_33339_to_fp16)[name = tensor("aw_chunk_3469_cast_fp16")]; + tensor var_33342_equation_0 = const()[name = tensor("op_33342_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33342_cast_fp16 = einsum(equation = var_33342_equation_0, values = (var_33016_cast_fp16, var_32791_cast_fp16))[name = tensor("op_33342_cast_fp16")]; + tensor var_33343_to_fp16 = const()[name = tensor("op_33343_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3471_cast_fp16 = mul(x = var_33342_cast_fp16, y = var_33343_to_fp16)[name = tensor("aw_chunk_3471_cast_fp16")]; + tensor var_33346_equation_0 = const()[name = tensor("op_33346_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33346_cast_fp16 = einsum(equation = var_33346_equation_0, values = (var_33020_cast_fp16, var_32798_cast_fp16))[name = tensor("op_33346_cast_fp16")]; + tensor var_33347_to_fp16 = const()[name = tensor("op_33347_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3473_cast_fp16 = mul(x = var_33346_cast_fp16, y = var_33347_to_fp16)[name = tensor("aw_chunk_3473_cast_fp16")]; + tensor var_33350_equation_0 = const()[name = tensor("op_33350_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33350_cast_fp16 = einsum(equation = var_33350_equation_0, values = (var_33020_cast_fp16, var_32805_cast_fp16))[name = tensor("op_33350_cast_fp16")]; + tensor var_33351_to_fp16 = const()[name = tensor("op_33351_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3475_cast_fp16 = mul(x = var_33350_cast_fp16, y = var_33351_to_fp16)[name = tensor("aw_chunk_3475_cast_fp16")]; + tensor var_33354_equation_0 = const()[name = tensor("op_33354_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33354_cast_fp16 = einsum(equation = var_33354_equation_0, values = (var_33020_cast_fp16, var_32812_cast_fp16))[name = tensor("op_33354_cast_fp16")]; + tensor var_33355_to_fp16 = const()[name = tensor("op_33355_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3477_cast_fp16 = mul(x = var_33354_cast_fp16, y = var_33355_to_fp16)[name = tensor("aw_chunk_3477_cast_fp16")]; + tensor var_33358_equation_0 = const()[name = tensor("op_33358_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33358_cast_fp16 = einsum(equation = var_33358_equation_0, values = (var_33020_cast_fp16, var_32819_cast_fp16))[name = tensor("op_33358_cast_fp16")]; + tensor var_33359_to_fp16 = const()[name = tensor("op_33359_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3479_cast_fp16 = mul(x = var_33358_cast_fp16, y = var_33359_to_fp16)[name = tensor("aw_chunk_3479_cast_fp16")]; + tensor var_33362_equation_0 = const()[name = tensor("op_33362_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33362_cast_fp16 = einsum(equation = var_33362_equation_0, values = (var_33024_cast_fp16, var_32826_cast_fp16))[name = tensor("op_33362_cast_fp16")]; + tensor var_33363_to_fp16 = const()[name = tensor("op_33363_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3481_cast_fp16 = mul(x = var_33362_cast_fp16, y = var_33363_to_fp16)[name = tensor("aw_chunk_3481_cast_fp16")]; + tensor var_33366_equation_0 = const()[name = tensor("op_33366_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33366_cast_fp16 = einsum(equation = var_33366_equation_0, values = (var_33024_cast_fp16, var_32833_cast_fp16))[name = tensor("op_33366_cast_fp16")]; + tensor var_33367_to_fp16 = const()[name = tensor("op_33367_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3483_cast_fp16 = mul(x = var_33366_cast_fp16, y = var_33367_to_fp16)[name = tensor("aw_chunk_3483_cast_fp16")]; + tensor var_33370_equation_0 = const()[name = tensor("op_33370_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33370_cast_fp16 = einsum(equation = var_33370_equation_0, values = (var_33024_cast_fp16, var_32840_cast_fp16))[name = tensor("op_33370_cast_fp16")]; + tensor var_33371_to_fp16 = const()[name = tensor("op_33371_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3485_cast_fp16 = mul(x = var_33370_cast_fp16, y = var_33371_to_fp16)[name = tensor("aw_chunk_3485_cast_fp16")]; + tensor var_33374_equation_0 = const()[name = tensor("op_33374_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33374_cast_fp16 = einsum(equation = var_33374_equation_0, values = (var_33024_cast_fp16, var_32847_cast_fp16))[name = tensor("op_33374_cast_fp16")]; + tensor var_33375_to_fp16 = const()[name = tensor("op_33375_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3487_cast_fp16 = mul(x = var_33374_cast_fp16, y = var_33375_to_fp16)[name = tensor("aw_chunk_3487_cast_fp16")]; + tensor var_33378_equation_0 = const()[name = tensor("op_33378_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33378_cast_fp16 = einsum(equation = var_33378_equation_0, values = (var_33028_cast_fp16, var_32854_cast_fp16))[name = tensor("op_33378_cast_fp16")]; + tensor var_33379_to_fp16 = const()[name = tensor("op_33379_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3489_cast_fp16 = mul(x = var_33378_cast_fp16, y = var_33379_to_fp16)[name = tensor("aw_chunk_3489_cast_fp16")]; + tensor var_33382_equation_0 = const()[name = tensor("op_33382_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33382_cast_fp16 = einsum(equation = var_33382_equation_0, values = (var_33028_cast_fp16, var_32861_cast_fp16))[name = tensor("op_33382_cast_fp16")]; + tensor var_33383_to_fp16 = const()[name = tensor("op_33383_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3491_cast_fp16 = mul(x = var_33382_cast_fp16, y = var_33383_to_fp16)[name = tensor("aw_chunk_3491_cast_fp16")]; + tensor var_33386_equation_0 = const()[name = tensor("op_33386_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33386_cast_fp16 = einsum(equation = var_33386_equation_0, values = (var_33028_cast_fp16, var_32868_cast_fp16))[name = tensor("op_33386_cast_fp16")]; + tensor var_33387_to_fp16 = const()[name = tensor("op_33387_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3493_cast_fp16 = mul(x = var_33386_cast_fp16, y = var_33387_to_fp16)[name = tensor("aw_chunk_3493_cast_fp16")]; + tensor var_33390_equation_0 = const()[name = tensor("op_33390_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33390_cast_fp16 = einsum(equation = var_33390_equation_0, values = (var_33028_cast_fp16, var_32875_cast_fp16))[name = tensor("op_33390_cast_fp16")]; + tensor var_33391_to_fp16 = const()[name = tensor("op_33391_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3495_cast_fp16 = mul(x = var_33390_cast_fp16, y = var_33391_to_fp16)[name = tensor("aw_chunk_3495_cast_fp16")]; + tensor var_33394_equation_0 = const()[name = tensor("op_33394_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33394_cast_fp16 = einsum(equation = var_33394_equation_0, values = (var_33032_cast_fp16, var_32882_cast_fp16))[name = tensor("op_33394_cast_fp16")]; + tensor var_33395_to_fp16 = const()[name = tensor("op_33395_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3497_cast_fp16 = mul(x = var_33394_cast_fp16, y = var_33395_to_fp16)[name = tensor("aw_chunk_3497_cast_fp16")]; + tensor var_33398_equation_0 = const()[name = tensor("op_33398_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33398_cast_fp16 = einsum(equation = var_33398_equation_0, values = (var_33032_cast_fp16, var_32889_cast_fp16))[name = tensor("op_33398_cast_fp16")]; + tensor var_33399_to_fp16 = const()[name = tensor("op_33399_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3499_cast_fp16 = mul(x = var_33398_cast_fp16, y = var_33399_to_fp16)[name = tensor("aw_chunk_3499_cast_fp16")]; + tensor var_33402_equation_0 = const()[name = tensor("op_33402_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33402_cast_fp16 = einsum(equation = var_33402_equation_0, values = (var_33032_cast_fp16, var_32896_cast_fp16))[name = tensor("op_33402_cast_fp16")]; + tensor var_33403_to_fp16 = const()[name = tensor("op_33403_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3501_cast_fp16 = mul(x = var_33402_cast_fp16, y = var_33403_to_fp16)[name = tensor("aw_chunk_3501_cast_fp16")]; + tensor var_33406_equation_0 = const()[name = tensor("op_33406_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33406_cast_fp16 = einsum(equation = var_33406_equation_0, values = (var_33032_cast_fp16, var_32903_cast_fp16))[name = tensor("op_33406_cast_fp16")]; + tensor var_33407_to_fp16 = const()[name = tensor("op_33407_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3503_cast_fp16 = mul(x = var_33406_cast_fp16, y = var_33407_to_fp16)[name = tensor("aw_chunk_3503_cast_fp16")]; + tensor var_33410_equation_0 = const()[name = tensor("op_33410_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33410_cast_fp16 = einsum(equation = var_33410_equation_0, values = (var_33036_cast_fp16, var_32910_cast_fp16))[name = tensor("op_33410_cast_fp16")]; + tensor var_33411_to_fp16 = const()[name = tensor("op_33411_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3505_cast_fp16 = mul(x = var_33410_cast_fp16, y = var_33411_to_fp16)[name = tensor("aw_chunk_3505_cast_fp16")]; + tensor var_33414_equation_0 = const()[name = tensor("op_33414_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33414_cast_fp16 = einsum(equation = var_33414_equation_0, values = (var_33036_cast_fp16, var_32917_cast_fp16))[name = tensor("op_33414_cast_fp16")]; + tensor var_33415_to_fp16 = const()[name = tensor("op_33415_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3507_cast_fp16 = mul(x = var_33414_cast_fp16, y = var_33415_to_fp16)[name = tensor("aw_chunk_3507_cast_fp16")]; + tensor var_33418_equation_0 = const()[name = tensor("op_33418_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33418_cast_fp16 = einsum(equation = var_33418_equation_0, values = (var_33036_cast_fp16, var_32924_cast_fp16))[name = tensor("op_33418_cast_fp16")]; + tensor var_33419_to_fp16 = const()[name = tensor("op_33419_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3509_cast_fp16 = mul(x = var_33418_cast_fp16, y = var_33419_to_fp16)[name = tensor("aw_chunk_3509_cast_fp16")]; + tensor var_33422_equation_0 = const()[name = tensor("op_33422_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33422_cast_fp16 = einsum(equation = var_33422_equation_0, values = (var_33036_cast_fp16, var_32931_cast_fp16))[name = tensor("op_33422_cast_fp16")]; + tensor var_33423_to_fp16 = const()[name = tensor("op_33423_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3511_cast_fp16 = mul(x = var_33422_cast_fp16, y = var_33423_to_fp16)[name = tensor("aw_chunk_3511_cast_fp16")]; + tensor var_33426_equation_0 = const()[name = tensor("op_33426_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33426_cast_fp16 = einsum(equation = var_33426_equation_0, values = (var_33040_cast_fp16, var_32938_cast_fp16))[name = tensor("op_33426_cast_fp16")]; + tensor var_33427_to_fp16 = const()[name = tensor("op_33427_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3513_cast_fp16 = mul(x = var_33426_cast_fp16, y = var_33427_to_fp16)[name = tensor("aw_chunk_3513_cast_fp16")]; + tensor var_33430_equation_0 = const()[name = tensor("op_33430_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33430_cast_fp16 = einsum(equation = var_33430_equation_0, values = (var_33040_cast_fp16, var_32945_cast_fp16))[name = tensor("op_33430_cast_fp16")]; + tensor var_33431_to_fp16 = const()[name = tensor("op_33431_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3515_cast_fp16 = mul(x = var_33430_cast_fp16, y = var_33431_to_fp16)[name = tensor("aw_chunk_3515_cast_fp16")]; + tensor var_33434_equation_0 = const()[name = tensor("op_33434_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33434_cast_fp16 = einsum(equation = var_33434_equation_0, values = (var_33040_cast_fp16, var_32952_cast_fp16))[name = tensor("op_33434_cast_fp16")]; + tensor var_33435_to_fp16 = const()[name = tensor("op_33435_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3517_cast_fp16 = mul(x = var_33434_cast_fp16, y = var_33435_to_fp16)[name = tensor("aw_chunk_3517_cast_fp16")]; + tensor var_33438_equation_0 = const()[name = tensor("op_33438_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_33438_cast_fp16 = einsum(equation = var_33438_equation_0, values = (var_33040_cast_fp16, var_32959_cast_fp16))[name = tensor("op_33438_cast_fp16")]; + tensor var_33439_to_fp16 = const()[name = tensor("op_33439_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3519_cast_fp16 = mul(x = var_33438_cast_fp16, y = var_33439_to_fp16)[name = tensor("aw_chunk_3519_cast_fp16")]; + tensor var_33441_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3361_cast_fp16)[name = tensor("op_33441_cast_fp16")]; + tensor var_33442_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3363_cast_fp16)[name = tensor("op_33442_cast_fp16")]; + tensor var_33443_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3365_cast_fp16)[name = tensor("op_33443_cast_fp16")]; + tensor var_33444_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3367_cast_fp16)[name = tensor("op_33444_cast_fp16")]; + tensor var_33445_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3369_cast_fp16)[name = tensor("op_33445_cast_fp16")]; + tensor var_33446_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3371_cast_fp16)[name = tensor("op_33446_cast_fp16")]; + tensor var_33447_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3373_cast_fp16)[name = tensor("op_33447_cast_fp16")]; + tensor var_33448_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3375_cast_fp16)[name = tensor("op_33448_cast_fp16")]; + tensor var_33449_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3377_cast_fp16)[name = tensor("op_33449_cast_fp16")]; + tensor var_33450_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3379_cast_fp16)[name = tensor("op_33450_cast_fp16")]; + tensor var_33451_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3381_cast_fp16)[name = tensor("op_33451_cast_fp16")]; + tensor var_33452_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3383_cast_fp16)[name = tensor("op_33452_cast_fp16")]; + tensor var_33453_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3385_cast_fp16)[name = tensor("op_33453_cast_fp16")]; + tensor var_33454_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3387_cast_fp16)[name = tensor("op_33454_cast_fp16")]; + tensor var_33455_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3389_cast_fp16)[name = tensor("op_33455_cast_fp16")]; + tensor var_33456_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3391_cast_fp16)[name = tensor("op_33456_cast_fp16")]; + tensor var_33457_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3393_cast_fp16)[name = tensor("op_33457_cast_fp16")]; + tensor var_33458_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3395_cast_fp16)[name = tensor("op_33458_cast_fp16")]; + tensor var_33459_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3397_cast_fp16)[name = tensor("op_33459_cast_fp16")]; + tensor var_33460_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3399_cast_fp16)[name = tensor("op_33460_cast_fp16")]; + tensor var_33461_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3401_cast_fp16)[name = tensor("op_33461_cast_fp16")]; + tensor var_33462_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3403_cast_fp16)[name = tensor("op_33462_cast_fp16")]; + tensor var_33463_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3405_cast_fp16)[name = tensor("op_33463_cast_fp16")]; + tensor var_33464_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3407_cast_fp16)[name = tensor("op_33464_cast_fp16")]; + tensor var_33465_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3409_cast_fp16)[name = tensor("op_33465_cast_fp16")]; + tensor var_33466_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3411_cast_fp16)[name = tensor("op_33466_cast_fp16")]; + tensor var_33467_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3413_cast_fp16)[name = tensor("op_33467_cast_fp16")]; + tensor var_33468_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3415_cast_fp16)[name = tensor("op_33468_cast_fp16")]; + tensor var_33469_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3417_cast_fp16)[name = tensor("op_33469_cast_fp16")]; + tensor var_33470_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3419_cast_fp16)[name = tensor("op_33470_cast_fp16")]; + tensor var_33471_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3421_cast_fp16)[name = tensor("op_33471_cast_fp16")]; + tensor var_33472_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3423_cast_fp16)[name = tensor("op_33472_cast_fp16")]; + tensor var_33473_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3425_cast_fp16)[name = tensor("op_33473_cast_fp16")]; + tensor var_33474_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3427_cast_fp16)[name = tensor("op_33474_cast_fp16")]; + tensor var_33475_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3429_cast_fp16)[name = tensor("op_33475_cast_fp16")]; + tensor var_33476_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3431_cast_fp16)[name = tensor("op_33476_cast_fp16")]; + tensor var_33477_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3433_cast_fp16)[name = tensor("op_33477_cast_fp16")]; + tensor var_33478_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3435_cast_fp16)[name = tensor("op_33478_cast_fp16")]; + tensor var_33479_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3437_cast_fp16)[name = tensor("op_33479_cast_fp16")]; + tensor var_33480_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3439_cast_fp16)[name = tensor("op_33480_cast_fp16")]; + tensor var_33481_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3441_cast_fp16)[name = tensor("op_33481_cast_fp16")]; + tensor var_33482_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3443_cast_fp16)[name = tensor("op_33482_cast_fp16")]; + tensor var_33483_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3445_cast_fp16)[name = tensor("op_33483_cast_fp16")]; + tensor var_33484_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3447_cast_fp16)[name = tensor("op_33484_cast_fp16")]; + tensor var_33485_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3449_cast_fp16)[name = tensor("op_33485_cast_fp16")]; + tensor var_33486_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3451_cast_fp16)[name = tensor("op_33486_cast_fp16")]; + tensor var_33487_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3453_cast_fp16)[name = tensor("op_33487_cast_fp16")]; + tensor var_33488_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3455_cast_fp16)[name = tensor("op_33488_cast_fp16")]; + tensor var_33489_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3457_cast_fp16)[name = tensor("op_33489_cast_fp16")]; + tensor var_33490_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3459_cast_fp16)[name = tensor("op_33490_cast_fp16")]; + tensor var_33491_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3461_cast_fp16)[name = tensor("op_33491_cast_fp16")]; + tensor var_33492_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3463_cast_fp16)[name = tensor("op_33492_cast_fp16")]; + tensor var_33493_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3465_cast_fp16)[name = tensor("op_33493_cast_fp16")]; + tensor var_33494_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3467_cast_fp16)[name = tensor("op_33494_cast_fp16")]; + tensor var_33495_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3469_cast_fp16)[name = tensor("op_33495_cast_fp16")]; + tensor var_33496_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3471_cast_fp16)[name = tensor("op_33496_cast_fp16")]; + tensor var_33497_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3473_cast_fp16)[name = tensor("op_33497_cast_fp16")]; + tensor var_33498_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3475_cast_fp16)[name = tensor("op_33498_cast_fp16")]; + tensor var_33499_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3477_cast_fp16)[name = tensor("op_33499_cast_fp16")]; + tensor var_33500_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3479_cast_fp16)[name = tensor("op_33500_cast_fp16")]; + tensor var_33501_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3481_cast_fp16)[name = tensor("op_33501_cast_fp16")]; + tensor var_33502_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3483_cast_fp16)[name = tensor("op_33502_cast_fp16")]; + tensor var_33503_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3485_cast_fp16)[name = tensor("op_33503_cast_fp16")]; + tensor var_33504_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3487_cast_fp16)[name = tensor("op_33504_cast_fp16")]; + tensor var_33505_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3489_cast_fp16)[name = tensor("op_33505_cast_fp16")]; + tensor var_33506_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3491_cast_fp16)[name = tensor("op_33506_cast_fp16")]; + tensor var_33507_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3493_cast_fp16)[name = tensor("op_33507_cast_fp16")]; + tensor var_33508_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3495_cast_fp16)[name = tensor("op_33508_cast_fp16")]; + tensor var_33509_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3497_cast_fp16)[name = tensor("op_33509_cast_fp16")]; + tensor var_33510_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3499_cast_fp16)[name = tensor("op_33510_cast_fp16")]; + tensor var_33511_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3501_cast_fp16)[name = tensor("op_33511_cast_fp16")]; + tensor var_33512_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3503_cast_fp16)[name = tensor("op_33512_cast_fp16")]; + tensor var_33513_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3505_cast_fp16)[name = tensor("op_33513_cast_fp16")]; + tensor var_33514_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3507_cast_fp16)[name = tensor("op_33514_cast_fp16")]; + tensor var_33515_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3509_cast_fp16)[name = tensor("op_33515_cast_fp16")]; + tensor var_33516_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3511_cast_fp16)[name = tensor("op_33516_cast_fp16")]; + tensor var_33517_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3513_cast_fp16)[name = tensor("op_33517_cast_fp16")]; + tensor var_33518_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3515_cast_fp16)[name = tensor("op_33518_cast_fp16")]; + tensor var_33519_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3517_cast_fp16)[name = tensor("op_33519_cast_fp16")]; + tensor var_33520_cast_fp16 = softmax(axis = var_32266, x = aw_chunk_3519_cast_fp16)[name = tensor("op_33520_cast_fp16")]; + tensor var_33522_equation_0 = const()[name = tensor("op_33522_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33522_cast_fp16 = einsum(equation = var_33522_equation_0, values = (var_33042_cast_fp16, var_33441_cast_fp16))[name = tensor("op_33522_cast_fp16")]; + tensor var_33524_equation_0 = const()[name = tensor("op_33524_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33524_cast_fp16 = einsum(equation = var_33524_equation_0, values = (var_33042_cast_fp16, var_33442_cast_fp16))[name = tensor("op_33524_cast_fp16")]; + tensor var_33526_equation_0 = const()[name = tensor("op_33526_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33526_cast_fp16 = einsum(equation = var_33526_equation_0, values = (var_33042_cast_fp16, var_33443_cast_fp16))[name = tensor("op_33526_cast_fp16")]; + tensor var_33528_equation_0 = const()[name = tensor("op_33528_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33528_cast_fp16 = einsum(equation = var_33528_equation_0, values = (var_33042_cast_fp16, var_33444_cast_fp16))[name = tensor("op_33528_cast_fp16")]; + tensor var_33530_equation_0 = const()[name = tensor("op_33530_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33530_cast_fp16 = einsum(equation = var_33530_equation_0, values = (var_33046_cast_fp16, var_33445_cast_fp16))[name = tensor("op_33530_cast_fp16")]; + tensor var_33532_equation_0 = const()[name = tensor("op_33532_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33532_cast_fp16 = einsum(equation = var_33532_equation_0, values = (var_33046_cast_fp16, var_33446_cast_fp16))[name = tensor("op_33532_cast_fp16")]; + tensor var_33534_equation_0 = const()[name = tensor("op_33534_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33534_cast_fp16 = einsum(equation = var_33534_equation_0, values = (var_33046_cast_fp16, var_33447_cast_fp16))[name = tensor("op_33534_cast_fp16")]; + tensor var_33536_equation_0 = const()[name = tensor("op_33536_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33536_cast_fp16 = einsum(equation = var_33536_equation_0, values = (var_33046_cast_fp16, var_33448_cast_fp16))[name = tensor("op_33536_cast_fp16")]; + tensor var_33538_equation_0 = const()[name = tensor("op_33538_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33538_cast_fp16 = einsum(equation = var_33538_equation_0, values = (var_33050_cast_fp16, var_33449_cast_fp16))[name = tensor("op_33538_cast_fp16")]; + tensor var_33540_equation_0 = const()[name = tensor("op_33540_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33540_cast_fp16 = einsum(equation = var_33540_equation_0, values = (var_33050_cast_fp16, var_33450_cast_fp16))[name = tensor("op_33540_cast_fp16")]; + tensor var_33542_equation_0 = const()[name = tensor("op_33542_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33542_cast_fp16 = einsum(equation = var_33542_equation_0, values = (var_33050_cast_fp16, var_33451_cast_fp16))[name = tensor("op_33542_cast_fp16")]; + tensor var_33544_equation_0 = const()[name = tensor("op_33544_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33544_cast_fp16 = einsum(equation = var_33544_equation_0, values = (var_33050_cast_fp16, var_33452_cast_fp16))[name = tensor("op_33544_cast_fp16")]; + tensor var_33546_equation_0 = const()[name = tensor("op_33546_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33546_cast_fp16 = einsum(equation = var_33546_equation_0, values = (var_33054_cast_fp16, var_33453_cast_fp16))[name = tensor("op_33546_cast_fp16")]; + tensor var_33548_equation_0 = const()[name = tensor("op_33548_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33548_cast_fp16 = einsum(equation = var_33548_equation_0, values = (var_33054_cast_fp16, var_33454_cast_fp16))[name = tensor("op_33548_cast_fp16")]; + tensor var_33550_equation_0 = const()[name = tensor("op_33550_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33550_cast_fp16 = einsum(equation = var_33550_equation_0, values = (var_33054_cast_fp16, var_33455_cast_fp16))[name = tensor("op_33550_cast_fp16")]; + tensor var_33552_equation_0 = const()[name = tensor("op_33552_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33552_cast_fp16 = einsum(equation = var_33552_equation_0, values = (var_33054_cast_fp16, var_33456_cast_fp16))[name = tensor("op_33552_cast_fp16")]; + tensor var_33554_equation_0 = const()[name = tensor("op_33554_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33554_cast_fp16 = einsum(equation = var_33554_equation_0, values = (var_33058_cast_fp16, var_33457_cast_fp16))[name = tensor("op_33554_cast_fp16")]; + tensor var_33556_equation_0 = const()[name = tensor("op_33556_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33556_cast_fp16 = einsum(equation = var_33556_equation_0, values = (var_33058_cast_fp16, var_33458_cast_fp16))[name = tensor("op_33556_cast_fp16")]; + tensor var_33558_equation_0 = const()[name = tensor("op_33558_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33558_cast_fp16 = einsum(equation = var_33558_equation_0, values = (var_33058_cast_fp16, var_33459_cast_fp16))[name = tensor("op_33558_cast_fp16")]; + tensor var_33560_equation_0 = const()[name = tensor("op_33560_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33560_cast_fp16 = einsum(equation = var_33560_equation_0, values = (var_33058_cast_fp16, var_33460_cast_fp16))[name = tensor("op_33560_cast_fp16")]; + tensor var_33562_equation_0 = const()[name = tensor("op_33562_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33562_cast_fp16 = einsum(equation = var_33562_equation_0, values = (var_33062_cast_fp16, var_33461_cast_fp16))[name = tensor("op_33562_cast_fp16")]; + tensor var_33564_equation_0 = const()[name = tensor("op_33564_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33564_cast_fp16 = einsum(equation = var_33564_equation_0, values = (var_33062_cast_fp16, var_33462_cast_fp16))[name = tensor("op_33564_cast_fp16")]; + tensor var_33566_equation_0 = const()[name = tensor("op_33566_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33566_cast_fp16 = einsum(equation = var_33566_equation_0, values = (var_33062_cast_fp16, var_33463_cast_fp16))[name = tensor("op_33566_cast_fp16")]; + tensor var_33568_equation_0 = const()[name = tensor("op_33568_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33568_cast_fp16 = einsum(equation = var_33568_equation_0, values = (var_33062_cast_fp16, var_33464_cast_fp16))[name = tensor("op_33568_cast_fp16")]; + tensor var_33570_equation_0 = const()[name = tensor("op_33570_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33570_cast_fp16 = einsum(equation = var_33570_equation_0, values = (var_33066_cast_fp16, var_33465_cast_fp16))[name = tensor("op_33570_cast_fp16")]; + tensor var_33572_equation_0 = const()[name = tensor("op_33572_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33572_cast_fp16 = einsum(equation = var_33572_equation_0, values = (var_33066_cast_fp16, var_33466_cast_fp16))[name = tensor("op_33572_cast_fp16")]; + tensor var_33574_equation_0 = const()[name = tensor("op_33574_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33574_cast_fp16 = einsum(equation = var_33574_equation_0, values = (var_33066_cast_fp16, var_33467_cast_fp16))[name = tensor("op_33574_cast_fp16")]; + tensor var_33576_equation_0 = const()[name = tensor("op_33576_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33576_cast_fp16 = einsum(equation = var_33576_equation_0, values = (var_33066_cast_fp16, var_33468_cast_fp16))[name = tensor("op_33576_cast_fp16")]; + tensor var_33578_equation_0 = const()[name = tensor("op_33578_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33578_cast_fp16 = einsum(equation = var_33578_equation_0, values = (var_33070_cast_fp16, var_33469_cast_fp16))[name = tensor("op_33578_cast_fp16")]; + tensor var_33580_equation_0 = const()[name = tensor("op_33580_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33580_cast_fp16 = einsum(equation = var_33580_equation_0, values = (var_33070_cast_fp16, var_33470_cast_fp16))[name = tensor("op_33580_cast_fp16")]; + tensor var_33582_equation_0 = const()[name = tensor("op_33582_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33582_cast_fp16 = einsum(equation = var_33582_equation_0, values = (var_33070_cast_fp16, var_33471_cast_fp16))[name = tensor("op_33582_cast_fp16")]; + tensor var_33584_equation_0 = const()[name = tensor("op_33584_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33584_cast_fp16 = einsum(equation = var_33584_equation_0, values = (var_33070_cast_fp16, var_33472_cast_fp16))[name = tensor("op_33584_cast_fp16")]; + tensor var_33586_equation_0 = const()[name = tensor("op_33586_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33586_cast_fp16 = einsum(equation = var_33586_equation_0, values = (var_33074_cast_fp16, var_33473_cast_fp16))[name = tensor("op_33586_cast_fp16")]; + tensor var_33588_equation_0 = const()[name = tensor("op_33588_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33588_cast_fp16 = einsum(equation = var_33588_equation_0, values = (var_33074_cast_fp16, var_33474_cast_fp16))[name = tensor("op_33588_cast_fp16")]; + tensor var_33590_equation_0 = const()[name = tensor("op_33590_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33590_cast_fp16 = einsum(equation = var_33590_equation_0, values = (var_33074_cast_fp16, var_33475_cast_fp16))[name = tensor("op_33590_cast_fp16")]; + tensor var_33592_equation_0 = const()[name = tensor("op_33592_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33592_cast_fp16 = einsum(equation = var_33592_equation_0, values = (var_33074_cast_fp16, var_33476_cast_fp16))[name = tensor("op_33592_cast_fp16")]; + tensor var_33594_equation_0 = const()[name = tensor("op_33594_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33594_cast_fp16 = einsum(equation = var_33594_equation_0, values = (var_33078_cast_fp16, var_33477_cast_fp16))[name = tensor("op_33594_cast_fp16")]; + tensor var_33596_equation_0 = const()[name = tensor("op_33596_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33596_cast_fp16 = einsum(equation = var_33596_equation_0, values = (var_33078_cast_fp16, var_33478_cast_fp16))[name = tensor("op_33596_cast_fp16")]; + tensor var_33598_equation_0 = const()[name = tensor("op_33598_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33598_cast_fp16 = einsum(equation = var_33598_equation_0, values = (var_33078_cast_fp16, var_33479_cast_fp16))[name = tensor("op_33598_cast_fp16")]; + tensor var_33600_equation_0 = const()[name = tensor("op_33600_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33600_cast_fp16 = einsum(equation = var_33600_equation_0, values = (var_33078_cast_fp16, var_33480_cast_fp16))[name = tensor("op_33600_cast_fp16")]; + tensor var_33602_equation_0 = const()[name = tensor("op_33602_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33602_cast_fp16 = einsum(equation = var_33602_equation_0, values = (var_33082_cast_fp16, var_33481_cast_fp16))[name = tensor("op_33602_cast_fp16")]; + tensor var_33604_equation_0 = const()[name = tensor("op_33604_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33604_cast_fp16 = einsum(equation = var_33604_equation_0, values = (var_33082_cast_fp16, var_33482_cast_fp16))[name = tensor("op_33604_cast_fp16")]; + tensor var_33606_equation_0 = const()[name = tensor("op_33606_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33606_cast_fp16 = einsum(equation = var_33606_equation_0, values = (var_33082_cast_fp16, var_33483_cast_fp16))[name = tensor("op_33606_cast_fp16")]; + tensor var_33608_equation_0 = const()[name = tensor("op_33608_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33608_cast_fp16 = einsum(equation = var_33608_equation_0, values = (var_33082_cast_fp16, var_33484_cast_fp16))[name = tensor("op_33608_cast_fp16")]; + tensor var_33610_equation_0 = const()[name = tensor("op_33610_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33610_cast_fp16 = einsum(equation = var_33610_equation_0, values = (var_33086_cast_fp16, var_33485_cast_fp16))[name = tensor("op_33610_cast_fp16")]; + tensor var_33612_equation_0 = const()[name = tensor("op_33612_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33612_cast_fp16 = einsum(equation = var_33612_equation_0, values = (var_33086_cast_fp16, var_33486_cast_fp16))[name = tensor("op_33612_cast_fp16")]; + tensor var_33614_equation_0 = const()[name = tensor("op_33614_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33614_cast_fp16 = einsum(equation = var_33614_equation_0, values = (var_33086_cast_fp16, var_33487_cast_fp16))[name = tensor("op_33614_cast_fp16")]; + tensor var_33616_equation_0 = const()[name = tensor("op_33616_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33616_cast_fp16 = einsum(equation = var_33616_equation_0, values = (var_33086_cast_fp16, var_33488_cast_fp16))[name = tensor("op_33616_cast_fp16")]; + tensor var_33618_equation_0 = const()[name = tensor("op_33618_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33618_cast_fp16 = einsum(equation = var_33618_equation_0, values = (var_33090_cast_fp16, var_33489_cast_fp16))[name = tensor("op_33618_cast_fp16")]; + tensor var_33620_equation_0 = const()[name = tensor("op_33620_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33620_cast_fp16 = einsum(equation = var_33620_equation_0, values = (var_33090_cast_fp16, var_33490_cast_fp16))[name = tensor("op_33620_cast_fp16")]; + tensor var_33622_equation_0 = const()[name = tensor("op_33622_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33622_cast_fp16 = einsum(equation = var_33622_equation_0, values = (var_33090_cast_fp16, var_33491_cast_fp16))[name = tensor("op_33622_cast_fp16")]; + tensor var_33624_equation_0 = const()[name = tensor("op_33624_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33624_cast_fp16 = einsum(equation = var_33624_equation_0, values = (var_33090_cast_fp16, var_33492_cast_fp16))[name = tensor("op_33624_cast_fp16")]; + tensor var_33626_equation_0 = const()[name = tensor("op_33626_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33626_cast_fp16 = einsum(equation = var_33626_equation_0, values = (var_33094_cast_fp16, var_33493_cast_fp16))[name = tensor("op_33626_cast_fp16")]; + tensor var_33628_equation_0 = const()[name = tensor("op_33628_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33628_cast_fp16 = einsum(equation = var_33628_equation_0, values = (var_33094_cast_fp16, var_33494_cast_fp16))[name = tensor("op_33628_cast_fp16")]; + tensor var_33630_equation_0 = const()[name = tensor("op_33630_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33630_cast_fp16 = einsum(equation = var_33630_equation_0, values = (var_33094_cast_fp16, var_33495_cast_fp16))[name = tensor("op_33630_cast_fp16")]; + tensor var_33632_equation_0 = const()[name = tensor("op_33632_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33632_cast_fp16 = einsum(equation = var_33632_equation_0, values = (var_33094_cast_fp16, var_33496_cast_fp16))[name = tensor("op_33632_cast_fp16")]; + tensor var_33634_equation_0 = const()[name = tensor("op_33634_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33634_cast_fp16 = einsum(equation = var_33634_equation_0, values = (var_33098_cast_fp16, var_33497_cast_fp16))[name = tensor("op_33634_cast_fp16")]; + tensor var_33636_equation_0 = const()[name = tensor("op_33636_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33636_cast_fp16 = einsum(equation = var_33636_equation_0, values = (var_33098_cast_fp16, var_33498_cast_fp16))[name = tensor("op_33636_cast_fp16")]; + tensor var_33638_equation_0 = const()[name = tensor("op_33638_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33638_cast_fp16 = einsum(equation = var_33638_equation_0, values = (var_33098_cast_fp16, var_33499_cast_fp16))[name = tensor("op_33638_cast_fp16")]; + tensor var_33640_equation_0 = const()[name = tensor("op_33640_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33640_cast_fp16 = einsum(equation = var_33640_equation_0, values = (var_33098_cast_fp16, var_33500_cast_fp16))[name = tensor("op_33640_cast_fp16")]; + tensor var_33642_equation_0 = const()[name = tensor("op_33642_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33642_cast_fp16 = einsum(equation = var_33642_equation_0, values = (var_33102_cast_fp16, var_33501_cast_fp16))[name = tensor("op_33642_cast_fp16")]; + tensor var_33644_equation_0 = const()[name = tensor("op_33644_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33644_cast_fp16 = einsum(equation = var_33644_equation_0, values = (var_33102_cast_fp16, var_33502_cast_fp16))[name = tensor("op_33644_cast_fp16")]; + tensor var_33646_equation_0 = const()[name = tensor("op_33646_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33646_cast_fp16 = einsum(equation = var_33646_equation_0, values = (var_33102_cast_fp16, var_33503_cast_fp16))[name = tensor("op_33646_cast_fp16")]; + tensor var_33648_equation_0 = const()[name = tensor("op_33648_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33648_cast_fp16 = einsum(equation = var_33648_equation_0, values = (var_33102_cast_fp16, var_33504_cast_fp16))[name = tensor("op_33648_cast_fp16")]; + tensor var_33650_equation_0 = const()[name = tensor("op_33650_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33650_cast_fp16 = einsum(equation = var_33650_equation_0, values = (var_33106_cast_fp16, var_33505_cast_fp16))[name = tensor("op_33650_cast_fp16")]; + tensor var_33652_equation_0 = const()[name = tensor("op_33652_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33652_cast_fp16 = einsum(equation = var_33652_equation_0, values = (var_33106_cast_fp16, var_33506_cast_fp16))[name = tensor("op_33652_cast_fp16")]; + tensor var_33654_equation_0 = const()[name = tensor("op_33654_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33654_cast_fp16 = einsum(equation = var_33654_equation_0, values = (var_33106_cast_fp16, var_33507_cast_fp16))[name = tensor("op_33654_cast_fp16")]; + tensor var_33656_equation_0 = const()[name = tensor("op_33656_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33656_cast_fp16 = einsum(equation = var_33656_equation_0, values = (var_33106_cast_fp16, var_33508_cast_fp16))[name = tensor("op_33656_cast_fp16")]; + tensor var_33658_equation_0 = const()[name = tensor("op_33658_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33658_cast_fp16 = einsum(equation = var_33658_equation_0, values = (var_33110_cast_fp16, var_33509_cast_fp16))[name = tensor("op_33658_cast_fp16")]; + tensor var_33660_equation_0 = const()[name = tensor("op_33660_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33660_cast_fp16 = einsum(equation = var_33660_equation_0, values = (var_33110_cast_fp16, var_33510_cast_fp16))[name = tensor("op_33660_cast_fp16")]; + tensor var_33662_equation_0 = const()[name = tensor("op_33662_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33662_cast_fp16 = einsum(equation = var_33662_equation_0, values = (var_33110_cast_fp16, var_33511_cast_fp16))[name = tensor("op_33662_cast_fp16")]; + tensor var_33664_equation_0 = const()[name = tensor("op_33664_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33664_cast_fp16 = einsum(equation = var_33664_equation_0, values = (var_33110_cast_fp16, var_33512_cast_fp16))[name = tensor("op_33664_cast_fp16")]; + tensor var_33666_equation_0 = const()[name = tensor("op_33666_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33666_cast_fp16 = einsum(equation = var_33666_equation_0, values = (var_33114_cast_fp16, var_33513_cast_fp16))[name = tensor("op_33666_cast_fp16")]; + tensor var_33668_equation_0 = const()[name = tensor("op_33668_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33668_cast_fp16 = einsum(equation = var_33668_equation_0, values = (var_33114_cast_fp16, var_33514_cast_fp16))[name = tensor("op_33668_cast_fp16")]; + tensor var_33670_equation_0 = const()[name = tensor("op_33670_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33670_cast_fp16 = einsum(equation = var_33670_equation_0, values = (var_33114_cast_fp16, var_33515_cast_fp16))[name = tensor("op_33670_cast_fp16")]; + tensor var_33672_equation_0 = const()[name = tensor("op_33672_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33672_cast_fp16 = einsum(equation = var_33672_equation_0, values = (var_33114_cast_fp16, var_33516_cast_fp16))[name = tensor("op_33672_cast_fp16")]; + tensor var_33674_equation_0 = const()[name = tensor("op_33674_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33674_cast_fp16 = einsum(equation = var_33674_equation_0, values = (var_33118_cast_fp16, var_33517_cast_fp16))[name = tensor("op_33674_cast_fp16")]; + tensor var_33676_equation_0 = const()[name = tensor("op_33676_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33676_cast_fp16 = einsum(equation = var_33676_equation_0, values = (var_33118_cast_fp16, var_33518_cast_fp16))[name = tensor("op_33676_cast_fp16")]; + tensor var_33678_equation_0 = const()[name = tensor("op_33678_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33678_cast_fp16 = einsum(equation = var_33678_equation_0, values = (var_33118_cast_fp16, var_33519_cast_fp16))[name = tensor("op_33678_cast_fp16")]; + tensor var_33680_equation_0 = const()[name = tensor("op_33680_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_33680_cast_fp16 = einsum(equation = var_33680_equation_0, values = (var_33118_cast_fp16, var_33520_cast_fp16))[name = tensor("op_33680_cast_fp16")]; + tensor var_33682_interleave_0 = const()[name = tensor("op_33682_interleave_0"), val = tensor(false)]; + tensor var_33682_cast_fp16 = concat(axis = var_32241, interleave = var_33682_interleave_0, values = (var_33522_cast_fp16, var_33524_cast_fp16, var_33526_cast_fp16, var_33528_cast_fp16))[name = tensor("op_33682_cast_fp16")]; + tensor var_33684_interleave_0 = const()[name = tensor("op_33684_interleave_0"), val = tensor(false)]; + tensor var_33684_cast_fp16 = concat(axis = var_32241, interleave = var_33684_interleave_0, values = (var_33530_cast_fp16, var_33532_cast_fp16, var_33534_cast_fp16, var_33536_cast_fp16))[name = tensor("op_33684_cast_fp16")]; + tensor var_33686_interleave_0 = const()[name = tensor("op_33686_interleave_0"), val = tensor(false)]; + tensor var_33686_cast_fp16 = concat(axis = var_32241, interleave = var_33686_interleave_0, values = (var_33538_cast_fp16, var_33540_cast_fp16, var_33542_cast_fp16, var_33544_cast_fp16))[name = tensor("op_33686_cast_fp16")]; + tensor var_33688_interleave_0 = const()[name = tensor("op_33688_interleave_0"), val = tensor(false)]; + tensor var_33688_cast_fp16 = concat(axis = var_32241, interleave = var_33688_interleave_0, values = (var_33546_cast_fp16, var_33548_cast_fp16, var_33550_cast_fp16, var_33552_cast_fp16))[name = tensor("op_33688_cast_fp16")]; + tensor var_33690_interleave_0 = const()[name = tensor("op_33690_interleave_0"), val = tensor(false)]; + tensor var_33690_cast_fp16 = concat(axis = var_32241, interleave = var_33690_interleave_0, values = (var_33554_cast_fp16, var_33556_cast_fp16, var_33558_cast_fp16, var_33560_cast_fp16))[name = tensor("op_33690_cast_fp16")]; + tensor var_33692_interleave_0 = const()[name = tensor("op_33692_interleave_0"), val = tensor(false)]; + tensor var_33692_cast_fp16 = concat(axis = var_32241, interleave = var_33692_interleave_0, values = (var_33562_cast_fp16, var_33564_cast_fp16, var_33566_cast_fp16, var_33568_cast_fp16))[name = tensor("op_33692_cast_fp16")]; + tensor var_33694_interleave_0 = const()[name = tensor("op_33694_interleave_0"), val = tensor(false)]; + tensor var_33694_cast_fp16 = concat(axis = var_32241, interleave = var_33694_interleave_0, values = (var_33570_cast_fp16, var_33572_cast_fp16, var_33574_cast_fp16, var_33576_cast_fp16))[name = tensor("op_33694_cast_fp16")]; + tensor var_33696_interleave_0 = const()[name = tensor("op_33696_interleave_0"), val = tensor(false)]; + tensor var_33696_cast_fp16 = concat(axis = var_32241, interleave = var_33696_interleave_0, values = (var_33578_cast_fp16, var_33580_cast_fp16, var_33582_cast_fp16, var_33584_cast_fp16))[name = tensor("op_33696_cast_fp16")]; + tensor var_33698_interleave_0 = const()[name = tensor("op_33698_interleave_0"), val = tensor(false)]; + tensor var_33698_cast_fp16 = concat(axis = var_32241, interleave = var_33698_interleave_0, values = (var_33586_cast_fp16, var_33588_cast_fp16, var_33590_cast_fp16, var_33592_cast_fp16))[name = tensor("op_33698_cast_fp16")]; + tensor var_33700_interleave_0 = const()[name = tensor("op_33700_interleave_0"), val = tensor(false)]; + tensor var_33700_cast_fp16 = concat(axis = var_32241, interleave = var_33700_interleave_0, values = (var_33594_cast_fp16, var_33596_cast_fp16, var_33598_cast_fp16, var_33600_cast_fp16))[name = tensor("op_33700_cast_fp16")]; + tensor var_33702_interleave_0 = const()[name = tensor("op_33702_interleave_0"), val = tensor(false)]; + tensor var_33702_cast_fp16 = concat(axis = var_32241, interleave = var_33702_interleave_0, values = (var_33602_cast_fp16, var_33604_cast_fp16, var_33606_cast_fp16, var_33608_cast_fp16))[name = tensor("op_33702_cast_fp16")]; + tensor var_33704_interleave_0 = const()[name = tensor("op_33704_interleave_0"), val = tensor(false)]; + tensor var_33704_cast_fp16 = concat(axis = var_32241, interleave = var_33704_interleave_0, values = (var_33610_cast_fp16, var_33612_cast_fp16, var_33614_cast_fp16, var_33616_cast_fp16))[name = tensor("op_33704_cast_fp16")]; + tensor var_33706_interleave_0 = const()[name = tensor("op_33706_interleave_0"), val = tensor(false)]; + tensor var_33706_cast_fp16 = concat(axis = var_32241, interleave = var_33706_interleave_0, values = (var_33618_cast_fp16, var_33620_cast_fp16, var_33622_cast_fp16, var_33624_cast_fp16))[name = tensor("op_33706_cast_fp16")]; + tensor var_33708_interleave_0 = const()[name = tensor("op_33708_interleave_0"), val = tensor(false)]; + tensor var_33708_cast_fp16 = concat(axis = var_32241, interleave = var_33708_interleave_0, values = (var_33626_cast_fp16, var_33628_cast_fp16, var_33630_cast_fp16, var_33632_cast_fp16))[name = tensor("op_33708_cast_fp16")]; + tensor var_33710_interleave_0 = const()[name = tensor("op_33710_interleave_0"), val = tensor(false)]; + tensor var_33710_cast_fp16 = concat(axis = var_32241, interleave = var_33710_interleave_0, values = (var_33634_cast_fp16, var_33636_cast_fp16, var_33638_cast_fp16, var_33640_cast_fp16))[name = tensor("op_33710_cast_fp16")]; + tensor var_33712_interleave_0 = const()[name = tensor("op_33712_interleave_0"), val = tensor(false)]; + tensor var_33712_cast_fp16 = concat(axis = var_32241, interleave = var_33712_interleave_0, values = (var_33642_cast_fp16, var_33644_cast_fp16, var_33646_cast_fp16, var_33648_cast_fp16))[name = tensor("op_33712_cast_fp16")]; + tensor var_33714_interleave_0 = const()[name = tensor("op_33714_interleave_0"), val = tensor(false)]; + tensor var_33714_cast_fp16 = concat(axis = var_32241, interleave = var_33714_interleave_0, values = (var_33650_cast_fp16, var_33652_cast_fp16, var_33654_cast_fp16, var_33656_cast_fp16))[name = tensor("op_33714_cast_fp16")]; + tensor var_33716_interleave_0 = const()[name = tensor("op_33716_interleave_0"), val = tensor(false)]; + tensor var_33716_cast_fp16 = concat(axis = var_32241, interleave = var_33716_interleave_0, values = (var_33658_cast_fp16, var_33660_cast_fp16, var_33662_cast_fp16, var_33664_cast_fp16))[name = tensor("op_33716_cast_fp16")]; + tensor var_33718_interleave_0 = const()[name = tensor("op_33718_interleave_0"), val = tensor(false)]; + tensor var_33718_cast_fp16 = concat(axis = var_32241, interleave = var_33718_interleave_0, values = (var_33666_cast_fp16, var_33668_cast_fp16, var_33670_cast_fp16, var_33672_cast_fp16))[name = tensor("op_33718_cast_fp16")]; + tensor var_33720_interleave_0 = const()[name = tensor("op_33720_interleave_0"), val = tensor(false)]; + tensor var_33720_cast_fp16 = concat(axis = var_32241, interleave = var_33720_interleave_0, values = (var_33674_cast_fp16, var_33676_cast_fp16, var_33678_cast_fp16, var_33680_cast_fp16))[name = tensor("op_33720_cast_fp16")]; + tensor input_169_interleave_0 = const()[name = tensor("input_169_interleave_0"), val = tensor(false)]; + tensor input_169_cast_fp16 = concat(axis = var_32266, interleave = input_169_interleave_0, values = (var_33682_cast_fp16, var_33684_cast_fp16, var_33686_cast_fp16, var_33688_cast_fp16, var_33690_cast_fp16, var_33692_cast_fp16, var_33694_cast_fp16, var_33696_cast_fp16, var_33698_cast_fp16, var_33700_cast_fp16, var_33702_cast_fp16, var_33704_cast_fp16, var_33706_cast_fp16, var_33708_cast_fp16, var_33710_cast_fp16, var_33712_cast_fp16, var_33714_cast_fp16, var_33716_cast_fp16, var_33718_cast_fp16, var_33720_cast_fp16))[name = tensor("input_169_cast_fp16")]; + tensor var_33725 = const()[name = tensor("op_33725"), val = tensor([1, 1])]; + tensor var_33727 = const()[name = tensor("op_33727"), val = tensor([1, 1])]; + tensor obj_87_pad_type_0 = const()[name = tensor("obj_87_pad_type_0"), val = tensor("custom")]; + tensor obj_87_pad_0 = const()[name = tensor("obj_87_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_21_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_21_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(850555520)))]; + tensor layers_21_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_21_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(853832384)))]; + tensor obj_87_cast_fp16 = conv(bias = layers_21_self_attn_o_proj_bias_to_fp16, dilations = var_33727, groups = var_32266, pad = obj_87_pad_0, pad_type = obj_87_pad_type_0, strides = var_33725, weight = layers_21_self_attn_o_proj_weight_to_fp16, x = input_169_cast_fp16)[name = tensor("obj_87_cast_fp16")]; + tensor inputs_87_cast_fp16 = add(x = inputs_85_cast_fp16, y = obj_87_cast_fp16)[name = tensor("inputs_87_cast_fp16")]; + tensor var_33733 = const()[name = tensor("op_33733"), val = tensor([1])]; + tensor channels_mean_87_cast_fp16 = reduce_mean(axes = var_33733, keep_dims = var_32267, x = inputs_87_cast_fp16)[name = tensor("channels_mean_87_cast_fp16")]; + tensor zero_mean_87_cast_fp16 = sub(x = inputs_87_cast_fp16, y = channels_mean_87_cast_fp16)[name = tensor("zero_mean_87_cast_fp16")]; + tensor zero_mean_sq_87_cast_fp16 = mul(x = zero_mean_87_cast_fp16, y = zero_mean_87_cast_fp16)[name = tensor("zero_mean_sq_87_cast_fp16")]; + tensor var_33737 = const()[name = tensor("op_33737"), val = tensor([1])]; + tensor var_33738_cast_fp16 = reduce_mean(axes = var_33737, keep_dims = var_32267, x = zero_mean_sq_87_cast_fp16)[name = tensor("op_33738_cast_fp16")]; + tensor var_33739_to_fp16 = const()[name = tensor("op_33739_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_33740_cast_fp16 = add(x = var_33738_cast_fp16, y = var_33739_to_fp16)[name = tensor("op_33740_cast_fp16")]; + tensor denom_87_epsilon_0_to_fp16 = const()[name = tensor("denom_87_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_87_cast_fp16 = rsqrt(epsilon = denom_87_epsilon_0_to_fp16, x = var_33740_cast_fp16)[name = tensor("denom_87_cast_fp16")]; + tensor out_87_cast_fp16 = mul(x = zero_mean_87_cast_fp16, y = denom_87_cast_fp16)[name = tensor("out_87_cast_fp16")]; + tensor input_171_gamma_0_to_fp16 = const()[name = tensor("input_171_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(853835008)))]; + tensor input_171_beta_0_to_fp16 = const()[name = tensor("input_171_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(853837632)))]; + tensor input_171_epsilon_0_to_fp16 = const()[name = tensor("input_171_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_171_cast_fp16 = batch_norm(beta = input_171_beta_0_to_fp16, epsilon = input_171_epsilon_0_to_fp16, gamma = input_171_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_87_cast_fp16)[name = tensor("input_171_cast_fp16")]; + tensor var_33751 = const()[name = tensor("op_33751"), val = tensor([1, 1])]; + tensor var_33753 = const()[name = tensor("op_33753"), val = tensor([1, 1])]; + tensor input_173_pad_type_0 = const()[name = tensor("input_173_pad_type_0"), val = tensor("custom")]; + tensor input_173_pad_0 = const()[name = tensor("input_173_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_21_fc1_weight_to_fp16 = const()[name = tensor("layers_21_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(853840256)))]; + tensor layers_21_fc1_bias_to_fp16 = const()[name = tensor("layers_21_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(866947520)))]; + tensor input_173_cast_fp16 = conv(bias = layers_21_fc1_bias_to_fp16, dilations = var_33753, groups = var_32266, pad = input_173_pad_0, pad_type = input_173_pad_type_0, strides = var_33751, weight = layers_21_fc1_weight_to_fp16, x = input_171_cast_fp16)[name = tensor("input_173_cast_fp16")]; + tensor input_175_mode_0 = const()[name = tensor("input_175_mode_0"), val = tensor("EXACT")]; + tensor input_175_cast_fp16 = gelu(mode = input_175_mode_0, x = input_173_cast_fp16)[name = tensor("input_175_cast_fp16")]; + tensor var_33759 = const()[name = tensor("op_33759"), val = tensor([1, 1])]; + tensor var_33761 = const()[name = tensor("op_33761"), val = tensor([1, 1])]; + tensor hidden_states_47_pad_type_0 = const()[name = tensor("hidden_states_47_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_47_pad_0 = const()[name = tensor("hidden_states_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_21_fc2_weight_to_fp16 = const()[name = tensor("layers_21_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(866957824)))]; + tensor layers_21_fc2_bias_to_fp16 = const()[name = tensor("layers_21_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(880065088)))]; + tensor hidden_states_47_cast_fp16 = conv(bias = layers_21_fc2_bias_to_fp16, dilations = var_33761, groups = var_32266, pad = hidden_states_47_pad_0, pad_type = hidden_states_47_pad_type_0, strides = var_33759, weight = layers_21_fc2_weight_to_fp16, x = input_175_cast_fp16)[name = tensor("hidden_states_47_cast_fp16")]; + tensor inputs_89_cast_fp16 = add(x = inputs_87_cast_fp16, y = hidden_states_47_cast_fp16)[name = tensor("inputs_89_cast_fp16")]; + tensor var_33768 = const()[name = tensor("op_33768"), val = tensor(3)]; + tensor var_33793 = const()[name = tensor("op_33793"), val = tensor(1)]; + tensor var_33794 = const()[name = tensor("op_33794"), val = tensor(true)]; + tensor var_33804 = const()[name = tensor("op_33804"), val = tensor([1])]; + tensor channels_mean_89_cast_fp16 = reduce_mean(axes = var_33804, keep_dims = var_33794, x = inputs_89_cast_fp16)[name = tensor("channels_mean_89_cast_fp16")]; + tensor zero_mean_89_cast_fp16 = sub(x = inputs_89_cast_fp16, y = channels_mean_89_cast_fp16)[name = tensor("zero_mean_89_cast_fp16")]; + tensor zero_mean_sq_89_cast_fp16 = mul(x = zero_mean_89_cast_fp16, y = zero_mean_89_cast_fp16)[name = tensor("zero_mean_sq_89_cast_fp16")]; + tensor var_33808 = const()[name = tensor("op_33808"), val = tensor([1])]; + tensor var_33809_cast_fp16 = reduce_mean(axes = var_33808, keep_dims = var_33794, x = zero_mean_sq_89_cast_fp16)[name = tensor("op_33809_cast_fp16")]; + tensor var_33810_to_fp16 = const()[name = tensor("op_33810_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_33811_cast_fp16 = add(x = var_33809_cast_fp16, y = var_33810_to_fp16)[name = tensor("op_33811_cast_fp16")]; + tensor denom_89_epsilon_0_to_fp16 = const()[name = tensor("denom_89_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_89_cast_fp16 = rsqrt(epsilon = denom_89_epsilon_0_to_fp16, x = var_33811_cast_fp16)[name = tensor("denom_89_cast_fp16")]; + tensor out_89_cast_fp16 = mul(x = zero_mean_89_cast_fp16, y = denom_89_cast_fp16)[name = tensor("out_89_cast_fp16")]; + tensor obj_89_gamma_0_to_fp16 = const()[name = tensor("obj_89_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(880067712)))]; + tensor obj_89_beta_0_to_fp16 = const()[name = tensor("obj_89_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(880070336)))]; + tensor obj_89_epsilon_0_to_fp16 = const()[name = tensor("obj_89_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_89_cast_fp16 = batch_norm(beta = obj_89_beta_0_to_fp16, epsilon = obj_89_epsilon_0_to_fp16, gamma = obj_89_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_89_cast_fp16)[name = tensor("obj_89_cast_fp16")]; + tensor var_33826 = const()[name = tensor("op_33826"), val = tensor([1, 1])]; + tensor var_33828 = const()[name = tensor("op_33828"), val = tensor([1, 1])]; + tensor query_45_pad_type_0 = const()[name = tensor("query_45_pad_type_0"), val = tensor("custom")]; + tensor query_45_pad_0 = const()[name = tensor("query_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_22_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_22_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(880072960)))]; + tensor layers_22_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_22_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(883349824)))]; + tensor query_45_cast_fp16 = conv(bias = layers_22_self_attn_q_proj_bias_to_fp16, dilations = var_33828, groups = var_33793, pad = query_45_pad_0, pad_type = query_45_pad_type_0, strides = var_33826, weight = layers_22_self_attn_q_proj_weight_to_fp16, x = obj_89_cast_fp16)[name = tensor("query_45_cast_fp16")]; + tensor var_33832 = const()[name = tensor("op_33832"), val = tensor([1, 1])]; + tensor var_33834 = const()[name = tensor("op_33834"), val = tensor([1, 1])]; + tensor key_45_pad_type_0 = const()[name = tensor("key_45_pad_type_0"), val = tensor("custom")]; + tensor key_45_pad_0 = const()[name = tensor("key_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_22_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_22_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(883352448)))]; + tensor key_45_cast_fp16 = conv(dilations = var_33834, groups = var_33793, pad = key_45_pad_0, pad_type = key_45_pad_type_0, strides = var_33832, weight = layers_22_self_attn_k_proj_weight_to_fp16, x = obj_89_cast_fp16)[name = tensor("key_45_cast_fp16")]; + tensor var_33839 = const()[name = tensor("op_33839"), val = tensor([1, 1])]; + tensor var_33841 = const()[name = tensor("op_33841"), val = tensor([1, 1])]; + tensor value_45_pad_type_0 = const()[name = tensor("value_45_pad_type_0"), val = tensor("custom")]; + tensor value_45_pad_0 = const()[name = tensor("value_45_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_22_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_22_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(886629312)))]; + tensor layers_22_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_22_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(889906176)))]; + tensor value_45_cast_fp16 = conv(bias = layers_22_self_attn_v_proj_bias_to_fp16, dilations = var_33841, groups = var_33793, pad = value_45_pad_0, pad_type = value_45_pad_type_0, strides = var_33839, weight = layers_22_self_attn_v_proj_weight_to_fp16, x = obj_89_cast_fp16)[name = tensor("value_45_cast_fp16")]; + tensor var_33848_begin_0 = const()[name = tensor("op_33848_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_33848_end_0 = const()[name = tensor("op_33848_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_33848_end_mask_0 = const()[name = tensor("op_33848_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33848_cast_fp16 = slice_by_index(begin = var_33848_begin_0, end = var_33848_end_0, end_mask = var_33848_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_33848_cast_fp16")]; + tensor var_33852_begin_0 = const()[name = tensor("op_33852_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_33852_end_0 = const()[name = tensor("op_33852_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_33852_end_mask_0 = const()[name = tensor("op_33852_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33852_cast_fp16 = slice_by_index(begin = var_33852_begin_0, end = var_33852_end_0, end_mask = var_33852_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_33852_cast_fp16")]; + tensor var_33856_begin_0 = const()[name = tensor("op_33856_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_33856_end_0 = const()[name = tensor("op_33856_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_33856_end_mask_0 = const()[name = tensor("op_33856_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33856_cast_fp16 = slice_by_index(begin = var_33856_begin_0, end = var_33856_end_0, end_mask = var_33856_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_33856_cast_fp16")]; + tensor var_33860_begin_0 = const()[name = tensor("op_33860_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_33860_end_0 = const()[name = tensor("op_33860_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_33860_end_mask_0 = const()[name = tensor("op_33860_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33860_cast_fp16 = slice_by_index(begin = var_33860_begin_0, end = var_33860_end_0, end_mask = var_33860_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_33860_cast_fp16")]; + tensor var_33864_begin_0 = const()[name = tensor("op_33864_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_33864_end_0 = const()[name = tensor("op_33864_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_33864_end_mask_0 = const()[name = tensor("op_33864_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33864_cast_fp16 = slice_by_index(begin = var_33864_begin_0, end = var_33864_end_0, end_mask = var_33864_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_33864_cast_fp16")]; + tensor var_33868_begin_0 = const()[name = tensor("op_33868_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_33868_end_0 = const()[name = tensor("op_33868_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_33868_end_mask_0 = const()[name = tensor("op_33868_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33868_cast_fp16 = slice_by_index(begin = var_33868_begin_0, end = var_33868_end_0, end_mask = var_33868_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_33868_cast_fp16")]; + tensor var_33872_begin_0 = const()[name = tensor("op_33872_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_33872_end_0 = const()[name = tensor("op_33872_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_33872_end_mask_0 = const()[name = tensor("op_33872_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33872_cast_fp16 = slice_by_index(begin = var_33872_begin_0, end = var_33872_end_0, end_mask = var_33872_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_33872_cast_fp16")]; + tensor var_33876_begin_0 = const()[name = tensor("op_33876_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_33876_end_0 = const()[name = tensor("op_33876_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_33876_end_mask_0 = const()[name = tensor("op_33876_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33876_cast_fp16 = slice_by_index(begin = var_33876_begin_0, end = var_33876_end_0, end_mask = var_33876_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_33876_cast_fp16")]; + tensor var_33880_begin_0 = const()[name = tensor("op_33880_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_33880_end_0 = const()[name = tensor("op_33880_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_33880_end_mask_0 = const()[name = tensor("op_33880_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33880_cast_fp16 = slice_by_index(begin = var_33880_begin_0, end = var_33880_end_0, end_mask = var_33880_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_33880_cast_fp16")]; + tensor var_33884_begin_0 = const()[name = tensor("op_33884_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_33884_end_0 = const()[name = tensor("op_33884_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_33884_end_mask_0 = const()[name = tensor("op_33884_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33884_cast_fp16 = slice_by_index(begin = var_33884_begin_0, end = var_33884_end_0, end_mask = var_33884_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_33884_cast_fp16")]; + tensor var_33888_begin_0 = const()[name = tensor("op_33888_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_33888_end_0 = const()[name = tensor("op_33888_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_33888_end_mask_0 = const()[name = tensor("op_33888_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33888_cast_fp16 = slice_by_index(begin = var_33888_begin_0, end = var_33888_end_0, end_mask = var_33888_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_33888_cast_fp16")]; + tensor var_33892_begin_0 = const()[name = tensor("op_33892_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_33892_end_0 = const()[name = tensor("op_33892_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_33892_end_mask_0 = const()[name = tensor("op_33892_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33892_cast_fp16 = slice_by_index(begin = var_33892_begin_0, end = var_33892_end_0, end_mask = var_33892_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_33892_cast_fp16")]; + tensor var_33896_begin_0 = const()[name = tensor("op_33896_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_33896_end_0 = const()[name = tensor("op_33896_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_33896_end_mask_0 = const()[name = tensor("op_33896_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33896_cast_fp16 = slice_by_index(begin = var_33896_begin_0, end = var_33896_end_0, end_mask = var_33896_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_33896_cast_fp16")]; + tensor var_33900_begin_0 = const()[name = tensor("op_33900_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_33900_end_0 = const()[name = tensor("op_33900_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_33900_end_mask_0 = const()[name = tensor("op_33900_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33900_cast_fp16 = slice_by_index(begin = var_33900_begin_0, end = var_33900_end_0, end_mask = var_33900_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_33900_cast_fp16")]; + tensor var_33904_begin_0 = const()[name = tensor("op_33904_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_33904_end_0 = const()[name = tensor("op_33904_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_33904_end_mask_0 = const()[name = tensor("op_33904_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33904_cast_fp16 = slice_by_index(begin = var_33904_begin_0, end = var_33904_end_0, end_mask = var_33904_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_33904_cast_fp16")]; + tensor var_33908_begin_0 = const()[name = tensor("op_33908_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_33908_end_0 = const()[name = tensor("op_33908_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_33908_end_mask_0 = const()[name = tensor("op_33908_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33908_cast_fp16 = slice_by_index(begin = var_33908_begin_0, end = var_33908_end_0, end_mask = var_33908_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_33908_cast_fp16")]; + tensor var_33912_begin_0 = const()[name = tensor("op_33912_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_33912_end_0 = const()[name = tensor("op_33912_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_33912_end_mask_0 = const()[name = tensor("op_33912_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33912_cast_fp16 = slice_by_index(begin = var_33912_begin_0, end = var_33912_end_0, end_mask = var_33912_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_33912_cast_fp16")]; + tensor var_33916_begin_0 = const()[name = tensor("op_33916_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_33916_end_0 = const()[name = tensor("op_33916_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_33916_end_mask_0 = const()[name = tensor("op_33916_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33916_cast_fp16 = slice_by_index(begin = var_33916_begin_0, end = var_33916_end_0, end_mask = var_33916_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_33916_cast_fp16")]; + tensor var_33920_begin_0 = const()[name = tensor("op_33920_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_33920_end_0 = const()[name = tensor("op_33920_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_33920_end_mask_0 = const()[name = tensor("op_33920_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33920_cast_fp16 = slice_by_index(begin = var_33920_begin_0, end = var_33920_end_0, end_mask = var_33920_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_33920_cast_fp16")]; + tensor var_33924_begin_0 = const()[name = tensor("op_33924_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_33924_end_0 = const()[name = tensor("op_33924_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_33924_end_mask_0 = const()[name = tensor("op_33924_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_33924_cast_fp16 = slice_by_index(begin = var_33924_begin_0, end = var_33924_end_0, end_mask = var_33924_end_mask_0, x = query_45_cast_fp16)[name = tensor("op_33924_cast_fp16")]; + tensor var_33933_begin_0 = const()[name = tensor("op_33933_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_33933_end_0 = const()[name = tensor("op_33933_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_33933_end_mask_0 = const()[name = tensor("op_33933_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_33933_cast_fp16 = slice_by_index(begin = var_33933_begin_0, end = var_33933_end_0, end_mask = var_33933_end_mask_0, x = var_33848_cast_fp16)[name = tensor("op_33933_cast_fp16")]; + tensor var_33940_begin_0 = const()[name = tensor("op_33940_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_33940_end_0 = const()[name = tensor("op_33940_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_33940_end_mask_0 = const()[name = tensor("op_33940_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_33940_cast_fp16 = slice_by_index(begin = var_33940_begin_0, end = var_33940_end_0, end_mask = var_33940_end_mask_0, x = var_33848_cast_fp16)[name = tensor("op_33940_cast_fp16")]; + tensor var_33947_begin_0 = const()[name = tensor("op_33947_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_33947_end_0 = const()[name = tensor("op_33947_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_33947_end_mask_0 = const()[name = tensor("op_33947_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_33947_cast_fp16 = slice_by_index(begin = var_33947_begin_0, end = var_33947_end_0, end_mask = var_33947_end_mask_0, x = var_33848_cast_fp16)[name = tensor("op_33947_cast_fp16")]; + tensor var_33954_begin_0 = const()[name = tensor("op_33954_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_33954_end_0 = const()[name = tensor("op_33954_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_33954_end_mask_0 = const()[name = tensor("op_33954_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_33954_cast_fp16 = slice_by_index(begin = var_33954_begin_0, end = var_33954_end_0, end_mask = var_33954_end_mask_0, x = var_33848_cast_fp16)[name = tensor("op_33954_cast_fp16")]; + tensor var_33961_begin_0 = const()[name = tensor("op_33961_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_33961_end_0 = const()[name = tensor("op_33961_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_33961_end_mask_0 = const()[name = tensor("op_33961_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_33961_cast_fp16 = slice_by_index(begin = var_33961_begin_0, end = var_33961_end_0, end_mask = var_33961_end_mask_0, x = var_33852_cast_fp16)[name = tensor("op_33961_cast_fp16")]; + tensor var_33968_begin_0 = const()[name = tensor("op_33968_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_33968_end_0 = const()[name = tensor("op_33968_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_33968_end_mask_0 = const()[name = tensor("op_33968_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_33968_cast_fp16 = slice_by_index(begin = var_33968_begin_0, end = var_33968_end_0, end_mask = var_33968_end_mask_0, x = var_33852_cast_fp16)[name = tensor("op_33968_cast_fp16")]; + tensor var_33975_begin_0 = const()[name = tensor("op_33975_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_33975_end_0 = const()[name = tensor("op_33975_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_33975_end_mask_0 = const()[name = tensor("op_33975_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_33975_cast_fp16 = slice_by_index(begin = var_33975_begin_0, end = var_33975_end_0, end_mask = var_33975_end_mask_0, x = var_33852_cast_fp16)[name = tensor("op_33975_cast_fp16")]; + tensor var_33982_begin_0 = const()[name = tensor("op_33982_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_33982_end_0 = const()[name = tensor("op_33982_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_33982_end_mask_0 = const()[name = tensor("op_33982_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_33982_cast_fp16 = slice_by_index(begin = var_33982_begin_0, end = var_33982_end_0, end_mask = var_33982_end_mask_0, x = var_33852_cast_fp16)[name = tensor("op_33982_cast_fp16")]; + tensor var_33989_begin_0 = const()[name = tensor("op_33989_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_33989_end_0 = const()[name = tensor("op_33989_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_33989_end_mask_0 = const()[name = tensor("op_33989_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_33989_cast_fp16 = slice_by_index(begin = var_33989_begin_0, end = var_33989_end_0, end_mask = var_33989_end_mask_0, x = var_33856_cast_fp16)[name = tensor("op_33989_cast_fp16")]; + tensor var_33996_begin_0 = const()[name = tensor("op_33996_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_33996_end_0 = const()[name = tensor("op_33996_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_33996_end_mask_0 = const()[name = tensor("op_33996_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_33996_cast_fp16 = slice_by_index(begin = var_33996_begin_0, end = var_33996_end_0, end_mask = var_33996_end_mask_0, x = var_33856_cast_fp16)[name = tensor("op_33996_cast_fp16")]; + tensor var_34003_begin_0 = const()[name = tensor("op_34003_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_34003_end_0 = const()[name = tensor("op_34003_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_34003_end_mask_0 = const()[name = tensor("op_34003_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34003_cast_fp16 = slice_by_index(begin = var_34003_begin_0, end = var_34003_end_0, end_mask = var_34003_end_mask_0, x = var_33856_cast_fp16)[name = tensor("op_34003_cast_fp16")]; + tensor var_34010_begin_0 = const()[name = tensor("op_34010_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_34010_end_0 = const()[name = tensor("op_34010_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_34010_end_mask_0 = const()[name = tensor("op_34010_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34010_cast_fp16 = slice_by_index(begin = var_34010_begin_0, end = var_34010_end_0, end_mask = var_34010_end_mask_0, x = var_33856_cast_fp16)[name = tensor("op_34010_cast_fp16")]; + tensor var_34017_begin_0 = const()[name = tensor("op_34017_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_34017_end_0 = const()[name = tensor("op_34017_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_34017_end_mask_0 = const()[name = tensor("op_34017_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34017_cast_fp16 = slice_by_index(begin = var_34017_begin_0, end = var_34017_end_0, end_mask = var_34017_end_mask_0, x = var_33860_cast_fp16)[name = tensor("op_34017_cast_fp16")]; + tensor var_34024_begin_0 = const()[name = tensor("op_34024_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_34024_end_0 = const()[name = tensor("op_34024_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_34024_end_mask_0 = const()[name = tensor("op_34024_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34024_cast_fp16 = slice_by_index(begin = var_34024_begin_0, end = var_34024_end_0, end_mask = var_34024_end_mask_0, x = var_33860_cast_fp16)[name = tensor("op_34024_cast_fp16")]; + tensor var_34031_begin_0 = const()[name = tensor("op_34031_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_34031_end_0 = const()[name = tensor("op_34031_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_34031_end_mask_0 = const()[name = tensor("op_34031_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34031_cast_fp16 = slice_by_index(begin = var_34031_begin_0, end = var_34031_end_0, end_mask = var_34031_end_mask_0, x = var_33860_cast_fp16)[name = tensor("op_34031_cast_fp16")]; + tensor var_34038_begin_0 = const()[name = tensor("op_34038_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_34038_end_0 = const()[name = tensor("op_34038_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_34038_end_mask_0 = const()[name = tensor("op_34038_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34038_cast_fp16 = slice_by_index(begin = var_34038_begin_0, end = var_34038_end_0, end_mask = var_34038_end_mask_0, x = var_33860_cast_fp16)[name = tensor("op_34038_cast_fp16")]; + tensor var_34045_begin_0 = const()[name = tensor("op_34045_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_34045_end_0 = const()[name = tensor("op_34045_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_34045_end_mask_0 = const()[name = tensor("op_34045_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34045_cast_fp16 = slice_by_index(begin = var_34045_begin_0, end = var_34045_end_0, end_mask = var_34045_end_mask_0, x = var_33864_cast_fp16)[name = tensor("op_34045_cast_fp16")]; + tensor var_34052_begin_0 = const()[name = tensor("op_34052_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_34052_end_0 = const()[name = tensor("op_34052_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_34052_end_mask_0 = const()[name = tensor("op_34052_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34052_cast_fp16 = slice_by_index(begin = var_34052_begin_0, end = var_34052_end_0, end_mask = var_34052_end_mask_0, x = var_33864_cast_fp16)[name = tensor("op_34052_cast_fp16")]; + tensor var_34059_begin_0 = const()[name = tensor("op_34059_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_34059_end_0 = const()[name = tensor("op_34059_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_34059_end_mask_0 = const()[name = tensor("op_34059_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34059_cast_fp16 = slice_by_index(begin = var_34059_begin_0, end = var_34059_end_0, end_mask = var_34059_end_mask_0, x = var_33864_cast_fp16)[name = tensor("op_34059_cast_fp16")]; + tensor var_34066_begin_0 = const()[name = tensor("op_34066_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_34066_end_0 = const()[name = tensor("op_34066_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_34066_end_mask_0 = const()[name = tensor("op_34066_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34066_cast_fp16 = slice_by_index(begin = var_34066_begin_0, end = var_34066_end_0, end_mask = var_34066_end_mask_0, x = var_33864_cast_fp16)[name = tensor("op_34066_cast_fp16")]; + tensor var_34073_begin_0 = const()[name = tensor("op_34073_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_34073_end_0 = const()[name = tensor("op_34073_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_34073_end_mask_0 = const()[name = tensor("op_34073_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34073_cast_fp16 = slice_by_index(begin = var_34073_begin_0, end = var_34073_end_0, end_mask = var_34073_end_mask_0, x = var_33868_cast_fp16)[name = tensor("op_34073_cast_fp16")]; + tensor var_34080_begin_0 = const()[name = tensor("op_34080_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_34080_end_0 = const()[name = tensor("op_34080_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_34080_end_mask_0 = const()[name = tensor("op_34080_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34080_cast_fp16 = slice_by_index(begin = var_34080_begin_0, end = var_34080_end_0, end_mask = var_34080_end_mask_0, x = var_33868_cast_fp16)[name = tensor("op_34080_cast_fp16")]; + tensor var_34087_begin_0 = const()[name = tensor("op_34087_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_34087_end_0 = const()[name = tensor("op_34087_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_34087_end_mask_0 = const()[name = tensor("op_34087_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34087_cast_fp16 = slice_by_index(begin = var_34087_begin_0, end = var_34087_end_0, end_mask = var_34087_end_mask_0, x = var_33868_cast_fp16)[name = tensor("op_34087_cast_fp16")]; + tensor var_34094_begin_0 = const()[name = tensor("op_34094_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_34094_end_0 = const()[name = tensor("op_34094_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_34094_end_mask_0 = const()[name = tensor("op_34094_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34094_cast_fp16 = slice_by_index(begin = var_34094_begin_0, end = var_34094_end_0, end_mask = var_34094_end_mask_0, x = var_33868_cast_fp16)[name = tensor("op_34094_cast_fp16")]; + tensor var_34101_begin_0 = const()[name = tensor("op_34101_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_34101_end_0 = const()[name = tensor("op_34101_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_34101_end_mask_0 = const()[name = tensor("op_34101_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34101_cast_fp16 = slice_by_index(begin = var_34101_begin_0, end = var_34101_end_0, end_mask = var_34101_end_mask_0, x = var_33872_cast_fp16)[name = tensor("op_34101_cast_fp16")]; + tensor var_34108_begin_0 = const()[name = tensor("op_34108_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_34108_end_0 = const()[name = tensor("op_34108_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_34108_end_mask_0 = const()[name = tensor("op_34108_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34108_cast_fp16 = slice_by_index(begin = var_34108_begin_0, end = var_34108_end_0, end_mask = var_34108_end_mask_0, x = var_33872_cast_fp16)[name = tensor("op_34108_cast_fp16")]; + tensor var_34115_begin_0 = const()[name = tensor("op_34115_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_34115_end_0 = const()[name = tensor("op_34115_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_34115_end_mask_0 = const()[name = tensor("op_34115_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34115_cast_fp16 = slice_by_index(begin = var_34115_begin_0, end = var_34115_end_0, end_mask = var_34115_end_mask_0, x = var_33872_cast_fp16)[name = tensor("op_34115_cast_fp16")]; + tensor var_34122_begin_0 = const()[name = tensor("op_34122_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_34122_end_0 = const()[name = tensor("op_34122_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_34122_end_mask_0 = const()[name = tensor("op_34122_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34122_cast_fp16 = slice_by_index(begin = var_34122_begin_0, end = var_34122_end_0, end_mask = var_34122_end_mask_0, x = var_33872_cast_fp16)[name = tensor("op_34122_cast_fp16")]; + tensor var_34129_begin_0 = const()[name = tensor("op_34129_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_34129_end_0 = const()[name = tensor("op_34129_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_34129_end_mask_0 = const()[name = tensor("op_34129_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34129_cast_fp16 = slice_by_index(begin = var_34129_begin_0, end = var_34129_end_0, end_mask = var_34129_end_mask_0, x = var_33876_cast_fp16)[name = tensor("op_34129_cast_fp16")]; + tensor var_34136_begin_0 = const()[name = tensor("op_34136_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_34136_end_0 = const()[name = tensor("op_34136_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_34136_end_mask_0 = const()[name = tensor("op_34136_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34136_cast_fp16 = slice_by_index(begin = var_34136_begin_0, end = var_34136_end_0, end_mask = var_34136_end_mask_0, x = var_33876_cast_fp16)[name = tensor("op_34136_cast_fp16")]; + tensor var_34143_begin_0 = const()[name = tensor("op_34143_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_34143_end_0 = const()[name = tensor("op_34143_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_34143_end_mask_0 = const()[name = tensor("op_34143_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34143_cast_fp16 = slice_by_index(begin = var_34143_begin_0, end = var_34143_end_0, end_mask = var_34143_end_mask_0, x = var_33876_cast_fp16)[name = tensor("op_34143_cast_fp16")]; + tensor var_34150_begin_0 = const()[name = tensor("op_34150_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_34150_end_0 = const()[name = tensor("op_34150_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_34150_end_mask_0 = const()[name = tensor("op_34150_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34150_cast_fp16 = slice_by_index(begin = var_34150_begin_0, end = var_34150_end_0, end_mask = var_34150_end_mask_0, x = var_33876_cast_fp16)[name = tensor("op_34150_cast_fp16")]; + tensor var_34157_begin_0 = const()[name = tensor("op_34157_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_34157_end_0 = const()[name = tensor("op_34157_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_34157_end_mask_0 = const()[name = tensor("op_34157_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34157_cast_fp16 = slice_by_index(begin = var_34157_begin_0, end = var_34157_end_0, end_mask = var_34157_end_mask_0, x = var_33880_cast_fp16)[name = tensor("op_34157_cast_fp16")]; + tensor var_34164_begin_0 = const()[name = tensor("op_34164_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_34164_end_0 = const()[name = tensor("op_34164_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_34164_end_mask_0 = const()[name = tensor("op_34164_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34164_cast_fp16 = slice_by_index(begin = var_34164_begin_0, end = var_34164_end_0, end_mask = var_34164_end_mask_0, x = var_33880_cast_fp16)[name = tensor("op_34164_cast_fp16")]; + tensor var_34171_begin_0 = const()[name = tensor("op_34171_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_34171_end_0 = const()[name = tensor("op_34171_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_34171_end_mask_0 = const()[name = tensor("op_34171_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34171_cast_fp16 = slice_by_index(begin = var_34171_begin_0, end = var_34171_end_0, end_mask = var_34171_end_mask_0, x = var_33880_cast_fp16)[name = tensor("op_34171_cast_fp16")]; + tensor var_34178_begin_0 = const()[name = tensor("op_34178_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_34178_end_0 = const()[name = tensor("op_34178_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_34178_end_mask_0 = const()[name = tensor("op_34178_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34178_cast_fp16 = slice_by_index(begin = var_34178_begin_0, end = var_34178_end_0, end_mask = var_34178_end_mask_0, x = var_33880_cast_fp16)[name = tensor("op_34178_cast_fp16")]; + tensor var_34185_begin_0 = const()[name = tensor("op_34185_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_34185_end_0 = const()[name = tensor("op_34185_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_34185_end_mask_0 = const()[name = tensor("op_34185_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34185_cast_fp16 = slice_by_index(begin = var_34185_begin_0, end = var_34185_end_0, end_mask = var_34185_end_mask_0, x = var_33884_cast_fp16)[name = tensor("op_34185_cast_fp16")]; + tensor var_34192_begin_0 = const()[name = tensor("op_34192_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_34192_end_0 = const()[name = tensor("op_34192_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_34192_end_mask_0 = const()[name = tensor("op_34192_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34192_cast_fp16 = slice_by_index(begin = var_34192_begin_0, end = var_34192_end_0, end_mask = var_34192_end_mask_0, x = var_33884_cast_fp16)[name = tensor("op_34192_cast_fp16")]; + tensor var_34199_begin_0 = const()[name = tensor("op_34199_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_34199_end_0 = const()[name = tensor("op_34199_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_34199_end_mask_0 = const()[name = tensor("op_34199_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34199_cast_fp16 = slice_by_index(begin = var_34199_begin_0, end = var_34199_end_0, end_mask = var_34199_end_mask_0, x = var_33884_cast_fp16)[name = tensor("op_34199_cast_fp16")]; + tensor var_34206_begin_0 = const()[name = tensor("op_34206_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_34206_end_0 = const()[name = tensor("op_34206_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_34206_end_mask_0 = const()[name = tensor("op_34206_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34206_cast_fp16 = slice_by_index(begin = var_34206_begin_0, end = var_34206_end_0, end_mask = var_34206_end_mask_0, x = var_33884_cast_fp16)[name = tensor("op_34206_cast_fp16")]; + tensor var_34213_begin_0 = const()[name = tensor("op_34213_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_34213_end_0 = const()[name = tensor("op_34213_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_34213_end_mask_0 = const()[name = tensor("op_34213_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34213_cast_fp16 = slice_by_index(begin = var_34213_begin_0, end = var_34213_end_0, end_mask = var_34213_end_mask_0, x = var_33888_cast_fp16)[name = tensor("op_34213_cast_fp16")]; + tensor var_34220_begin_0 = const()[name = tensor("op_34220_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_34220_end_0 = const()[name = tensor("op_34220_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_34220_end_mask_0 = const()[name = tensor("op_34220_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34220_cast_fp16 = slice_by_index(begin = var_34220_begin_0, end = var_34220_end_0, end_mask = var_34220_end_mask_0, x = var_33888_cast_fp16)[name = tensor("op_34220_cast_fp16")]; + tensor var_34227_begin_0 = const()[name = tensor("op_34227_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_34227_end_0 = const()[name = tensor("op_34227_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_34227_end_mask_0 = const()[name = tensor("op_34227_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34227_cast_fp16 = slice_by_index(begin = var_34227_begin_0, end = var_34227_end_0, end_mask = var_34227_end_mask_0, x = var_33888_cast_fp16)[name = tensor("op_34227_cast_fp16")]; + tensor var_34234_begin_0 = const()[name = tensor("op_34234_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_34234_end_0 = const()[name = tensor("op_34234_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_34234_end_mask_0 = const()[name = tensor("op_34234_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34234_cast_fp16 = slice_by_index(begin = var_34234_begin_0, end = var_34234_end_0, end_mask = var_34234_end_mask_0, x = var_33888_cast_fp16)[name = tensor("op_34234_cast_fp16")]; + tensor var_34241_begin_0 = const()[name = tensor("op_34241_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_34241_end_0 = const()[name = tensor("op_34241_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_34241_end_mask_0 = const()[name = tensor("op_34241_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34241_cast_fp16 = slice_by_index(begin = var_34241_begin_0, end = var_34241_end_0, end_mask = var_34241_end_mask_0, x = var_33892_cast_fp16)[name = tensor("op_34241_cast_fp16")]; + tensor var_34248_begin_0 = const()[name = tensor("op_34248_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_34248_end_0 = const()[name = tensor("op_34248_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_34248_end_mask_0 = const()[name = tensor("op_34248_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34248_cast_fp16 = slice_by_index(begin = var_34248_begin_0, end = var_34248_end_0, end_mask = var_34248_end_mask_0, x = var_33892_cast_fp16)[name = tensor("op_34248_cast_fp16")]; + tensor var_34255_begin_0 = const()[name = tensor("op_34255_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_34255_end_0 = const()[name = tensor("op_34255_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_34255_end_mask_0 = const()[name = tensor("op_34255_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34255_cast_fp16 = slice_by_index(begin = var_34255_begin_0, end = var_34255_end_0, end_mask = var_34255_end_mask_0, x = var_33892_cast_fp16)[name = tensor("op_34255_cast_fp16")]; + tensor var_34262_begin_0 = const()[name = tensor("op_34262_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_34262_end_0 = const()[name = tensor("op_34262_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_34262_end_mask_0 = const()[name = tensor("op_34262_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34262_cast_fp16 = slice_by_index(begin = var_34262_begin_0, end = var_34262_end_0, end_mask = var_34262_end_mask_0, x = var_33892_cast_fp16)[name = tensor("op_34262_cast_fp16")]; + tensor var_34269_begin_0 = const()[name = tensor("op_34269_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_34269_end_0 = const()[name = tensor("op_34269_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_34269_end_mask_0 = const()[name = tensor("op_34269_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34269_cast_fp16 = slice_by_index(begin = var_34269_begin_0, end = var_34269_end_0, end_mask = var_34269_end_mask_0, x = var_33896_cast_fp16)[name = tensor("op_34269_cast_fp16")]; + tensor var_34276_begin_0 = const()[name = tensor("op_34276_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_34276_end_0 = const()[name = tensor("op_34276_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_34276_end_mask_0 = const()[name = tensor("op_34276_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34276_cast_fp16 = slice_by_index(begin = var_34276_begin_0, end = var_34276_end_0, end_mask = var_34276_end_mask_0, x = var_33896_cast_fp16)[name = tensor("op_34276_cast_fp16")]; + tensor var_34283_begin_0 = const()[name = tensor("op_34283_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_34283_end_0 = const()[name = tensor("op_34283_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_34283_end_mask_0 = const()[name = tensor("op_34283_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34283_cast_fp16 = slice_by_index(begin = var_34283_begin_0, end = var_34283_end_0, end_mask = var_34283_end_mask_0, x = var_33896_cast_fp16)[name = tensor("op_34283_cast_fp16")]; + tensor var_34290_begin_0 = const()[name = tensor("op_34290_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_34290_end_0 = const()[name = tensor("op_34290_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_34290_end_mask_0 = const()[name = tensor("op_34290_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34290_cast_fp16 = slice_by_index(begin = var_34290_begin_0, end = var_34290_end_0, end_mask = var_34290_end_mask_0, x = var_33896_cast_fp16)[name = tensor("op_34290_cast_fp16")]; + tensor var_34297_begin_0 = const()[name = tensor("op_34297_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_34297_end_0 = const()[name = tensor("op_34297_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_34297_end_mask_0 = const()[name = tensor("op_34297_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34297_cast_fp16 = slice_by_index(begin = var_34297_begin_0, end = var_34297_end_0, end_mask = var_34297_end_mask_0, x = var_33900_cast_fp16)[name = tensor("op_34297_cast_fp16")]; + tensor var_34304_begin_0 = const()[name = tensor("op_34304_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_34304_end_0 = const()[name = tensor("op_34304_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_34304_end_mask_0 = const()[name = tensor("op_34304_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34304_cast_fp16 = slice_by_index(begin = var_34304_begin_0, end = var_34304_end_0, end_mask = var_34304_end_mask_0, x = var_33900_cast_fp16)[name = tensor("op_34304_cast_fp16")]; + tensor var_34311_begin_0 = const()[name = tensor("op_34311_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_34311_end_0 = const()[name = tensor("op_34311_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_34311_end_mask_0 = const()[name = tensor("op_34311_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34311_cast_fp16 = slice_by_index(begin = var_34311_begin_0, end = var_34311_end_0, end_mask = var_34311_end_mask_0, x = var_33900_cast_fp16)[name = tensor("op_34311_cast_fp16")]; + tensor var_34318_begin_0 = const()[name = tensor("op_34318_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_34318_end_0 = const()[name = tensor("op_34318_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_34318_end_mask_0 = const()[name = tensor("op_34318_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34318_cast_fp16 = slice_by_index(begin = var_34318_begin_0, end = var_34318_end_0, end_mask = var_34318_end_mask_0, x = var_33900_cast_fp16)[name = tensor("op_34318_cast_fp16")]; + tensor var_34325_begin_0 = const()[name = tensor("op_34325_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_34325_end_0 = const()[name = tensor("op_34325_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_34325_end_mask_0 = const()[name = tensor("op_34325_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34325_cast_fp16 = slice_by_index(begin = var_34325_begin_0, end = var_34325_end_0, end_mask = var_34325_end_mask_0, x = var_33904_cast_fp16)[name = tensor("op_34325_cast_fp16")]; + tensor var_34332_begin_0 = const()[name = tensor("op_34332_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_34332_end_0 = const()[name = tensor("op_34332_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_34332_end_mask_0 = const()[name = tensor("op_34332_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34332_cast_fp16 = slice_by_index(begin = var_34332_begin_0, end = var_34332_end_0, end_mask = var_34332_end_mask_0, x = var_33904_cast_fp16)[name = tensor("op_34332_cast_fp16")]; + tensor var_34339_begin_0 = const()[name = tensor("op_34339_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_34339_end_0 = const()[name = tensor("op_34339_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_34339_end_mask_0 = const()[name = tensor("op_34339_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34339_cast_fp16 = slice_by_index(begin = var_34339_begin_0, end = var_34339_end_0, end_mask = var_34339_end_mask_0, x = var_33904_cast_fp16)[name = tensor("op_34339_cast_fp16")]; + tensor var_34346_begin_0 = const()[name = tensor("op_34346_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_34346_end_0 = const()[name = tensor("op_34346_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_34346_end_mask_0 = const()[name = tensor("op_34346_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34346_cast_fp16 = slice_by_index(begin = var_34346_begin_0, end = var_34346_end_0, end_mask = var_34346_end_mask_0, x = var_33904_cast_fp16)[name = tensor("op_34346_cast_fp16")]; + tensor var_34353_begin_0 = const()[name = tensor("op_34353_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_34353_end_0 = const()[name = tensor("op_34353_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_34353_end_mask_0 = const()[name = tensor("op_34353_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34353_cast_fp16 = slice_by_index(begin = var_34353_begin_0, end = var_34353_end_0, end_mask = var_34353_end_mask_0, x = var_33908_cast_fp16)[name = tensor("op_34353_cast_fp16")]; + tensor var_34360_begin_0 = const()[name = tensor("op_34360_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_34360_end_0 = const()[name = tensor("op_34360_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_34360_end_mask_0 = const()[name = tensor("op_34360_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34360_cast_fp16 = slice_by_index(begin = var_34360_begin_0, end = var_34360_end_0, end_mask = var_34360_end_mask_0, x = var_33908_cast_fp16)[name = tensor("op_34360_cast_fp16")]; + tensor var_34367_begin_0 = const()[name = tensor("op_34367_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_34367_end_0 = const()[name = tensor("op_34367_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_34367_end_mask_0 = const()[name = tensor("op_34367_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34367_cast_fp16 = slice_by_index(begin = var_34367_begin_0, end = var_34367_end_0, end_mask = var_34367_end_mask_0, x = var_33908_cast_fp16)[name = tensor("op_34367_cast_fp16")]; + tensor var_34374_begin_0 = const()[name = tensor("op_34374_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_34374_end_0 = const()[name = tensor("op_34374_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_34374_end_mask_0 = const()[name = tensor("op_34374_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34374_cast_fp16 = slice_by_index(begin = var_34374_begin_0, end = var_34374_end_0, end_mask = var_34374_end_mask_0, x = var_33908_cast_fp16)[name = tensor("op_34374_cast_fp16")]; + tensor var_34381_begin_0 = const()[name = tensor("op_34381_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_34381_end_0 = const()[name = tensor("op_34381_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_34381_end_mask_0 = const()[name = tensor("op_34381_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34381_cast_fp16 = slice_by_index(begin = var_34381_begin_0, end = var_34381_end_0, end_mask = var_34381_end_mask_0, x = var_33912_cast_fp16)[name = tensor("op_34381_cast_fp16")]; + tensor var_34388_begin_0 = const()[name = tensor("op_34388_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_34388_end_0 = const()[name = tensor("op_34388_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_34388_end_mask_0 = const()[name = tensor("op_34388_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34388_cast_fp16 = slice_by_index(begin = var_34388_begin_0, end = var_34388_end_0, end_mask = var_34388_end_mask_0, x = var_33912_cast_fp16)[name = tensor("op_34388_cast_fp16")]; + tensor var_34395_begin_0 = const()[name = tensor("op_34395_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_34395_end_0 = const()[name = tensor("op_34395_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_34395_end_mask_0 = const()[name = tensor("op_34395_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34395_cast_fp16 = slice_by_index(begin = var_34395_begin_0, end = var_34395_end_0, end_mask = var_34395_end_mask_0, x = var_33912_cast_fp16)[name = tensor("op_34395_cast_fp16")]; + tensor var_34402_begin_0 = const()[name = tensor("op_34402_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_34402_end_0 = const()[name = tensor("op_34402_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_34402_end_mask_0 = const()[name = tensor("op_34402_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34402_cast_fp16 = slice_by_index(begin = var_34402_begin_0, end = var_34402_end_0, end_mask = var_34402_end_mask_0, x = var_33912_cast_fp16)[name = tensor("op_34402_cast_fp16")]; + tensor var_34409_begin_0 = const()[name = tensor("op_34409_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_34409_end_0 = const()[name = tensor("op_34409_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_34409_end_mask_0 = const()[name = tensor("op_34409_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34409_cast_fp16 = slice_by_index(begin = var_34409_begin_0, end = var_34409_end_0, end_mask = var_34409_end_mask_0, x = var_33916_cast_fp16)[name = tensor("op_34409_cast_fp16")]; + tensor var_34416_begin_0 = const()[name = tensor("op_34416_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_34416_end_0 = const()[name = tensor("op_34416_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_34416_end_mask_0 = const()[name = tensor("op_34416_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34416_cast_fp16 = slice_by_index(begin = var_34416_begin_0, end = var_34416_end_0, end_mask = var_34416_end_mask_0, x = var_33916_cast_fp16)[name = tensor("op_34416_cast_fp16")]; + tensor var_34423_begin_0 = const()[name = tensor("op_34423_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_34423_end_0 = const()[name = tensor("op_34423_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_34423_end_mask_0 = const()[name = tensor("op_34423_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34423_cast_fp16 = slice_by_index(begin = var_34423_begin_0, end = var_34423_end_0, end_mask = var_34423_end_mask_0, x = var_33916_cast_fp16)[name = tensor("op_34423_cast_fp16")]; + tensor var_34430_begin_0 = const()[name = tensor("op_34430_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_34430_end_0 = const()[name = tensor("op_34430_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_34430_end_mask_0 = const()[name = tensor("op_34430_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34430_cast_fp16 = slice_by_index(begin = var_34430_begin_0, end = var_34430_end_0, end_mask = var_34430_end_mask_0, x = var_33916_cast_fp16)[name = tensor("op_34430_cast_fp16")]; + tensor var_34437_begin_0 = const()[name = tensor("op_34437_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_34437_end_0 = const()[name = tensor("op_34437_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_34437_end_mask_0 = const()[name = tensor("op_34437_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34437_cast_fp16 = slice_by_index(begin = var_34437_begin_0, end = var_34437_end_0, end_mask = var_34437_end_mask_0, x = var_33920_cast_fp16)[name = tensor("op_34437_cast_fp16")]; + tensor var_34444_begin_0 = const()[name = tensor("op_34444_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_34444_end_0 = const()[name = tensor("op_34444_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_34444_end_mask_0 = const()[name = tensor("op_34444_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34444_cast_fp16 = slice_by_index(begin = var_34444_begin_0, end = var_34444_end_0, end_mask = var_34444_end_mask_0, x = var_33920_cast_fp16)[name = tensor("op_34444_cast_fp16")]; + tensor var_34451_begin_0 = const()[name = tensor("op_34451_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_34451_end_0 = const()[name = tensor("op_34451_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_34451_end_mask_0 = const()[name = tensor("op_34451_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34451_cast_fp16 = slice_by_index(begin = var_34451_begin_0, end = var_34451_end_0, end_mask = var_34451_end_mask_0, x = var_33920_cast_fp16)[name = tensor("op_34451_cast_fp16")]; + tensor var_34458_begin_0 = const()[name = tensor("op_34458_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_34458_end_0 = const()[name = tensor("op_34458_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_34458_end_mask_0 = const()[name = tensor("op_34458_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34458_cast_fp16 = slice_by_index(begin = var_34458_begin_0, end = var_34458_end_0, end_mask = var_34458_end_mask_0, x = var_33920_cast_fp16)[name = tensor("op_34458_cast_fp16")]; + tensor var_34465_begin_0 = const()[name = tensor("op_34465_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_34465_end_0 = const()[name = tensor("op_34465_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_34465_end_mask_0 = const()[name = tensor("op_34465_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34465_cast_fp16 = slice_by_index(begin = var_34465_begin_0, end = var_34465_end_0, end_mask = var_34465_end_mask_0, x = var_33924_cast_fp16)[name = tensor("op_34465_cast_fp16")]; + tensor var_34472_begin_0 = const()[name = tensor("op_34472_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_34472_end_0 = const()[name = tensor("op_34472_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_34472_end_mask_0 = const()[name = tensor("op_34472_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34472_cast_fp16 = slice_by_index(begin = var_34472_begin_0, end = var_34472_end_0, end_mask = var_34472_end_mask_0, x = var_33924_cast_fp16)[name = tensor("op_34472_cast_fp16")]; + tensor var_34479_begin_0 = const()[name = tensor("op_34479_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_34479_end_0 = const()[name = tensor("op_34479_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_34479_end_mask_0 = const()[name = tensor("op_34479_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34479_cast_fp16 = slice_by_index(begin = var_34479_begin_0, end = var_34479_end_0, end_mask = var_34479_end_mask_0, x = var_33924_cast_fp16)[name = tensor("op_34479_cast_fp16")]; + tensor var_34486_begin_0 = const()[name = tensor("op_34486_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_34486_end_0 = const()[name = tensor("op_34486_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_34486_end_mask_0 = const()[name = tensor("op_34486_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34486_cast_fp16 = slice_by_index(begin = var_34486_begin_0, end = var_34486_end_0, end_mask = var_34486_end_mask_0, x = var_33924_cast_fp16)[name = tensor("op_34486_cast_fp16")]; + tensor k_45_perm_0 = const()[name = tensor("k_45_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_34491_begin_0 = const()[name = tensor("op_34491_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_34491_end_0 = const()[name = tensor("op_34491_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_34491_end_mask_0 = const()[name = tensor("op_34491_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_9 = transpose(perm = k_45_perm_0, x = key_45_cast_fp16)[name = tensor("transpose_9")]; + tensor var_34491_cast_fp16 = slice_by_index(begin = var_34491_begin_0, end = var_34491_end_0, end_mask = var_34491_end_mask_0, x = transpose_9)[name = tensor("op_34491_cast_fp16")]; + tensor var_34495_begin_0 = const()[name = tensor("op_34495_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_34495_end_0 = const()[name = tensor("op_34495_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_34495_end_mask_0 = const()[name = tensor("op_34495_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34495_cast_fp16 = slice_by_index(begin = var_34495_begin_0, end = var_34495_end_0, end_mask = var_34495_end_mask_0, x = transpose_9)[name = tensor("op_34495_cast_fp16")]; + tensor var_34499_begin_0 = const()[name = tensor("op_34499_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_34499_end_0 = const()[name = tensor("op_34499_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_34499_end_mask_0 = const()[name = tensor("op_34499_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34499_cast_fp16 = slice_by_index(begin = var_34499_begin_0, end = var_34499_end_0, end_mask = var_34499_end_mask_0, x = transpose_9)[name = tensor("op_34499_cast_fp16")]; + tensor var_34503_begin_0 = const()[name = tensor("op_34503_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_34503_end_0 = const()[name = tensor("op_34503_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_34503_end_mask_0 = const()[name = tensor("op_34503_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34503_cast_fp16 = slice_by_index(begin = var_34503_begin_0, end = var_34503_end_0, end_mask = var_34503_end_mask_0, x = transpose_9)[name = tensor("op_34503_cast_fp16")]; + tensor var_34507_begin_0 = const()[name = tensor("op_34507_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_34507_end_0 = const()[name = tensor("op_34507_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_34507_end_mask_0 = const()[name = tensor("op_34507_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34507_cast_fp16 = slice_by_index(begin = var_34507_begin_0, end = var_34507_end_0, end_mask = var_34507_end_mask_0, x = transpose_9)[name = tensor("op_34507_cast_fp16")]; + tensor var_34511_begin_0 = const()[name = tensor("op_34511_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_34511_end_0 = const()[name = tensor("op_34511_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_34511_end_mask_0 = const()[name = tensor("op_34511_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34511_cast_fp16 = slice_by_index(begin = var_34511_begin_0, end = var_34511_end_0, end_mask = var_34511_end_mask_0, x = transpose_9)[name = tensor("op_34511_cast_fp16")]; + tensor var_34515_begin_0 = const()[name = tensor("op_34515_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_34515_end_0 = const()[name = tensor("op_34515_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_34515_end_mask_0 = const()[name = tensor("op_34515_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34515_cast_fp16 = slice_by_index(begin = var_34515_begin_0, end = var_34515_end_0, end_mask = var_34515_end_mask_0, x = transpose_9)[name = tensor("op_34515_cast_fp16")]; + tensor var_34519_begin_0 = const()[name = tensor("op_34519_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_34519_end_0 = const()[name = tensor("op_34519_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_34519_end_mask_0 = const()[name = tensor("op_34519_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34519_cast_fp16 = slice_by_index(begin = var_34519_begin_0, end = var_34519_end_0, end_mask = var_34519_end_mask_0, x = transpose_9)[name = tensor("op_34519_cast_fp16")]; + tensor var_34523_begin_0 = const()[name = tensor("op_34523_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_34523_end_0 = const()[name = tensor("op_34523_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_34523_end_mask_0 = const()[name = tensor("op_34523_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34523_cast_fp16 = slice_by_index(begin = var_34523_begin_0, end = var_34523_end_0, end_mask = var_34523_end_mask_0, x = transpose_9)[name = tensor("op_34523_cast_fp16")]; + tensor var_34527_begin_0 = const()[name = tensor("op_34527_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_34527_end_0 = const()[name = tensor("op_34527_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_34527_end_mask_0 = const()[name = tensor("op_34527_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34527_cast_fp16 = slice_by_index(begin = var_34527_begin_0, end = var_34527_end_0, end_mask = var_34527_end_mask_0, x = transpose_9)[name = tensor("op_34527_cast_fp16")]; + tensor var_34531_begin_0 = const()[name = tensor("op_34531_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_34531_end_0 = const()[name = tensor("op_34531_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_34531_end_mask_0 = const()[name = tensor("op_34531_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34531_cast_fp16 = slice_by_index(begin = var_34531_begin_0, end = var_34531_end_0, end_mask = var_34531_end_mask_0, x = transpose_9)[name = tensor("op_34531_cast_fp16")]; + tensor var_34535_begin_0 = const()[name = tensor("op_34535_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_34535_end_0 = const()[name = tensor("op_34535_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_34535_end_mask_0 = const()[name = tensor("op_34535_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34535_cast_fp16 = slice_by_index(begin = var_34535_begin_0, end = var_34535_end_0, end_mask = var_34535_end_mask_0, x = transpose_9)[name = tensor("op_34535_cast_fp16")]; + tensor var_34539_begin_0 = const()[name = tensor("op_34539_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_34539_end_0 = const()[name = tensor("op_34539_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_34539_end_mask_0 = const()[name = tensor("op_34539_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34539_cast_fp16 = slice_by_index(begin = var_34539_begin_0, end = var_34539_end_0, end_mask = var_34539_end_mask_0, x = transpose_9)[name = tensor("op_34539_cast_fp16")]; + tensor var_34543_begin_0 = const()[name = tensor("op_34543_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_34543_end_0 = const()[name = tensor("op_34543_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_34543_end_mask_0 = const()[name = tensor("op_34543_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34543_cast_fp16 = slice_by_index(begin = var_34543_begin_0, end = var_34543_end_0, end_mask = var_34543_end_mask_0, x = transpose_9)[name = tensor("op_34543_cast_fp16")]; + tensor var_34547_begin_0 = const()[name = tensor("op_34547_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_34547_end_0 = const()[name = tensor("op_34547_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_34547_end_mask_0 = const()[name = tensor("op_34547_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34547_cast_fp16 = slice_by_index(begin = var_34547_begin_0, end = var_34547_end_0, end_mask = var_34547_end_mask_0, x = transpose_9)[name = tensor("op_34547_cast_fp16")]; + tensor var_34551_begin_0 = const()[name = tensor("op_34551_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_34551_end_0 = const()[name = tensor("op_34551_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_34551_end_mask_0 = const()[name = tensor("op_34551_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34551_cast_fp16 = slice_by_index(begin = var_34551_begin_0, end = var_34551_end_0, end_mask = var_34551_end_mask_0, x = transpose_9)[name = tensor("op_34551_cast_fp16")]; + tensor var_34555_begin_0 = const()[name = tensor("op_34555_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_34555_end_0 = const()[name = tensor("op_34555_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_34555_end_mask_0 = const()[name = tensor("op_34555_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34555_cast_fp16 = slice_by_index(begin = var_34555_begin_0, end = var_34555_end_0, end_mask = var_34555_end_mask_0, x = transpose_9)[name = tensor("op_34555_cast_fp16")]; + tensor var_34559_begin_0 = const()[name = tensor("op_34559_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_34559_end_0 = const()[name = tensor("op_34559_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_34559_end_mask_0 = const()[name = tensor("op_34559_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34559_cast_fp16 = slice_by_index(begin = var_34559_begin_0, end = var_34559_end_0, end_mask = var_34559_end_mask_0, x = transpose_9)[name = tensor("op_34559_cast_fp16")]; + tensor var_34563_begin_0 = const()[name = tensor("op_34563_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_34563_end_0 = const()[name = tensor("op_34563_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_34563_end_mask_0 = const()[name = tensor("op_34563_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34563_cast_fp16 = slice_by_index(begin = var_34563_begin_0, end = var_34563_end_0, end_mask = var_34563_end_mask_0, x = transpose_9)[name = tensor("op_34563_cast_fp16")]; + tensor var_34567_begin_0 = const()[name = tensor("op_34567_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_34567_end_0 = const()[name = tensor("op_34567_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_34567_end_mask_0 = const()[name = tensor("op_34567_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_34567_cast_fp16 = slice_by_index(begin = var_34567_begin_0, end = var_34567_end_0, end_mask = var_34567_end_mask_0, x = transpose_9)[name = tensor("op_34567_cast_fp16")]; + tensor var_34569_begin_0 = const()[name = tensor("op_34569_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_34569_end_0 = const()[name = tensor("op_34569_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_34569_end_mask_0 = const()[name = tensor("op_34569_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_34569_cast_fp16 = slice_by_index(begin = var_34569_begin_0, end = var_34569_end_0, end_mask = var_34569_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_34569_cast_fp16")]; + tensor var_34573_begin_0 = const()[name = tensor("op_34573_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_34573_end_0 = const()[name = tensor("op_34573_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_34573_end_mask_0 = const()[name = tensor("op_34573_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_34573_cast_fp16 = slice_by_index(begin = var_34573_begin_0, end = var_34573_end_0, end_mask = var_34573_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_34573_cast_fp16")]; + tensor var_34577_begin_0 = const()[name = tensor("op_34577_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_34577_end_0 = const()[name = tensor("op_34577_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_34577_end_mask_0 = const()[name = tensor("op_34577_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_34577_cast_fp16 = slice_by_index(begin = var_34577_begin_0, end = var_34577_end_0, end_mask = var_34577_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_34577_cast_fp16")]; + tensor var_34581_begin_0 = const()[name = tensor("op_34581_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_34581_end_0 = const()[name = tensor("op_34581_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_34581_end_mask_0 = const()[name = tensor("op_34581_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_34581_cast_fp16 = slice_by_index(begin = var_34581_begin_0, end = var_34581_end_0, end_mask = var_34581_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_34581_cast_fp16")]; + tensor var_34585_begin_0 = const()[name = tensor("op_34585_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_34585_end_0 = const()[name = tensor("op_34585_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_34585_end_mask_0 = const()[name = tensor("op_34585_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_34585_cast_fp16 = slice_by_index(begin = var_34585_begin_0, end = var_34585_end_0, end_mask = var_34585_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_34585_cast_fp16")]; + tensor var_34589_begin_0 = const()[name = tensor("op_34589_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_34589_end_0 = const()[name = tensor("op_34589_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_34589_end_mask_0 = const()[name = tensor("op_34589_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_34589_cast_fp16 = slice_by_index(begin = var_34589_begin_0, end = var_34589_end_0, end_mask = var_34589_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_34589_cast_fp16")]; + tensor var_34593_begin_0 = const()[name = tensor("op_34593_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_34593_end_0 = const()[name = tensor("op_34593_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_34593_end_mask_0 = const()[name = tensor("op_34593_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_34593_cast_fp16 = slice_by_index(begin = var_34593_begin_0, end = var_34593_end_0, end_mask = var_34593_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_34593_cast_fp16")]; + tensor var_34597_begin_0 = const()[name = tensor("op_34597_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_34597_end_0 = const()[name = tensor("op_34597_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_34597_end_mask_0 = const()[name = tensor("op_34597_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_34597_cast_fp16 = slice_by_index(begin = var_34597_begin_0, end = var_34597_end_0, end_mask = var_34597_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_34597_cast_fp16")]; + tensor var_34601_begin_0 = const()[name = tensor("op_34601_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_34601_end_0 = const()[name = tensor("op_34601_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_34601_end_mask_0 = const()[name = tensor("op_34601_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_34601_cast_fp16 = slice_by_index(begin = var_34601_begin_0, end = var_34601_end_0, end_mask = var_34601_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_34601_cast_fp16")]; + tensor var_34605_begin_0 = const()[name = tensor("op_34605_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_34605_end_0 = const()[name = tensor("op_34605_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_34605_end_mask_0 = const()[name = tensor("op_34605_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_34605_cast_fp16 = slice_by_index(begin = var_34605_begin_0, end = var_34605_end_0, end_mask = var_34605_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_34605_cast_fp16")]; + tensor var_34609_begin_0 = const()[name = tensor("op_34609_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_34609_end_0 = const()[name = tensor("op_34609_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_34609_end_mask_0 = const()[name = tensor("op_34609_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_34609_cast_fp16 = slice_by_index(begin = var_34609_begin_0, end = var_34609_end_0, end_mask = var_34609_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_34609_cast_fp16")]; + tensor var_34613_begin_0 = const()[name = tensor("op_34613_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_34613_end_0 = const()[name = tensor("op_34613_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_34613_end_mask_0 = const()[name = tensor("op_34613_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_34613_cast_fp16 = slice_by_index(begin = var_34613_begin_0, end = var_34613_end_0, end_mask = var_34613_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_34613_cast_fp16")]; + tensor var_34617_begin_0 = const()[name = tensor("op_34617_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_34617_end_0 = const()[name = tensor("op_34617_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_34617_end_mask_0 = const()[name = tensor("op_34617_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_34617_cast_fp16 = slice_by_index(begin = var_34617_begin_0, end = var_34617_end_0, end_mask = var_34617_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_34617_cast_fp16")]; + tensor var_34621_begin_0 = const()[name = tensor("op_34621_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_34621_end_0 = const()[name = tensor("op_34621_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_34621_end_mask_0 = const()[name = tensor("op_34621_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_34621_cast_fp16 = slice_by_index(begin = var_34621_begin_0, end = var_34621_end_0, end_mask = var_34621_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_34621_cast_fp16")]; + tensor var_34625_begin_0 = const()[name = tensor("op_34625_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_34625_end_0 = const()[name = tensor("op_34625_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_34625_end_mask_0 = const()[name = tensor("op_34625_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_34625_cast_fp16 = slice_by_index(begin = var_34625_begin_0, end = var_34625_end_0, end_mask = var_34625_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_34625_cast_fp16")]; + tensor var_34629_begin_0 = const()[name = tensor("op_34629_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_34629_end_0 = const()[name = tensor("op_34629_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_34629_end_mask_0 = const()[name = tensor("op_34629_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_34629_cast_fp16 = slice_by_index(begin = var_34629_begin_0, end = var_34629_end_0, end_mask = var_34629_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_34629_cast_fp16")]; + tensor var_34633_begin_0 = const()[name = tensor("op_34633_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_34633_end_0 = const()[name = tensor("op_34633_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_34633_end_mask_0 = const()[name = tensor("op_34633_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_34633_cast_fp16 = slice_by_index(begin = var_34633_begin_0, end = var_34633_end_0, end_mask = var_34633_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_34633_cast_fp16")]; + tensor var_34637_begin_0 = const()[name = tensor("op_34637_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_34637_end_0 = const()[name = tensor("op_34637_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_34637_end_mask_0 = const()[name = tensor("op_34637_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_34637_cast_fp16 = slice_by_index(begin = var_34637_begin_0, end = var_34637_end_0, end_mask = var_34637_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_34637_cast_fp16")]; + tensor var_34641_begin_0 = const()[name = tensor("op_34641_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_34641_end_0 = const()[name = tensor("op_34641_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_34641_end_mask_0 = const()[name = tensor("op_34641_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_34641_cast_fp16 = slice_by_index(begin = var_34641_begin_0, end = var_34641_end_0, end_mask = var_34641_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_34641_cast_fp16")]; + tensor var_34645_begin_0 = const()[name = tensor("op_34645_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_34645_end_0 = const()[name = tensor("op_34645_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_34645_end_mask_0 = const()[name = tensor("op_34645_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_34645_cast_fp16 = slice_by_index(begin = var_34645_begin_0, end = var_34645_end_0, end_mask = var_34645_end_mask_0, x = value_45_cast_fp16)[name = tensor("op_34645_cast_fp16")]; + tensor var_34649_equation_0 = const()[name = tensor("op_34649_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34649_cast_fp16 = einsum(equation = var_34649_equation_0, values = (var_34491_cast_fp16, var_33933_cast_fp16))[name = tensor("op_34649_cast_fp16")]; + tensor var_34650_to_fp16 = const()[name = tensor("op_34650_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3521_cast_fp16 = mul(x = var_34649_cast_fp16, y = var_34650_to_fp16)[name = tensor("aw_chunk_3521_cast_fp16")]; + tensor var_34653_equation_0 = const()[name = tensor("op_34653_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34653_cast_fp16 = einsum(equation = var_34653_equation_0, values = (var_34491_cast_fp16, var_33940_cast_fp16))[name = tensor("op_34653_cast_fp16")]; + tensor var_34654_to_fp16 = const()[name = tensor("op_34654_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3523_cast_fp16 = mul(x = var_34653_cast_fp16, y = var_34654_to_fp16)[name = tensor("aw_chunk_3523_cast_fp16")]; + tensor var_34657_equation_0 = const()[name = tensor("op_34657_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34657_cast_fp16 = einsum(equation = var_34657_equation_0, values = (var_34491_cast_fp16, var_33947_cast_fp16))[name = tensor("op_34657_cast_fp16")]; + tensor var_34658_to_fp16 = const()[name = tensor("op_34658_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3525_cast_fp16 = mul(x = var_34657_cast_fp16, y = var_34658_to_fp16)[name = tensor("aw_chunk_3525_cast_fp16")]; + tensor var_34661_equation_0 = const()[name = tensor("op_34661_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34661_cast_fp16 = einsum(equation = var_34661_equation_0, values = (var_34491_cast_fp16, var_33954_cast_fp16))[name = tensor("op_34661_cast_fp16")]; + tensor var_34662_to_fp16 = const()[name = tensor("op_34662_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3527_cast_fp16 = mul(x = var_34661_cast_fp16, y = var_34662_to_fp16)[name = tensor("aw_chunk_3527_cast_fp16")]; + tensor var_34665_equation_0 = const()[name = tensor("op_34665_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34665_cast_fp16 = einsum(equation = var_34665_equation_0, values = (var_34495_cast_fp16, var_33961_cast_fp16))[name = tensor("op_34665_cast_fp16")]; + tensor var_34666_to_fp16 = const()[name = tensor("op_34666_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3529_cast_fp16 = mul(x = var_34665_cast_fp16, y = var_34666_to_fp16)[name = tensor("aw_chunk_3529_cast_fp16")]; + tensor var_34669_equation_0 = const()[name = tensor("op_34669_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34669_cast_fp16 = einsum(equation = var_34669_equation_0, values = (var_34495_cast_fp16, var_33968_cast_fp16))[name = tensor("op_34669_cast_fp16")]; + tensor var_34670_to_fp16 = const()[name = tensor("op_34670_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3531_cast_fp16 = mul(x = var_34669_cast_fp16, y = var_34670_to_fp16)[name = tensor("aw_chunk_3531_cast_fp16")]; + tensor var_34673_equation_0 = const()[name = tensor("op_34673_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34673_cast_fp16 = einsum(equation = var_34673_equation_0, values = (var_34495_cast_fp16, var_33975_cast_fp16))[name = tensor("op_34673_cast_fp16")]; + tensor var_34674_to_fp16 = const()[name = tensor("op_34674_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3533_cast_fp16 = mul(x = var_34673_cast_fp16, y = var_34674_to_fp16)[name = tensor("aw_chunk_3533_cast_fp16")]; + tensor var_34677_equation_0 = const()[name = tensor("op_34677_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34677_cast_fp16 = einsum(equation = var_34677_equation_0, values = (var_34495_cast_fp16, var_33982_cast_fp16))[name = tensor("op_34677_cast_fp16")]; + tensor var_34678_to_fp16 = const()[name = tensor("op_34678_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3535_cast_fp16 = mul(x = var_34677_cast_fp16, y = var_34678_to_fp16)[name = tensor("aw_chunk_3535_cast_fp16")]; + tensor var_34681_equation_0 = const()[name = tensor("op_34681_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34681_cast_fp16 = einsum(equation = var_34681_equation_0, values = (var_34499_cast_fp16, var_33989_cast_fp16))[name = tensor("op_34681_cast_fp16")]; + tensor var_34682_to_fp16 = const()[name = tensor("op_34682_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3537_cast_fp16 = mul(x = var_34681_cast_fp16, y = var_34682_to_fp16)[name = tensor("aw_chunk_3537_cast_fp16")]; + tensor var_34685_equation_0 = const()[name = tensor("op_34685_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34685_cast_fp16 = einsum(equation = var_34685_equation_0, values = (var_34499_cast_fp16, var_33996_cast_fp16))[name = tensor("op_34685_cast_fp16")]; + tensor var_34686_to_fp16 = const()[name = tensor("op_34686_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3539_cast_fp16 = mul(x = var_34685_cast_fp16, y = var_34686_to_fp16)[name = tensor("aw_chunk_3539_cast_fp16")]; + tensor var_34689_equation_0 = const()[name = tensor("op_34689_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34689_cast_fp16 = einsum(equation = var_34689_equation_0, values = (var_34499_cast_fp16, var_34003_cast_fp16))[name = tensor("op_34689_cast_fp16")]; + tensor var_34690_to_fp16 = const()[name = tensor("op_34690_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3541_cast_fp16 = mul(x = var_34689_cast_fp16, y = var_34690_to_fp16)[name = tensor("aw_chunk_3541_cast_fp16")]; + tensor var_34693_equation_0 = const()[name = tensor("op_34693_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34693_cast_fp16 = einsum(equation = var_34693_equation_0, values = (var_34499_cast_fp16, var_34010_cast_fp16))[name = tensor("op_34693_cast_fp16")]; + tensor var_34694_to_fp16 = const()[name = tensor("op_34694_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3543_cast_fp16 = mul(x = var_34693_cast_fp16, y = var_34694_to_fp16)[name = tensor("aw_chunk_3543_cast_fp16")]; + tensor var_34697_equation_0 = const()[name = tensor("op_34697_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34697_cast_fp16 = einsum(equation = var_34697_equation_0, values = (var_34503_cast_fp16, var_34017_cast_fp16))[name = tensor("op_34697_cast_fp16")]; + tensor var_34698_to_fp16 = const()[name = tensor("op_34698_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3545_cast_fp16 = mul(x = var_34697_cast_fp16, y = var_34698_to_fp16)[name = tensor("aw_chunk_3545_cast_fp16")]; + tensor var_34701_equation_0 = const()[name = tensor("op_34701_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34701_cast_fp16 = einsum(equation = var_34701_equation_0, values = (var_34503_cast_fp16, var_34024_cast_fp16))[name = tensor("op_34701_cast_fp16")]; + tensor var_34702_to_fp16 = const()[name = tensor("op_34702_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3547_cast_fp16 = mul(x = var_34701_cast_fp16, y = var_34702_to_fp16)[name = tensor("aw_chunk_3547_cast_fp16")]; + tensor var_34705_equation_0 = const()[name = tensor("op_34705_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34705_cast_fp16 = einsum(equation = var_34705_equation_0, values = (var_34503_cast_fp16, var_34031_cast_fp16))[name = tensor("op_34705_cast_fp16")]; + tensor var_34706_to_fp16 = const()[name = tensor("op_34706_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3549_cast_fp16 = mul(x = var_34705_cast_fp16, y = var_34706_to_fp16)[name = tensor("aw_chunk_3549_cast_fp16")]; + tensor var_34709_equation_0 = const()[name = tensor("op_34709_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34709_cast_fp16 = einsum(equation = var_34709_equation_0, values = (var_34503_cast_fp16, var_34038_cast_fp16))[name = tensor("op_34709_cast_fp16")]; + tensor var_34710_to_fp16 = const()[name = tensor("op_34710_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3551_cast_fp16 = mul(x = var_34709_cast_fp16, y = var_34710_to_fp16)[name = tensor("aw_chunk_3551_cast_fp16")]; + tensor var_34713_equation_0 = const()[name = tensor("op_34713_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34713_cast_fp16 = einsum(equation = var_34713_equation_0, values = (var_34507_cast_fp16, var_34045_cast_fp16))[name = tensor("op_34713_cast_fp16")]; + tensor var_34714_to_fp16 = const()[name = tensor("op_34714_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3553_cast_fp16 = mul(x = var_34713_cast_fp16, y = var_34714_to_fp16)[name = tensor("aw_chunk_3553_cast_fp16")]; + tensor var_34717_equation_0 = const()[name = tensor("op_34717_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34717_cast_fp16 = einsum(equation = var_34717_equation_0, values = (var_34507_cast_fp16, var_34052_cast_fp16))[name = tensor("op_34717_cast_fp16")]; + tensor var_34718_to_fp16 = const()[name = tensor("op_34718_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3555_cast_fp16 = mul(x = var_34717_cast_fp16, y = var_34718_to_fp16)[name = tensor("aw_chunk_3555_cast_fp16")]; + tensor var_34721_equation_0 = const()[name = tensor("op_34721_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34721_cast_fp16 = einsum(equation = var_34721_equation_0, values = (var_34507_cast_fp16, var_34059_cast_fp16))[name = tensor("op_34721_cast_fp16")]; + tensor var_34722_to_fp16 = const()[name = tensor("op_34722_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3557_cast_fp16 = mul(x = var_34721_cast_fp16, y = var_34722_to_fp16)[name = tensor("aw_chunk_3557_cast_fp16")]; + tensor var_34725_equation_0 = const()[name = tensor("op_34725_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34725_cast_fp16 = einsum(equation = var_34725_equation_0, values = (var_34507_cast_fp16, var_34066_cast_fp16))[name = tensor("op_34725_cast_fp16")]; + tensor var_34726_to_fp16 = const()[name = tensor("op_34726_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3559_cast_fp16 = mul(x = var_34725_cast_fp16, y = var_34726_to_fp16)[name = tensor("aw_chunk_3559_cast_fp16")]; + tensor var_34729_equation_0 = const()[name = tensor("op_34729_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34729_cast_fp16 = einsum(equation = var_34729_equation_0, values = (var_34511_cast_fp16, var_34073_cast_fp16))[name = tensor("op_34729_cast_fp16")]; + tensor var_34730_to_fp16 = const()[name = tensor("op_34730_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3561_cast_fp16 = mul(x = var_34729_cast_fp16, y = var_34730_to_fp16)[name = tensor("aw_chunk_3561_cast_fp16")]; + tensor var_34733_equation_0 = const()[name = tensor("op_34733_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34733_cast_fp16 = einsum(equation = var_34733_equation_0, values = (var_34511_cast_fp16, var_34080_cast_fp16))[name = tensor("op_34733_cast_fp16")]; + tensor var_34734_to_fp16 = const()[name = tensor("op_34734_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3563_cast_fp16 = mul(x = var_34733_cast_fp16, y = var_34734_to_fp16)[name = tensor("aw_chunk_3563_cast_fp16")]; + tensor var_34737_equation_0 = const()[name = tensor("op_34737_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34737_cast_fp16 = einsum(equation = var_34737_equation_0, values = (var_34511_cast_fp16, var_34087_cast_fp16))[name = tensor("op_34737_cast_fp16")]; + tensor var_34738_to_fp16 = const()[name = tensor("op_34738_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3565_cast_fp16 = mul(x = var_34737_cast_fp16, y = var_34738_to_fp16)[name = tensor("aw_chunk_3565_cast_fp16")]; + tensor var_34741_equation_0 = const()[name = tensor("op_34741_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34741_cast_fp16 = einsum(equation = var_34741_equation_0, values = (var_34511_cast_fp16, var_34094_cast_fp16))[name = tensor("op_34741_cast_fp16")]; + tensor var_34742_to_fp16 = const()[name = tensor("op_34742_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3567_cast_fp16 = mul(x = var_34741_cast_fp16, y = var_34742_to_fp16)[name = tensor("aw_chunk_3567_cast_fp16")]; + tensor var_34745_equation_0 = const()[name = tensor("op_34745_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34745_cast_fp16 = einsum(equation = var_34745_equation_0, values = (var_34515_cast_fp16, var_34101_cast_fp16))[name = tensor("op_34745_cast_fp16")]; + tensor var_34746_to_fp16 = const()[name = tensor("op_34746_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3569_cast_fp16 = mul(x = var_34745_cast_fp16, y = var_34746_to_fp16)[name = tensor("aw_chunk_3569_cast_fp16")]; + tensor var_34749_equation_0 = const()[name = tensor("op_34749_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34749_cast_fp16 = einsum(equation = var_34749_equation_0, values = (var_34515_cast_fp16, var_34108_cast_fp16))[name = tensor("op_34749_cast_fp16")]; + tensor var_34750_to_fp16 = const()[name = tensor("op_34750_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3571_cast_fp16 = mul(x = var_34749_cast_fp16, y = var_34750_to_fp16)[name = tensor("aw_chunk_3571_cast_fp16")]; + tensor var_34753_equation_0 = const()[name = tensor("op_34753_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34753_cast_fp16 = einsum(equation = var_34753_equation_0, values = (var_34515_cast_fp16, var_34115_cast_fp16))[name = tensor("op_34753_cast_fp16")]; + tensor var_34754_to_fp16 = const()[name = tensor("op_34754_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3573_cast_fp16 = mul(x = var_34753_cast_fp16, y = var_34754_to_fp16)[name = tensor("aw_chunk_3573_cast_fp16")]; + tensor var_34757_equation_0 = const()[name = tensor("op_34757_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34757_cast_fp16 = einsum(equation = var_34757_equation_0, values = (var_34515_cast_fp16, var_34122_cast_fp16))[name = tensor("op_34757_cast_fp16")]; + tensor var_34758_to_fp16 = const()[name = tensor("op_34758_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3575_cast_fp16 = mul(x = var_34757_cast_fp16, y = var_34758_to_fp16)[name = tensor("aw_chunk_3575_cast_fp16")]; + tensor var_34761_equation_0 = const()[name = tensor("op_34761_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34761_cast_fp16 = einsum(equation = var_34761_equation_0, values = (var_34519_cast_fp16, var_34129_cast_fp16))[name = tensor("op_34761_cast_fp16")]; + tensor var_34762_to_fp16 = const()[name = tensor("op_34762_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3577_cast_fp16 = mul(x = var_34761_cast_fp16, y = var_34762_to_fp16)[name = tensor("aw_chunk_3577_cast_fp16")]; + tensor var_34765_equation_0 = const()[name = tensor("op_34765_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34765_cast_fp16 = einsum(equation = var_34765_equation_0, values = (var_34519_cast_fp16, var_34136_cast_fp16))[name = tensor("op_34765_cast_fp16")]; + tensor var_34766_to_fp16 = const()[name = tensor("op_34766_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3579_cast_fp16 = mul(x = var_34765_cast_fp16, y = var_34766_to_fp16)[name = tensor("aw_chunk_3579_cast_fp16")]; + tensor var_34769_equation_0 = const()[name = tensor("op_34769_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34769_cast_fp16 = einsum(equation = var_34769_equation_0, values = (var_34519_cast_fp16, var_34143_cast_fp16))[name = tensor("op_34769_cast_fp16")]; + tensor var_34770_to_fp16 = const()[name = tensor("op_34770_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3581_cast_fp16 = mul(x = var_34769_cast_fp16, y = var_34770_to_fp16)[name = tensor("aw_chunk_3581_cast_fp16")]; + tensor var_34773_equation_0 = const()[name = tensor("op_34773_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34773_cast_fp16 = einsum(equation = var_34773_equation_0, values = (var_34519_cast_fp16, var_34150_cast_fp16))[name = tensor("op_34773_cast_fp16")]; + tensor var_34774_to_fp16 = const()[name = tensor("op_34774_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3583_cast_fp16 = mul(x = var_34773_cast_fp16, y = var_34774_to_fp16)[name = tensor("aw_chunk_3583_cast_fp16")]; + tensor var_34777_equation_0 = const()[name = tensor("op_34777_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34777_cast_fp16 = einsum(equation = var_34777_equation_0, values = (var_34523_cast_fp16, var_34157_cast_fp16))[name = tensor("op_34777_cast_fp16")]; + tensor var_34778_to_fp16 = const()[name = tensor("op_34778_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3585_cast_fp16 = mul(x = var_34777_cast_fp16, y = var_34778_to_fp16)[name = tensor("aw_chunk_3585_cast_fp16")]; + tensor var_34781_equation_0 = const()[name = tensor("op_34781_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34781_cast_fp16 = einsum(equation = var_34781_equation_0, values = (var_34523_cast_fp16, var_34164_cast_fp16))[name = tensor("op_34781_cast_fp16")]; + tensor var_34782_to_fp16 = const()[name = tensor("op_34782_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3587_cast_fp16 = mul(x = var_34781_cast_fp16, y = var_34782_to_fp16)[name = tensor("aw_chunk_3587_cast_fp16")]; + tensor var_34785_equation_0 = const()[name = tensor("op_34785_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34785_cast_fp16 = einsum(equation = var_34785_equation_0, values = (var_34523_cast_fp16, var_34171_cast_fp16))[name = tensor("op_34785_cast_fp16")]; + tensor var_34786_to_fp16 = const()[name = tensor("op_34786_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3589_cast_fp16 = mul(x = var_34785_cast_fp16, y = var_34786_to_fp16)[name = tensor("aw_chunk_3589_cast_fp16")]; + tensor var_34789_equation_0 = const()[name = tensor("op_34789_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34789_cast_fp16 = einsum(equation = var_34789_equation_0, values = (var_34523_cast_fp16, var_34178_cast_fp16))[name = tensor("op_34789_cast_fp16")]; + tensor var_34790_to_fp16 = const()[name = tensor("op_34790_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3591_cast_fp16 = mul(x = var_34789_cast_fp16, y = var_34790_to_fp16)[name = tensor("aw_chunk_3591_cast_fp16")]; + tensor var_34793_equation_0 = const()[name = tensor("op_34793_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34793_cast_fp16 = einsum(equation = var_34793_equation_0, values = (var_34527_cast_fp16, var_34185_cast_fp16))[name = tensor("op_34793_cast_fp16")]; + tensor var_34794_to_fp16 = const()[name = tensor("op_34794_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3593_cast_fp16 = mul(x = var_34793_cast_fp16, y = var_34794_to_fp16)[name = tensor("aw_chunk_3593_cast_fp16")]; + tensor var_34797_equation_0 = const()[name = tensor("op_34797_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34797_cast_fp16 = einsum(equation = var_34797_equation_0, values = (var_34527_cast_fp16, var_34192_cast_fp16))[name = tensor("op_34797_cast_fp16")]; + tensor var_34798_to_fp16 = const()[name = tensor("op_34798_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3595_cast_fp16 = mul(x = var_34797_cast_fp16, y = var_34798_to_fp16)[name = tensor("aw_chunk_3595_cast_fp16")]; + tensor var_34801_equation_0 = const()[name = tensor("op_34801_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34801_cast_fp16 = einsum(equation = var_34801_equation_0, values = (var_34527_cast_fp16, var_34199_cast_fp16))[name = tensor("op_34801_cast_fp16")]; + tensor var_34802_to_fp16 = const()[name = tensor("op_34802_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3597_cast_fp16 = mul(x = var_34801_cast_fp16, y = var_34802_to_fp16)[name = tensor("aw_chunk_3597_cast_fp16")]; + tensor var_34805_equation_0 = const()[name = tensor("op_34805_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34805_cast_fp16 = einsum(equation = var_34805_equation_0, values = (var_34527_cast_fp16, var_34206_cast_fp16))[name = tensor("op_34805_cast_fp16")]; + tensor var_34806_to_fp16 = const()[name = tensor("op_34806_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3599_cast_fp16 = mul(x = var_34805_cast_fp16, y = var_34806_to_fp16)[name = tensor("aw_chunk_3599_cast_fp16")]; + tensor var_34809_equation_0 = const()[name = tensor("op_34809_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34809_cast_fp16 = einsum(equation = var_34809_equation_0, values = (var_34531_cast_fp16, var_34213_cast_fp16))[name = tensor("op_34809_cast_fp16")]; + tensor var_34810_to_fp16 = const()[name = tensor("op_34810_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3601_cast_fp16 = mul(x = var_34809_cast_fp16, y = var_34810_to_fp16)[name = tensor("aw_chunk_3601_cast_fp16")]; + tensor var_34813_equation_0 = const()[name = tensor("op_34813_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34813_cast_fp16 = einsum(equation = var_34813_equation_0, values = (var_34531_cast_fp16, var_34220_cast_fp16))[name = tensor("op_34813_cast_fp16")]; + tensor var_34814_to_fp16 = const()[name = tensor("op_34814_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3603_cast_fp16 = mul(x = var_34813_cast_fp16, y = var_34814_to_fp16)[name = tensor("aw_chunk_3603_cast_fp16")]; + tensor var_34817_equation_0 = const()[name = tensor("op_34817_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34817_cast_fp16 = einsum(equation = var_34817_equation_0, values = (var_34531_cast_fp16, var_34227_cast_fp16))[name = tensor("op_34817_cast_fp16")]; + tensor var_34818_to_fp16 = const()[name = tensor("op_34818_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3605_cast_fp16 = mul(x = var_34817_cast_fp16, y = var_34818_to_fp16)[name = tensor("aw_chunk_3605_cast_fp16")]; + tensor var_34821_equation_0 = const()[name = tensor("op_34821_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34821_cast_fp16 = einsum(equation = var_34821_equation_0, values = (var_34531_cast_fp16, var_34234_cast_fp16))[name = tensor("op_34821_cast_fp16")]; + tensor var_34822_to_fp16 = const()[name = tensor("op_34822_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3607_cast_fp16 = mul(x = var_34821_cast_fp16, y = var_34822_to_fp16)[name = tensor("aw_chunk_3607_cast_fp16")]; + tensor var_34825_equation_0 = const()[name = tensor("op_34825_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34825_cast_fp16 = einsum(equation = var_34825_equation_0, values = (var_34535_cast_fp16, var_34241_cast_fp16))[name = tensor("op_34825_cast_fp16")]; + tensor var_34826_to_fp16 = const()[name = tensor("op_34826_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3609_cast_fp16 = mul(x = var_34825_cast_fp16, y = var_34826_to_fp16)[name = tensor("aw_chunk_3609_cast_fp16")]; + tensor var_34829_equation_0 = const()[name = tensor("op_34829_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34829_cast_fp16 = einsum(equation = var_34829_equation_0, values = (var_34535_cast_fp16, var_34248_cast_fp16))[name = tensor("op_34829_cast_fp16")]; + tensor var_34830_to_fp16 = const()[name = tensor("op_34830_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3611_cast_fp16 = mul(x = var_34829_cast_fp16, y = var_34830_to_fp16)[name = tensor("aw_chunk_3611_cast_fp16")]; + tensor var_34833_equation_0 = const()[name = tensor("op_34833_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34833_cast_fp16 = einsum(equation = var_34833_equation_0, values = (var_34535_cast_fp16, var_34255_cast_fp16))[name = tensor("op_34833_cast_fp16")]; + tensor var_34834_to_fp16 = const()[name = tensor("op_34834_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3613_cast_fp16 = mul(x = var_34833_cast_fp16, y = var_34834_to_fp16)[name = tensor("aw_chunk_3613_cast_fp16")]; + tensor var_34837_equation_0 = const()[name = tensor("op_34837_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34837_cast_fp16 = einsum(equation = var_34837_equation_0, values = (var_34535_cast_fp16, var_34262_cast_fp16))[name = tensor("op_34837_cast_fp16")]; + tensor var_34838_to_fp16 = const()[name = tensor("op_34838_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3615_cast_fp16 = mul(x = var_34837_cast_fp16, y = var_34838_to_fp16)[name = tensor("aw_chunk_3615_cast_fp16")]; + tensor var_34841_equation_0 = const()[name = tensor("op_34841_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34841_cast_fp16 = einsum(equation = var_34841_equation_0, values = (var_34539_cast_fp16, var_34269_cast_fp16))[name = tensor("op_34841_cast_fp16")]; + tensor var_34842_to_fp16 = const()[name = tensor("op_34842_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3617_cast_fp16 = mul(x = var_34841_cast_fp16, y = var_34842_to_fp16)[name = tensor("aw_chunk_3617_cast_fp16")]; + tensor var_34845_equation_0 = const()[name = tensor("op_34845_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34845_cast_fp16 = einsum(equation = var_34845_equation_0, values = (var_34539_cast_fp16, var_34276_cast_fp16))[name = tensor("op_34845_cast_fp16")]; + tensor var_34846_to_fp16 = const()[name = tensor("op_34846_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3619_cast_fp16 = mul(x = var_34845_cast_fp16, y = var_34846_to_fp16)[name = tensor("aw_chunk_3619_cast_fp16")]; + tensor var_34849_equation_0 = const()[name = tensor("op_34849_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34849_cast_fp16 = einsum(equation = var_34849_equation_0, values = (var_34539_cast_fp16, var_34283_cast_fp16))[name = tensor("op_34849_cast_fp16")]; + tensor var_34850_to_fp16 = const()[name = tensor("op_34850_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3621_cast_fp16 = mul(x = var_34849_cast_fp16, y = var_34850_to_fp16)[name = tensor("aw_chunk_3621_cast_fp16")]; + tensor var_34853_equation_0 = const()[name = tensor("op_34853_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34853_cast_fp16 = einsum(equation = var_34853_equation_0, values = (var_34539_cast_fp16, var_34290_cast_fp16))[name = tensor("op_34853_cast_fp16")]; + tensor var_34854_to_fp16 = const()[name = tensor("op_34854_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3623_cast_fp16 = mul(x = var_34853_cast_fp16, y = var_34854_to_fp16)[name = tensor("aw_chunk_3623_cast_fp16")]; + tensor var_34857_equation_0 = const()[name = tensor("op_34857_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34857_cast_fp16 = einsum(equation = var_34857_equation_0, values = (var_34543_cast_fp16, var_34297_cast_fp16))[name = tensor("op_34857_cast_fp16")]; + tensor var_34858_to_fp16 = const()[name = tensor("op_34858_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3625_cast_fp16 = mul(x = var_34857_cast_fp16, y = var_34858_to_fp16)[name = tensor("aw_chunk_3625_cast_fp16")]; + tensor var_34861_equation_0 = const()[name = tensor("op_34861_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34861_cast_fp16 = einsum(equation = var_34861_equation_0, values = (var_34543_cast_fp16, var_34304_cast_fp16))[name = tensor("op_34861_cast_fp16")]; + tensor var_34862_to_fp16 = const()[name = tensor("op_34862_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3627_cast_fp16 = mul(x = var_34861_cast_fp16, y = var_34862_to_fp16)[name = tensor("aw_chunk_3627_cast_fp16")]; + tensor var_34865_equation_0 = const()[name = tensor("op_34865_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34865_cast_fp16 = einsum(equation = var_34865_equation_0, values = (var_34543_cast_fp16, var_34311_cast_fp16))[name = tensor("op_34865_cast_fp16")]; + tensor var_34866_to_fp16 = const()[name = tensor("op_34866_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3629_cast_fp16 = mul(x = var_34865_cast_fp16, y = var_34866_to_fp16)[name = tensor("aw_chunk_3629_cast_fp16")]; + tensor var_34869_equation_0 = const()[name = tensor("op_34869_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34869_cast_fp16 = einsum(equation = var_34869_equation_0, values = (var_34543_cast_fp16, var_34318_cast_fp16))[name = tensor("op_34869_cast_fp16")]; + tensor var_34870_to_fp16 = const()[name = tensor("op_34870_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3631_cast_fp16 = mul(x = var_34869_cast_fp16, y = var_34870_to_fp16)[name = tensor("aw_chunk_3631_cast_fp16")]; + tensor var_34873_equation_0 = const()[name = tensor("op_34873_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34873_cast_fp16 = einsum(equation = var_34873_equation_0, values = (var_34547_cast_fp16, var_34325_cast_fp16))[name = tensor("op_34873_cast_fp16")]; + tensor var_34874_to_fp16 = const()[name = tensor("op_34874_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3633_cast_fp16 = mul(x = var_34873_cast_fp16, y = var_34874_to_fp16)[name = tensor("aw_chunk_3633_cast_fp16")]; + tensor var_34877_equation_0 = const()[name = tensor("op_34877_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34877_cast_fp16 = einsum(equation = var_34877_equation_0, values = (var_34547_cast_fp16, var_34332_cast_fp16))[name = tensor("op_34877_cast_fp16")]; + tensor var_34878_to_fp16 = const()[name = tensor("op_34878_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3635_cast_fp16 = mul(x = var_34877_cast_fp16, y = var_34878_to_fp16)[name = tensor("aw_chunk_3635_cast_fp16")]; + tensor var_34881_equation_0 = const()[name = tensor("op_34881_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34881_cast_fp16 = einsum(equation = var_34881_equation_0, values = (var_34547_cast_fp16, var_34339_cast_fp16))[name = tensor("op_34881_cast_fp16")]; + tensor var_34882_to_fp16 = const()[name = tensor("op_34882_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3637_cast_fp16 = mul(x = var_34881_cast_fp16, y = var_34882_to_fp16)[name = tensor("aw_chunk_3637_cast_fp16")]; + tensor var_34885_equation_0 = const()[name = tensor("op_34885_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34885_cast_fp16 = einsum(equation = var_34885_equation_0, values = (var_34547_cast_fp16, var_34346_cast_fp16))[name = tensor("op_34885_cast_fp16")]; + tensor var_34886_to_fp16 = const()[name = tensor("op_34886_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3639_cast_fp16 = mul(x = var_34885_cast_fp16, y = var_34886_to_fp16)[name = tensor("aw_chunk_3639_cast_fp16")]; + tensor var_34889_equation_0 = const()[name = tensor("op_34889_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34889_cast_fp16 = einsum(equation = var_34889_equation_0, values = (var_34551_cast_fp16, var_34353_cast_fp16))[name = tensor("op_34889_cast_fp16")]; + tensor var_34890_to_fp16 = const()[name = tensor("op_34890_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3641_cast_fp16 = mul(x = var_34889_cast_fp16, y = var_34890_to_fp16)[name = tensor("aw_chunk_3641_cast_fp16")]; + tensor var_34893_equation_0 = const()[name = tensor("op_34893_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34893_cast_fp16 = einsum(equation = var_34893_equation_0, values = (var_34551_cast_fp16, var_34360_cast_fp16))[name = tensor("op_34893_cast_fp16")]; + tensor var_34894_to_fp16 = const()[name = tensor("op_34894_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3643_cast_fp16 = mul(x = var_34893_cast_fp16, y = var_34894_to_fp16)[name = tensor("aw_chunk_3643_cast_fp16")]; + tensor var_34897_equation_0 = const()[name = tensor("op_34897_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34897_cast_fp16 = einsum(equation = var_34897_equation_0, values = (var_34551_cast_fp16, var_34367_cast_fp16))[name = tensor("op_34897_cast_fp16")]; + tensor var_34898_to_fp16 = const()[name = tensor("op_34898_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3645_cast_fp16 = mul(x = var_34897_cast_fp16, y = var_34898_to_fp16)[name = tensor("aw_chunk_3645_cast_fp16")]; + tensor var_34901_equation_0 = const()[name = tensor("op_34901_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34901_cast_fp16 = einsum(equation = var_34901_equation_0, values = (var_34551_cast_fp16, var_34374_cast_fp16))[name = tensor("op_34901_cast_fp16")]; + tensor var_34902_to_fp16 = const()[name = tensor("op_34902_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3647_cast_fp16 = mul(x = var_34901_cast_fp16, y = var_34902_to_fp16)[name = tensor("aw_chunk_3647_cast_fp16")]; + tensor var_34905_equation_0 = const()[name = tensor("op_34905_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34905_cast_fp16 = einsum(equation = var_34905_equation_0, values = (var_34555_cast_fp16, var_34381_cast_fp16))[name = tensor("op_34905_cast_fp16")]; + tensor var_34906_to_fp16 = const()[name = tensor("op_34906_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3649_cast_fp16 = mul(x = var_34905_cast_fp16, y = var_34906_to_fp16)[name = tensor("aw_chunk_3649_cast_fp16")]; + tensor var_34909_equation_0 = const()[name = tensor("op_34909_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34909_cast_fp16 = einsum(equation = var_34909_equation_0, values = (var_34555_cast_fp16, var_34388_cast_fp16))[name = tensor("op_34909_cast_fp16")]; + tensor var_34910_to_fp16 = const()[name = tensor("op_34910_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3651_cast_fp16 = mul(x = var_34909_cast_fp16, y = var_34910_to_fp16)[name = tensor("aw_chunk_3651_cast_fp16")]; + tensor var_34913_equation_0 = const()[name = tensor("op_34913_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34913_cast_fp16 = einsum(equation = var_34913_equation_0, values = (var_34555_cast_fp16, var_34395_cast_fp16))[name = tensor("op_34913_cast_fp16")]; + tensor var_34914_to_fp16 = const()[name = tensor("op_34914_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3653_cast_fp16 = mul(x = var_34913_cast_fp16, y = var_34914_to_fp16)[name = tensor("aw_chunk_3653_cast_fp16")]; + tensor var_34917_equation_0 = const()[name = tensor("op_34917_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34917_cast_fp16 = einsum(equation = var_34917_equation_0, values = (var_34555_cast_fp16, var_34402_cast_fp16))[name = tensor("op_34917_cast_fp16")]; + tensor var_34918_to_fp16 = const()[name = tensor("op_34918_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3655_cast_fp16 = mul(x = var_34917_cast_fp16, y = var_34918_to_fp16)[name = tensor("aw_chunk_3655_cast_fp16")]; + tensor var_34921_equation_0 = const()[name = tensor("op_34921_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34921_cast_fp16 = einsum(equation = var_34921_equation_0, values = (var_34559_cast_fp16, var_34409_cast_fp16))[name = tensor("op_34921_cast_fp16")]; + tensor var_34922_to_fp16 = const()[name = tensor("op_34922_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3657_cast_fp16 = mul(x = var_34921_cast_fp16, y = var_34922_to_fp16)[name = tensor("aw_chunk_3657_cast_fp16")]; + tensor var_34925_equation_0 = const()[name = tensor("op_34925_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34925_cast_fp16 = einsum(equation = var_34925_equation_0, values = (var_34559_cast_fp16, var_34416_cast_fp16))[name = tensor("op_34925_cast_fp16")]; + tensor var_34926_to_fp16 = const()[name = tensor("op_34926_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3659_cast_fp16 = mul(x = var_34925_cast_fp16, y = var_34926_to_fp16)[name = tensor("aw_chunk_3659_cast_fp16")]; + tensor var_34929_equation_0 = const()[name = tensor("op_34929_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34929_cast_fp16 = einsum(equation = var_34929_equation_0, values = (var_34559_cast_fp16, var_34423_cast_fp16))[name = tensor("op_34929_cast_fp16")]; + tensor var_34930_to_fp16 = const()[name = tensor("op_34930_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3661_cast_fp16 = mul(x = var_34929_cast_fp16, y = var_34930_to_fp16)[name = tensor("aw_chunk_3661_cast_fp16")]; + tensor var_34933_equation_0 = const()[name = tensor("op_34933_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34933_cast_fp16 = einsum(equation = var_34933_equation_0, values = (var_34559_cast_fp16, var_34430_cast_fp16))[name = tensor("op_34933_cast_fp16")]; + tensor var_34934_to_fp16 = const()[name = tensor("op_34934_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3663_cast_fp16 = mul(x = var_34933_cast_fp16, y = var_34934_to_fp16)[name = tensor("aw_chunk_3663_cast_fp16")]; + tensor var_34937_equation_0 = const()[name = tensor("op_34937_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34937_cast_fp16 = einsum(equation = var_34937_equation_0, values = (var_34563_cast_fp16, var_34437_cast_fp16))[name = tensor("op_34937_cast_fp16")]; + tensor var_34938_to_fp16 = const()[name = tensor("op_34938_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3665_cast_fp16 = mul(x = var_34937_cast_fp16, y = var_34938_to_fp16)[name = tensor("aw_chunk_3665_cast_fp16")]; + tensor var_34941_equation_0 = const()[name = tensor("op_34941_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34941_cast_fp16 = einsum(equation = var_34941_equation_0, values = (var_34563_cast_fp16, var_34444_cast_fp16))[name = tensor("op_34941_cast_fp16")]; + tensor var_34942_to_fp16 = const()[name = tensor("op_34942_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3667_cast_fp16 = mul(x = var_34941_cast_fp16, y = var_34942_to_fp16)[name = tensor("aw_chunk_3667_cast_fp16")]; + tensor var_34945_equation_0 = const()[name = tensor("op_34945_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34945_cast_fp16 = einsum(equation = var_34945_equation_0, values = (var_34563_cast_fp16, var_34451_cast_fp16))[name = tensor("op_34945_cast_fp16")]; + tensor var_34946_to_fp16 = const()[name = tensor("op_34946_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3669_cast_fp16 = mul(x = var_34945_cast_fp16, y = var_34946_to_fp16)[name = tensor("aw_chunk_3669_cast_fp16")]; + tensor var_34949_equation_0 = const()[name = tensor("op_34949_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34949_cast_fp16 = einsum(equation = var_34949_equation_0, values = (var_34563_cast_fp16, var_34458_cast_fp16))[name = tensor("op_34949_cast_fp16")]; + tensor var_34950_to_fp16 = const()[name = tensor("op_34950_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3671_cast_fp16 = mul(x = var_34949_cast_fp16, y = var_34950_to_fp16)[name = tensor("aw_chunk_3671_cast_fp16")]; + tensor var_34953_equation_0 = const()[name = tensor("op_34953_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34953_cast_fp16 = einsum(equation = var_34953_equation_0, values = (var_34567_cast_fp16, var_34465_cast_fp16))[name = tensor("op_34953_cast_fp16")]; + tensor var_34954_to_fp16 = const()[name = tensor("op_34954_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3673_cast_fp16 = mul(x = var_34953_cast_fp16, y = var_34954_to_fp16)[name = tensor("aw_chunk_3673_cast_fp16")]; + tensor var_34957_equation_0 = const()[name = tensor("op_34957_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34957_cast_fp16 = einsum(equation = var_34957_equation_0, values = (var_34567_cast_fp16, var_34472_cast_fp16))[name = tensor("op_34957_cast_fp16")]; + tensor var_34958_to_fp16 = const()[name = tensor("op_34958_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3675_cast_fp16 = mul(x = var_34957_cast_fp16, y = var_34958_to_fp16)[name = tensor("aw_chunk_3675_cast_fp16")]; + tensor var_34961_equation_0 = const()[name = tensor("op_34961_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34961_cast_fp16 = einsum(equation = var_34961_equation_0, values = (var_34567_cast_fp16, var_34479_cast_fp16))[name = tensor("op_34961_cast_fp16")]; + tensor var_34962_to_fp16 = const()[name = tensor("op_34962_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3677_cast_fp16 = mul(x = var_34961_cast_fp16, y = var_34962_to_fp16)[name = tensor("aw_chunk_3677_cast_fp16")]; + tensor var_34965_equation_0 = const()[name = tensor("op_34965_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_34965_cast_fp16 = einsum(equation = var_34965_equation_0, values = (var_34567_cast_fp16, var_34486_cast_fp16))[name = tensor("op_34965_cast_fp16")]; + tensor var_34966_to_fp16 = const()[name = tensor("op_34966_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3679_cast_fp16 = mul(x = var_34965_cast_fp16, y = var_34966_to_fp16)[name = tensor("aw_chunk_3679_cast_fp16")]; + tensor var_34968_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3521_cast_fp16)[name = tensor("op_34968_cast_fp16")]; + tensor var_34969_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3523_cast_fp16)[name = tensor("op_34969_cast_fp16")]; + tensor var_34970_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3525_cast_fp16)[name = tensor("op_34970_cast_fp16")]; + tensor var_34971_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3527_cast_fp16)[name = tensor("op_34971_cast_fp16")]; + tensor var_34972_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3529_cast_fp16)[name = tensor("op_34972_cast_fp16")]; + tensor var_34973_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3531_cast_fp16)[name = tensor("op_34973_cast_fp16")]; + tensor var_34974_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3533_cast_fp16)[name = tensor("op_34974_cast_fp16")]; + tensor var_34975_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3535_cast_fp16)[name = tensor("op_34975_cast_fp16")]; + tensor var_34976_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3537_cast_fp16)[name = tensor("op_34976_cast_fp16")]; + tensor var_34977_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3539_cast_fp16)[name = tensor("op_34977_cast_fp16")]; + tensor var_34978_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3541_cast_fp16)[name = tensor("op_34978_cast_fp16")]; + tensor var_34979_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3543_cast_fp16)[name = tensor("op_34979_cast_fp16")]; + tensor var_34980_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3545_cast_fp16)[name = tensor("op_34980_cast_fp16")]; + tensor var_34981_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3547_cast_fp16)[name = tensor("op_34981_cast_fp16")]; + tensor var_34982_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3549_cast_fp16)[name = tensor("op_34982_cast_fp16")]; + tensor var_34983_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3551_cast_fp16)[name = tensor("op_34983_cast_fp16")]; + tensor var_34984_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3553_cast_fp16)[name = tensor("op_34984_cast_fp16")]; + tensor var_34985_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3555_cast_fp16)[name = tensor("op_34985_cast_fp16")]; + tensor var_34986_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3557_cast_fp16)[name = tensor("op_34986_cast_fp16")]; + tensor var_34987_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3559_cast_fp16)[name = tensor("op_34987_cast_fp16")]; + tensor var_34988_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3561_cast_fp16)[name = tensor("op_34988_cast_fp16")]; + tensor var_34989_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3563_cast_fp16)[name = tensor("op_34989_cast_fp16")]; + tensor var_34990_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3565_cast_fp16)[name = tensor("op_34990_cast_fp16")]; + tensor var_34991_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3567_cast_fp16)[name = tensor("op_34991_cast_fp16")]; + tensor var_34992_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3569_cast_fp16)[name = tensor("op_34992_cast_fp16")]; + tensor var_34993_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3571_cast_fp16)[name = tensor("op_34993_cast_fp16")]; + tensor var_34994_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3573_cast_fp16)[name = tensor("op_34994_cast_fp16")]; + tensor var_34995_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3575_cast_fp16)[name = tensor("op_34995_cast_fp16")]; + tensor var_34996_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3577_cast_fp16)[name = tensor("op_34996_cast_fp16")]; + tensor var_34997_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3579_cast_fp16)[name = tensor("op_34997_cast_fp16")]; + tensor var_34998_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3581_cast_fp16)[name = tensor("op_34998_cast_fp16")]; + tensor var_34999_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3583_cast_fp16)[name = tensor("op_34999_cast_fp16")]; + tensor var_35000_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3585_cast_fp16)[name = tensor("op_35000_cast_fp16")]; + tensor var_35001_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3587_cast_fp16)[name = tensor("op_35001_cast_fp16")]; + tensor var_35002_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3589_cast_fp16)[name = tensor("op_35002_cast_fp16")]; + tensor var_35003_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3591_cast_fp16)[name = tensor("op_35003_cast_fp16")]; + tensor var_35004_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3593_cast_fp16)[name = tensor("op_35004_cast_fp16")]; + tensor var_35005_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3595_cast_fp16)[name = tensor("op_35005_cast_fp16")]; + tensor var_35006_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3597_cast_fp16)[name = tensor("op_35006_cast_fp16")]; + tensor var_35007_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3599_cast_fp16)[name = tensor("op_35007_cast_fp16")]; + tensor var_35008_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3601_cast_fp16)[name = tensor("op_35008_cast_fp16")]; + tensor var_35009_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3603_cast_fp16)[name = tensor("op_35009_cast_fp16")]; + tensor var_35010_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3605_cast_fp16)[name = tensor("op_35010_cast_fp16")]; + tensor var_35011_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3607_cast_fp16)[name = tensor("op_35011_cast_fp16")]; + tensor var_35012_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3609_cast_fp16)[name = tensor("op_35012_cast_fp16")]; + tensor var_35013_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3611_cast_fp16)[name = tensor("op_35013_cast_fp16")]; + tensor var_35014_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3613_cast_fp16)[name = tensor("op_35014_cast_fp16")]; + tensor var_35015_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3615_cast_fp16)[name = tensor("op_35015_cast_fp16")]; + tensor var_35016_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3617_cast_fp16)[name = tensor("op_35016_cast_fp16")]; + tensor var_35017_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3619_cast_fp16)[name = tensor("op_35017_cast_fp16")]; + tensor var_35018_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3621_cast_fp16)[name = tensor("op_35018_cast_fp16")]; + tensor var_35019_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3623_cast_fp16)[name = tensor("op_35019_cast_fp16")]; + tensor var_35020_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3625_cast_fp16)[name = tensor("op_35020_cast_fp16")]; + tensor var_35021_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3627_cast_fp16)[name = tensor("op_35021_cast_fp16")]; + tensor var_35022_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3629_cast_fp16)[name = tensor("op_35022_cast_fp16")]; + tensor var_35023_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3631_cast_fp16)[name = tensor("op_35023_cast_fp16")]; + tensor var_35024_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3633_cast_fp16)[name = tensor("op_35024_cast_fp16")]; + tensor var_35025_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3635_cast_fp16)[name = tensor("op_35025_cast_fp16")]; + tensor var_35026_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3637_cast_fp16)[name = tensor("op_35026_cast_fp16")]; + tensor var_35027_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3639_cast_fp16)[name = tensor("op_35027_cast_fp16")]; + tensor var_35028_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3641_cast_fp16)[name = tensor("op_35028_cast_fp16")]; + tensor var_35029_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3643_cast_fp16)[name = tensor("op_35029_cast_fp16")]; + tensor var_35030_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3645_cast_fp16)[name = tensor("op_35030_cast_fp16")]; + tensor var_35031_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3647_cast_fp16)[name = tensor("op_35031_cast_fp16")]; + tensor var_35032_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3649_cast_fp16)[name = tensor("op_35032_cast_fp16")]; + tensor var_35033_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3651_cast_fp16)[name = tensor("op_35033_cast_fp16")]; + tensor var_35034_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3653_cast_fp16)[name = tensor("op_35034_cast_fp16")]; + tensor var_35035_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3655_cast_fp16)[name = tensor("op_35035_cast_fp16")]; + tensor var_35036_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3657_cast_fp16)[name = tensor("op_35036_cast_fp16")]; + tensor var_35037_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3659_cast_fp16)[name = tensor("op_35037_cast_fp16")]; + tensor var_35038_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3661_cast_fp16)[name = tensor("op_35038_cast_fp16")]; + tensor var_35039_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3663_cast_fp16)[name = tensor("op_35039_cast_fp16")]; + tensor var_35040_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3665_cast_fp16)[name = tensor("op_35040_cast_fp16")]; + tensor var_35041_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3667_cast_fp16)[name = tensor("op_35041_cast_fp16")]; + tensor var_35042_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3669_cast_fp16)[name = tensor("op_35042_cast_fp16")]; + tensor var_35043_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3671_cast_fp16)[name = tensor("op_35043_cast_fp16")]; + tensor var_35044_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3673_cast_fp16)[name = tensor("op_35044_cast_fp16")]; + tensor var_35045_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3675_cast_fp16)[name = tensor("op_35045_cast_fp16")]; + tensor var_35046_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3677_cast_fp16)[name = tensor("op_35046_cast_fp16")]; + tensor var_35047_cast_fp16 = softmax(axis = var_33793, x = aw_chunk_3679_cast_fp16)[name = tensor("op_35047_cast_fp16")]; + tensor var_35049_equation_0 = const()[name = tensor("op_35049_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35049_cast_fp16 = einsum(equation = var_35049_equation_0, values = (var_34569_cast_fp16, var_34968_cast_fp16))[name = tensor("op_35049_cast_fp16")]; + tensor var_35051_equation_0 = const()[name = tensor("op_35051_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35051_cast_fp16 = einsum(equation = var_35051_equation_0, values = (var_34569_cast_fp16, var_34969_cast_fp16))[name = tensor("op_35051_cast_fp16")]; + tensor var_35053_equation_0 = const()[name = tensor("op_35053_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35053_cast_fp16 = einsum(equation = var_35053_equation_0, values = (var_34569_cast_fp16, var_34970_cast_fp16))[name = tensor("op_35053_cast_fp16")]; + tensor var_35055_equation_0 = const()[name = tensor("op_35055_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35055_cast_fp16 = einsum(equation = var_35055_equation_0, values = (var_34569_cast_fp16, var_34971_cast_fp16))[name = tensor("op_35055_cast_fp16")]; + tensor var_35057_equation_0 = const()[name = tensor("op_35057_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35057_cast_fp16 = einsum(equation = var_35057_equation_0, values = (var_34573_cast_fp16, var_34972_cast_fp16))[name = tensor("op_35057_cast_fp16")]; + tensor var_35059_equation_0 = const()[name = tensor("op_35059_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35059_cast_fp16 = einsum(equation = var_35059_equation_0, values = (var_34573_cast_fp16, var_34973_cast_fp16))[name = tensor("op_35059_cast_fp16")]; + tensor var_35061_equation_0 = const()[name = tensor("op_35061_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35061_cast_fp16 = einsum(equation = var_35061_equation_0, values = (var_34573_cast_fp16, var_34974_cast_fp16))[name = tensor("op_35061_cast_fp16")]; + tensor var_35063_equation_0 = const()[name = tensor("op_35063_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35063_cast_fp16 = einsum(equation = var_35063_equation_0, values = (var_34573_cast_fp16, var_34975_cast_fp16))[name = tensor("op_35063_cast_fp16")]; + tensor var_35065_equation_0 = const()[name = tensor("op_35065_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35065_cast_fp16 = einsum(equation = var_35065_equation_0, values = (var_34577_cast_fp16, var_34976_cast_fp16))[name = tensor("op_35065_cast_fp16")]; + tensor var_35067_equation_0 = const()[name = tensor("op_35067_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35067_cast_fp16 = einsum(equation = var_35067_equation_0, values = (var_34577_cast_fp16, var_34977_cast_fp16))[name = tensor("op_35067_cast_fp16")]; + tensor var_35069_equation_0 = const()[name = tensor("op_35069_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35069_cast_fp16 = einsum(equation = var_35069_equation_0, values = (var_34577_cast_fp16, var_34978_cast_fp16))[name = tensor("op_35069_cast_fp16")]; + tensor var_35071_equation_0 = const()[name = tensor("op_35071_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35071_cast_fp16 = einsum(equation = var_35071_equation_0, values = (var_34577_cast_fp16, var_34979_cast_fp16))[name = tensor("op_35071_cast_fp16")]; + tensor var_35073_equation_0 = const()[name = tensor("op_35073_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35073_cast_fp16 = einsum(equation = var_35073_equation_0, values = (var_34581_cast_fp16, var_34980_cast_fp16))[name = tensor("op_35073_cast_fp16")]; + tensor var_35075_equation_0 = const()[name = tensor("op_35075_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35075_cast_fp16 = einsum(equation = var_35075_equation_0, values = (var_34581_cast_fp16, var_34981_cast_fp16))[name = tensor("op_35075_cast_fp16")]; + tensor var_35077_equation_0 = const()[name = tensor("op_35077_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35077_cast_fp16 = einsum(equation = var_35077_equation_0, values = (var_34581_cast_fp16, var_34982_cast_fp16))[name = tensor("op_35077_cast_fp16")]; + tensor var_35079_equation_0 = const()[name = tensor("op_35079_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35079_cast_fp16 = einsum(equation = var_35079_equation_0, values = (var_34581_cast_fp16, var_34983_cast_fp16))[name = tensor("op_35079_cast_fp16")]; + tensor var_35081_equation_0 = const()[name = tensor("op_35081_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35081_cast_fp16 = einsum(equation = var_35081_equation_0, values = (var_34585_cast_fp16, var_34984_cast_fp16))[name = tensor("op_35081_cast_fp16")]; + tensor var_35083_equation_0 = const()[name = tensor("op_35083_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35083_cast_fp16 = einsum(equation = var_35083_equation_0, values = (var_34585_cast_fp16, var_34985_cast_fp16))[name = tensor("op_35083_cast_fp16")]; + tensor var_35085_equation_0 = const()[name = tensor("op_35085_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35085_cast_fp16 = einsum(equation = var_35085_equation_0, values = (var_34585_cast_fp16, var_34986_cast_fp16))[name = tensor("op_35085_cast_fp16")]; + tensor var_35087_equation_0 = const()[name = tensor("op_35087_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35087_cast_fp16 = einsum(equation = var_35087_equation_0, values = (var_34585_cast_fp16, var_34987_cast_fp16))[name = tensor("op_35087_cast_fp16")]; + tensor var_35089_equation_0 = const()[name = tensor("op_35089_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35089_cast_fp16 = einsum(equation = var_35089_equation_0, values = (var_34589_cast_fp16, var_34988_cast_fp16))[name = tensor("op_35089_cast_fp16")]; + tensor var_35091_equation_0 = const()[name = tensor("op_35091_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35091_cast_fp16 = einsum(equation = var_35091_equation_0, values = (var_34589_cast_fp16, var_34989_cast_fp16))[name = tensor("op_35091_cast_fp16")]; + tensor var_35093_equation_0 = const()[name = tensor("op_35093_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35093_cast_fp16 = einsum(equation = var_35093_equation_0, values = (var_34589_cast_fp16, var_34990_cast_fp16))[name = tensor("op_35093_cast_fp16")]; + tensor var_35095_equation_0 = const()[name = tensor("op_35095_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35095_cast_fp16 = einsum(equation = var_35095_equation_0, values = (var_34589_cast_fp16, var_34991_cast_fp16))[name = tensor("op_35095_cast_fp16")]; + tensor var_35097_equation_0 = const()[name = tensor("op_35097_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35097_cast_fp16 = einsum(equation = var_35097_equation_0, values = (var_34593_cast_fp16, var_34992_cast_fp16))[name = tensor("op_35097_cast_fp16")]; + tensor var_35099_equation_0 = const()[name = tensor("op_35099_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35099_cast_fp16 = einsum(equation = var_35099_equation_0, values = (var_34593_cast_fp16, var_34993_cast_fp16))[name = tensor("op_35099_cast_fp16")]; + tensor var_35101_equation_0 = const()[name = tensor("op_35101_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35101_cast_fp16 = einsum(equation = var_35101_equation_0, values = (var_34593_cast_fp16, var_34994_cast_fp16))[name = tensor("op_35101_cast_fp16")]; + tensor var_35103_equation_0 = const()[name = tensor("op_35103_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35103_cast_fp16 = einsum(equation = var_35103_equation_0, values = (var_34593_cast_fp16, var_34995_cast_fp16))[name = tensor("op_35103_cast_fp16")]; + tensor var_35105_equation_0 = const()[name = tensor("op_35105_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35105_cast_fp16 = einsum(equation = var_35105_equation_0, values = (var_34597_cast_fp16, var_34996_cast_fp16))[name = tensor("op_35105_cast_fp16")]; + tensor var_35107_equation_0 = const()[name = tensor("op_35107_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35107_cast_fp16 = einsum(equation = var_35107_equation_0, values = (var_34597_cast_fp16, var_34997_cast_fp16))[name = tensor("op_35107_cast_fp16")]; + tensor var_35109_equation_0 = const()[name = tensor("op_35109_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35109_cast_fp16 = einsum(equation = var_35109_equation_0, values = (var_34597_cast_fp16, var_34998_cast_fp16))[name = tensor("op_35109_cast_fp16")]; + tensor var_35111_equation_0 = const()[name = tensor("op_35111_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35111_cast_fp16 = einsum(equation = var_35111_equation_0, values = (var_34597_cast_fp16, var_34999_cast_fp16))[name = tensor("op_35111_cast_fp16")]; + tensor var_35113_equation_0 = const()[name = tensor("op_35113_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35113_cast_fp16 = einsum(equation = var_35113_equation_0, values = (var_34601_cast_fp16, var_35000_cast_fp16))[name = tensor("op_35113_cast_fp16")]; + tensor var_35115_equation_0 = const()[name = tensor("op_35115_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35115_cast_fp16 = einsum(equation = var_35115_equation_0, values = (var_34601_cast_fp16, var_35001_cast_fp16))[name = tensor("op_35115_cast_fp16")]; + tensor var_35117_equation_0 = const()[name = tensor("op_35117_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35117_cast_fp16 = einsum(equation = var_35117_equation_0, values = (var_34601_cast_fp16, var_35002_cast_fp16))[name = tensor("op_35117_cast_fp16")]; + tensor var_35119_equation_0 = const()[name = tensor("op_35119_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35119_cast_fp16 = einsum(equation = var_35119_equation_0, values = (var_34601_cast_fp16, var_35003_cast_fp16))[name = tensor("op_35119_cast_fp16")]; + tensor var_35121_equation_0 = const()[name = tensor("op_35121_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35121_cast_fp16 = einsum(equation = var_35121_equation_0, values = (var_34605_cast_fp16, var_35004_cast_fp16))[name = tensor("op_35121_cast_fp16")]; + tensor var_35123_equation_0 = const()[name = tensor("op_35123_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35123_cast_fp16 = einsum(equation = var_35123_equation_0, values = (var_34605_cast_fp16, var_35005_cast_fp16))[name = tensor("op_35123_cast_fp16")]; + tensor var_35125_equation_0 = const()[name = tensor("op_35125_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35125_cast_fp16 = einsum(equation = var_35125_equation_0, values = (var_34605_cast_fp16, var_35006_cast_fp16))[name = tensor("op_35125_cast_fp16")]; + tensor var_35127_equation_0 = const()[name = tensor("op_35127_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35127_cast_fp16 = einsum(equation = var_35127_equation_0, values = (var_34605_cast_fp16, var_35007_cast_fp16))[name = tensor("op_35127_cast_fp16")]; + tensor var_35129_equation_0 = const()[name = tensor("op_35129_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35129_cast_fp16 = einsum(equation = var_35129_equation_0, values = (var_34609_cast_fp16, var_35008_cast_fp16))[name = tensor("op_35129_cast_fp16")]; + tensor var_35131_equation_0 = const()[name = tensor("op_35131_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35131_cast_fp16 = einsum(equation = var_35131_equation_0, values = (var_34609_cast_fp16, var_35009_cast_fp16))[name = tensor("op_35131_cast_fp16")]; + tensor var_35133_equation_0 = const()[name = tensor("op_35133_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35133_cast_fp16 = einsum(equation = var_35133_equation_0, values = (var_34609_cast_fp16, var_35010_cast_fp16))[name = tensor("op_35133_cast_fp16")]; + tensor var_35135_equation_0 = const()[name = tensor("op_35135_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35135_cast_fp16 = einsum(equation = var_35135_equation_0, values = (var_34609_cast_fp16, var_35011_cast_fp16))[name = tensor("op_35135_cast_fp16")]; + tensor var_35137_equation_0 = const()[name = tensor("op_35137_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35137_cast_fp16 = einsum(equation = var_35137_equation_0, values = (var_34613_cast_fp16, var_35012_cast_fp16))[name = tensor("op_35137_cast_fp16")]; + tensor var_35139_equation_0 = const()[name = tensor("op_35139_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35139_cast_fp16 = einsum(equation = var_35139_equation_0, values = (var_34613_cast_fp16, var_35013_cast_fp16))[name = tensor("op_35139_cast_fp16")]; + tensor var_35141_equation_0 = const()[name = tensor("op_35141_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35141_cast_fp16 = einsum(equation = var_35141_equation_0, values = (var_34613_cast_fp16, var_35014_cast_fp16))[name = tensor("op_35141_cast_fp16")]; + tensor var_35143_equation_0 = const()[name = tensor("op_35143_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35143_cast_fp16 = einsum(equation = var_35143_equation_0, values = (var_34613_cast_fp16, var_35015_cast_fp16))[name = tensor("op_35143_cast_fp16")]; + tensor var_35145_equation_0 = const()[name = tensor("op_35145_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35145_cast_fp16 = einsum(equation = var_35145_equation_0, values = (var_34617_cast_fp16, var_35016_cast_fp16))[name = tensor("op_35145_cast_fp16")]; + tensor var_35147_equation_0 = const()[name = tensor("op_35147_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35147_cast_fp16 = einsum(equation = var_35147_equation_0, values = (var_34617_cast_fp16, var_35017_cast_fp16))[name = tensor("op_35147_cast_fp16")]; + tensor var_35149_equation_0 = const()[name = tensor("op_35149_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35149_cast_fp16 = einsum(equation = var_35149_equation_0, values = (var_34617_cast_fp16, var_35018_cast_fp16))[name = tensor("op_35149_cast_fp16")]; + tensor var_35151_equation_0 = const()[name = tensor("op_35151_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35151_cast_fp16 = einsum(equation = var_35151_equation_0, values = (var_34617_cast_fp16, var_35019_cast_fp16))[name = tensor("op_35151_cast_fp16")]; + tensor var_35153_equation_0 = const()[name = tensor("op_35153_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35153_cast_fp16 = einsum(equation = var_35153_equation_0, values = (var_34621_cast_fp16, var_35020_cast_fp16))[name = tensor("op_35153_cast_fp16")]; + tensor var_35155_equation_0 = const()[name = tensor("op_35155_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35155_cast_fp16 = einsum(equation = var_35155_equation_0, values = (var_34621_cast_fp16, var_35021_cast_fp16))[name = tensor("op_35155_cast_fp16")]; + tensor var_35157_equation_0 = const()[name = tensor("op_35157_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35157_cast_fp16 = einsum(equation = var_35157_equation_0, values = (var_34621_cast_fp16, var_35022_cast_fp16))[name = tensor("op_35157_cast_fp16")]; + tensor var_35159_equation_0 = const()[name = tensor("op_35159_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35159_cast_fp16 = einsum(equation = var_35159_equation_0, values = (var_34621_cast_fp16, var_35023_cast_fp16))[name = tensor("op_35159_cast_fp16")]; + tensor var_35161_equation_0 = const()[name = tensor("op_35161_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35161_cast_fp16 = einsum(equation = var_35161_equation_0, values = (var_34625_cast_fp16, var_35024_cast_fp16))[name = tensor("op_35161_cast_fp16")]; + tensor var_35163_equation_0 = const()[name = tensor("op_35163_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35163_cast_fp16 = einsum(equation = var_35163_equation_0, values = (var_34625_cast_fp16, var_35025_cast_fp16))[name = tensor("op_35163_cast_fp16")]; + tensor var_35165_equation_0 = const()[name = tensor("op_35165_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35165_cast_fp16 = einsum(equation = var_35165_equation_0, values = (var_34625_cast_fp16, var_35026_cast_fp16))[name = tensor("op_35165_cast_fp16")]; + tensor var_35167_equation_0 = const()[name = tensor("op_35167_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35167_cast_fp16 = einsum(equation = var_35167_equation_0, values = (var_34625_cast_fp16, var_35027_cast_fp16))[name = tensor("op_35167_cast_fp16")]; + tensor var_35169_equation_0 = const()[name = tensor("op_35169_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35169_cast_fp16 = einsum(equation = var_35169_equation_0, values = (var_34629_cast_fp16, var_35028_cast_fp16))[name = tensor("op_35169_cast_fp16")]; + tensor var_35171_equation_0 = const()[name = tensor("op_35171_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35171_cast_fp16 = einsum(equation = var_35171_equation_0, values = (var_34629_cast_fp16, var_35029_cast_fp16))[name = tensor("op_35171_cast_fp16")]; + tensor var_35173_equation_0 = const()[name = tensor("op_35173_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35173_cast_fp16 = einsum(equation = var_35173_equation_0, values = (var_34629_cast_fp16, var_35030_cast_fp16))[name = tensor("op_35173_cast_fp16")]; + tensor var_35175_equation_0 = const()[name = tensor("op_35175_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35175_cast_fp16 = einsum(equation = var_35175_equation_0, values = (var_34629_cast_fp16, var_35031_cast_fp16))[name = tensor("op_35175_cast_fp16")]; + tensor var_35177_equation_0 = const()[name = tensor("op_35177_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35177_cast_fp16 = einsum(equation = var_35177_equation_0, values = (var_34633_cast_fp16, var_35032_cast_fp16))[name = tensor("op_35177_cast_fp16")]; + tensor var_35179_equation_0 = const()[name = tensor("op_35179_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35179_cast_fp16 = einsum(equation = var_35179_equation_0, values = (var_34633_cast_fp16, var_35033_cast_fp16))[name = tensor("op_35179_cast_fp16")]; + tensor var_35181_equation_0 = const()[name = tensor("op_35181_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35181_cast_fp16 = einsum(equation = var_35181_equation_0, values = (var_34633_cast_fp16, var_35034_cast_fp16))[name = tensor("op_35181_cast_fp16")]; + tensor var_35183_equation_0 = const()[name = tensor("op_35183_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35183_cast_fp16 = einsum(equation = var_35183_equation_0, values = (var_34633_cast_fp16, var_35035_cast_fp16))[name = tensor("op_35183_cast_fp16")]; + tensor var_35185_equation_0 = const()[name = tensor("op_35185_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35185_cast_fp16 = einsum(equation = var_35185_equation_0, values = (var_34637_cast_fp16, var_35036_cast_fp16))[name = tensor("op_35185_cast_fp16")]; + tensor var_35187_equation_0 = const()[name = tensor("op_35187_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35187_cast_fp16 = einsum(equation = var_35187_equation_0, values = (var_34637_cast_fp16, var_35037_cast_fp16))[name = tensor("op_35187_cast_fp16")]; + tensor var_35189_equation_0 = const()[name = tensor("op_35189_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35189_cast_fp16 = einsum(equation = var_35189_equation_0, values = (var_34637_cast_fp16, var_35038_cast_fp16))[name = tensor("op_35189_cast_fp16")]; + tensor var_35191_equation_0 = const()[name = tensor("op_35191_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35191_cast_fp16 = einsum(equation = var_35191_equation_0, values = (var_34637_cast_fp16, var_35039_cast_fp16))[name = tensor("op_35191_cast_fp16")]; + tensor var_35193_equation_0 = const()[name = tensor("op_35193_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35193_cast_fp16 = einsum(equation = var_35193_equation_0, values = (var_34641_cast_fp16, var_35040_cast_fp16))[name = tensor("op_35193_cast_fp16")]; + tensor var_35195_equation_0 = const()[name = tensor("op_35195_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35195_cast_fp16 = einsum(equation = var_35195_equation_0, values = (var_34641_cast_fp16, var_35041_cast_fp16))[name = tensor("op_35195_cast_fp16")]; + tensor var_35197_equation_0 = const()[name = tensor("op_35197_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35197_cast_fp16 = einsum(equation = var_35197_equation_0, values = (var_34641_cast_fp16, var_35042_cast_fp16))[name = tensor("op_35197_cast_fp16")]; + tensor var_35199_equation_0 = const()[name = tensor("op_35199_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35199_cast_fp16 = einsum(equation = var_35199_equation_0, values = (var_34641_cast_fp16, var_35043_cast_fp16))[name = tensor("op_35199_cast_fp16")]; + tensor var_35201_equation_0 = const()[name = tensor("op_35201_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35201_cast_fp16 = einsum(equation = var_35201_equation_0, values = (var_34645_cast_fp16, var_35044_cast_fp16))[name = tensor("op_35201_cast_fp16")]; + tensor var_35203_equation_0 = const()[name = tensor("op_35203_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35203_cast_fp16 = einsum(equation = var_35203_equation_0, values = (var_34645_cast_fp16, var_35045_cast_fp16))[name = tensor("op_35203_cast_fp16")]; + tensor var_35205_equation_0 = const()[name = tensor("op_35205_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35205_cast_fp16 = einsum(equation = var_35205_equation_0, values = (var_34645_cast_fp16, var_35046_cast_fp16))[name = tensor("op_35205_cast_fp16")]; + tensor var_35207_equation_0 = const()[name = tensor("op_35207_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_35207_cast_fp16 = einsum(equation = var_35207_equation_0, values = (var_34645_cast_fp16, var_35047_cast_fp16))[name = tensor("op_35207_cast_fp16")]; + tensor var_35209_interleave_0 = const()[name = tensor("op_35209_interleave_0"), val = tensor(false)]; + tensor var_35209_cast_fp16 = concat(axis = var_33768, interleave = var_35209_interleave_0, values = (var_35049_cast_fp16, var_35051_cast_fp16, var_35053_cast_fp16, var_35055_cast_fp16))[name = tensor("op_35209_cast_fp16")]; + tensor var_35211_interleave_0 = const()[name = tensor("op_35211_interleave_0"), val = tensor(false)]; + tensor var_35211_cast_fp16 = concat(axis = var_33768, interleave = var_35211_interleave_0, values = (var_35057_cast_fp16, var_35059_cast_fp16, var_35061_cast_fp16, var_35063_cast_fp16))[name = tensor("op_35211_cast_fp16")]; + tensor var_35213_interleave_0 = const()[name = tensor("op_35213_interleave_0"), val = tensor(false)]; + tensor var_35213_cast_fp16 = concat(axis = var_33768, interleave = var_35213_interleave_0, values = (var_35065_cast_fp16, var_35067_cast_fp16, var_35069_cast_fp16, var_35071_cast_fp16))[name = tensor("op_35213_cast_fp16")]; + tensor var_35215_interleave_0 = const()[name = tensor("op_35215_interleave_0"), val = tensor(false)]; + tensor var_35215_cast_fp16 = concat(axis = var_33768, interleave = var_35215_interleave_0, values = (var_35073_cast_fp16, var_35075_cast_fp16, var_35077_cast_fp16, var_35079_cast_fp16))[name = tensor("op_35215_cast_fp16")]; + tensor var_35217_interleave_0 = const()[name = tensor("op_35217_interleave_0"), val = tensor(false)]; + tensor var_35217_cast_fp16 = concat(axis = var_33768, interleave = var_35217_interleave_0, values = (var_35081_cast_fp16, var_35083_cast_fp16, var_35085_cast_fp16, var_35087_cast_fp16))[name = tensor("op_35217_cast_fp16")]; + tensor var_35219_interleave_0 = const()[name = tensor("op_35219_interleave_0"), val = tensor(false)]; + tensor var_35219_cast_fp16 = concat(axis = var_33768, interleave = var_35219_interleave_0, values = (var_35089_cast_fp16, var_35091_cast_fp16, var_35093_cast_fp16, var_35095_cast_fp16))[name = tensor("op_35219_cast_fp16")]; + tensor var_35221_interleave_0 = const()[name = tensor("op_35221_interleave_0"), val = tensor(false)]; + tensor var_35221_cast_fp16 = concat(axis = var_33768, interleave = var_35221_interleave_0, values = (var_35097_cast_fp16, var_35099_cast_fp16, var_35101_cast_fp16, var_35103_cast_fp16))[name = tensor("op_35221_cast_fp16")]; + tensor var_35223_interleave_0 = const()[name = tensor("op_35223_interleave_0"), val = tensor(false)]; + tensor var_35223_cast_fp16 = concat(axis = var_33768, interleave = var_35223_interleave_0, values = (var_35105_cast_fp16, var_35107_cast_fp16, var_35109_cast_fp16, var_35111_cast_fp16))[name = tensor("op_35223_cast_fp16")]; + tensor var_35225_interleave_0 = const()[name = tensor("op_35225_interleave_0"), val = tensor(false)]; + tensor var_35225_cast_fp16 = concat(axis = var_33768, interleave = var_35225_interleave_0, values = (var_35113_cast_fp16, var_35115_cast_fp16, var_35117_cast_fp16, var_35119_cast_fp16))[name = tensor("op_35225_cast_fp16")]; + tensor var_35227_interleave_0 = const()[name = tensor("op_35227_interleave_0"), val = tensor(false)]; + tensor var_35227_cast_fp16 = concat(axis = var_33768, interleave = var_35227_interleave_0, values = (var_35121_cast_fp16, var_35123_cast_fp16, var_35125_cast_fp16, var_35127_cast_fp16))[name = tensor("op_35227_cast_fp16")]; + tensor var_35229_interleave_0 = const()[name = tensor("op_35229_interleave_0"), val = tensor(false)]; + tensor var_35229_cast_fp16 = concat(axis = var_33768, interleave = var_35229_interleave_0, values = (var_35129_cast_fp16, var_35131_cast_fp16, var_35133_cast_fp16, var_35135_cast_fp16))[name = tensor("op_35229_cast_fp16")]; + tensor var_35231_interleave_0 = const()[name = tensor("op_35231_interleave_0"), val = tensor(false)]; + tensor var_35231_cast_fp16 = concat(axis = var_33768, interleave = var_35231_interleave_0, values = (var_35137_cast_fp16, var_35139_cast_fp16, var_35141_cast_fp16, var_35143_cast_fp16))[name = tensor("op_35231_cast_fp16")]; + tensor var_35233_interleave_0 = const()[name = tensor("op_35233_interleave_0"), val = tensor(false)]; + tensor var_35233_cast_fp16 = concat(axis = var_33768, interleave = var_35233_interleave_0, values = (var_35145_cast_fp16, var_35147_cast_fp16, var_35149_cast_fp16, var_35151_cast_fp16))[name = tensor("op_35233_cast_fp16")]; + tensor var_35235_interleave_0 = const()[name = tensor("op_35235_interleave_0"), val = tensor(false)]; + tensor var_35235_cast_fp16 = concat(axis = var_33768, interleave = var_35235_interleave_0, values = (var_35153_cast_fp16, var_35155_cast_fp16, var_35157_cast_fp16, var_35159_cast_fp16))[name = tensor("op_35235_cast_fp16")]; + tensor var_35237_interleave_0 = const()[name = tensor("op_35237_interleave_0"), val = tensor(false)]; + tensor var_35237_cast_fp16 = concat(axis = var_33768, interleave = var_35237_interleave_0, values = (var_35161_cast_fp16, var_35163_cast_fp16, var_35165_cast_fp16, var_35167_cast_fp16))[name = tensor("op_35237_cast_fp16")]; + tensor var_35239_interleave_0 = const()[name = tensor("op_35239_interleave_0"), val = tensor(false)]; + tensor var_35239_cast_fp16 = concat(axis = var_33768, interleave = var_35239_interleave_0, values = (var_35169_cast_fp16, var_35171_cast_fp16, var_35173_cast_fp16, var_35175_cast_fp16))[name = tensor("op_35239_cast_fp16")]; + tensor var_35241_interleave_0 = const()[name = tensor("op_35241_interleave_0"), val = tensor(false)]; + tensor var_35241_cast_fp16 = concat(axis = var_33768, interleave = var_35241_interleave_0, values = (var_35177_cast_fp16, var_35179_cast_fp16, var_35181_cast_fp16, var_35183_cast_fp16))[name = tensor("op_35241_cast_fp16")]; + tensor var_35243_interleave_0 = const()[name = tensor("op_35243_interleave_0"), val = tensor(false)]; + tensor var_35243_cast_fp16 = concat(axis = var_33768, interleave = var_35243_interleave_0, values = (var_35185_cast_fp16, var_35187_cast_fp16, var_35189_cast_fp16, var_35191_cast_fp16))[name = tensor("op_35243_cast_fp16")]; + tensor var_35245_interleave_0 = const()[name = tensor("op_35245_interleave_0"), val = tensor(false)]; + tensor var_35245_cast_fp16 = concat(axis = var_33768, interleave = var_35245_interleave_0, values = (var_35193_cast_fp16, var_35195_cast_fp16, var_35197_cast_fp16, var_35199_cast_fp16))[name = tensor("op_35245_cast_fp16")]; + tensor var_35247_interleave_0 = const()[name = tensor("op_35247_interleave_0"), val = tensor(false)]; + tensor var_35247_cast_fp16 = concat(axis = var_33768, interleave = var_35247_interleave_0, values = (var_35201_cast_fp16, var_35203_cast_fp16, var_35205_cast_fp16, var_35207_cast_fp16))[name = tensor("op_35247_cast_fp16")]; + tensor input_177_interleave_0 = const()[name = tensor("input_177_interleave_0"), val = tensor(false)]; + tensor input_177_cast_fp16 = concat(axis = var_33793, interleave = input_177_interleave_0, values = (var_35209_cast_fp16, var_35211_cast_fp16, var_35213_cast_fp16, var_35215_cast_fp16, var_35217_cast_fp16, var_35219_cast_fp16, var_35221_cast_fp16, var_35223_cast_fp16, var_35225_cast_fp16, var_35227_cast_fp16, var_35229_cast_fp16, var_35231_cast_fp16, var_35233_cast_fp16, var_35235_cast_fp16, var_35237_cast_fp16, var_35239_cast_fp16, var_35241_cast_fp16, var_35243_cast_fp16, var_35245_cast_fp16, var_35247_cast_fp16))[name = tensor("input_177_cast_fp16")]; + tensor var_35252 = const()[name = tensor("op_35252"), val = tensor([1, 1])]; + tensor var_35254 = const()[name = tensor("op_35254"), val = tensor([1, 1])]; + tensor obj_91_pad_type_0 = const()[name = tensor("obj_91_pad_type_0"), val = tensor("custom")]; + tensor obj_91_pad_0 = const()[name = tensor("obj_91_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_22_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_22_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(889908800)))]; + tensor layers_22_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_22_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(893185664)))]; + tensor obj_91_cast_fp16 = conv(bias = layers_22_self_attn_o_proj_bias_to_fp16, dilations = var_35254, groups = var_33793, pad = obj_91_pad_0, pad_type = obj_91_pad_type_0, strides = var_35252, weight = layers_22_self_attn_o_proj_weight_to_fp16, x = input_177_cast_fp16)[name = tensor("obj_91_cast_fp16")]; + tensor inputs_91_cast_fp16 = add(x = inputs_89_cast_fp16, y = obj_91_cast_fp16)[name = tensor("inputs_91_cast_fp16")]; + tensor var_35260 = const()[name = tensor("op_35260"), val = tensor([1])]; + tensor channels_mean_91_cast_fp16 = reduce_mean(axes = var_35260, keep_dims = var_33794, x = inputs_91_cast_fp16)[name = tensor("channels_mean_91_cast_fp16")]; + tensor zero_mean_91_cast_fp16 = sub(x = inputs_91_cast_fp16, y = channels_mean_91_cast_fp16)[name = tensor("zero_mean_91_cast_fp16")]; + tensor zero_mean_sq_91_cast_fp16 = mul(x = zero_mean_91_cast_fp16, y = zero_mean_91_cast_fp16)[name = tensor("zero_mean_sq_91_cast_fp16")]; + tensor var_35264 = const()[name = tensor("op_35264"), val = tensor([1])]; + tensor var_35265_cast_fp16 = reduce_mean(axes = var_35264, keep_dims = var_33794, x = zero_mean_sq_91_cast_fp16)[name = tensor("op_35265_cast_fp16")]; + tensor var_35266_to_fp16 = const()[name = tensor("op_35266_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_35267_cast_fp16 = add(x = var_35265_cast_fp16, y = var_35266_to_fp16)[name = tensor("op_35267_cast_fp16")]; + tensor denom_91_epsilon_0_to_fp16 = const()[name = tensor("denom_91_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_91_cast_fp16 = rsqrt(epsilon = denom_91_epsilon_0_to_fp16, x = var_35267_cast_fp16)[name = tensor("denom_91_cast_fp16")]; + tensor out_91_cast_fp16 = mul(x = zero_mean_91_cast_fp16, y = denom_91_cast_fp16)[name = tensor("out_91_cast_fp16")]; + tensor input_179_gamma_0_to_fp16 = const()[name = tensor("input_179_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(893188288)))]; + tensor input_179_beta_0_to_fp16 = const()[name = tensor("input_179_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(893190912)))]; + tensor input_179_epsilon_0_to_fp16 = const()[name = tensor("input_179_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_179_cast_fp16 = batch_norm(beta = input_179_beta_0_to_fp16, epsilon = input_179_epsilon_0_to_fp16, gamma = input_179_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_91_cast_fp16)[name = tensor("input_179_cast_fp16")]; + tensor var_35278 = const()[name = tensor("op_35278"), val = tensor([1, 1])]; + tensor var_35280 = const()[name = tensor("op_35280"), val = tensor([1, 1])]; + tensor input_181_pad_type_0 = const()[name = tensor("input_181_pad_type_0"), val = tensor("custom")]; + tensor input_181_pad_0 = const()[name = tensor("input_181_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_22_fc1_weight_to_fp16 = const()[name = tensor("layers_22_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(893193536)))]; + tensor layers_22_fc1_bias_to_fp16 = const()[name = tensor("layers_22_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(906300800)))]; + tensor input_181_cast_fp16 = conv(bias = layers_22_fc1_bias_to_fp16, dilations = var_35280, groups = var_33793, pad = input_181_pad_0, pad_type = input_181_pad_type_0, strides = var_35278, weight = layers_22_fc1_weight_to_fp16, x = input_179_cast_fp16)[name = tensor("input_181_cast_fp16")]; + tensor input_183_mode_0 = const()[name = tensor("input_183_mode_0"), val = tensor("EXACT")]; + tensor input_183_cast_fp16 = gelu(mode = input_183_mode_0, x = input_181_cast_fp16)[name = tensor("input_183_cast_fp16")]; + tensor var_35286 = const()[name = tensor("op_35286"), val = tensor([1, 1])]; + tensor var_35288 = const()[name = tensor("op_35288"), val = tensor([1, 1])]; + tensor hidden_states_49_pad_type_0 = const()[name = tensor("hidden_states_49_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_49_pad_0 = const()[name = tensor("hidden_states_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_22_fc2_weight_to_fp16 = const()[name = tensor("layers_22_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(906311104)))]; + tensor layers_22_fc2_bias_to_fp16 = const()[name = tensor("layers_22_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(919418368)))]; + tensor hidden_states_49_cast_fp16 = conv(bias = layers_22_fc2_bias_to_fp16, dilations = var_35288, groups = var_33793, pad = hidden_states_49_pad_0, pad_type = hidden_states_49_pad_type_0, strides = var_35286, weight = layers_22_fc2_weight_to_fp16, x = input_183_cast_fp16)[name = tensor("hidden_states_49_cast_fp16")]; + tensor inputs_93_cast_fp16 = add(x = inputs_91_cast_fp16, y = hidden_states_49_cast_fp16)[name = tensor("inputs_93_cast_fp16")]; + tensor var_35295 = const()[name = tensor("op_35295"), val = tensor(3)]; + tensor var_35320 = const()[name = tensor("op_35320"), val = tensor(1)]; + tensor var_35321 = const()[name = tensor("op_35321"), val = tensor(true)]; + tensor var_35331 = const()[name = tensor("op_35331"), val = tensor([1])]; + tensor channels_mean_93_cast_fp16 = reduce_mean(axes = var_35331, keep_dims = var_35321, x = inputs_93_cast_fp16)[name = tensor("channels_mean_93_cast_fp16")]; + tensor zero_mean_93_cast_fp16 = sub(x = inputs_93_cast_fp16, y = channels_mean_93_cast_fp16)[name = tensor("zero_mean_93_cast_fp16")]; + tensor zero_mean_sq_93_cast_fp16 = mul(x = zero_mean_93_cast_fp16, y = zero_mean_93_cast_fp16)[name = tensor("zero_mean_sq_93_cast_fp16")]; + tensor var_35335 = const()[name = tensor("op_35335"), val = tensor([1])]; + tensor var_35336_cast_fp16 = reduce_mean(axes = var_35335, keep_dims = var_35321, x = zero_mean_sq_93_cast_fp16)[name = tensor("op_35336_cast_fp16")]; + tensor var_35337_to_fp16 = const()[name = tensor("op_35337_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_35338_cast_fp16 = add(x = var_35336_cast_fp16, y = var_35337_to_fp16)[name = tensor("op_35338_cast_fp16")]; + tensor denom_93_epsilon_0_to_fp16 = const()[name = tensor("denom_93_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_93_cast_fp16 = rsqrt(epsilon = denom_93_epsilon_0_to_fp16, x = var_35338_cast_fp16)[name = tensor("denom_93_cast_fp16")]; + tensor out_93_cast_fp16 = mul(x = zero_mean_93_cast_fp16, y = denom_93_cast_fp16)[name = tensor("out_93_cast_fp16")]; + tensor obj_93_gamma_0_to_fp16 = const()[name = tensor("obj_93_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(919420992)))]; + tensor obj_93_beta_0_to_fp16 = const()[name = tensor("obj_93_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(919423616)))]; + tensor obj_93_epsilon_0_to_fp16 = const()[name = tensor("obj_93_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_93_cast_fp16 = batch_norm(beta = obj_93_beta_0_to_fp16, epsilon = obj_93_epsilon_0_to_fp16, gamma = obj_93_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_93_cast_fp16)[name = tensor("obj_93_cast_fp16")]; + tensor var_35353 = const()[name = tensor("op_35353"), val = tensor([1, 1])]; + tensor var_35355 = const()[name = tensor("op_35355"), val = tensor([1, 1])]; + tensor query_47_pad_type_0 = const()[name = tensor("query_47_pad_type_0"), val = tensor("custom")]; + tensor query_47_pad_0 = const()[name = tensor("query_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_23_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_23_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(919426240)))]; + tensor layers_23_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_23_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(922703104)))]; + tensor query_47_cast_fp16 = conv(bias = layers_23_self_attn_q_proj_bias_to_fp16, dilations = var_35355, groups = var_35320, pad = query_47_pad_0, pad_type = query_47_pad_type_0, strides = var_35353, weight = layers_23_self_attn_q_proj_weight_to_fp16, x = obj_93_cast_fp16)[name = tensor("query_47_cast_fp16")]; + tensor var_35359 = const()[name = tensor("op_35359"), val = tensor([1, 1])]; + tensor var_35361 = const()[name = tensor("op_35361"), val = tensor([1, 1])]; + tensor key_47_pad_type_0 = const()[name = tensor("key_47_pad_type_0"), val = tensor("custom")]; + tensor key_47_pad_0 = const()[name = tensor("key_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_23_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_23_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(922705728)))]; + tensor key_47_cast_fp16 = conv(dilations = var_35361, groups = var_35320, pad = key_47_pad_0, pad_type = key_47_pad_type_0, strides = var_35359, weight = layers_23_self_attn_k_proj_weight_to_fp16, x = obj_93_cast_fp16)[name = tensor("key_47_cast_fp16")]; + tensor var_35366 = const()[name = tensor("op_35366"), val = tensor([1, 1])]; + tensor var_35368 = const()[name = tensor("op_35368"), val = tensor([1, 1])]; + tensor value_47_pad_type_0 = const()[name = tensor("value_47_pad_type_0"), val = tensor("custom")]; + tensor value_47_pad_0 = const()[name = tensor("value_47_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_23_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_23_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(925982592)))]; + tensor layers_23_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_23_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(929259456)))]; + tensor value_47_cast_fp16 = conv(bias = layers_23_self_attn_v_proj_bias_to_fp16, dilations = var_35368, groups = var_35320, pad = value_47_pad_0, pad_type = value_47_pad_type_0, strides = var_35366, weight = layers_23_self_attn_v_proj_weight_to_fp16, x = obj_93_cast_fp16)[name = tensor("value_47_cast_fp16")]; + tensor var_35375_begin_0 = const()[name = tensor("op_35375_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_35375_end_0 = const()[name = tensor("op_35375_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_35375_end_mask_0 = const()[name = tensor("op_35375_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_35375_cast_fp16 = slice_by_index(begin = var_35375_begin_0, end = var_35375_end_0, end_mask = var_35375_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_35375_cast_fp16")]; + tensor var_35379_begin_0 = const()[name = tensor("op_35379_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_35379_end_0 = const()[name = tensor("op_35379_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_35379_end_mask_0 = const()[name = tensor("op_35379_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_35379_cast_fp16 = slice_by_index(begin = var_35379_begin_0, end = var_35379_end_0, end_mask = var_35379_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_35379_cast_fp16")]; + tensor var_35383_begin_0 = const()[name = tensor("op_35383_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_35383_end_0 = const()[name = tensor("op_35383_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_35383_end_mask_0 = const()[name = tensor("op_35383_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_35383_cast_fp16 = slice_by_index(begin = var_35383_begin_0, end = var_35383_end_0, end_mask = var_35383_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_35383_cast_fp16")]; + tensor var_35387_begin_0 = const()[name = tensor("op_35387_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_35387_end_0 = const()[name = tensor("op_35387_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_35387_end_mask_0 = const()[name = tensor("op_35387_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_35387_cast_fp16 = slice_by_index(begin = var_35387_begin_0, end = var_35387_end_0, end_mask = var_35387_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_35387_cast_fp16")]; + tensor var_35391_begin_0 = const()[name = tensor("op_35391_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_35391_end_0 = const()[name = tensor("op_35391_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_35391_end_mask_0 = const()[name = tensor("op_35391_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_35391_cast_fp16 = slice_by_index(begin = var_35391_begin_0, end = var_35391_end_0, end_mask = var_35391_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_35391_cast_fp16")]; + tensor var_35395_begin_0 = const()[name = tensor("op_35395_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_35395_end_0 = const()[name = tensor("op_35395_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_35395_end_mask_0 = const()[name = tensor("op_35395_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_35395_cast_fp16 = slice_by_index(begin = var_35395_begin_0, end = var_35395_end_0, end_mask = var_35395_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_35395_cast_fp16")]; + tensor var_35399_begin_0 = const()[name = tensor("op_35399_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_35399_end_0 = const()[name = tensor("op_35399_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_35399_end_mask_0 = const()[name = tensor("op_35399_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_35399_cast_fp16 = slice_by_index(begin = var_35399_begin_0, end = var_35399_end_0, end_mask = var_35399_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_35399_cast_fp16")]; + tensor var_35403_begin_0 = const()[name = tensor("op_35403_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_35403_end_0 = const()[name = tensor("op_35403_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_35403_end_mask_0 = const()[name = tensor("op_35403_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_35403_cast_fp16 = slice_by_index(begin = var_35403_begin_0, end = var_35403_end_0, end_mask = var_35403_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_35403_cast_fp16")]; + tensor var_35407_begin_0 = const()[name = tensor("op_35407_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_35407_end_0 = const()[name = tensor("op_35407_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_35407_end_mask_0 = const()[name = tensor("op_35407_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_35407_cast_fp16 = slice_by_index(begin = var_35407_begin_0, end = var_35407_end_0, end_mask = var_35407_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_35407_cast_fp16")]; + tensor var_35411_begin_0 = const()[name = tensor("op_35411_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_35411_end_0 = const()[name = tensor("op_35411_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_35411_end_mask_0 = const()[name = tensor("op_35411_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_35411_cast_fp16 = slice_by_index(begin = var_35411_begin_0, end = var_35411_end_0, end_mask = var_35411_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_35411_cast_fp16")]; + tensor var_35415_begin_0 = const()[name = tensor("op_35415_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_35415_end_0 = const()[name = tensor("op_35415_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_35415_end_mask_0 = const()[name = tensor("op_35415_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_35415_cast_fp16 = slice_by_index(begin = var_35415_begin_0, end = var_35415_end_0, end_mask = var_35415_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_35415_cast_fp16")]; + tensor var_35419_begin_0 = const()[name = tensor("op_35419_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_35419_end_0 = const()[name = tensor("op_35419_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_35419_end_mask_0 = const()[name = tensor("op_35419_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_35419_cast_fp16 = slice_by_index(begin = var_35419_begin_0, end = var_35419_end_0, end_mask = var_35419_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_35419_cast_fp16")]; + tensor var_35423_begin_0 = const()[name = tensor("op_35423_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_35423_end_0 = const()[name = tensor("op_35423_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_35423_end_mask_0 = const()[name = tensor("op_35423_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_35423_cast_fp16 = slice_by_index(begin = var_35423_begin_0, end = var_35423_end_0, end_mask = var_35423_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_35423_cast_fp16")]; + tensor var_35427_begin_0 = const()[name = tensor("op_35427_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_35427_end_0 = const()[name = tensor("op_35427_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_35427_end_mask_0 = const()[name = tensor("op_35427_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_35427_cast_fp16 = slice_by_index(begin = var_35427_begin_0, end = var_35427_end_0, end_mask = var_35427_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_35427_cast_fp16")]; + tensor var_35431_begin_0 = const()[name = tensor("op_35431_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_35431_end_0 = const()[name = tensor("op_35431_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_35431_end_mask_0 = const()[name = tensor("op_35431_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_35431_cast_fp16 = slice_by_index(begin = var_35431_begin_0, end = var_35431_end_0, end_mask = var_35431_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_35431_cast_fp16")]; + tensor var_35435_begin_0 = const()[name = tensor("op_35435_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_35435_end_0 = const()[name = tensor("op_35435_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_35435_end_mask_0 = const()[name = tensor("op_35435_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_35435_cast_fp16 = slice_by_index(begin = var_35435_begin_0, end = var_35435_end_0, end_mask = var_35435_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_35435_cast_fp16")]; + tensor var_35439_begin_0 = const()[name = tensor("op_35439_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_35439_end_0 = const()[name = tensor("op_35439_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_35439_end_mask_0 = const()[name = tensor("op_35439_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_35439_cast_fp16 = slice_by_index(begin = var_35439_begin_0, end = var_35439_end_0, end_mask = var_35439_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_35439_cast_fp16")]; + tensor var_35443_begin_0 = const()[name = tensor("op_35443_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_35443_end_0 = const()[name = tensor("op_35443_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_35443_end_mask_0 = const()[name = tensor("op_35443_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_35443_cast_fp16 = slice_by_index(begin = var_35443_begin_0, end = var_35443_end_0, end_mask = var_35443_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_35443_cast_fp16")]; + tensor var_35447_begin_0 = const()[name = tensor("op_35447_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_35447_end_0 = const()[name = tensor("op_35447_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_35447_end_mask_0 = const()[name = tensor("op_35447_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_35447_cast_fp16 = slice_by_index(begin = var_35447_begin_0, end = var_35447_end_0, end_mask = var_35447_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_35447_cast_fp16")]; + tensor var_35451_begin_0 = const()[name = tensor("op_35451_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_35451_end_0 = const()[name = tensor("op_35451_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_35451_end_mask_0 = const()[name = tensor("op_35451_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_35451_cast_fp16 = slice_by_index(begin = var_35451_begin_0, end = var_35451_end_0, end_mask = var_35451_end_mask_0, x = query_47_cast_fp16)[name = tensor("op_35451_cast_fp16")]; + tensor var_35460_begin_0 = const()[name = tensor("op_35460_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_35460_end_0 = const()[name = tensor("op_35460_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_35460_end_mask_0 = const()[name = tensor("op_35460_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35460_cast_fp16 = slice_by_index(begin = var_35460_begin_0, end = var_35460_end_0, end_mask = var_35460_end_mask_0, x = var_35375_cast_fp16)[name = tensor("op_35460_cast_fp16")]; + tensor var_35467_begin_0 = const()[name = tensor("op_35467_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_35467_end_0 = const()[name = tensor("op_35467_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_35467_end_mask_0 = const()[name = tensor("op_35467_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35467_cast_fp16 = slice_by_index(begin = var_35467_begin_0, end = var_35467_end_0, end_mask = var_35467_end_mask_0, x = var_35375_cast_fp16)[name = tensor("op_35467_cast_fp16")]; + tensor var_35474_begin_0 = const()[name = tensor("op_35474_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_35474_end_0 = const()[name = tensor("op_35474_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_35474_end_mask_0 = const()[name = tensor("op_35474_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35474_cast_fp16 = slice_by_index(begin = var_35474_begin_0, end = var_35474_end_0, end_mask = var_35474_end_mask_0, x = var_35375_cast_fp16)[name = tensor("op_35474_cast_fp16")]; + tensor var_35481_begin_0 = const()[name = tensor("op_35481_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_35481_end_0 = const()[name = tensor("op_35481_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_35481_end_mask_0 = const()[name = tensor("op_35481_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35481_cast_fp16 = slice_by_index(begin = var_35481_begin_0, end = var_35481_end_0, end_mask = var_35481_end_mask_0, x = var_35375_cast_fp16)[name = tensor("op_35481_cast_fp16")]; + tensor var_35488_begin_0 = const()[name = tensor("op_35488_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_35488_end_0 = const()[name = tensor("op_35488_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_35488_end_mask_0 = const()[name = tensor("op_35488_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35488_cast_fp16 = slice_by_index(begin = var_35488_begin_0, end = var_35488_end_0, end_mask = var_35488_end_mask_0, x = var_35379_cast_fp16)[name = tensor("op_35488_cast_fp16")]; + tensor var_35495_begin_0 = const()[name = tensor("op_35495_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_35495_end_0 = const()[name = tensor("op_35495_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_35495_end_mask_0 = const()[name = tensor("op_35495_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35495_cast_fp16 = slice_by_index(begin = var_35495_begin_0, end = var_35495_end_0, end_mask = var_35495_end_mask_0, x = var_35379_cast_fp16)[name = tensor("op_35495_cast_fp16")]; + tensor var_35502_begin_0 = const()[name = tensor("op_35502_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_35502_end_0 = const()[name = tensor("op_35502_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_35502_end_mask_0 = const()[name = tensor("op_35502_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35502_cast_fp16 = slice_by_index(begin = var_35502_begin_0, end = var_35502_end_0, end_mask = var_35502_end_mask_0, x = var_35379_cast_fp16)[name = tensor("op_35502_cast_fp16")]; + tensor var_35509_begin_0 = const()[name = tensor("op_35509_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_35509_end_0 = const()[name = tensor("op_35509_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_35509_end_mask_0 = const()[name = tensor("op_35509_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35509_cast_fp16 = slice_by_index(begin = var_35509_begin_0, end = var_35509_end_0, end_mask = var_35509_end_mask_0, x = var_35379_cast_fp16)[name = tensor("op_35509_cast_fp16")]; + tensor var_35516_begin_0 = const()[name = tensor("op_35516_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_35516_end_0 = const()[name = tensor("op_35516_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_35516_end_mask_0 = const()[name = tensor("op_35516_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35516_cast_fp16 = slice_by_index(begin = var_35516_begin_0, end = var_35516_end_0, end_mask = var_35516_end_mask_0, x = var_35383_cast_fp16)[name = tensor("op_35516_cast_fp16")]; + tensor var_35523_begin_0 = const()[name = tensor("op_35523_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_35523_end_0 = const()[name = tensor("op_35523_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_35523_end_mask_0 = const()[name = tensor("op_35523_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35523_cast_fp16 = slice_by_index(begin = var_35523_begin_0, end = var_35523_end_0, end_mask = var_35523_end_mask_0, x = var_35383_cast_fp16)[name = tensor("op_35523_cast_fp16")]; + tensor var_35530_begin_0 = const()[name = tensor("op_35530_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_35530_end_0 = const()[name = tensor("op_35530_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_35530_end_mask_0 = const()[name = tensor("op_35530_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35530_cast_fp16 = slice_by_index(begin = var_35530_begin_0, end = var_35530_end_0, end_mask = var_35530_end_mask_0, x = var_35383_cast_fp16)[name = tensor("op_35530_cast_fp16")]; + tensor var_35537_begin_0 = const()[name = tensor("op_35537_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_35537_end_0 = const()[name = tensor("op_35537_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_35537_end_mask_0 = const()[name = tensor("op_35537_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35537_cast_fp16 = slice_by_index(begin = var_35537_begin_0, end = var_35537_end_0, end_mask = var_35537_end_mask_0, x = var_35383_cast_fp16)[name = tensor("op_35537_cast_fp16")]; + tensor var_35544_begin_0 = const()[name = tensor("op_35544_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_35544_end_0 = const()[name = tensor("op_35544_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_35544_end_mask_0 = const()[name = tensor("op_35544_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35544_cast_fp16 = slice_by_index(begin = var_35544_begin_0, end = var_35544_end_0, end_mask = var_35544_end_mask_0, x = var_35387_cast_fp16)[name = tensor("op_35544_cast_fp16")]; + tensor var_35551_begin_0 = const()[name = tensor("op_35551_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_35551_end_0 = const()[name = tensor("op_35551_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_35551_end_mask_0 = const()[name = tensor("op_35551_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35551_cast_fp16 = slice_by_index(begin = var_35551_begin_0, end = var_35551_end_0, end_mask = var_35551_end_mask_0, x = var_35387_cast_fp16)[name = tensor("op_35551_cast_fp16")]; + tensor var_35558_begin_0 = const()[name = tensor("op_35558_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_35558_end_0 = const()[name = tensor("op_35558_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_35558_end_mask_0 = const()[name = tensor("op_35558_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35558_cast_fp16 = slice_by_index(begin = var_35558_begin_0, end = var_35558_end_0, end_mask = var_35558_end_mask_0, x = var_35387_cast_fp16)[name = tensor("op_35558_cast_fp16")]; + tensor var_35565_begin_0 = const()[name = tensor("op_35565_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_35565_end_0 = const()[name = tensor("op_35565_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_35565_end_mask_0 = const()[name = tensor("op_35565_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35565_cast_fp16 = slice_by_index(begin = var_35565_begin_0, end = var_35565_end_0, end_mask = var_35565_end_mask_0, x = var_35387_cast_fp16)[name = tensor("op_35565_cast_fp16")]; + tensor var_35572_begin_0 = const()[name = tensor("op_35572_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_35572_end_0 = const()[name = tensor("op_35572_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_35572_end_mask_0 = const()[name = tensor("op_35572_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35572_cast_fp16 = slice_by_index(begin = var_35572_begin_0, end = var_35572_end_0, end_mask = var_35572_end_mask_0, x = var_35391_cast_fp16)[name = tensor("op_35572_cast_fp16")]; + tensor var_35579_begin_0 = const()[name = tensor("op_35579_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_35579_end_0 = const()[name = tensor("op_35579_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_35579_end_mask_0 = const()[name = tensor("op_35579_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35579_cast_fp16 = slice_by_index(begin = var_35579_begin_0, end = var_35579_end_0, end_mask = var_35579_end_mask_0, x = var_35391_cast_fp16)[name = tensor("op_35579_cast_fp16")]; + tensor var_35586_begin_0 = const()[name = tensor("op_35586_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_35586_end_0 = const()[name = tensor("op_35586_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_35586_end_mask_0 = const()[name = tensor("op_35586_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35586_cast_fp16 = slice_by_index(begin = var_35586_begin_0, end = var_35586_end_0, end_mask = var_35586_end_mask_0, x = var_35391_cast_fp16)[name = tensor("op_35586_cast_fp16")]; + tensor var_35593_begin_0 = const()[name = tensor("op_35593_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_35593_end_0 = const()[name = tensor("op_35593_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_35593_end_mask_0 = const()[name = tensor("op_35593_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35593_cast_fp16 = slice_by_index(begin = var_35593_begin_0, end = var_35593_end_0, end_mask = var_35593_end_mask_0, x = var_35391_cast_fp16)[name = tensor("op_35593_cast_fp16")]; + tensor var_35600_begin_0 = const()[name = tensor("op_35600_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_35600_end_0 = const()[name = tensor("op_35600_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_35600_end_mask_0 = const()[name = tensor("op_35600_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35600_cast_fp16 = slice_by_index(begin = var_35600_begin_0, end = var_35600_end_0, end_mask = var_35600_end_mask_0, x = var_35395_cast_fp16)[name = tensor("op_35600_cast_fp16")]; + tensor var_35607_begin_0 = const()[name = tensor("op_35607_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_35607_end_0 = const()[name = tensor("op_35607_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_35607_end_mask_0 = const()[name = tensor("op_35607_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35607_cast_fp16 = slice_by_index(begin = var_35607_begin_0, end = var_35607_end_0, end_mask = var_35607_end_mask_0, x = var_35395_cast_fp16)[name = tensor("op_35607_cast_fp16")]; + tensor var_35614_begin_0 = const()[name = tensor("op_35614_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_35614_end_0 = const()[name = tensor("op_35614_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_35614_end_mask_0 = const()[name = tensor("op_35614_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35614_cast_fp16 = slice_by_index(begin = var_35614_begin_0, end = var_35614_end_0, end_mask = var_35614_end_mask_0, x = var_35395_cast_fp16)[name = tensor("op_35614_cast_fp16")]; + tensor var_35621_begin_0 = const()[name = tensor("op_35621_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_35621_end_0 = const()[name = tensor("op_35621_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_35621_end_mask_0 = const()[name = tensor("op_35621_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35621_cast_fp16 = slice_by_index(begin = var_35621_begin_0, end = var_35621_end_0, end_mask = var_35621_end_mask_0, x = var_35395_cast_fp16)[name = tensor("op_35621_cast_fp16")]; + tensor var_35628_begin_0 = const()[name = tensor("op_35628_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_35628_end_0 = const()[name = tensor("op_35628_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_35628_end_mask_0 = const()[name = tensor("op_35628_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35628_cast_fp16 = slice_by_index(begin = var_35628_begin_0, end = var_35628_end_0, end_mask = var_35628_end_mask_0, x = var_35399_cast_fp16)[name = tensor("op_35628_cast_fp16")]; + tensor var_35635_begin_0 = const()[name = tensor("op_35635_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_35635_end_0 = const()[name = tensor("op_35635_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_35635_end_mask_0 = const()[name = tensor("op_35635_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35635_cast_fp16 = slice_by_index(begin = var_35635_begin_0, end = var_35635_end_0, end_mask = var_35635_end_mask_0, x = var_35399_cast_fp16)[name = tensor("op_35635_cast_fp16")]; + tensor var_35642_begin_0 = const()[name = tensor("op_35642_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_35642_end_0 = const()[name = tensor("op_35642_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_35642_end_mask_0 = const()[name = tensor("op_35642_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35642_cast_fp16 = slice_by_index(begin = var_35642_begin_0, end = var_35642_end_0, end_mask = var_35642_end_mask_0, x = var_35399_cast_fp16)[name = tensor("op_35642_cast_fp16")]; + tensor var_35649_begin_0 = const()[name = tensor("op_35649_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_35649_end_0 = const()[name = tensor("op_35649_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_35649_end_mask_0 = const()[name = tensor("op_35649_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35649_cast_fp16 = slice_by_index(begin = var_35649_begin_0, end = var_35649_end_0, end_mask = var_35649_end_mask_0, x = var_35399_cast_fp16)[name = tensor("op_35649_cast_fp16")]; + tensor var_35656_begin_0 = const()[name = tensor("op_35656_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_35656_end_0 = const()[name = tensor("op_35656_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_35656_end_mask_0 = const()[name = tensor("op_35656_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35656_cast_fp16 = slice_by_index(begin = var_35656_begin_0, end = var_35656_end_0, end_mask = var_35656_end_mask_0, x = var_35403_cast_fp16)[name = tensor("op_35656_cast_fp16")]; + tensor var_35663_begin_0 = const()[name = tensor("op_35663_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_35663_end_0 = const()[name = tensor("op_35663_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_35663_end_mask_0 = const()[name = tensor("op_35663_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35663_cast_fp16 = slice_by_index(begin = var_35663_begin_0, end = var_35663_end_0, end_mask = var_35663_end_mask_0, x = var_35403_cast_fp16)[name = tensor("op_35663_cast_fp16")]; + tensor var_35670_begin_0 = const()[name = tensor("op_35670_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_35670_end_0 = const()[name = tensor("op_35670_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_35670_end_mask_0 = const()[name = tensor("op_35670_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35670_cast_fp16 = slice_by_index(begin = var_35670_begin_0, end = var_35670_end_0, end_mask = var_35670_end_mask_0, x = var_35403_cast_fp16)[name = tensor("op_35670_cast_fp16")]; + tensor var_35677_begin_0 = const()[name = tensor("op_35677_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_35677_end_0 = const()[name = tensor("op_35677_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_35677_end_mask_0 = const()[name = tensor("op_35677_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35677_cast_fp16 = slice_by_index(begin = var_35677_begin_0, end = var_35677_end_0, end_mask = var_35677_end_mask_0, x = var_35403_cast_fp16)[name = tensor("op_35677_cast_fp16")]; + tensor var_35684_begin_0 = const()[name = tensor("op_35684_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_35684_end_0 = const()[name = tensor("op_35684_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_35684_end_mask_0 = const()[name = tensor("op_35684_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35684_cast_fp16 = slice_by_index(begin = var_35684_begin_0, end = var_35684_end_0, end_mask = var_35684_end_mask_0, x = var_35407_cast_fp16)[name = tensor("op_35684_cast_fp16")]; + tensor var_35691_begin_0 = const()[name = tensor("op_35691_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_35691_end_0 = const()[name = tensor("op_35691_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_35691_end_mask_0 = const()[name = tensor("op_35691_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35691_cast_fp16 = slice_by_index(begin = var_35691_begin_0, end = var_35691_end_0, end_mask = var_35691_end_mask_0, x = var_35407_cast_fp16)[name = tensor("op_35691_cast_fp16")]; + tensor var_35698_begin_0 = const()[name = tensor("op_35698_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_35698_end_0 = const()[name = tensor("op_35698_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_35698_end_mask_0 = const()[name = tensor("op_35698_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35698_cast_fp16 = slice_by_index(begin = var_35698_begin_0, end = var_35698_end_0, end_mask = var_35698_end_mask_0, x = var_35407_cast_fp16)[name = tensor("op_35698_cast_fp16")]; + tensor var_35705_begin_0 = const()[name = tensor("op_35705_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_35705_end_0 = const()[name = tensor("op_35705_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_35705_end_mask_0 = const()[name = tensor("op_35705_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35705_cast_fp16 = slice_by_index(begin = var_35705_begin_0, end = var_35705_end_0, end_mask = var_35705_end_mask_0, x = var_35407_cast_fp16)[name = tensor("op_35705_cast_fp16")]; + tensor var_35712_begin_0 = const()[name = tensor("op_35712_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_35712_end_0 = const()[name = tensor("op_35712_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_35712_end_mask_0 = const()[name = tensor("op_35712_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35712_cast_fp16 = slice_by_index(begin = var_35712_begin_0, end = var_35712_end_0, end_mask = var_35712_end_mask_0, x = var_35411_cast_fp16)[name = tensor("op_35712_cast_fp16")]; + tensor var_35719_begin_0 = const()[name = tensor("op_35719_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_35719_end_0 = const()[name = tensor("op_35719_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_35719_end_mask_0 = const()[name = tensor("op_35719_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35719_cast_fp16 = slice_by_index(begin = var_35719_begin_0, end = var_35719_end_0, end_mask = var_35719_end_mask_0, x = var_35411_cast_fp16)[name = tensor("op_35719_cast_fp16")]; + tensor var_35726_begin_0 = const()[name = tensor("op_35726_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_35726_end_0 = const()[name = tensor("op_35726_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_35726_end_mask_0 = const()[name = tensor("op_35726_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35726_cast_fp16 = slice_by_index(begin = var_35726_begin_0, end = var_35726_end_0, end_mask = var_35726_end_mask_0, x = var_35411_cast_fp16)[name = tensor("op_35726_cast_fp16")]; + tensor var_35733_begin_0 = const()[name = tensor("op_35733_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_35733_end_0 = const()[name = tensor("op_35733_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_35733_end_mask_0 = const()[name = tensor("op_35733_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35733_cast_fp16 = slice_by_index(begin = var_35733_begin_0, end = var_35733_end_0, end_mask = var_35733_end_mask_0, x = var_35411_cast_fp16)[name = tensor("op_35733_cast_fp16")]; + tensor var_35740_begin_0 = const()[name = tensor("op_35740_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_35740_end_0 = const()[name = tensor("op_35740_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_35740_end_mask_0 = const()[name = tensor("op_35740_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35740_cast_fp16 = slice_by_index(begin = var_35740_begin_0, end = var_35740_end_0, end_mask = var_35740_end_mask_0, x = var_35415_cast_fp16)[name = tensor("op_35740_cast_fp16")]; + tensor var_35747_begin_0 = const()[name = tensor("op_35747_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_35747_end_0 = const()[name = tensor("op_35747_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_35747_end_mask_0 = const()[name = tensor("op_35747_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35747_cast_fp16 = slice_by_index(begin = var_35747_begin_0, end = var_35747_end_0, end_mask = var_35747_end_mask_0, x = var_35415_cast_fp16)[name = tensor("op_35747_cast_fp16")]; + tensor var_35754_begin_0 = const()[name = tensor("op_35754_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_35754_end_0 = const()[name = tensor("op_35754_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_35754_end_mask_0 = const()[name = tensor("op_35754_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35754_cast_fp16 = slice_by_index(begin = var_35754_begin_0, end = var_35754_end_0, end_mask = var_35754_end_mask_0, x = var_35415_cast_fp16)[name = tensor("op_35754_cast_fp16")]; + tensor var_35761_begin_0 = const()[name = tensor("op_35761_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_35761_end_0 = const()[name = tensor("op_35761_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_35761_end_mask_0 = const()[name = tensor("op_35761_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35761_cast_fp16 = slice_by_index(begin = var_35761_begin_0, end = var_35761_end_0, end_mask = var_35761_end_mask_0, x = var_35415_cast_fp16)[name = tensor("op_35761_cast_fp16")]; + tensor var_35768_begin_0 = const()[name = tensor("op_35768_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_35768_end_0 = const()[name = tensor("op_35768_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_35768_end_mask_0 = const()[name = tensor("op_35768_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35768_cast_fp16 = slice_by_index(begin = var_35768_begin_0, end = var_35768_end_0, end_mask = var_35768_end_mask_0, x = var_35419_cast_fp16)[name = tensor("op_35768_cast_fp16")]; + tensor var_35775_begin_0 = const()[name = tensor("op_35775_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_35775_end_0 = const()[name = tensor("op_35775_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_35775_end_mask_0 = const()[name = tensor("op_35775_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35775_cast_fp16 = slice_by_index(begin = var_35775_begin_0, end = var_35775_end_0, end_mask = var_35775_end_mask_0, x = var_35419_cast_fp16)[name = tensor("op_35775_cast_fp16")]; + tensor var_35782_begin_0 = const()[name = tensor("op_35782_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_35782_end_0 = const()[name = tensor("op_35782_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_35782_end_mask_0 = const()[name = tensor("op_35782_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35782_cast_fp16 = slice_by_index(begin = var_35782_begin_0, end = var_35782_end_0, end_mask = var_35782_end_mask_0, x = var_35419_cast_fp16)[name = tensor("op_35782_cast_fp16")]; + tensor var_35789_begin_0 = const()[name = tensor("op_35789_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_35789_end_0 = const()[name = tensor("op_35789_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_35789_end_mask_0 = const()[name = tensor("op_35789_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35789_cast_fp16 = slice_by_index(begin = var_35789_begin_0, end = var_35789_end_0, end_mask = var_35789_end_mask_0, x = var_35419_cast_fp16)[name = tensor("op_35789_cast_fp16")]; + tensor var_35796_begin_0 = const()[name = tensor("op_35796_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_35796_end_0 = const()[name = tensor("op_35796_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_35796_end_mask_0 = const()[name = tensor("op_35796_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35796_cast_fp16 = slice_by_index(begin = var_35796_begin_0, end = var_35796_end_0, end_mask = var_35796_end_mask_0, x = var_35423_cast_fp16)[name = tensor("op_35796_cast_fp16")]; + tensor var_35803_begin_0 = const()[name = tensor("op_35803_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_35803_end_0 = const()[name = tensor("op_35803_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_35803_end_mask_0 = const()[name = tensor("op_35803_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35803_cast_fp16 = slice_by_index(begin = var_35803_begin_0, end = var_35803_end_0, end_mask = var_35803_end_mask_0, x = var_35423_cast_fp16)[name = tensor("op_35803_cast_fp16")]; + tensor var_35810_begin_0 = const()[name = tensor("op_35810_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_35810_end_0 = const()[name = tensor("op_35810_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_35810_end_mask_0 = const()[name = tensor("op_35810_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35810_cast_fp16 = slice_by_index(begin = var_35810_begin_0, end = var_35810_end_0, end_mask = var_35810_end_mask_0, x = var_35423_cast_fp16)[name = tensor("op_35810_cast_fp16")]; + tensor var_35817_begin_0 = const()[name = tensor("op_35817_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_35817_end_0 = const()[name = tensor("op_35817_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_35817_end_mask_0 = const()[name = tensor("op_35817_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35817_cast_fp16 = slice_by_index(begin = var_35817_begin_0, end = var_35817_end_0, end_mask = var_35817_end_mask_0, x = var_35423_cast_fp16)[name = tensor("op_35817_cast_fp16")]; + tensor var_35824_begin_0 = const()[name = tensor("op_35824_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_35824_end_0 = const()[name = tensor("op_35824_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_35824_end_mask_0 = const()[name = tensor("op_35824_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35824_cast_fp16 = slice_by_index(begin = var_35824_begin_0, end = var_35824_end_0, end_mask = var_35824_end_mask_0, x = var_35427_cast_fp16)[name = tensor("op_35824_cast_fp16")]; + tensor var_35831_begin_0 = const()[name = tensor("op_35831_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_35831_end_0 = const()[name = tensor("op_35831_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_35831_end_mask_0 = const()[name = tensor("op_35831_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35831_cast_fp16 = slice_by_index(begin = var_35831_begin_0, end = var_35831_end_0, end_mask = var_35831_end_mask_0, x = var_35427_cast_fp16)[name = tensor("op_35831_cast_fp16")]; + tensor var_35838_begin_0 = const()[name = tensor("op_35838_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_35838_end_0 = const()[name = tensor("op_35838_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_35838_end_mask_0 = const()[name = tensor("op_35838_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35838_cast_fp16 = slice_by_index(begin = var_35838_begin_0, end = var_35838_end_0, end_mask = var_35838_end_mask_0, x = var_35427_cast_fp16)[name = tensor("op_35838_cast_fp16")]; + tensor var_35845_begin_0 = const()[name = tensor("op_35845_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_35845_end_0 = const()[name = tensor("op_35845_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_35845_end_mask_0 = const()[name = tensor("op_35845_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35845_cast_fp16 = slice_by_index(begin = var_35845_begin_0, end = var_35845_end_0, end_mask = var_35845_end_mask_0, x = var_35427_cast_fp16)[name = tensor("op_35845_cast_fp16")]; + tensor var_35852_begin_0 = const()[name = tensor("op_35852_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_35852_end_0 = const()[name = tensor("op_35852_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_35852_end_mask_0 = const()[name = tensor("op_35852_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35852_cast_fp16 = slice_by_index(begin = var_35852_begin_0, end = var_35852_end_0, end_mask = var_35852_end_mask_0, x = var_35431_cast_fp16)[name = tensor("op_35852_cast_fp16")]; + tensor var_35859_begin_0 = const()[name = tensor("op_35859_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_35859_end_0 = const()[name = tensor("op_35859_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_35859_end_mask_0 = const()[name = tensor("op_35859_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35859_cast_fp16 = slice_by_index(begin = var_35859_begin_0, end = var_35859_end_0, end_mask = var_35859_end_mask_0, x = var_35431_cast_fp16)[name = tensor("op_35859_cast_fp16")]; + tensor var_35866_begin_0 = const()[name = tensor("op_35866_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_35866_end_0 = const()[name = tensor("op_35866_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_35866_end_mask_0 = const()[name = tensor("op_35866_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35866_cast_fp16 = slice_by_index(begin = var_35866_begin_0, end = var_35866_end_0, end_mask = var_35866_end_mask_0, x = var_35431_cast_fp16)[name = tensor("op_35866_cast_fp16")]; + tensor var_35873_begin_0 = const()[name = tensor("op_35873_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_35873_end_0 = const()[name = tensor("op_35873_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_35873_end_mask_0 = const()[name = tensor("op_35873_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35873_cast_fp16 = slice_by_index(begin = var_35873_begin_0, end = var_35873_end_0, end_mask = var_35873_end_mask_0, x = var_35431_cast_fp16)[name = tensor("op_35873_cast_fp16")]; + tensor var_35880_begin_0 = const()[name = tensor("op_35880_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_35880_end_0 = const()[name = tensor("op_35880_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_35880_end_mask_0 = const()[name = tensor("op_35880_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35880_cast_fp16 = slice_by_index(begin = var_35880_begin_0, end = var_35880_end_0, end_mask = var_35880_end_mask_0, x = var_35435_cast_fp16)[name = tensor("op_35880_cast_fp16")]; + tensor var_35887_begin_0 = const()[name = tensor("op_35887_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_35887_end_0 = const()[name = tensor("op_35887_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_35887_end_mask_0 = const()[name = tensor("op_35887_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35887_cast_fp16 = slice_by_index(begin = var_35887_begin_0, end = var_35887_end_0, end_mask = var_35887_end_mask_0, x = var_35435_cast_fp16)[name = tensor("op_35887_cast_fp16")]; + tensor var_35894_begin_0 = const()[name = tensor("op_35894_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_35894_end_0 = const()[name = tensor("op_35894_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_35894_end_mask_0 = const()[name = tensor("op_35894_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35894_cast_fp16 = slice_by_index(begin = var_35894_begin_0, end = var_35894_end_0, end_mask = var_35894_end_mask_0, x = var_35435_cast_fp16)[name = tensor("op_35894_cast_fp16")]; + tensor var_35901_begin_0 = const()[name = tensor("op_35901_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_35901_end_0 = const()[name = tensor("op_35901_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_35901_end_mask_0 = const()[name = tensor("op_35901_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35901_cast_fp16 = slice_by_index(begin = var_35901_begin_0, end = var_35901_end_0, end_mask = var_35901_end_mask_0, x = var_35435_cast_fp16)[name = tensor("op_35901_cast_fp16")]; + tensor var_35908_begin_0 = const()[name = tensor("op_35908_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_35908_end_0 = const()[name = tensor("op_35908_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_35908_end_mask_0 = const()[name = tensor("op_35908_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35908_cast_fp16 = slice_by_index(begin = var_35908_begin_0, end = var_35908_end_0, end_mask = var_35908_end_mask_0, x = var_35439_cast_fp16)[name = tensor("op_35908_cast_fp16")]; + tensor var_35915_begin_0 = const()[name = tensor("op_35915_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_35915_end_0 = const()[name = tensor("op_35915_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_35915_end_mask_0 = const()[name = tensor("op_35915_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35915_cast_fp16 = slice_by_index(begin = var_35915_begin_0, end = var_35915_end_0, end_mask = var_35915_end_mask_0, x = var_35439_cast_fp16)[name = tensor("op_35915_cast_fp16")]; + tensor var_35922_begin_0 = const()[name = tensor("op_35922_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_35922_end_0 = const()[name = tensor("op_35922_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_35922_end_mask_0 = const()[name = tensor("op_35922_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35922_cast_fp16 = slice_by_index(begin = var_35922_begin_0, end = var_35922_end_0, end_mask = var_35922_end_mask_0, x = var_35439_cast_fp16)[name = tensor("op_35922_cast_fp16")]; + tensor var_35929_begin_0 = const()[name = tensor("op_35929_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_35929_end_0 = const()[name = tensor("op_35929_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_35929_end_mask_0 = const()[name = tensor("op_35929_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35929_cast_fp16 = slice_by_index(begin = var_35929_begin_0, end = var_35929_end_0, end_mask = var_35929_end_mask_0, x = var_35439_cast_fp16)[name = tensor("op_35929_cast_fp16")]; + tensor var_35936_begin_0 = const()[name = tensor("op_35936_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_35936_end_0 = const()[name = tensor("op_35936_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_35936_end_mask_0 = const()[name = tensor("op_35936_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35936_cast_fp16 = slice_by_index(begin = var_35936_begin_0, end = var_35936_end_0, end_mask = var_35936_end_mask_0, x = var_35443_cast_fp16)[name = tensor("op_35936_cast_fp16")]; + tensor var_35943_begin_0 = const()[name = tensor("op_35943_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_35943_end_0 = const()[name = tensor("op_35943_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_35943_end_mask_0 = const()[name = tensor("op_35943_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35943_cast_fp16 = slice_by_index(begin = var_35943_begin_0, end = var_35943_end_0, end_mask = var_35943_end_mask_0, x = var_35443_cast_fp16)[name = tensor("op_35943_cast_fp16")]; + tensor var_35950_begin_0 = const()[name = tensor("op_35950_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_35950_end_0 = const()[name = tensor("op_35950_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_35950_end_mask_0 = const()[name = tensor("op_35950_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35950_cast_fp16 = slice_by_index(begin = var_35950_begin_0, end = var_35950_end_0, end_mask = var_35950_end_mask_0, x = var_35443_cast_fp16)[name = tensor("op_35950_cast_fp16")]; + tensor var_35957_begin_0 = const()[name = tensor("op_35957_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_35957_end_0 = const()[name = tensor("op_35957_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_35957_end_mask_0 = const()[name = tensor("op_35957_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35957_cast_fp16 = slice_by_index(begin = var_35957_begin_0, end = var_35957_end_0, end_mask = var_35957_end_mask_0, x = var_35443_cast_fp16)[name = tensor("op_35957_cast_fp16")]; + tensor var_35964_begin_0 = const()[name = tensor("op_35964_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_35964_end_0 = const()[name = tensor("op_35964_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_35964_end_mask_0 = const()[name = tensor("op_35964_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35964_cast_fp16 = slice_by_index(begin = var_35964_begin_0, end = var_35964_end_0, end_mask = var_35964_end_mask_0, x = var_35447_cast_fp16)[name = tensor("op_35964_cast_fp16")]; + tensor var_35971_begin_0 = const()[name = tensor("op_35971_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_35971_end_0 = const()[name = tensor("op_35971_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_35971_end_mask_0 = const()[name = tensor("op_35971_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35971_cast_fp16 = slice_by_index(begin = var_35971_begin_0, end = var_35971_end_0, end_mask = var_35971_end_mask_0, x = var_35447_cast_fp16)[name = tensor("op_35971_cast_fp16")]; + tensor var_35978_begin_0 = const()[name = tensor("op_35978_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_35978_end_0 = const()[name = tensor("op_35978_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_35978_end_mask_0 = const()[name = tensor("op_35978_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35978_cast_fp16 = slice_by_index(begin = var_35978_begin_0, end = var_35978_end_0, end_mask = var_35978_end_mask_0, x = var_35447_cast_fp16)[name = tensor("op_35978_cast_fp16")]; + tensor var_35985_begin_0 = const()[name = tensor("op_35985_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_35985_end_0 = const()[name = tensor("op_35985_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_35985_end_mask_0 = const()[name = tensor("op_35985_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35985_cast_fp16 = slice_by_index(begin = var_35985_begin_0, end = var_35985_end_0, end_mask = var_35985_end_mask_0, x = var_35447_cast_fp16)[name = tensor("op_35985_cast_fp16")]; + tensor var_35992_begin_0 = const()[name = tensor("op_35992_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_35992_end_0 = const()[name = tensor("op_35992_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_35992_end_mask_0 = const()[name = tensor("op_35992_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35992_cast_fp16 = slice_by_index(begin = var_35992_begin_0, end = var_35992_end_0, end_mask = var_35992_end_mask_0, x = var_35451_cast_fp16)[name = tensor("op_35992_cast_fp16")]; + tensor var_35999_begin_0 = const()[name = tensor("op_35999_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_35999_end_0 = const()[name = tensor("op_35999_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_35999_end_mask_0 = const()[name = tensor("op_35999_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_35999_cast_fp16 = slice_by_index(begin = var_35999_begin_0, end = var_35999_end_0, end_mask = var_35999_end_mask_0, x = var_35451_cast_fp16)[name = tensor("op_35999_cast_fp16")]; + tensor var_36006_begin_0 = const()[name = tensor("op_36006_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_36006_end_0 = const()[name = tensor("op_36006_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_36006_end_mask_0 = const()[name = tensor("op_36006_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_36006_cast_fp16 = slice_by_index(begin = var_36006_begin_0, end = var_36006_end_0, end_mask = var_36006_end_mask_0, x = var_35451_cast_fp16)[name = tensor("op_36006_cast_fp16")]; + tensor var_36013_begin_0 = const()[name = tensor("op_36013_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_36013_end_0 = const()[name = tensor("op_36013_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_36013_end_mask_0 = const()[name = tensor("op_36013_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_36013_cast_fp16 = slice_by_index(begin = var_36013_begin_0, end = var_36013_end_0, end_mask = var_36013_end_mask_0, x = var_35451_cast_fp16)[name = tensor("op_36013_cast_fp16")]; + tensor k_47_perm_0 = const()[name = tensor("k_47_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_36018_begin_0 = const()[name = tensor("op_36018_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_36018_end_0 = const()[name = tensor("op_36018_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_36018_end_mask_0 = const()[name = tensor("op_36018_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_8 = transpose(perm = k_47_perm_0, x = key_47_cast_fp16)[name = tensor("transpose_8")]; + tensor var_36018_cast_fp16 = slice_by_index(begin = var_36018_begin_0, end = var_36018_end_0, end_mask = var_36018_end_mask_0, x = transpose_8)[name = tensor("op_36018_cast_fp16")]; + tensor var_36022_begin_0 = const()[name = tensor("op_36022_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_36022_end_0 = const()[name = tensor("op_36022_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_36022_end_mask_0 = const()[name = tensor("op_36022_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_36022_cast_fp16 = slice_by_index(begin = var_36022_begin_0, end = var_36022_end_0, end_mask = var_36022_end_mask_0, x = transpose_8)[name = tensor("op_36022_cast_fp16")]; + tensor var_36026_begin_0 = const()[name = tensor("op_36026_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_36026_end_0 = const()[name = tensor("op_36026_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_36026_end_mask_0 = const()[name = tensor("op_36026_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_36026_cast_fp16 = slice_by_index(begin = var_36026_begin_0, end = var_36026_end_0, end_mask = var_36026_end_mask_0, x = transpose_8)[name = tensor("op_36026_cast_fp16")]; + tensor var_36030_begin_0 = const()[name = tensor("op_36030_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_36030_end_0 = const()[name = tensor("op_36030_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_36030_end_mask_0 = const()[name = tensor("op_36030_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_36030_cast_fp16 = slice_by_index(begin = var_36030_begin_0, end = var_36030_end_0, end_mask = var_36030_end_mask_0, x = transpose_8)[name = tensor("op_36030_cast_fp16")]; + tensor var_36034_begin_0 = const()[name = tensor("op_36034_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_36034_end_0 = const()[name = tensor("op_36034_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_36034_end_mask_0 = const()[name = tensor("op_36034_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_36034_cast_fp16 = slice_by_index(begin = var_36034_begin_0, end = var_36034_end_0, end_mask = var_36034_end_mask_0, x = transpose_8)[name = tensor("op_36034_cast_fp16")]; + tensor var_36038_begin_0 = const()[name = tensor("op_36038_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_36038_end_0 = const()[name = tensor("op_36038_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_36038_end_mask_0 = const()[name = tensor("op_36038_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_36038_cast_fp16 = slice_by_index(begin = var_36038_begin_0, end = var_36038_end_0, end_mask = var_36038_end_mask_0, x = transpose_8)[name = tensor("op_36038_cast_fp16")]; + tensor var_36042_begin_0 = const()[name = tensor("op_36042_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_36042_end_0 = const()[name = tensor("op_36042_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_36042_end_mask_0 = const()[name = tensor("op_36042_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_36042_cast_fp16 = slice_by_index(begin = var_36042_begin_0, end = var_36042_end_0, end_mask = var_36042_end_mask_0, x = transpose_8)[name = tensor("op_36042_cast_fp16")]; + tensor var_36046_begin_0 = const()[name = tensor("op_36046_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_36046_end_0 = const()[name = tensor("op_36046_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_36046_end_mask_0 = const()[name = tensor("op_36046_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_36046_cast_fp16 = slice_by_index(begin = var_36046_begin_0, end = var_36046_end_0, end_mask = var_36046_end_mask_0, x = transpose_8)[name = tensor("op_36046_cast_fp16")]; + tensor var_36050_begin_0 = const()[name = tensor("op_36050_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_36050_end_0 = const()[name = tensor("op_36050_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_36050_end_mask_0 = const()[name = tensor("op_36050_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_36050_cast_fp16 = slice_by_index(begin = var_36050_begin_0, end = var_36050_end_0, end_mask = var_36050_end_mask_0, x = transpose_8)[name = tensor("op_36050_cast_fp16")]; + tensor var_36054_begin_0 = const()[name = tensor("op_36054_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_36054_end_0 = const()[name = tensor("op_36054_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_36054_end_mask_0 = const()[name = tensor("op_36054_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_36054_cast_fp16 = slice_by_index(begin = var_36054_begin_0, end = var_36054_end_0, end_mask = var_36054_end_mask_0, x = transpose_8)[name = tensor("op_36054_cast_fp16")]; + tensor var_36058_begin_0 = const()[name = tensor("op_36058_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_36058_end_0 = const()[name = tensor("op_36058_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_36058_end_mask_0 = const()[name = tensor("op_36058_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_36058_cast_fp16 = slice_by_index(begin = var_36058_begin_0, end = var_36058_end_0, end_mask = var_36058_end_mask_0, x = transpose_8)[name = tensor("op_36058_cast_fp16")]; + tensor var_36062_begin_0 = const()[name = tensor("op_36062_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_36062_end_0 = const()[name = tensor("op_36062_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_36062_end_mask_0 = const()[name = tensor("op_36062_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_36062_cast_fp16 = slice_by_index(begin = var_36062_begin_0, end = var_36062_end_0, end_mask = var_36062_end_mask_0, x = transpose_8)[name = tensor("op_36062_cast_fp16")]; + tensor var_36066_begin_0 = const()[name = tensor("op_36066_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_36066_end_0 = const()[name = tensor("op_36066_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_36066_end_mask_0 = const()[name = tensor("op_36066_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_36066_cast_fp16 = slice_by_index(begin = var_36066_begin_0, end = var_36066_end_0, end_mask = var_36066_end_mask_0, x = transpose_8)[name = tensor("op_36066_cast_fp16")]; + tensor var_36070_begin_0 = const()[name = tensor("op_36070_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_36070_end_0 = const()[name = tensor("op_36070_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_36070_end_mask_0 = const()[name = tensor("op_36070_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_36070_cast_fp16 = slice_by_index(begin = var_36070_begin_0, end = var_36070_end_0, end_mask = var_36070_end_mask_0, x = transpose_8)[name = tensor("op_36070_cast_fp16")]; + tensor var_36074_begin_0 = const()[name = tensor("op_36074_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_36074_end_0 = const()[name = tensor("op_36074_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_36074_end_mask_0 = const()[name = tensor("op_36074_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_36074_cast_fp16 = slice_by_index(begin = var_36074_begin_0, end = var_36074_end_0, end_mask = var_36074_end_mask_0, x = transpose_8)[name = tensor("op_36074_cast_fp16")]; + tensor var_36078_begin_0 = const()[name = tensor("op_36078_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_36078_end_0 = const()[name = tensor("op_36078_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_36078_end_mask_0 = const()[name = tensor("op_36078_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_36078_cast_fp16 = slice_by_index(begin = var_36078_begin_0, end = var_36078_end_0, end_mask = var_36078_end_mask_0, x = transpose_8)[name = tensor("op_36078_cast_fp16")]; + tensor var_36082_begin_0 = const()[name = tensor("op_36082_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_36082_end_0 = const()[name = tensor("op_36082_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_36082_end_mask_0 = const()[name = tensor("op_36082_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_36082_cast_fp16 = slice_by_index(begin = var_36082_begin_0, end = var_36082_end_0, end_mask = var_36082_end_mask_0, x = transpose_8)[name = tensor("op_36082_cast_fp16")]; + tensor var_36086_begin_0 = const()[name = tensor("op_36086_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_36086_end_0 = const()[name = tensor("op_36086_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_36086_end_mask_0 = const()[name = tensor("op_36086_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_36086_cast_fp16 = slice_by_index(begin = var_36086_begin_0, end = var_36086_end_0, end_mask = var_36086_end_mask_0, x = transpose_8)[name = tensor("op_36086_cast_fp16")]; + tensor var_36090_begin_0 = const()[name = tensor("op_36090_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_36090_end_0 = const()[name = tensor("op_36090_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_36090_end_mask_0 = const()[name = tensor("op_36090_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_36090_cast_fp16 = slice_by_index(begin = var_36090_begin_0, end = var_36090_end_0, end_mask = var_36090_end_mask_0, x = transpose_8)[name = tensor("op_36090_cast_fp16")]; + tensor var_36094_begin_0 = const()[name = tensor("op_36094_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_36094_end_0 = const()[name = tensor("op_36094_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_36094_end_mask_0 = const()[name = tensor("op_36094_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_36094_cast_fp16 = slice_by_index(begin = var_36094_begin_0, end = var_36094_end_0, end_mask = var_36094_end_mask_0, x = transpose_8)[name = tensor("op_36094_cast_fp16")]; + tensor var_36096_begin_0 = const()[name = tensor("op_36096_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_36096_end_0 = const()[name = tensor("op_36096_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_36096_end_mask_0 = const()[name = tensor("op_36096_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36096_cast_fp16 = slice_by_index(begin = var_36096_begin_0, end = var_36096_end_0, end_mask = var_36096_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_36096_cast_fp16")]; + tensor var_36100_begin_0 = const()[name = tensor("op_36100_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_36100_end_0 = const()[name = tensor("op_36100_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_36100_end_mask_0 = const()[name = tensor("op_36100_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36100_cast_fp16 = slice_by_index(begin = var_36100_begin_0, end = var_36100_end_0, end_mask = var_36100_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_36100_cast_fp16")]; + tensor var_36104_begin_0 = const()[name = tensor("op_36104_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_36104_end_0 = const()[name = tensor("op_36104_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_36104_end_mask_0 = const()[name = tensor("op_36104_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36104_cast_fp16 = slice_by_index(begin = var_36104_begin_0, end = var_36104_end_0, end_mask = var_36104_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_36104_cast_fp16")]; + tensor var_36108_begin_0 = const()[name = tensor("op_36108_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_36108_end_0 = const()[name = tensor("op_36108_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_36108_end_mask_0 = const()[name = tensor("op_36108_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36108_cast_fp16 = slice_by_index(begin = var_36108_begin_0, end = var_36108_end_0, end_mask = var_36108_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_36108_cast_fp16")]; + tensor var_36112_begin_0 = const()[name = tensor("op_36112_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_36112_end_0 = const()[name = tensor("op_36112_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_36112_end_mask_0 = const()[name = tensor("op_36112_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36112_cast_fp16 = slice_by_index(begin = var_36112_begin_0, end = var_36112_end_0, end_mask = var_36112_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_36112_cast_fp16")]; + tensor var_36116_begin_0 = const()[name = tensor("op_36116_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_36116_end_0 = const()[name = tensor("op_36116_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_36116_end_mask_0 = const()[name = tensor("op_36116_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36116_cast_fp16 = slice_by_index(begin = var_36116_begin_0, end = var_36116_end_0, end_mask = var_36116_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_36116_cast_fp16")]; + tensor var_36120_begin_0 = const()[name = tensor("op_36120_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_36120_end_0 = const()[name = tensor("op_36120_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_36120_end_mask_0 = const()[name = tensor("op_36120_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36120_cast_fp16 = slice_by_index(begin = var_36120_begin_0, end = var_36120_end_0, end_mask = var_36120_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_36120_cast_fp16")]; + tensor var_36124_begin_0 = const()[name = tensor("op_36124_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_36124_end_0 = const()[name = tensor("op_36124_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_36124_end_mask_0 = const()[name = tensor("op_36124_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36124_cast_fp16 = slice_by_index(begin = var_36124_begin_0, end = var_36124_end_0, end_mask = var_36124_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_36124_cast_fp16")]; + tensor var_36128_begin_0 = const()[name = tensor("op_36128_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_36128_end_0 = const()[name = tensor("op_36128_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_36128_end_mask_0 = const()[name = tensor("op_36128_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36128_cast_fp16 = slice_by_index(begin = var_36128_begin_0, end = var_36128_end_0, end_mask = var_36128_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_36128_cast_fp16")]; + tensor var_36132_begin_0 = const()[name = tensor("op_36132_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_36132_end_0 = const()[name = tensor("op_36132_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_36132_end_mask_0 = const()[name = tensor("op_36132_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36132_cast_fp16 = slice_by_index(begin = var_36132_begin_0, end = var_36132_end_0, end_mask = var_36132_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_36132_cast_fp16")]; + tensor var_36136_begin_0 = const()[name = tensor("op_36136_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_36136_end_0 = const()[name = tensor("op_36136_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_36136_end_mask_0 = const()[name = tensor("op_36136_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36136_cast_fp16 = slice_by_index(begin = var_36136_begin_0, end = var_36136_end_0, end_mask = var_36136_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_36136_cast_fp16")]; + tensor var_36140_begin_0 = const()[name = tensor("op_36140_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_36140_end_0 = const()[name = tensor("op_36140_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_36140_end_mask_0 = const()[name = tensor("op_36140_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36140_cast_fp16 = slice_by_index(begin = var_36140_begin_0, end = var_36140_end_0, end_mask = var_36140_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_36140_cast_fp16")]; + tensor var_36144_begin_0 = const()[name = tensor("op_36144_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_36144_end_0 = const()[name = tensor("op_36144_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_36144_end_mask_0 = const()[name = tensor("op_36144_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36144_cast_fp16 = slice_by_index(begin = var_36144_begin_0, end = var_36144_end_0, end_mask = var_36144_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_36144_cast_fp16")]; + tensor var_36148_begin_0 = const()[name = tensor("op_36148_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_36148_end_0 = const()[name = tensor("op_36148_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_36148_end_mask_0 = const()[name = tensor("op_36148_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36148_cast_fp16 = slice_by_index(begin = var_36148_begin_0, end = var_36148_end_0, end_mask = var_36148_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_36148_cast_fp16")]; + tensor var_36152_begin_0 = const()[name = tensor("op_36152_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_36152_end_0 = const()[name = tensor("op_36152_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_36152_end_mask_0 = const()[name = tensor("op_36152_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36152_cast_fp16 = slice_by_index(begin = var_36152_begin_0, end = var_36152_end_0, end_mask = var_36152_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_36152_cast_fp16")]; + tensor var_36156_begin_0 = const()[name = tensor("op_36156_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_36156_end_0 = const()[name = tensor("op_36156_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_36156_end_mask_0 = const()[name = tensor("op_36156_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36156_cast_fp16 = slice_by_index(begin = var_36156_begin_0, end = var_36156_end_0, end_mask = var_36156_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_36156_cast_fp16")]; + tensor var_36160_begin_0 = const()[name = tensor("op_36160_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_36160_end_0 = const()[name = tensor("op_36160_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_36160_end_mask_0 = const()[name = tensor("op_36160_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36160_cast_fp16 = slice_by_index(begin = var_36160_begin_0, end = var_36160_end_0, end_mask = var_36160_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_36160_cast_fp16")]; + tensor var_36164_begin_0 = const()[name = tensor("op_36164_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_36164_end_0 = const()[name = tensor("op_36164_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_36164_end_mask_0 = const()[name = tensor("op_36164_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36164_cast_fp16 = slice_by_index(begin = var_36164_begin_0, end = var_36164_end_0, end_mask = var_36164_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_36164_cast_fp16")]; + tensor var_36168_begin_0 = const()[name = tensor("op_36168_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_36168_end_0 = const()[name = tensor("op_36168_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_36168_end_mask_0 = const()[name = tensor("op_36168_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36168_cast_fp16 = slice_by_index(begin = var_36168_begin_0, end = var_36168_end_0, end_mask = var_36168_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_36168_cast_fp16")]; + tensor var_36172_begin_0 = const()[name = tensor("op_36172_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_36172_end_0 = const()[name = tensor("op_36172_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_36172_end_mask_0 = const()[name = tensor("op_36172_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36172_cast_fp16 = slice_by_index(begin = var_36172_begin_0, end = var_36172_end_0, end_mask = var_36172_end_mask_0, x = value_47_cast_fp16)[name = tensor("op_36172_cast_fp16")]; + tensor var_36176_equation_0 = const()[name = tensor("op_36176_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36176_cast_fp16 = einsum(equation = var_36176_equation_0, values = (var_36018_cast_fp16, var_35460_cast_fp16))[name = tensor("op_36176_cast_fp16")]; + tensor var_36177_to_fp16 = const()[name = tensor("op_36177_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3681_cast_fp16 = mul(x = var_36176_cast_fp16, y = var_36177_to_fp16)[name = tensor("aw_chunk_3681_cast_fp16")]; + tensor var_36180_equation_0 = const()[name = tensor("op_36180_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36180_cast_fp16 = einsum(equation = var_36180_equation_0, values = (var_36018_cast_fp16, var_35467_cast_fp16))[name = tensor("op_36180_cast_fp16")]; + tensor var_36181_to_fp16 = const()[name = tensor("op_36181_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3683_cast_fp16 = mul(x = var_36180_cast_fp16, y = var_36181_to_fp16)[name = tensor("aw_chunk_3683_cast_fp16")]; + tensor var_36184_equation_0 = const()[name = tensor("op_36184_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36184_cast_fp16 = einsum(equation = var_36184_equation_0, values = (var_36018_cast_fp16, var_35474_cast_fp16))[name = tensor("op_36184_cast_fp16")]; + tensor var_36185_to_fp16 = const()[name = tensor("op_36185_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3685_cast_fp16 = mul(x = var_36184_cast_fp16, y = var_36185_to_fp16)[name = tensor("aw_chunk_3685_cast_fp16")]; + tensor var_36188_equation_0 = const()[name = tensor("op_36188_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36188_cast_fp16 = einsum(equation = var_36188_equation_0, values = (var_36018_cast_fp16, var_35481_cast_fp16))[name = tensor("op_36188_cast_fp16")]; + tensor var_36189_to_fp16 = const()[name = tensor("op_36189_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3687_cast_fp16 = mul(x = var_36188_cast_fp16, y = var_36189_to_fp16)[name = tensor("aw_chunk_3687_cast_fp16")]; + tensor var_36192_equation_0 = const()[name = tensor("op_36192_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36192_cast_fp16 = einsum(equation = var_36192_equation_0, values = (var_36022_cast_fp16, var_35488_cast_fp16))[name = tensor("op_36192_cast_fp16")]; + tensor var_36193_to_fp16 = const()[name = tensor("op_36193_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3689_cast_fp16 = mul(x = var_36192_cast_fp16, y = var_36193_to_fp16)[name = tensor("aw_chunk_3689_cast_fp16")]; + tensor var_36196_equation_0 = const()[name = tensor("op_36196_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36196_cast_fp16 = einsum(equation = var_36196_equation_0, values = (var_36022_cast_fp16, var_35495_cast_fp16))[name = tensor("op_36196_cast_fp16")]; + tensor var_36197_to_fp16 = const()[name = tensor("op_36197_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3691_cast_fp16 = mul(x = var_36196_cast_fp16, y = var_36197_to_fp16)[name = tensor("aw_chunk_3691_cast_fp16")]; + tensor var_36200_equation_0 = const()[name = tensor("op_36200_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36200_cast_fp16 = einsum(equation = var_36200_equation_0, values = (var_36022_cast_fp16, var_35502_cast_fp16))[name = tensor("op_36200_cast_fp16")]; + tensor var_36201_to_fp16 = const()[name = tensor("op_36201_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3693_cast_fp16 = mul(x = var_36200_cast_fp16, y = var_36201_to_fp16)[name = tensor("aw_chunk_3693_cast_fp16")]; + tensor var_36204_equation_0 = const()[name = tensor("op_36204_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36204_cast_fp16 = einsum(equation = var_36204_equation_0, values = (var_36022_cast_fp16, var_35509_cast_fp16))[name = tensor("op_36204_cast_fp16")]; + tensor var_36205_to_fp16 = const()[name = tensor("op_36205_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3695_cast_fp16 = mul(x = var_36204_cast_fp16, y = var_36205_to_fp16)[name = tensor("aw_chunk_3695_cast_fp16")]; + tensor var_36208_equation_0 = const()[name = tensor("op_36208_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36208_cast_fp16 = einsum(equation = var_36208_equation_0, values = (var_36026_cast_fp16, var_35516_cast_fp16))[name = tensor("op_36208_cast_fp16")]; + tensor var_36209_to_fp16 = const()[name = tensor("op_36209_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3697_cast_fp16 = mul(x = var_36208_cast_fp16, y = var_36209_to_fp16)[name = tensor("aw_chunk_3697_cast_fp16")]; + tensor var_36212_equation_0 = const()[name = tensor("op_36212_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36212_cast_fp16 = einsum(equation = var_36212_equation_0, values = (var_36026_cast_fp16, var_35523_cast_fp16))[name = tensor("op_36212_cast_fp16")]; + tensor var_36213_to_fp16 = const()[name = tensor("op_36213_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3699_cast_fp16 = mul(x = var_36212_cast_fp16, y = var_36213_to_fp16)[name = tensor("aw_chunk_3699_cast_fp16")]; + tensor var_36216_equation_0 = const()[name = tensor("op_36216_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36216_cast_fp16 = einsum(equation = var_36216_equation_0, values = (var_36026_cast_fp16, var_35530_cast_fp16))[name = tensor("op_36216_cast_fp16")]; + tensor var_36217_to_fp16 = const()[name = tensor("op_36217_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3701_cast_fp16 = mul(x = var_36216_cast_fp16, y = var_36217_to_fp16)[name = tensor("aw_chunk_3701_cast_fp16")]; + tensor var_36220_equation_0 = const()[name = tensor("op_36220_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36220_cast_fp16 = einsum(equation = var_36220_equation_0, values = (var_36026_cast_fp16, var_35537_cast_fp16))[name = tensor("op_36220_cast_fp16")]; + tensor var_36221_to_fp16 = const()[name = tensor("op_36221_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3703_cast_fp16 = mul(x = var_36220_cast_fp16, y = var_36221_to_fp16)[name = tensor("aw_chunk_3703_cast_fp16")]; + tensor var_36224_equation_0 = const()[name = tensor("op_36224_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36224_cast_fp16 = einsum(equation = var_36224_equation_0, values = (var_36030_cast_fp16, var_35544_cast_fp16))[name = tensor("op_36224_cast_fp16")]; + tensor var_36225_to_fp16 = const()[name = tensor("op_36225_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3705_cast_fp16 = mul(x = var_36224_cast_fp16, y = var_36225_to_fp16)[name = tensor("aw_chunk_3705_cast_fp16")]; + tensor var_36228_equation_0 = const()[name = tensor("op_36228_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36228_cast_fp16 = einsum(equation = var_36228_equation_0, values = (var_36030_cast_fp16, var_35551_cast_fp16))[name = tensor("op_36228_cast_fp16")]; + tensor var_36229_to_fp16 = const()[name = tensor("op_36229_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3707_cast_fp16 = mul(x = var_36228_cast_fp16, y = var_36229_to_fp16)[name = tensor("aw_chunk_3707_cast_fp16")]; + tensor var_36232_equation_0 = const()[name = tensor("op_36232_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36232_cast_fp16 = einsum(equation = var_36232_equation_0, values = (var_36030_cast_fp16, var_35558_cast_fp16))[name = tensor("op_36232_cast_fp16")]; + tensor var_36233_to_fp16 = const()[name = tensor("op_36233_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3709_cast_fp16 = mul(x = var_36232_cast_fp16, y = var_36233_to_fp16)[name = tensor("aw_chunk_3709_cast_fp16")]; + tensor var_36236_equation_0 = const()[name = tensor("op_36236_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36236_cast_fp16 = einsum(equation = var_36236_equation_0, values = (var_36030_cast_fp16, var_35565_cast_fp16))[name = tensor("op_36236_cast_fp16")]; + tensor var_36237_to_fp16 = const()[name = tensor("op_36237_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3711_cast_fp16 = mul(x = var_36236_cast_fp16, y = var_36237_to_fp16)[name = tensor("aw_chunk_3711_cast_fp16")]; + tensor var_36240_equation_0 = const()[name = tensor("op_36240_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36240_cast_fp16 = einsum(equation = var_36240_equation_0, values = (var_36034_cast_fp16, var_35572_cast_fp16))[name = tensor("op_36240_cast_fp16")]; + tensor var_36241_to_fp16 = const()[name = tensor("op_36241_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3713_cast_fp16 = mul(x = var_36240_cast_fp16, y = var_36241_to_fp16)[name = tensor("aw_chunk_3713_cast_fp16")]; + tensor var_36244_equation_0 = const()[name = tensor("op_36244_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36244_cast_fp16 = einsum(equation = var_36244_equation_0, values = (var_36034_cast_fp16, var_35579_cast_fp16))[name = tensor("op_36244_cast_fp16")]; + tensor var_36245_to_fp16 = const()[name = tensor("op_36245_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3715_cast_fp16 = mul(x = var_36244_cast_fp16, y = var_36245_to_fp16)[name = tensor("aw_chunk_3715_cast_fp16")]; + tensor var_36248_equation_0 = const()[name = tensor("op_36248_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36248_cast_fp16 = einsum(equation = var_36248_equation_0, values = (var_36034_cast_fp16, var_35586_cast_fp16))[name = tensor("op_36248_cast_fp16")]; + tensor var_36249_to_fp16 = const()[name = tensor("op_36249_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3717_cast_fp16 = mul(x = var_36248_cast_fp16, y = var_36249_to_fp16)[name = tensor("aw_chunk_3717_cast_fp16")]; + tensor var_36252_equation_0 = const()[name = tensor("op_36252_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36252_cast_fp16 = einsum(equation = var_36252_equation_0, values = (var_36034_cast_fp16, var_35593_cast_fp16))[name = tensor("op_36252_cast_fp16")]; + tensor var_36253_to_fp16 = const()[name = tensor("op_36253_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3719_cast_fp16 = mul(x = var_36252_cast_fp16, y = var_36253_to_fp16)[name = tensor("aw_chunk_3719_cast_fp16")]; + tensor var_36256_equation_0 = const()[name = tensor("op_36256_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36256_cast_fp16 = einsum(equation = var_36256_equation_0, values = (var_36038_cast_fp16, var_35600_cast_fp16))[name = tensor("op_36256_cast_fp16")]; + tensor var_36257_to_fp16 = const()[name = tensor("op_36257_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3721_cast_fp16 = mul(x = var_36256_cast_fp16, y = var_36257_to_fp16)[name = tensor("aw_chunk_3721_cast_fp16")]; + tensor var_36260_equation_0 = const()[name = tensor("op_36260_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36260_cast_fp16 = einsum(equation = var_36260_equation_0, values = (var_36038_cast_fp16, var_35607_cast_fp16))[name = tensor("op_36260_cast_fp16")]; + tensor var_36261_to_fp16 = const()[name = tensor("op_36261_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3723_cast_fp16 = mul(x = var_36260_cast_fp16, y = var_36261_to_fp16)[name = tensor("aw_chunk_3723_cast_fp16")]; + tensor var_36264_equation_0 = const()[name = tensor("op_36264_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36264_cast_fp16 = einsum(equation = var_36264_equation_0, values = (var_36038_cast_fp16, var_35614_cast_fp16))[name = tensor("op_36264_cast_fp16")]; + tensor var_36265_to_fp16 = const()[name = tensor("op_36265_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3725_cast_fp16 = mul(x = var_36264_cast_fp16, y = var_36265_to_fp16)[name = tensor("aw_chunk_3725_cast_fp16")]; + tensor var_36268_equation_0 = const()[name = tensor("op_36268_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36268_cast_fp16 = einsum(equation = var_36268_equation_0, values = (var_36038_cast_fp16, var_35621_cast_fp16))[name = tensor("op_36268_cast_fp16")]; + tensor var_36269_to_fp16 = const()[name = tensor("op_36269_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3727_cast_fp16 = mul(x = var_36268_cast_fp16, y = var_36269_to_fp16)[name = tensor("aw_chunk_3727_cast_fp16")]; + tensor var_36272_equation_0 = const()[name = tensor("op_36272_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36272_cast_fp16 = einsum(equation = var_36272_equation_0, values = (var_36042_cast_fp16, var_35628_cast_fp16))[name = tensor("op_36272_cast_fp16")]; + tensor var_36273_to_fp16 = const()[name = tensor("op_36273_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3729_cast_fp16 = mul(x = var_36272_cast_fp16, y = var_36273_to_fp16)[name = tensor("aw_chunk_3729_cast_fp16")]; + tensor var_36276_equation_0 = const()[name = tensor("op_36276_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36276_cast_fp16 = einsum(equation = var_36276_equation_0, values = (var_36042_cast_fp16, var_35635_cast_fp16))[name = tensor("op_36276_cast_fp16")]; + tensor var_36277_to_fp16 = const()[name = tensor("op_36277_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3731_cast_fp16 = mul(x = var_36276_cast_fp16, y = var_36277_to_fp16)[name = tensor("aw_chunk_3731_cast_fp16")]; + tensor var_36280_equation_0 = const()[name = tensor("op_36280_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36280_cast_fp16 = einsum(equation = var_36280_equation_0, values = (var_36042_cast_fp16, var_35642_cast_fp16))[name = tensor("op_36280_cast_fp16")]; + tensor var_36281_to_fp16 = const()[name = tensor("op_36281_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3733_cast_fp16 = mul(x = var_36280_cast_fp16, y = var_36281_to_fp16)[name = tensor("aw_chunk_3733_cast_fp16")]; + tensor var_36284_equation_0 = const()[name = tensor("op_36284_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36284_cast_fp16 = einsum(equation = var_36284_equation_0, values = (var_36042_cast_fp16, var_35649_cast_fp16))[name = tensor("op_36284_cast_fp16")]; + tensor var_36285_to_fp16 = const()[name = tensor("op_36285_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3735_cast_fp16 = mul(x = var_36284_cast_fp16, y = var_36285_to_fp16)[name = tensor("aw_chunk_3735_cast_fp16")]; + tensor var_36288_equation_0 = const()[name = tensor("op_36288_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36288_cast_fp16 = einsum(equation = var_36288_equation_0, values = (var_36046_cast_fp16, var_35656_cast_fp16))[name = tensor("op_36288_cast_fp16")]; + tensor var_36289_to_fp16 = const()[name = tensor("op_36289_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3737_cast_fp16 = mul(x = var_36288_cast_fp16, y = var_36289_to_fp16)[name = tensor("aw_chunk_3737_cast_fp16")]; + tensor var_36292_equation_0 = const()[name = tensor("op_36292_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36292_cast_fp16 = einsum(equation = var_36292_equation_0, values = (var_36046_cast_fp16, var_35663_cast_fp16))[name = tensor("op_36292_cast_fp16")]; + tensor var_36293_to_fp16 = const()[name = tensor("op_36293_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3739_cast_fp16 = mul(x = var_36292_cast_fp16, y = var_36293_to_fp16)[name = tensor("aw_chunk_3739_cast_fp16")]; + tensor var_36296_equation_0 = const()[name = tensor("op_36296_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36296_cast_fp16 = einsum(equation = var_36296_equation_0, values = (var_36046_cast_fp16, var_35670_cast_fp16))[name = tensor("op_36296_cast_fp16")]; + tensor var_36297_to_fp16 = const()[name = tensor("op_36297_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3741_cast_fp16 = mul(x = var_36296_cast_fp16, y = var_36297_to_fp16)[name = tensor("aw_chunk_3741_cast_fp16")]; + tensor var_36300_equation_0 = const()[name = tensor("op_36300_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36300_cast_fp16 = einsum(equation = var_36300_equation_0, values = (var_36046_cast_fp16, var_35677_cast_fp16))[name = tensor("op_36300_cast_fp16")]; + tensor var_36301_to_fp16 = const()[name = tensor("op_36301_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3743_cast_fp16 = mul(x = var_36300_cast_fp16, y = var_36301_to_fp16)[name = tensor("aw_chunk_3743_cast_fp16")]; + tensor var_36304_equation_0 = const()[name = tensor("op_36304_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36304_cast_fp16 = einsum(equation = var_36304_equation_0, values = (var_36050_cast_fp16, var_35684_cast_fp16))[name = tensor("op_36304_cast_fp16")]; + tensor var_36305_to_fp16 = const()[name = tensor("op_36305_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3745_cast_fp16 = mul(x = var_36304_cast_fp16, y = var_36305_to_fp16)[name = tensor("aw_chunk_3745_cast_fp16")]; + tensor var_36308_equation_0 = const()[name = tensor("op_36308_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36308_cast_fp16 = einsum(equation = var_36308_equation_0, values = (var_36050_cast_fp16, var_35691_cast_fp16))[name = tensor("op_36308_cast_fp16")]; + tensor var_36309_to_fp16 = const()[name = tensor("op_36309_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3747_cast_fp16 = mul(x = var_36308_cast_fp16, y = var_36309_to_fp16)[name = tensor("aw_chunk_3747_cast_fp16")]; + tensor var_36312_equation_0 = const()[name = tensor("op_36312_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36312_cast_fp16 = einsum(equation = var_36312_equation_0, values = (var_36050_cast_fp16, var_35698_cast_fp16))[name = tensor("op_36312_cast_fp16")]; + tensor var_36313_to_fp16 = const()[name = tensor("op_36313_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3749_cast_fp16 = mul(x = var_36312_cast_fp16, y = var_36313_to_fp16)[name = tensor("aw_chunk_3749_cast_fp16")]; + tensor var_36316_equation_0 = const()[name = tensor("op_36316_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36316_cast_fp16 = einsum(equation = var_36316_equation_0, values = (var_36050_cast_fp16, var_35705_cast_fp16))[name = tensor("op_36316_cast_fp16")]; + tensor var_36317_to_fp16 = const()[name = tensor("op_36317_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3751_cast_fp16 = mul(x = var_36316_cast_fp16, y = var_36317_to_fp16)[name = tensor("aw_chunk_3751_cast_fp16")]; + tensor var_36320_equation_0 = const()[name = tensor("op_36320_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36320_cast_fp16 = einsum(equation = var_36320_equation_0, values = (var_36054_cast_fp16, var_35712_cast_fp16))[name = tensor("op_36320_cast_fp16")]; + tensor var_36321_to_fp16 = const()[name = tensor("op_36321_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3753_cast_fp16 = mul(x = var_36320_cast_fp16, y = var_36321_to_fp16)[name = tensor("aw_chunk_3753_cast_fp16")]; + tensor var_36324_equation_0 = const()[name = tensor("op_36324_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36324_cast_fp16 = einsum(equation = var_36324_equation_0, values = (var_36054_cast_fp16, var_35719_cast_fp16))[name = tensor("op_36324_cast_fp16")]; + tensor var_36325_to_fp16 = const()[name = tensor("op_36325_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3755_cast_fp16 = mul(x = var_36324_cast_fp16, y = var_36325_to_fp16)[name = tensor("aw_chunk_3755_cast_fp16")]; + tensor var_36328_equation_0 = const()[name = tensor("op_36328_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36328_cast_fp16 = einsum(equation = var_36328_equation_0, values = (var_36054_cast_fp16, var_35726_cast_fp16))[name = tensor("op_36328_cast_fp16")]; + tensor var_36329_to_fp16 = const()[name = tensor("op_36329_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3757_cast_fp16 = mul(x = var_36328_cast_fp16, y = var_36329_to_fp16)[name = tensor("aw_chunk_3757_cast_fp16")]; + tensor var_36332_equation_0 = const()[name = tensor("op_36332_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36332_cast_fp16 = einsum(equation = var_36332_equation_0, values = (var_36054_cast_fp16, var_35733_cast_fp16))[name = tensor("op_36332_cast_fp16")]; + tensor var_36333_to_fp16 = const()[name = tensor("op_36333_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3759_cast_fp16 = mul(x = var_36332_cast_fp16, y = var_36333_to_fp16)[name = tensor("aw_chunk_3759_cast_fp16")]; + tensor var_36336_equation_0 = const()[name = tensor("op_36336_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36336_cast_fp16 = einsum(equation = var_36336_equation_0, values = (var_36058_cast_fp16, var_35740_cast_fp16))[name = tensor("op_36336_cast_fp16")]; + tensor var_36337_to_fp16 = const()[name = tensor("op_36337_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3761_cast_fp16 = mul(x = var_36336_cast_fp16, y = var_36337_to_fp16)[name = tensor("aw_chunk_3761_cast_fp16")]; + tensor var_36340_equation_0 = const()[name = tensor("op_36340_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36340_cast_fp16 = einsum(equation = var_36340_equation_0, values = (var_36058_cast_fp16, var_35747_cast_fp16))[name = tensor("op_36340_cast_fp16")]; + tensor var_36341_to_fp16 = const()[name = tensor("op_36341_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3763_cast_fp16 = mul(x = var_36340_cast_fp16, y = var_36341_to_fp16)[name = tensor("aw_chunk_3763_cast_fp16")]; + tensor var_36344_equation_0 = const()[name = tensor("op_36344_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36344_cast_fp16 = einsum(equation = var_36344_equation_0, values = (var_36058_cast_fp16, var_35754_cast_fp16))[name = tensor("op_36344_cast_fp16")]; + tensor var_36345_to_fp16 = const()[name = tensor("op_36345_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3765_cast_fp16 = mul(x = var_36344_cast_fp16, y = var_36345_to_fp16)[name = tensor("aw_chunk_3765_cast_fp16")]; + tensor var_36348_equation_0 = const()[name = tensor("op_36348_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36348_cast_fp16 = einsum(equation = var_36348_equation_0, values = (var_36058_cast_fp16, var_35761_cast_fp16))[name = tensor("op_36348_cast_fp16")]; + tensor var_36349_to_fp16 = const()[name = tensor("op_36349_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3767_cast_fp16 = mul(x = var_36348_cast_fp16, y = var_36349_to_fp16)[name = tensor("aw_chunk_3767_cast_fp16")]; + tensor var_36352_equation_0 = const()[name = tensor("op_36352_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36352_cast_fp16 = einsum(equation = var_36352_equation_0, values = (var_36062_cast_fp16, var_35768_cast_fp16))[name = tensor("op_36352_cast_fp16")]; + tensor var_36353_to_fp16 = const()[name = tensor("op_36353_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3769_cast_fp16 = mul(x = var_36352_cast_fp16, y = var_36353_to_fp16)[name = tensor("aw_chunk_3769_cast_fp16")]; + tensor var_36356_equation_0 = const()[name = tensor("op_36356_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36356_cast_fp16 = einsum(equation = var_36356_equation_0, values = (var_36062_cast_fp16, var_35775_cast_fp16))[name = tensor("op_36356_cast_fp16")]; + tensor var_36357_to_fp16 = const()[name = tensor("op_36357_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3771_cast_fp16 = mul(x = var_36356_cast_fp16, y = var_36357_to_fp16)[name = tensor("aw_chunk_3771_cast_fp16")]; + tensor var_36360_equation_0 = const()[name = tensor("op_36360_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36360_cast_fp16 = einsum(equation = var_36360_equation_0, values = (var_36062_cast_fp16, var_35782_cast_fp16))[name = tensor("op_36360_cast_fp16")]; + tensor var_36361_to_fp16 = const()[name = tensor("op_36361_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3773_cast_fp16 = mul(x = var_36360_cast_fp16, y = var_36361_to_fp16)[name = tensor("aw_chunk_3773_cast_fp16")]; + tensor var_36364_equation_0 = const()[name = tensor("op_36364_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36364_cast_fp16 = einsum(equation = var_36364_equation_0, values = (var_36062_cast_fp16, var_35789_cast_fp16))[name = tensor("op_36364_cast_fp16")]; + tensor var_36365_to_fp16 = const()[name = tensor("op_36365_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3775_cast_fp16 = mul(x = var_36364_cast_fp16, y = var_36365_to_fp16)[name = tensor("aw_chunk_3775_cast_fp16")]; + tensor var_36368_equation_0 = const()[name = tensor("op_36368_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36368_cast_fp16 = einsum(equation = var_36368_equation_0, values = (var_36066_cast_fp16, var_35796_cast_fp16))[name = tensor("op_36368_cast_fp16")]; + tensor var_36369_to_fp16 = const()[name = tensor("op_36369_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3777_cast_fp16 = mul(x = var_36368_cast_fp16, y = var_36369_to_fp16)[name = tensor("aw_chunk_3777_cast_fp16")]; + tensor var_36372_equation_0 = const()[name = tensor("op_36372_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36372_cast_fp16 = einsum(equation = var_36372_equation_0, values = (var_36066_cast_fp16, var_35803_cast_fp16))[name = tensor("op_36372_cast_fp16")]; + tensor var_36373_to_fp16 = const()[name = tensor("op_36373_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3779_cast_fp16 = mul(x = var_36372_cast_fp16, y = var_36373_to_fp16)[name = tensor("aw_chunk_3779_cast_fp16")]; + tensor var_36376_equation_0 = const()[name = tensor("op_36376_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36376_cast_fp16 = einsum(equation = var_36376_equation_0, values = (var_36066_cast_fp16, var_35810_cast_fp16))[name = tensor("op_36376_cast_fp16")]; + tensor var_36377_to_fp16 = const()[name = tensor("op_36377_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3781_cast_fp16 = mul(x = var_36376_cast_fp16, y = var_36377_to_fp16)[name = tensor("aw_chunk_3781_cast_fp16")]; + tensor var_36380_equation_0 = const()[name = tensor("op_36380_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36380_cast_fp16 = einsum(equation = var_36380_equation_0, values = (var_36066_cast_fp16, var_35817_cast_fp16))[name = tensor("op_36380_cast_fp16")]; + tensor var_36381_to_fp16 = const()[name = tensor("op_36381_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3783_cast_fp16 = mul(x = var_36380_cast_fp16, y = var_36381_to_fp16)[name = tensor("aw_chunk_3783_cast_fp16")]; + tensor var_36384_equation_0 = const()[name = tensor("op_36384_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36384_cast_fp16 = einsum(equation = var_36384_equation_0, values = (var_36070_cast_fp16, var_35824_cast_fp16))[name = tensor("op_36384_cast_fp16")]; + tensor var_36385_to_fp16 = const()[name = tensor("op_36385_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3785_cast_fp16 = mul(x = var_36384_cast_fp16, y = var_36385_to_fp16)[name = tensor("aw_chunk_3785_cast_fp16")]; + tensor var_36388_equation_0 = const()[name = tensor("op_36388_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36388_cast_fp16 = einsum(equation = var_36388_equation_0, values = (var_36070_cast_fp16, var_35831_cast_fp16))[name = tensor("op_36388_cast_fp16")]; + tensor var_36389_to_fp16 = const()[name = tensor("op_36389_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3787_cast_fp16 = mul(x = var_36388_cast_fp16, y = var_36389_to_fp16)[name = tensor("aw_chunk_3787_cast_fp16")]; + tensor var_36392_equation_0 = const()[name = tensor("op_36392_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36392_cast_fp16 = einsum(equation = var_36392_equation_0, values = (var_36070_cast_fp16, var_35838_cast_fp16))[name = tensor("op_36392_cast_fp16")]; + tensor var_36393_to_fp16 = const()[name = tensor("op_36393_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3789_cast_fp16 = mul(x = var_36392_cast_fp16, y = var_36393_to_fp16)[name = tensor("aw_chunk_3789_cast_fp16")]; + tensor var_36396_equation_0 = const()[name = tensor("op_36396_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36396_cast_fp16 = einsum(equation = var_36396_equation_0, values = (var_36070_cast_fp16, var_35845_cast_fp16))[name = tensor("op_36396_cast_fp16")]; + tensor var_36397_to_fp16 = const()[name = tensor("op_36397_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3791_cast_fp16 = mul(x = var_36396_cast_fp16, y = var_36397_to_fp16)[name = tensor("aw_chunk_3791_cast_fp16")]; + tensor var_36400_equation_0 = const()[name = tensor("op_36400_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36400_cast_fp16 = einsum(equation = var_36400_equation_0, values = (var_36074_cast_fp16, var_35852_cast_fp16))[name = tensor("op_36400_cast_fp16")]; + tensor var_36401_to_fp16 = const()[name = tensor("op_36401_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3793_cast_fp16 = mul(x = var_36400_cast_fp16, y = var_36401_to_fp16)[name = tensor("aw_chunk_3793_cast_fp16")]; + tensor var_36404_equation_0 = const()[name = tensor("op_36404_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36404_cast_fp16 = einsum(equation = var_36404_equation_0, values = (var_36074_cast_fp16, var_35859_cast_fp16))[name = tensor("op_36404_cast_fp16")]; + tensor var_36405_to_fp16 = const()[name = tensor("op_36405_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3795_cast_fp16 = mul(x = var_36404_cast_fp16, y = var_36405_to_fp16)[name = tensor("aw_chunk_3795_cast_fp16")]; + tensor var_36408_equation_0 = const()[name = tensor("op_36408_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36408_cast_fp16 = einsum(equation = var_36408_equation_0, values = (var_36074_cast_fp16, var_35866_cast_fp16))[name = tensor("op_36408_cast_fp16")]; + tensor var_36409_to_fp16 = const()[name = tensor("op_36409_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3797_cast_fp16 = mul(x = var_36408_cast_fp16, y = var_36409_to_fp16)[name = tensor("aw_chunk_3797_cast_fp16")]; + tensor var_36412_equation_0 = const()[name = tensor("op_36412_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36412_cast_fp16 = einsum(equation = var_36412_equation_0, values = (var_36074_cast_fp16, var_35873_cast_fp16))[name = tensor("op_36412_cast_fp16")]; + tensor var_36413_to_fp16 = const()[name = tensor("op_36413_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3799_cast_fp16 = mul(x = var_36412_cast_fp16, y = var_36413_to_fp16)[name = tensor("aw_chunk_3799_cast_fp16")]; + tensor var_36416_equation_0 = const()[name = tensor("op_36416_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36416_cast_fp16 = einsum(equation = var_36416_equation_0, values = (var_36078_cast_fp16, var_35880_cast_fp16))[name = tensor("op_36416_cast_fp16")]; + tensor var_36417_to_fp16 = const()[name = tensor("op_36417_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3801_cast_fp16 = mul(x = var_36416_cast_fp16, y = var_36417_to_fp16)[name = tensor("aw_chunk_3801_cast_fp16")]; + tensor var_36420_equation_0 = const()[name = tensor("op_36420_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36420_cast_fp16 = einsum(equation = var_36420_equation_0, values = (var_36078_cast_fp16, var_35887_cast_fp16))[name = tensor("op_36420_cast_fp16")]; + tensor var_36421_to_fp16 = const()[name = tensor("op_36421_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3803_cast_fp16 = mul(x = var_36420_cast_fp16, y = var_36421_to_fp16)[name = tensor("aw_chunk_3803_cast_fp16")]; + tensor var_36424_equation_0 = const()[name = tensor("op_36424_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36424_cast_fp16 = einsum(equation = var_36424_equation_0, values = (var_36078_cast_fp16, var_35894_cast_fp16))[name = tensor("op_36424_cast_fp16")]; + tensor var_36425_to_fp16 = const()[name = tensor("op_36425_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3805_cast_fp16 = mul(x = var_36424_cast_fp16, y = var_36425_to_fp16)[name = tensor("aw_chunk_3805_cast_fp16")]; + tensor var_36428_equation_0 = const()[name = tensor("op_36428_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36428_cast_fp16 = einsum(equation = var_36428_equation_0, values = (var_36078_cast_fp16, var_35901_cast_fp16))[name = tensor("op_36428_cast_fp16")]; + tensor var_36429_to_fp16 = const()[name = tensor("op_36429_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3807_cast_fp16 = mul(x = var_36428_cast_fp16, y = var_36429_to_fp16)[name = tensor("aw_chunk_3807_cast_fp16")]; + tensor var_36432_equation_0 = const()[name = tensor("op_36432_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36432_cast_fp16 = einsum(equation = var_36432_equation_0, values = (var_36082_cast_fp16, var_35908_cast_fp16))[name = tensor("op_36432_cast_fp16")]; + tensor var_36433_to_fp16 = const()[name = tensor("op_36433_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3809_cast_fp16 = mul(x = var_36432_cast_fp16, y = var_36433_to_fp16)[name = tensor("aw_chunk_3809_cast_fp16")]; + tensor var_36436_equation_0 = const()[name = tensor("op_36436_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36436_cast_fp16 = einsum(equation = var_36436_equation_0, values = (var_36082_cast_fp16, var_35915_cast_fp16))[name = tensor("op_36436_cast_fp16")]; + tensor var_36437_to_fp16 = const()[name = tensor("op_36437_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3811_cast_fp16 = mul(x = var_36436_cast_fp16, y = var_36437_to_fp16)[name = tensor("aw_chunk_3811_cast_fp16")]; + tensor var_36440_equation_0 = const()[name = tensor("op_36440_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36440_cast_fp16 = einsum(equation = var_36440_equation_0, values = (var_36082_cast_fp16, var_35922_cast_fp16))[name = tensor("op_36440_cast_fp16")]; + tensor var_36441_to_fp16 = const()[name = tensor("op_36441_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3813_cast_fp16 = mul(x = var_36440_cast_fp16, y = var_36441_to_fp16)[name = tensor("aw_chunk_3813_cast_fp16")]; + tensor var_36444_equation_0 = const()[name = tensor("op_36444_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36444_cast_fp16 = einsum(equation = var_36444_equation_0, values = (var_36082_cast_fp16, var_35929_cast_fp16))[name = tensor("op_36444_cast_fp16")]; + tensor var_36445_to_fp16 = const()[name = tensor("op_36445_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3815_cast_fp16 = mul(x = var_36444_cast_fp16, y = var_36445_to_fp16)[name = tensor("aw_chunk_3815_cast_fp16")]; + tensor var_36448_equation_0 = const()[name = tensor("op_36448_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36448_cast_fp16 = einsum(equation = var_36448_equation_0, values = (var_36086_cast_fp16, var_35936_cast_fp16))[name = tensor("op_36448_cast_fp16")]; + tensor var_36449_to_fp16 = const()[name = tensor("op_36449_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3817_cast_fp16 = mul(x = var_36448_cast_fp16, y = var_36449_to_fp16)[name = tensor("aw_chunk_3817_cast_fp16")]; + tensor var_36452_equation_0 = const()[name = tensor("op_36452_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36452_cast_fp16 = einsum(equation = var_36452_equation_0, values = (var_36086_cast_fp16, var_35943_cast_fp16))[name = tensor("op_36452_cast_fp16")]; + tensor var_36453_to_fp16 = const()[name = tensor("op_36453_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3819_cast_fp16 = mul(x = var_36452_cast_fp16, y = var_36453_to_fp16)[name = tensor("aw_chunk_3819_cast_fp16")]; + tensor var_36456_equation_0 = const()[name = tensor("op_36456_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36456_cast_fp16 = einsum(equation = var_36456_equation_0, values = (var_36086_cast_fp16, var_35950_cast_fp16))[name = tensor("op_36456_cast_fp16")]; + tensor var_36457_to_fp16 = const()[name = tensor("op_36457_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3821_cast_fp16 = mul(x = var_36456_cast_fp16, y = var_36457_to_fp16)[name = tensor("aw_chunk_3821_cast_fp16")]; + tensor var_36460_equation_0 = const()[name = tensor("op_36460_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36460_cast_fp16 = einsum(equation = var_36460_equation_0, values = (var_36086_cast_fp16, var_35957_cast_fp16))[name = tensor("op_36460_cast_fp16")]; + tensor var_36461_to_fp16 = const()[name = tensor("op_36461_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3823_cast_fp16 = mul(x = var_36460_cast_fp16, y = var_36461_to_fp16)[name = tensor("aw_chunk_3823_cast_fp16")]; + tensor var_36464_equation_0 = const()[name = tensor("op_36464_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36464_cast_fp16 = einsum(equation = var_36464_equation_0, values = (var_36090_cast_fp16, var_35964_cast_fp16))[name = tensor("op_36464_cast_fp16")]; + tensor var_36465_to_fp16 = const()[name = tensor("op_36465_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3825_cast_fp16 = mul(x = var_36464_cast_fp16, y = var_36465_to_fp16)[name = tensor("aw_chunk_3825_cast_fp16")]; + tensor var_36468_equation_0 = const()[name = tensor("op_36468_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36468_cast_fp16 = einsum(equation = var_36468_equation_0, values = (var_36090_cast_fp16, var_35971_cast_fp16))[name = tensor("op_36468_cast_fp16")]; + tensor var_36469_to_fp16 = const()[name = tensor("op_36469_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3827_cast_fp16 = mul(x = var_36468_cast_fp16, y = var_36469_to_fp16)[name = tensor("aw_chunk_3827_cast_fp16")]; + tensor var_36472_equation_0 = const()[name = tensor("op_36472_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36472_cast_fp16 = einsum(equation = var_36472_equation_0, values = (var_36090_cast_fp16, var_35978_cast_fp16))[name = tensor("op_36472_cast_fp16")]; + tensor var_36473_to_fp16 = const()[name = tensor("op_36473_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3829_cast_fp16 = mul(x = var_36472_cast_fp16, y = var_36473_to_fp16)[name = tensor("aw_chunk_3829_cast_fp16")]; + tensor var_36476_equation_0 = const()[name = tensor("op_36476_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36476_cast_fp16 = einsum(equation = var_36476_equation_0, values = (var_36090_cast_fp16, var_35985_cast_fp16))[name = tensor("op_36476_cast_fp16")]; + tensor var_36477_to_fp16 = const()[name = tensor("op_36477_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3831_cast_fp16 = mul(x = var_36476_cast_fp16, y = var_36477_to_fp16)[name = tensor("aw_chunk_3831_cast_fp16")]; + tensor var_36480_equation_0 = const()[name = tensor("op_36480_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36480_cast_fp16 = einsum(equation = var_36480_equation_0, values = (var_36094_cast_fp16, var_35992_cast_fp16))[name = tensor("op_36480_cast_fp16")]; + tensor var_36481_to_fp16 = const()[name = tensor("op_36481_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3833_cast_fp16 = mul(x = var_36480_cast_fp16, y = var_36481_to_fp16)[name = tensor("aw_chunk_3833_cast_fp16")]; + tensor var_36484_equation_0 = const()[name = tensor("op_36484_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36484_cast_fp16 = einsum(equation = var_36484_equation_0, values = (var_36094_cast_fp16, var_35999_cast_fp16))[name = tensor("op_36484_cast_fp16")]; + tensor var_36485_to_fp16 = const()[name = tensor("op_36485_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3835_cast_fp16 = mul(x = var_36484_cast_fp16, y = var_36485_to_fp16)[name = tensor("aw_chunk_3835_cast_fp16")]; + tensor var_36488_equation_0 = const()[name = tensor("op_36488_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36488_cast_fp16 = einsum(equation = var_36488_equation_0, values = (var_36094_cast_fp16, var_36006_cast_fp16))[name = tensor("op_36488_cast_fp16")]; + tensor var_36489_to_fp16 = const()[name = tensor("op_36489_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3837_cast_fp16 = mul(x = var_36488_cast_fp16, y = var_36489_to_fp16)[name = tensor("aw_chunk_3837_cast_fp16")]; + tensor var_36492_equation_0 = const()[name = tensor("op_36492_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_36492_cast_fp16 = einsum(equation = var_36492_equation_0, values = (var_36094_cast_fp16, var_36013_cast_fp16))[name = tensor("op_36492_cast_fp16")]; + tensor var_36493_to_fp16 = const()[name = tensor("op_36493_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3839_cast_fp16 = mul(x = var_36492_cast_fp16, y = var_36493_to_fp16)[name = tensor("aw_chunk_3839_cast_fp16")]; + tensor var_36495_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3681_cast_fp16)[name = tensor("op_36495_cast_fp16")]; + tensor var_36496_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3683_cast_fp16)[name = tensor("op_36496_cast_fp16")]; + tensor var_36497_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3685_cast_fp16)[name = tensor("op_36497_cast_fp16")]; + tensor var_36498_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3687_cast_fp16)[name = tensor("op_36498_cast_fp16")]; + tensor var_36499_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3689_cast_fp16)[name = tensor("op_36499_cast_fp16")]; + tensor var_36500_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3691_cast_fp16)[name = tensor("op_36500_cast_fp16")]; + tensor var_36501_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3693_cast_fp16)[name = tensor("op_36501_cast_fp16")]; + tensor var_36502_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3695_cast_fp16)[name = tensor("op_36502_cast_fp16")]; + tensor var_36503_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3697_cast_fp16)[name = tensor("op_36503_cast_fp16")]; + tensor var_36504_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3699_cast_fp16)[name = tensor("op_36504_cast_fp16")]; + tensor var_36505_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3701_cast_fp16)[name = tensor("op_36505_cast_fp16")]; + tensor var_36506_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3703_cast_fp16)[name = tensor("op_36506_cast_fp16")]; + tensor var_36507_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3705_cast_fp16)[name = tensor("op_36507_cast_fp16")]; + tensor var_36508_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3707_cast_fp16)[name = tensor("op_36508_cast_fp16")]; + tensor var_36509_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3709_cast_fp16)[name = tensor("op_36509_cast_fp16")]; + tensor var_36510_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3711_cast_fp16)[name = tensor("op_36510_cast_fp16")]; + tensor var_36511_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3713_cast_fp16)[name = tensor("op_36511_cast_fp16")]; + tensor var_36512_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3715_cast_fp16)[name = tensor("op_36512_cast_fp16")]; + tensor var_36513_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3717_cast_fp16)[name = tensor("op_36513_cast_fp16")]; + tensor var_36514_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3719_cast_fp16)[name = tensor("op_36514_cast_fp16")]; + tensor var_36515_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3721_cast_fp16)[name = tensor("op_36515_cast_fp16")]; + tensor var_36516_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3723_cast_fp16)[name = tensor("op_36516_cast_fp16")]; + tensor var_36517_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3725_cast_fp16)[name = tensor("op_36517_cast_fp16")]; + tensor var_36518_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3727_cast_fp16)[name = tensor("op_36518_cast_fp16")]; + tensor var_36519_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3729_cast_fp16)[name = tensor("op_36519_cast_fp16")]; + tensor var_36520_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3731_cast_fp16)[name = tensor("op_36520_cast_fp16")]; + tensor var_36521_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3733_cast_fp16)[name = tensor("op_36521_cast_fp16")]; + tensor var_36522_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3735_cast_fp16)[name = tensor("op_36522_cast_fp16")]; + tensor var_36523_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3737_cast_fp16)[name = tensor("op_36523_cast_fp16")]; + tensor var_36524_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3739_cast_fp16)[name = tensor("op_36524_cast_fp16")]; + tensor var_36525_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3741_cast_fp16)[name = tensor("op_36525_cast_fp16")]; + tensor var_36526_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3743_cast_fp16)[name = tensor("op_36526_cast_fp16")]; + tensor var_36527_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3745_cast_fp16)[name = tensor("op_36527_cast_fp16")]; + tensor var_36528_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3747_cast_fp16)[name = tensor("op_36528_cast_fp16")]; + tensor var_36529_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3749_cast_fp16)[name = tensor("op_36529_cast_fp16")]; + tensor var_36530_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3751_cast_fp16)[name = tensor("op_36530_cast_fp16")]; + tensor var_36531_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3753_cast_fp16)[name = tensor("op_36531_cast_fp16")]; + tensor var_36532_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3755_cast_fp16)[name = tensor("op_36532_cast_fp16")]; + tensor var_36533_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3757_cast_fp16)[name = tensor("op_36533_cast_fp16")]; + tensor var_36534_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3759_cast_fp16)[name = tensor("op_36534_cast_fp16")]; + tensor var_36535_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3761_cast_fp16)[name = tensor("op_36535_cast_fp16")]; + tensor var_36536_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3763_cast_fp16)[name = tensor("op_36536_cast_fp16")]; + tensor var_36537_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3765_cast_fp16)[name = tensor("op_36537_cast_fp16")]; + tensor var_36538_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3767_cast_fp16)[name = tensor("op_36538_cast_fp16")]; + tensor var_36539_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3769_cast_fp16)[name = tensor("op_36539_cast_fp16")]; + tensor var_36540_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3771_cast_fp16)[name = tensor("op_36540_cast_fp16")]; + tensor var_36541_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3773_cast_fp16)[name = tensor("op_36541_cast_fp16")]; + tensor var_36542_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3775_cast_fp16)[name = tensor("op_36542_cast_fp16")]; + tensor var_36543_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3777_cast_fp16)[name = tensor("op_36543_cast_fp16")]; + tensor var_36544_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3779_cast_fp16)[name = tensor("op_36544_cast_fp16")]; + tensor var_36545_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3781_cast_fp16)[name = tensor("op_36545_cast_fp16")]; + tensor var_36546_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3783_cast_fp16)[name = tensor("op_36546_cast_fp16")]; + tensor var_36547_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3785_cast_fp16)[name = tensor("op_36547_cast_fp16")]; + tensor var_36548_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3787_cast_fp16)[name = tensor("op_36548_cast_fp16")]; + tensor var_36549_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3789_cast_fp16)[name = tensor("op_36549_cast_fp16")]; + tensor var_36550_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3791_cast_fp16)[name = tensor("op_36550_cast_fp16")]; + tensor var_36551_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3793_cast_fp16)[name = tensor("op_36551_cast_fp16")]; + tensor var_36552_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3795_cast_fp16)[name = tensor("op_36552_cast_fp16")]; + tensor var_36553_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3797_cast_fp16)[name = tensor("op_36553_cast_fp16")]; + tensor var_36554_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3799_cast_fp16)[name = tensor("op_36554_cast_fp16")]; + tensor var_36555_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3801_cast_fp16)[name = tensor("op_36555_cast_fp16")]; + tensor var_36556_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3803_cast_fp16)[name = tensor("op_36556_cast_fp16")]; + tensor var_36557_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3805_cast_fp16)[name = tensor("op_36557_cast_fp16")]; + tensor var_36558_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3807_cast_fp16)[name = tensor("op_36558_cast_fp16")]; + tensor var_36559_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3809_cast_fp16)[name = tensor("op_36559_cast_fp16")]; + tensor var_36560_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3811_cast_fp16)[name = tensor("op_36560_cast_fp16")]; + tensor var_36561_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3813_cast_fp16)[name = tensor("op_36561_cast_fp16")]; + tensor var_36562_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3815_cast_fp16)[name = tensor("op_36562_cast_fp16")]; + tensor var_36563_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3817_cast_fp16)[name = tensor("op_36563_cast_fp16")]; + tensor var_36564_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3819_cast_fp16)[name = tensor("op_36564_cast_fp16")]; + tensor var_36565_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3821_cast_fp16)[name = tensor("op_36565_cast_fp16")]; + tensor var_36566_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3823_cast_fp16)[name = tensor("op_36566_cast_fp16")]; + tensor var_36567_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3825_cast_fp16)[name = tensor("op_36567_cast_fp16")]; + tensor var_36568_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3827_cast_fp16)[name = tensor("op_36568_cast_fp16")]; + tensor var_36569_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3829_cast_fp16)[name = tensor("op_36569_cast_fp16")]; + tensor var_36570_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3831_cast_fp16)[name = tensor("op_36570_cast_fp16")]; + tensor var_36571_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3833_cast_fp16)[name = tensor("op_36571_cast_fp16")]; + tensor var_36572_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3835_cast_fp16)[name = tensor("op_36572_cast_fp16")]; + tensor var_36573_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3837_cast_fp16)[name = tensor("op_36573_cast_fp16")]; + tensor var_36574_cast_fp16 = softmax(axis = var_35320, x = aw_chunk_3839_cast_fp16)[name = tensor("op_36574_cast_fp16")]; + tensor var_36576_equation_0 = const()[name = tensor("op_36576_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36576_cast_fp16 = einsum(equation = var_36576_equation_0, values = (var_36096_cast_fp16, var_36495_cast_fp16))[name = tensor("op_36576_cast_fp16")]; + tensor var_36578_equation_0 = const()[name = tensor("op_36578_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36578_cast_fp16 = einsum(equation = var_36578_equation_0, values = (var_36096_cast_fp16, var_36496_cast_fp16))[name = tensor("op_36578_cast_fp16")]; + tensor var_36580_equation_0 = const()[name = tensor("op_36580_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36580_cast_fp16 = einsum(equation = var_36580_equation_0, values = (var_36096_cast_fp16, var_36497_cast_fp16))[name = tensor("op_36580_cast_fp16")]; + tensor var_36582_equation_0 = const()[name = tensor("op_36582_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36582_cast_fp16 = einsum(equation = var_36582_equation_0, values = (var_36096_cast_fp16, var_36498_cast_fp16))[name = tensor("op_36582_cast_fp16")]; + tensor var_36584_equation_0 = const()[name = tensor("op_36584_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36584_cast_fp16 = einsum(equation = var_36584_equation_0, values = (var_36100_cast_fp16, var_36499_cast_fp16))[name = tensor("op_36584_cast_fp16")]; + tensor var_36586_equation_0 = const()[name = tensor("op_36586_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36586_cast_fp16 = einsum(equation = var_36586_equation_0, values = (var_36100_cast_fp16, var_36500_cast_fp16))[name = tensor("op_36586_cast_fp16")]; + tensor var_36588_equation_0 = const()[name = tensor("op_36588_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36588_cast_fp16 = einsum(equation = var_36588_equation_0, values = (var_36100_cast_fp16, var_36501_cast_fp16))[name = tensor("op_36588_cast_fp16")]; + tensor var_36590_equation_0 = const()[name = tensor("op_36590_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36590_cast_fp16 = einsum(equation = var_36590_equation_0, values = (var_36100_cast_fp16, var_36502_cast_fp16))[name = tensor("op_36590_cast_fp16")]; + tensor var_36592_equation_0 = const()[name = tensor("op_36592_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36592_cast_fp16 = einsum(equation = var_36592_equation_0, values = (var_36104_cast_fp16, var_36503_cast_fp16))[name = tensor("op_36592_cast_fp16")]; + tensor var_36594_equation_0 = const()[name = tensor("op_36594_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36594_cast_fp16 = einsum(equation = var_36594_equation_0, values = (var_36104_cast_fp16, var_36504_cast_fp16))[name = tensor("op_36594_cast_fp16")]; + tensor var_36596_equation_0 = const()[name = tensor("op_36596_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36596_cast_fp16 = einsum(equation = var_36596_equation_0, values = (var_36104_cast_fp16, var_36505_cast_fp16))[name = tensor("op_36596_cast_fp16")]; + tensor var_36598_equation_0 = const()[name = tensor("op_36598_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36598_cast_fp16 = einsum(equation = var_36598_equation_0, values = (var_36104_cast_fp16, var_36506_cast_fp16))[name = tensor("op_36598_cast_fp16")]; + tensor var_36600_equation_0 = const()[name = tensor("op_36600_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36600_cast_fp16 = einsum(equation = var_36600_equation_0, values = (var_36108_cast_fp16, var_36507_cast_fp16))[name = tensor("op_36600_cast_fp16")]; + tensor var_36602_equation_0 = const()[name = tensor("op_36602_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36602_cast_fp16 = einsum(equation = var_36602_equation_0, values = (var_36108_cast_fp16, var_36508_cast_fp16))[name = tensor("op_36602_cast_fp16")]; + tensor var_36604_equation_0 = const()[name = tensor("op_36604_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36604_cast_fp16 = einsum(equation = var_36604_equation_0, values = (var_36108_cast_fp16, var_36509_cast_fp16))[name = tensor("op_36604_cast_fp16")]; + tensor var_36606_equation_0 = const()[name = tensor("op_36606_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36606_cast_fp16 = einsum(equation = var_36606_equation_0, values = (var_36108_cast_fp16, var_36510_cast_fp16))[name = tensor("op_36606_cast_fp16")]; + tensor var_36608_equation_0 = const()[name = tensor("op_36608_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36608_cast_fp16 = einsum(equation = var_36608_equation_0, values = (var_36112_cast_fp16, var_36511_cast_fp16))[name = tensor("op_36608_cast_fp16")]; + tensor var_36610_equation_0 = const()[name = tensor("op_36610_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36610_cast_fp16 = einsum(equation = var_36610_equation_0, values = (var_36112_cast_fp16, var_36512_cast_fp16))[name = tensor("op_36610_cast_fp16")]; + tensor var_36612_equation_0 = const()[name = tensor("op_36612_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36612_cast_fp16 = einsum(equation = var_36612_equation_0, values = (var_36112_cast_fp16, var_36513_cast_fp16))[name = tensor("op_36612_cast_fp16")]; + tensor var_36614_equation_0 = const()[name = tensor("op_36614_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36614_cast_fp16 = einsum(equation = var_36614_equation_0, values = (var_36112_cast_fp16, var_36514_cast_fp16))[name = tensor("op_36614_cast_fp16")]; + tensor var_36616_equation_0 = const()[name = tensor("op_36616_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36616_cast_fp16 = einsum(equation = var_36616_equation_0, values = (var_36116_cast_fp16, var_36515_cast_fp16))[name = tensor("op_36616_cast_fp16")]; + tensor var_36618_equation_0 = const()[name = tensor("op_36618_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36618_cast_fp16 = einsum(equation = var_36618_equation_0, values = (var_36116_cast_fp16, var_36516_cast_fp16))[name = tensor("op_36618_cast_fp16")]; + tensor var_36620_equation_0 = const()[name = tensor("op_36620_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36620_cast_fp16 = einsum(equation = var_36620_equation_0, values = (var_36116_cast_fp16, var_36517_cast_fp16))[name = tensor("op_36620_cast_fp16")]; + tensor var_36622_equation_0 = const()[name = tensor("op_36622_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36622_cast_fp16 = einsum(equation = var_36622_equation_0, values = (var_36116_cast_fp16, var_36518_cast_fp16))[name = tensor("op_36622_cast_fp16")]; + tensor var_36624_equation_0 = const()[name = tensor("op_36624_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36624_cast_fp16 = einsum(equation = var_36624_equation_0, values = (var_36120_cast_fp16, var_36519_cast_fp16))[name = tensor("op_36624_cast_fp16")]; + tensor var_36626_equation_0 = const()[name = tensor("op_36626_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36626_cast_fp16 = einsum(equation = var_36626_equation_0, values = (var_36120_cast_fp16, var_36520_cast_fp16))[name = tensor("op_36626_cast_fp16")]; + tensor var_36628_equation_0 = const()[name = tensor("op_36628_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36628_cast_fp16 = einsum(equation = var_36628_equation_0, values = (var_36120_cast_fp16, var_36521_cast_fp16))[name = tensor("op_36628_cast_fp16")]; + tensor var_36630_equation_0 = const()[name = tensor("op_36630_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36630_cast_fp16 = einsum(equation = var_36630_equation_0, values = (var_36120_cast_fp16, var_36522_cast_fp16))[name = tensor("op_36630_cast_fp16")]; + tensor var_36632_equation_0 = const()[name = tensor("op_36632_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36632_cast_fp16 = einsum(equation = var_36632_equation_0, values = (var_36124_cast_fp16, var_36523_cast_fp16))[name = tensor("op_36632_cast_fp16")]; + tensor var_36634_equation_0 = const()[name = tensor("op_36634_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36634_cast_fp16 = einsum(equation = var_36634_equation_0, values = (var_36124_cast_fp16, var_36524_cast_fp16))[name = tensor("op_36634_cast_fp16")]; + tensor var_36636_equation_0 = const()[name = tensor("op_36636_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36636_cast_fp16 = einsum(equation = var_36636_equation_0, values = (var_36124_cast_fp16, var_36525_cast_fp16))[name = tensor("op_36636_cast_fp16")]; + tensor var_36638_equation_0 = const()[name = tensor("op_36638_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36638_cast_fp16 = einsum(equation = var_36638_equation_0, values = (var_36124_cast_fp16, var_36526_cast_fp16))[name = tensor("op_36638_cast_fp16")]; + tensor var_36640_equation_0 = const()[name = tensor("op_36640_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36640_cast_fp16 = einsum(equation = var_36640_equation_0, values = (var_36128_cast_fp16, var_36527_cast_fp16))[name = tensor("op_36640_cast_fp16")]; + tensor var_36642_equation_0 = const()[name = tensor("op_36642_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36642_cast_fp16 = einsum(equation = var_36642_equation_0, values = (var_36128_cast_fp16, var_36528_cast_fp16))[name = tensor("op_36642_cast_fp16")]; + tensor var_36644_equation_0 = const()[name = tensor("op_36644_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36644_cast_fp16 = einsum(equation = var_36644_equation_0, values = (var_36128_cast_fp16, var_36529_cast_fp16))[name = tensor("op_36644_cast_fp16")]; + tensor var_36646_equation_0 = const()[name = tensor("op_36646_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36646_cast_fp16 = einsum(equation = var_36646_equation_0, values = (var_36128_cast_fp16, var_36530_cast_fp16))[name = tensor("op_36646_cast_fp16")]; + tensor var_36648_equation_0 = const()[name = tensor("op_36648_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36648_cast_fp16 = einsum(equation = var_36648_equation_0, values = (var_36132_cast_fp16, var_36531_cast_fp16))[name = tensor("op_36648_cast_fp16")]; + tensor var_36650_equation_0 = const()[name = tensor("op_36650_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36650_cast_fp16 = einsum(equation = var_36650_equation_0, values = (var_36132_cast_fp16, var_36532_cast_fp16))[name = tensor("op_36650_cast_fp16")]; + tensor var_36652_equation_0 = const()[name = tensor("op_36652_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36652_cast_fp16 = einsum(equation = var_36652_equation_0, values = (var_36132_cast_fp16, var_36533_cast_fp16))[name = tensor("op_36652_cast_fp16")]; + tensor var_36654_equation_0 = const()[name = tensor("op_36654_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36654_cast_fp16 = einsum(equation = var_36654_equation_0, values = (var_36132_cast_fp16, var_36534_cast_fp16))[name = tensor("op_36654_cast_fp16")]; + tensor var_36656_equation_0 = const()[name = tensor("op_36656_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36656_cast_fp16 = einsum(equation = var_36656_equation_0, values = (var_36136_cast_fp16, var_36535_cast_fp16))[name = tensor("op_36656_cast_fp16")]; + tensor var_36658_equation_0 = const()[name = tensor("op_36658_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36658_cast_fp16 = einsum(equation = var_36658_equation_0, values = (var_36136_cast_fp16, var_36536_cast_fp16))[name = tensor("op_36658_cast_fp16")]; + tensor var_36660_equation_0 = const()[name = tensor("op_36660_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36660_cast_fp16 = einsum(equation = var_36660_equation_0, values = (var_36136_cast_fp16, var_36537_cast_fp16))[name = tensor("op_36660_cast_fp16")]; + tensor var_36662_equation_0 = const()[name = tensor("op_36662_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36662_cast_fp16 = einsum(equation = var_36662_equation_0, values = (var_36136_cast_fp16, var_36538_cast_fp16))[name = tensor("op_36662_cast_fp16")]; + tensor var_36664_equation_0 = const()[name = tensor("op_36664_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36664_cast_fp16 = einsum(equation = var_36664_equation_0, values = (var_36140_cast_fp16, var_36539_cast_fp16))[name = tensor("op_36664_cast_fp16")]; + tensor var_36666_equation_0 = const()[name = tensor("op_36666_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36666_cast_fp16 = einsum(equation = var_36666_equation_0, values = (var_36140_cast_fp16, var_36540_cast_fp16))[name = tensor("op_36666_cast_fp16")]; + tensor var_36668_equation_0 = const()[name = tensor("op_36668_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36668_cast_fp16 = einsum(equation = var_36668_equation_0, values = (var_36140_cast_fp16, var_36541_cast_fp16))[name = tensor("op_36668_cast_fp16")]; + tensor var_36670_equation_0 = const()[name = tensor("op_36670_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36670_cast_fp16 = einsum(equation = var_36670_equation_0, values = (var_36140_cast_fp16, var_36542_cast_fp16))[name = tensor("op_36670_cast_fp16")]; + tensor var_36672_equation_0 = const()[name = tensor("op_36672_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36672_cast_fp16 = einsum(equation = var_36672_equation_0, values = (var_36144_cast_fp16, var_36543_cast_fp16))[name = tensor("op_36672_cast_fp16")]; + tensor var_36674_equation_0 = const()[name = tensor("op_36674_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36674_cast_fp16 = einsum(equation = var_36674_equation_0, values = (var_36144_cast_fp16, var_36544_cast_fp16))[name = tensor("op_36674_cast_fp16")]; + tensor var_36676_equation_0 = const()[name = tensor("op_36676_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36676_cast_fp16 = einsum(equation = var_36676_equation_0, values = (var_36144_cast_fp16, var_36545_cast_fp16))[name = tensor("op_36676_cast_fp16")]; + tensor var_36678_equation_0 = const()[name = tensor("op_36678_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36678_cast_fp16 = einsum(equation = var_36678_equation_0, values = (var_36144_cast_fp16, var_36546_cast_fp16))[name = tensor("op_36678_cast_fp16")]; + tensor var_36680_equation_0 = const()[name = tensor("op_36680_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36680_cast_fp16 = einsum(equation = var_36680_equation_0, values = (var_36148_cast_fp16, var_36547_cast_fp16))[name = tensor("op_36680_cast_fp16")]; + tensor var_36682_equation_0 = const()[name = tensor("op_36682_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36682_cast_fp16 = einsum(equation = var_36682_equation_0, values = (var_36148_cast_fp16, var_36548_cast_fp16))[name = tensor("op_36682_cast_fp16")]; + tensor var_36684_equation_0 = const()[name = tensor("op_36684_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36684_cast_fp16 = einsum(equation = var_36684_equation_0, values = (var_36148_cast_fp16, var_36549_cast_fp16))[name = tensor("op_36684_cast_fp16")]; + tensor var_36686_equation_0 = const()[name = tensor("op_36686_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36686_cast_fp16 = einsum(equation = var_36686_equation_0, values = (var_36148_cast_fp16, var_36550_cast_fp16))[name = tensor("op_36686_cast_fp16")]; + tensor var_36688_equation_0 = const()[name = tensor("op_36688_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36688_cast_fp16 = einsum(equation = var_36688_equation_0, values = (var_36152_cast_fp16, var_36551_cast_fp16))[name = tensor("op_36688_cast_fp16")]; + tensor var_36690_equation_0 = const()[name = tensor("op_36690_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36690_cast_fp16 = einsum(equation = var_36690_equation_0, values = (var_36152_cast_fp16, var_36552_cast_fp16))[name = tensor("op_36690_cast_fp16")]; + tensor var_36692_equation_0 = const()[name = tensor("op_36692_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36692_cast_fp16 = einsum(equation = var_36692_equation_0, values = (var_36152_cast_fp16, var_36553_cast_fp16))[name = tensor("op_36692_cast_fp16")]; + tensor var_36694_equation_0 = const()[name = tensor("op_36694_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36694_cast_fp16 = einsum(equation = var_36694_equation_0, values = (var_36152_cast_fp16, var_36554_cast_fp16))[name = tensor("op_36694_cast_fp16")]; + tensor var_36696_equation_0 = const()[name = tensor("op_36696_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36696_cast_fp16 = einsum(equation = var_36696_equation_0, values = (var_36156_cast_fp16, var_36555_cast_fp16))[name = tensor("op_36696_cast_fp16")]; + tensor var_36698_equation_0 = const()[name = tensor("op_36698_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36698_cast_fp16 = einsum(equation = var_36698_equation_0, values = (var_36156_cast_fp16, var_36556_cast_fp16))[name = tensor("op_36698_cast_fp16")]; + tensor var_36700_equation_0 = const()[name = tensor("op_36700_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36700_cast_fp16 = einsum(equation = var_36700_equation_0, values = (var_36156_cast_fp16, var_36557_cast_fp16))[name = tensor("op_36700_cast_fp16")]; + tensor var_36702_equation_0 = const()[name = tensor("op_36702_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36702_cast_fp16 = einsum(equation = var_36702_equation_0, values = (var_36156_cast_fp16, var_36558_cast_fp16))[name = tensor("op_36702_cast_fp16")]; + tensor var_36704_equation_0 = const()[name = tensor("op_36704_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36704_cast_fp16 = einsum(equation = var_36704_equation_0, values = (var_36160_cast_fp16, var_36559_cast_fp16))[name = tensor("op_36704_cast_fp16")]; + tensor var_36706_equation_0 = const()[name = tensor("op_36706_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36706_cast_fp16 = einsum(equation = var_36706_equation_0, values = (var_36160_cast_fp16, var_36560_cast_fp16))[name = tensor("op_36706_cast_fp16")]; + tensor var_36708_equation_0 = const()[name = tensor("op_36708_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36708_cast_fp16 = einsum(equation = var_36708_equation_0, values = (var_36160_cast_fp16, var_36561_cast_fp16))[name = tensor("op_36708_cast_fp16")]; + tensor var_36710_equation_0 = const()[name = tensor("op_36710_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36710_cast_fp16 = einsum(equation = var_36710_equation_0, values = (var_36160_cast_fp16, var_36562_cast_fp16))[name = tensor("op_36710_cast_fp16")]; + tensor var_36712_equation_0 = const()[name = tensor("op_36712_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36712_cast_fp16 = einsum(equation = var_36712_equation_0, values = (var_36164_cast_fp16, var_36563_cast_fp16))[name = tensor("op_36712_cast_fp16")]; + tensor var_36714_equation_0 = const()[name = tensor("op_36714_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36714_cast_fp16 = einsum(equation = var_36714_equation_0, values = (var_36164_cast_fp16, var_36564_cast_fp16))[name = tensor("op_36714_cast_fp16")]; + tensor var_36716_equation_0 = const()[name = tensor("op_36716_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36716_cast_fp16 = einsum(equation = var_36716_equation_0, values = (var_36164_cast_fp16, var_36565_cast_fp16))[name = tensor("op_36716_cast_fp16")]; + tensor var_36718_equation_0 = const()[name = tensor("op_36718_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36718_cast_fp16 = einsum(equation = var_36718_equation_0, values = (var_36164_cast_fp16, var_36566_cast_fp16))[name = tensor("op_36718_cast_fp16")]; + tensor var_36720_equation_0 = const()[name = tensor("op_36720_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36720_cast_fp16 = einsum(equation = var_36720_equation_0, values = (var_36168_cast_fp16, var_36567_cast_fp16))[name = tensor("op_36720_cast_fp16")]; + tensor var_36722_equation_0 = const()[name = tensor("op_36722_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36722_cast_fp16 = einsum(equation = var_36722_equation_0, values = (var_36168_cast_fp16, var_36568_cast_fp16))[name = tensor("op_36722_cast_fp16")]; + tensor var_36724_equation_0 = const()[name = tensor("op_36724_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36724_cast_fp16 = einsum(equation = var_36724_equation_0, values = (var_36168_cast_fp16, var_36569_cast_fp16))[name = tensor("op_36724_cast_fp16")]; + tensor var_36726_equation_0 = const()[name = tensor("op_36726_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36726_cast_fp16 = einsum(equation = var_36726_equation_0, values = (var_36168_cast_fp16, var_36570_cast_fp16))[name = tensor("op_36726_cast_fp16")]; + tensor var_36728_equation_0 = const()[name = tensor("op_36728_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36728_cast_fp16 = einsum(equation = var_36728_equation_0, values = (var_36172_cast_fp16, var_36571_cast_fp16))[name = tensor("op_36728_cast_fp16")]; + tensor var_36730_equation_0 = const()[name = tensor("op_36730_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36730_cast_fp16 = einsum(equation = var_36730_equation_0, values = (var_36172_cast_fp16, var_36572_cast_fp16))[name = tensor("op_36730_cast_fp16")]; + tensor var_36732_equation_0 = const()[name = tensor("op_36732_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36732_cast_fp16 = einsum(equation = var_36732_equation_0, values = (var_36172_cast_fp16, var_36573_cast_fp16))[name = tensor("op_36732_cast_fp16")]; + tensor var_36734_equation_0 = const()[name = tensor("op_36734_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_36734_cast_fp16 = einsum(equation = var_36734_equation_0, values = (var_36172_cast_fp16, var_36574_cast_fp16))[name = tensor("op_36734_cast_fp16")]; + tensor var_36736_interleave_0 = const()[name = tensor("op_36736_interleave_0"), val = tensor(false)]; + tensor var_36736_cast_fp16 = concat(axis = var_35295, interleave = var_36736_interleave_0, values = (var_36576_cast_fp16, var_36578_cast_fp16, var_36580_cast_fp16, var_36582_cast_fp16))[name = tensor("op_36736_cast_fp16")]; + tensor var_36738_interleave_0 = const()[name = tensor("op_36738_interleave_0"), val = tensor(false)]; + tensor var_36738_cast_fp16 = concat(axis = var_35295, interleave = var_36738_interleave_0, values = (var_36584_cast_fp16, var_36586_cast_fp16, var_36588_cast_fp16, var_36590_cast_fp16))[name = tensor("op_36738_cast_fp16")]; + tensor var_36740_interleave_0 = const()[name = tensor("op_36740_interleave_0"), val = tensor(false)]; + tensor var_36740_cast_fp16 = concat(axis = var_35295, interleave = var_36740_interleave_0, values = (var_36592_cast_fp16, var_36594_cast_fp16, var_36596_cast_fp16, var_36598_cast_fp16))[name = tensor("op_36740_cast_fp16")]; + tensor var_36742_interleave_0 = const()[name = tensor("op_36742_interleave_0"), val = tensor(false)]; + tensor var_36742_cast_fp16 = concat(axis = var_35295, interleave = var_36742_interleave_0, values = (var_36600_cast_fp16, var_36602_cast_fp16, var_36604_cast_fp16, var_36606_cast_fp16))[name = tensor("op_36742_cast_fp16")]; + tensor var_36744_interleave_0 = const()[name = tensor("op_36744_interleave_0"), val = tensor(false)]; + tensor var_36744_cast_fp16 = concat(axis = var_35295, interleave = var_36744_interleave_0, values = (var_36608_cast_fp16, var_36610_cast_fp16, var_36612_cast_fp16, var_36614_cast_fp16))[name = tensor("op_36744_cast_fp16")]; + tensor var_36746_interleave_0 = const()[name = tensor("op_36746_interleave_0"), val = tensor(false)]; + tensor var_36746_cast_fp16 = concat(axis = var_35295, interleave = var_36746_interleave_0, values = (var_36616_cast_fp16, var_36618_cast_fp16, var_36620_cast_fp16, var_36622_cast_fp16))[name = tensor("op_36746_cast_fp16")]; + tensor var_36748_interleave_0 = const()[name = tensor("op_36748_interleave_0"), val = tensor(false)]; + tensor var_36748_cast_fp16 = concat(axis = var_35295, interleave = var_36748_interleave_0, values = (var_36624_cast_fp16, var_36626_cast_fp16, var_36628_cast_fp16, var_36630_cast_fp16))[name = tensor("op_36748_cast_fp16")]; + tensor var_36750_interleave_0 = const()[name = tensor("op_36750_interleave_0"), val = tensor(false)]; + tensor var_36750_cast_fp16 = concat(axis = var_35295, interleave = var_36750_interleave_0, values = (var_36632_cast_fp16, var_36634_cast_fp16, var_36636_cast_fp16, var_36638_cast_fp16))[name = tensor("op_36750_cast_fp16")]; + tensor var_36752_interleave_0 = const()[name = tensor("op_36752_interleave_0"), val = tensor(false)]; + tensor var_36752_cast_fp16 = concat(axis = var_35295, interleave = var_36752_interleave_0, values = (var_36640_cast_fp16, var_36642_cast_fp16, var_36644_cast_fp16, var_36646_cast_fp16))[name = tensor("op_36752_cast_fp16")]; + tensor var_36754_interleave_0 = const()[name = tensor("op_36754_interleave_0"), val = tensor(false)]; + tensor var_36754_cast_fp16 = concat(axis = var_35295, interleave = var_36754_interleave_0, values = (var_36648_cast_fp16, var_36650_cast_fp16, var_36652_cast_fp16, var_36654_cast_fp16))[name = tensor("op_36754_cast_fp16")]; + tensor var_36756_interleave_0 = const()[name = tensor("op_36756_interleave_0"), val = tensor(false)]; + tensor var_36756_cast_fp16 = concat(axis = var_35295, interleave = var_36756_interleave_0, values = (var_36656_cast_fp16, var_36658_cast_fp16, var_36660_cast_fp16, var_36662_cast_fp16))[name = tensor("op_36756_cast_fp16")]; + tensor var_36758_interleave_0 = const()[name = tensor("op_36758_interleave_0"), val = tensor(false)]; + tensor var_36758_cast_fp16 = concat(axis = var_35295, interleave = var_36758_interleave_0, values = (var_36664_cast_fp16, var_36666_cast_fp16, var_36668_cast_fp16, var_36670_cast_fp16))[name = tensor("op_36758_cast_fp16")]; + tensor var_36760_interleave_0 = const()[name = tensor("op_36760_interleave_0"), val = tensor(false)]; + tensor var_36760_cast_fp16 = concat(axis = var_35295, interleave = var_36760_interleave_0, values = (var_36672_cast_fp16, var_36674_cast_fp16, var_36676_cast_fp16, var_36678_cast_fp16))[name = tensor("op_36760_cast_fp16")]; + tensor var_36762_interleave_0 = const()[name = tensor("op_36762_interleave_0"), val = tensor(false)]; + tensor var_36762_cast_fp16 = concat(axis = var_35295, interleave = var_36762_interleave_0, values = (var_36680_cast_fp16, var_36682_cast_fp16, var_36684_cast_fp16, var_36686_cast_fp16))[name = tensor("op_36762_cast_fp16")]; + tensor var_36764_interleave_0 = const()[name = tensor("op_36764_interleave_0"), val = tensor(false)]; + tensor var_36764_cast_fp16 = concat(axis = var_35295, interleave = var_36764_interleave_0, values = (var_36688_cast_fp16, var_36690_cast_fp16, var_36692_cast_fp16, var_36694_cast_fp16))[name = tensor("op_36764_cast_fp16")]; + tensor var_36766_interleave_0 = const()[name = tensor("op_36766_interleave_0"), val = tensor(false)]; + tensor var_36766_cast_fp16 = concat(axis = var_35295, interleave = var_36766_interleave_0, values = (var_36696_cast_fp16, var_36698_cast_fp16, var_36700_cast_fp16, var_36702_cast_fp16))[name = tensor("op_36766_cast_fp16")]; + tensor var_36768_interleave_0 = const()[name = tensor("op_36768_interleave_0"), val = tensor(false)]; + tensor var_36768_cast_fp16 = concat(axis = var_35295, interleave = var_36768_interleave_0, values = (var_36704_cast_fp16, var_36706_cast_fp16, var_36708_cast_fp16, var_36710_cast_fp16))[name = tensor("op_36768_cast_fp16")]; + tensor var_36770_interleave_0 = const()[name = tensor("op_36770_interleave_0"), val = tensor(false)]; + tensor var_36770_cast_fp16 = concat(axis = var_35295, interleave = var_36770_interleave_0, values = (var_36712_cast_fp16, var_36714_cast_fp16, var_36716_cast_fp16, var_36718_cast_fp16))[name = tensor("op_36770_cast_fp16")]; + tensor var_36772_interleave_0 = const()[name = tensor("op_36772_interleave_0"), val = tensor(false)]; + tensor var_36772_cast_fp16 = concat(axis = var_35295, interleave = var_36772_interleave_0, values = (var_36720_cast_fp16, var_36722_cast_fp16, var_36724_cast_fp16, var_36726_cast_fp16))[name = tensor("op_36772_cast_fp16")]; + tensor var_36774_interleave_0 = const()[name = tensor("op_36774_interleave_0"), val = tensor(false)]; + tensor var_36774_cast_fp16 = concat(axis = var_35295, interleave = var_36774_interleave_0, values = (var_36728_cast_fp16, var_36730_cast_fp16, var_36732_cast_fp16, var_36734_cast_fp16))[name = tensor("op_36774_cast_fp16")]; + tensor input_185_interleave_0 = const()[name = tensor("input_185_interleave_0"), val = tensor(false)]; + tensor input_185_cast_fp16 = concat(axis = var_35320, interleave = input_185_interleave_0, values = (var_36736_cast_fp16, var_36738_cast_fp16, var_36740_cast_fp16, var_36742_cast_fp16, var_36744_cast_fp16, var_36746_cast_fp16, var_36748_cast_fp16, var_36750_cast_fp16, var_36752_cast_fp16, var_36754_cast_fp16, var_36756_cast_fp16, var_36758_cast_fp16, var_36760_cast_fp16, var_36762_cast_fp16, var_36764_cast_fp16, var_36766_cast_fp16, var_36768_cast_fp16, var_36770_cast_fp16, var_36772_cast_fp16, var_36774_cast_fp16))[name = tensor("input_185_cast_fp16")]; + tensor var_36779 = const()[name = tensor("op_36779"), val = tensor([1, 1])]; + tensor var_36781 = const()[name = tensor("op_36781"), val = tensor([1, 1])]; + tensor obj_95_pad_type_0 = const()[name = tensor("obj_95_pad_type_0"), val = tensor("custom")]; + tensor obj_95_pad_0 = const()[name = tensor("obj_95_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_23_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_23_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(929262080)))]; + tensor layers_23_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_23_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(932538944)))]; + tensor obj_95_cast_fp16 = conv(bias = layers_23_self_attn_o_proj_bias_to_fp16, dilations = var_36781, groups = var_35320, pad = obj_95_pad_0, pad_type = obj_95_pad_type_0, strides = var_36779, weight = layers_23_self_attn_o_proj_weight_to_fp16, x = input_185_cast_fp16)[name = tensor("obj_95_cast_fp16")]; + tensor inputs_95_cast_fp16 = add(x = inputs_93_cast_fp16, y = obj_95_cast_fp16)[name = tensor("inputs_95_cast_fp16")]; + tensor var_36787 = const()[name = tensor("op_36787"), val = tensor([1])]; + tensor channels_mean_95_cast_fp16 = reduce_mean(axes = var_36787, keep_dims = var_35321, x = inputs_95_cast_fp16)[name = tensor("channels_mean_95_cast_fp16")]; + tensor zero_mean_95_cast_fp16 = sub(x = inputs_95_cast_fp16, y = channels_mean_95_cast_fp16)[name = tensor("zero_mean_95_cast_fp16")]; + tensor zero_mean_sq_95_cast_fp16 = mul(x = zero_mean_95_cast_fp16, y = zero_mean_95_cast_fp16)[name = tensor("zero_mean_sq_95_cast_fp16")]; + tensor var_36791 = const()[name = tensor("op_36791"), val = tensor([1])]; + tensor var_36792_cast_fp16 = reduce_mean(axes = var_36791, keep_dims = var_35321, x = zero_mean_sq_95_cast_fp16)[name = tensor("op_36792_cast_fp16")]; + tensor var_36793_to_fp16 = const()[name = tensor("op_36793_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_36794_cast_fp16 = add(x = var_36792_cast_fp16, y = var_36793_to_fp16)[name = tensor("op_36794_cast_fp16")]; + tensor denom_95_epsilon_0_to_fp16 = const()[name = tensor("denom_95_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_95_cast_fp16 = rsqrt(epsilon = denom_95_epsilon_0_to_fp16, x = var_36794_cast_fp16)[name = tensor("denom_95_cast_fp16")]; + tensor out_95_cast_fp16 = mul(x = zero_mean_95_cast_fp16, y = denom_95_cast_fp16)[name = tensor("out_95_cast_fp16")]; + tensor input_187_gamma_0_to_fp16 = const()[name = tensor("input_187_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(932541568)))]; + tensor input_187_beta_0_to_fp16 = const()[name = tensor("input_187_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(932544192)))]; + tensor input_187_epsilon_0_to_fp16 = const()[name = tensor("input_187_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_187_cast_fp16 = batch_norm(beta = input_187_beta_0_to_fp16, epsilon = input_187_epsilon_0_to_fp16, gamma = input_187_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_95_cast_fp16)[name = tensor("input_187_cast_fp16")]; + tensor var_36805 = const()[name = tensor("op_36805"), val = tensor([1, 1])]; + tensor var_36807 = const()[name = tensor("op_36807"), val = tensor([1, 1])]; + tensor input_189_pad_type_0 = const()[name = tensor("input_189_pad_type_0"), val = tensor("custom")]; + tensor input_189_pad_0 = const()[name = tensor("input_189_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_23_fc1_weight_to_fp16 = const()[name = tensor("layers_23_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(932546816)))]; + tensor layers_23_fc1_bias_to_fp16 = const()[name = tensor("layers_23_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(945654080)))]; + tensor input_189_cast_fp16 = conv(bias = layers_23_fc1_bias_to_fp16, dilations = var_36807, groups = var_35320, pad = input_189_pad_0, pad_type = input_189_pad_type_0, strides = var_36805, weight = layers_23_fc1_weight_to_fp16, x = input_187_cast_fp16)[name = tensor("input_189_cast_fp16")]; + tensor input_191_mode_0 = const()[name = tensor("input_191_mode_0"), val = tensor("EXACT")]; + tensor input_191_cast_fp16 = gelu(mode = input_191_mode_0, x = input_189_cast_fp16)[name = tensor("input_191_cast_fp16")]; + tensor var_36813 = const()[name = tensor("op_36813"), val = tensor([1, 1])]; + tensor var_36815 = const()[name = tensor("op_36815"), val = tensor([1, 1])]; + tensor hidden_states_51_pad_type_0 = const()[name = tensor("hidden_states_51_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_51_pad_0 = const()[name = tensor("hidden_states_51_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_23_fc2_weight_to_fp16 = const()[name = tensor("layers_23_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(945664384)))]; + tensor layers_23_fc2_bias_to_fp16 = const()[name = tensor("layers_23_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(958771648)))]; + tensor hidden_states_51_cast_fp16 = conv(bias = layers_23_fc2_bias_to_fp16, dilations = var_36815, groups = var_35320, pad = hidden_states_51_pad_0, pad_type = hidden_states_51_pad_type_0, strides = var_36813, weight = layers_23_fc2_weight_to_fp16, x = input_191_cast_fp16)[name = tensor("hidden_states_51_cast_fp16")]; + tensor inputs_97_cast_fp16 = add(x = inputs_95_cast_fp16, y = hidden_states_51_cast_fp16)[name = tensor("inputs_97_cast_fp16")]; + tensor var_36822 = const()[name = tensor("op_36822"), val = tensor(3)]; + tensor var_36847 = const()[name = tensor("op_36847"), val = tensor(1)]; + tensor var_36848 = const()[name = tensor("op_36848"), val = tensor(true)]; + tensor var_36858 = const()[name = tensor("op_36858"), val = tensor([1])]; + tensor channels_mean_97_cast_fp16 = reduce_mean(axes = var_36858, keep_dims = var_36848, x = inputs_97_cast_fp16)[name = tensor("channels_mean_97_cast_fp16")]; + tensor zero_mean_97_cast_fp16 = sub(x = inputs_97_cast_fp16, y = channels_mean_97_cast_fp16)[name = tensor("zero_mean_97_cast_fp16")]; + tensor zero_mean_sq_97_cast_fp16 = mul(x = zero_mean_97_cast_fp16, y = zero_mean_97_cast_fp16)[name = tensor("zero_mean_sq_97_cast_fp16")]; + tensor var_36862 = const()[name = tensor("op_36862"), val = tensor([1])]; + tensor var_36863_cast_fp16 = reduce_mean(axes = var_36862, keep_dims = var_36848, x = zero_mean_sq_97_cast_fp16)[name = tensor("op_36863_cast_fp16")]; + tensor var_36864_to_fp16 = const()[name = tensor("op_36864_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_36865_cast_fp16 = add(x = var_36863_cast_fp16, y = var_36864_to_fp16)[name = tensor("op_36865_cast_fp16")]; + tensor denom_97_epsilon_0_to_fp16 = const()[name = tensor("denom_97_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_97_cast_fp16 = rsqrt(epsilon = denom_97_epsilon_0_to_fp16, x = var_36865_cast_fp16)[name = tensor("denom_97_cast_fp16")]; + tensor out_97_cast_fp16 = mul(x = zero_mean_97_cast_fp16, y = denom_97_cast_fp16)[name = tensor("out_97_cast_fp16")]; + tensor obj_97_gamma_0_to_fp16 = const()[name = tensor("obj_97_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(958774272)))]; + tensor obj_97_beta_0_to_fp16 = const()[name = tensor("obj_97_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(958776896)))]; + tensor obj_97_epsilon_0_to_fp16 = const()[name = tensor("obj_97_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_97_cast_fp16 = batch_norm(beta = obj_97_beta_0_to_fp16, epsilon = obj_97_epsilon_0_to_fp16, gamma = obj_97_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_97_cast_fp16)[name = tensor("obj_97_cast_fp16")]; + tensor var_36880 = const()[name = tensor("op_36880"), val = tensor([1, 1])]; + tensor var_36882 = const()[name = tensor("op_36882"), val = tensor([1, 1])]; + tensor query_49_pad_type_0 = const()[name = tensor("query_49_pad_type_0"), val = tensor("custom")]; + tensor query_49_pad_0 = const()[name = tensor("query_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_24_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_24_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(958779520)))]; + tensor layers_24_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_24_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(962056384)))]; + tensor query_49_cast_fp16 = conv(bias = layers_24_self_attn_q_proj_bias_to_fp16, dilations = var_36882, groups = var_36847, pad = query_49_pad_0, pad_type = query_49_pad_type_0, strides = var_36880, weight = layers_24_self_attn_q_proj_weight_to_fp16, x = obj_97_cast_fp16)[name = tensor("query_49_cast_fp16")]; + tensor var_36886 = const()[name = tensor("op_36886"), val = tensor([1, 1])]; + tensor var_36888 = const()[name = tensor("op_36888"), val = tensor([1, 1])]; + tensor key_49_pad_type_0 = const()[name = tensor("key_49_pad_type_0"), val = tensor("custom")]; + tensor key_49_pad_0 = const()[name = tensor("key_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_24_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_24_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(962059008)))]; + tensor key_49_cast_fp16 = conv(dilations = var_36888, groups = var_36847, pad = key_49_pad_0, pad_type = key_49_pad_type_0, strides = var_36886, weight = layers_24_self_attn_k_proj_weight_to_fp16, x = obj_97_cast_fp16)[name = tensor("key_49_cast_fp16")]; + tensor var_36893 = const()[name = tensor("op_36893"), val = tensor([1, 1])]; + tensor var_36895 = const()[name = tensor("op_36895"), val = tensor([1, 1])]; + tensor value_49_pad_type_0 = const()[name = tensor("value_49_pad_type_0"), val = tensor("custom")]; + tensor value_49_pad_0 = const()[name = tensor("value_49_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_24_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_24_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(965335872)))]; + tensor layers_24_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_24_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(968612736)))]; + tensor value_49_cast_fp16 = conv(bias = layers_24_self_attn_v_proj_bias_to_fp16, dilations = var_36895, groups = var_36847, pad = value_49_pad_0, pad_type = value_49_pad_type_0, strides = var_36893, weight = layers_24_self_attn_v_proj_weight_to_fp16, x = obj_97_cast_fp16)[name = tensor("value_49_cast_fp16")]; + tensor var_36902_begin_0 = const()[name = tensor("op_36902_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_36902_end_0 = const()[name = tensor("op_36902_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_36902_end_mask_0 = const()[name = tensor("op_36902_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36902_cast_fp16 = slice_by_index(begin = var_36902_begin_0, end = var_36902_end_0, end_mask = var_36902_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_36902_cast_fp16")]; + tensor var_36906_begin_0 = const()[name = tensor("op_36906_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_36906_end_0 = const()[name = tensor("op_36906_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_36906_end_mask_0 = const()[name = tensor("op_36906_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36906_cast_fp16 = slice_by_index(begin = var_36906_begin_0, end = var_36906_end_0, end_mask = var_36906_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_36906_cast_fp16")]; + tensor var_36910_begin_0 = const()[name = tensor("op_36910_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_36910_end_0 = const()[name = tensor("op_36910_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_36910_end_mask_0 = const()[name = tensor("op_36910_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36910_cast_fp16 = slice_by_index(begin = var_36910_begin_0, end = var_36910_end_0, end_mask = var_36910_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_36910_cast_fp16")]; + tensor var_36914_begin_0 = const()[name = tensor("op_36914_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_36914_end_0 = const()[name = tensor("op_36914_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_36914_end_mask_0 = const()[name = tensor("op_36914_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36914_cast_fp16 = slice_by_index(begin = var_36914_begin_0, end = var_36914_end_0, end_mask = var_36914_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_36914_cast_fp16")]; + tensor var_36918_begin_0 = const()[name = tensor("op_36918_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_36918_end_0 = const()[name = tensor("op_36918_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_36918_end_mask_0 = const()[name = tensor("op_36918_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36918_cast_fp16 = slice_by_index(begin = var_36918_begin_0, end = var_36918_end_0, end_mask = var_36918_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_36918_cast_fp16")]; + tensor var_36922_begin_0 = const()[name = tensor("op_36922_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_36922_end_0 = const()[name = tensor("op_36922_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_36922_end_mask_0 = const()[name = tensor("op_36922_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36922_cast_fp16 = slice_by_index(begin = var_36922_begin_0, end = var_36922_end_0, end_mask = var_36922_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_36922_cast_fp16")]; + tensor var_36926_begin_0 = const()[name = tensor("op_36926_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_36926_end_0 = const()[name = tensor("op_36926_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_36926_end_mask_0 = const()[name = tensor("op_36926_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36926_cast_fp16 = slice_by_index(begin = var_36926_begin_0, end = var_36926_end_0, end_mask = var_36926_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_36926_cast_fp16")]; + tensor var_36930_begin_0 = const()[name = tensor("op_36930_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_36930_end_0 = const()[name = tensor("op_36930_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_36930_end_mask_0 = const()[name = tensor("op_36930_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36930_cast_fp16 = slice_by_index(begin = var_36930_begin_0, end = var_36930_end_0, end_mask = var_36930_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_36930_cast_fp16")]; + tensor var_36934_begin_0 = const()[name = tensor("op_36934_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_36934_end_0 = const()[name = tensor("op_36934_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_36934_end_mask_0 = const()[name = tensor("op_36934_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36934_cast_fp16 = slice_by_index(begin = var_36934_begin_0, end = var_36934_end_0, end_mask = var_36934_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_36934_cast_fp16")]; + tensor var_36938_begin_0 = const()[name = tensor("op_36938_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_36938_end_0 = const()[name = tensor("op_36938_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_36938_end_mask_0 = const()[name = tensor("op_36938_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36938_cast_fp16 = slice_by_index(begin = var_36938_begin_0, end = var_36938_end_0, end_mask = var_36938_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_36938_cast_fp16")]; + tensor var_36942_begin_0 = const()[name = tensor("op_36942_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_36942_end_0 = const()[name = tensor("op_36942_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_36942_end_mask_0 = const()[name = tensor("op_36942_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36942_cast_fp16 = slice_by_index(begin = var_36942_begin_0, end = var_36942_end_0, end_mask = var_36942_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_36942_cast_fp16")]; + tensor var_36946_begin_0 = const()[name = tensor("op_36946_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_36946_end_0 = const()[name = tensor("op_36946_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_36946_end_mask_0 = const()[name = tensor("op_36946_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36946_cast_fp16 = slice_by_index(begin = var_36946_begin_0, end = var_36946_end_0, end_mask = var_36946_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_36946_cast_fp16")]; + tensor var_36950_begin_0 = const()[name = tensor("op_36950_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_36950_end_0 = const()[name = tensor("op_36950_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_36950_end_mask_0 = const()[name = tensor("op_36950_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36950_cast_fp16 = slice_by_index(begin = var_36950_begin_0, end = var_36950_end_0, end_mask = var_36950_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_36950_cast_fp16")]; + tensor var_36954_begin_0 = const()[name = tensor("op_36954_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_36954_end_0 = const()[name = tensor("op_36954_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_36954_end_mask_0 = const()[name = tensor("op_36954_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36954_cast_fp16 = slice_by_index(begin = var_36954_begin_0, end = var_36954_end_0, end_mask = var_36954_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_36954_cast_fp16")]; + tensor var_36958_begin_0 = const()[name = tensor("op_36958_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_36958_end_0 = const()[name = tensor("op_36958_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_36958_end_mask_0 = const()[name = tensor("op_36958_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36958_cast_fp16 = slice_by_index(begin = var_36958_begin_0, end = var_36958_end_0, end_mask = var_36958_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_36958_cast_fp16")]; + tensor var_36962_begin_0 = const()[name = tensor("op_36962_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_36962_end_0 = const()[name = tensor("op_36962_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_36962_end_mask_0 = const()[name = tensor("op_36962_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36962_cast_fp16 = slice_by_index(begin = var_36962_begin_0, end = var_36962_end_0, end_mask = var_36962_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_36962_cast_fp16")]; + tensor var_36966_begin_0 = const()[name = tensor("op_36966_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_36966_end_0 = const()[name = tensor("op_36966_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_36966_end_mask_0 = const()[name = tensor("op_36966_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36966_cast_fp16 = slice_by_index(begin = var_36966_begin_0, end = var_36966_end_0, end_mask = var_36966_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_36966_cast_fp16")]; + tensor var_36970_begin_0 = const()[name = tensor("op_36970_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_36970_end_0 = const()[name = tensor("op_36970_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_36970_end_mask_0 = const()[name = tensor("op_36970_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36970_cast_fp16 = slice_by_index(begin = var_36970_begin_0, end = var_36970_end_0, end_mask = var_36970_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_36970_cast_fp16")]; + tensor var_36974_begin_0 = const()[name = tensor("op_36974_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_36974_end_0 = const()[name = tensor("op_36974_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_36974_end_mask_0 = const()[name = tensor("op_36974_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36974_cast_fp16 = slice_by_index(begin = var_36974_begin_0, end = var_36974_end_0, end_mask = var_36974_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_36974_cast_fp16")]; + tensor var_36978_begin_0 = const()[name = tensor("op_36978_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_36978_end_0 = const()[name = tensor("op_36978_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_36978_end_mask_0 = const()[name = tensor("op_36978_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_36978_cast_fp16 = slice_by_index(begin = var_36978_begin_0, end = var_36978_end_0, end_mask = var_36978_end_mask_0, x = query_49_cast_fp16)[name = tensor("op_36978_cast_fp16")]; + tensor var_36987_begin_0 = const()[name = tensor("op_36987_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_36987_end_0 = const()[name = tensor("op_36987_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_36987_end_mask_0 = const()[name = tensor("op_36987_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_36987_cast_fp16 = slice_by_index(begin = var_36987_begin_0, end = var_36987_end_0, end_mask = var_36987_end_mask_0, x = var_36902_cast_fp16)[name = tensor("op_36987_cast_fp16")]; + tensor var_36994_begin_0 = const()[name = tensor("op_36994_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_36994_end_0 = const()[name = tensor("op_36994_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_36994_end_mask_0 = const()[name = tensor("op_36994_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_36994_cast_fp16 = slice_by_index(begin = var_36994_begin_0, end = var_36994_end_0, end_mask = var_36994_end_mask_0, x = var_36902_cast_fp16)[name = tensor("op_36994_cast_fp16")]; + tensor var_37001_begin_0 = const()[name = tensor("op_37001_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_37001_end_0 = const()[name = tensor("op_37001_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_37001_end_mask_0 = const()[name = tensor("op_37001_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37001_cast_fp16 = slice_by_index(begin = var_37001_begin_0, end = var_37001_end_0, end_mask = var_37001_end_mask_0, x = var_36902_cast_fp16)[name = tensor("op_37001_cast_fp16")]; + tensor var_37008_begin_0 = const()[name = tensor("op_37008_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_37008_end_0 = const()[name = tensor("op_37008_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_37008_end_mask_0 = const()[name = tensor("op_37008_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37008_cast_fp16 = slice_by_index(begin = var_37008_begin_0, end = var_37008_end_0, end_mask = var_37008_end_mask_0, x = var_36902_cast_fp16)[name = tensor("op_37008_cast_fp16")]; + tensor var_37015_begin_0 = const()[name = tensor("op_37015_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_37015_end_0 = const()[name = tensor("op_37015_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_37015_end_mask_0 = const()[name = tensor("op_37015_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37015_cast_fp16 = slice_by_index(begin = var_37015_begin_0, end = var_37015_end_0, end_mask = var_37015_end_mask_0, x = var_36906_cast_fp16)[name = tensor("op_37015_cast_fp16")]; + tensor var_37022_begin_0 = const()[name = tensor("op_37022_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_37022_end_0 = const()[name = tensor("op_37022_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_37022_end_mask_0 = const()[name = tensor("op_37022_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37022_cast_fp16 = slice_by_index(begin = var_37022_begin_0, end = var_37022_end_0, end_mask = var_37022_end_mask_0, x = var_36906_cast_fp16)[name = tensor("op_37022_cast_fp16")]; + tensor var_37029_begin_0 = const()[name = tensor("op_37029_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_37029_end_0 = const()[name = tensor("op_37029_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_37029_end_mask_0 = const()[name = tensor("op_37029_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37029_cast_fp16 = slice_by_index(begin = var_37029_begin_0, end = var_37029_end_0, end_mask = var_37029_end_mask_0, x = var_36906_cast_fp16)[name = tensor("op_37029_cast_fp16")]; + tensor var_37036_begin_0 = const()[name = tensor("op_37036_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_37036_end_0 = const()[name = tensor("op_37036_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_37036_end_mask_0 = const()[name = tensor("op_37036_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37036_cast_fp16 = slice_by_index(begin = var_37036_begin_0, end = var_37036_end_0, end_mask = var_37036_end_mask_0, x = var_36906_cast_fp16)[name = tensor("op_37036_cast_fp16")]; + tensor var_37043_begin_0 = const()[name = tensor("op_37043_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_37043_end_0 = const()[name = tensor("op_37043_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_37043_end_mask_0 = const()[name = tensor("op_37043_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37043_cast_fp16 = slice_by_index(begin = var_37043_begin_0, end = var_37043_end_0, end_mask = var_37043_end_mask_0, x = var_36910_cast_fp16)[name = tensor("op_37043_cast_fp16")]; + tensor var_37050_begin_0 = const()[name = tensor("op_37050_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_37050_end_0 = const()[name = tensor("op_37050_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_37050_end_mask_0 = const()[name = tensor("op_37050_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37050_cast_fp16 = slice_by_index(begin = var_37050_begin_0, end = var_37050_end_0, end_mask = var_37050_end_mask_0, x = var_36910_cast_fp16)[name = tensor("op_37050_cast_fp16")]; + tensor var_37057_begin_0 = const()[name = tensor("op_37057_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_37057_end_0 = const()[name = tensor("op_37057_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_37057_end_mask_0 = const()[name = tensor("op_37057_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37057_cast_fp16 = slice_by_index(begin = var_37057_begin_0, end = var_37057_end_0, end_mask = var_37057_end_mask_0, x = var_36910_cast_fp16)[name = tensor("op_37057_cast_fp16")]; + tensor var_37064_begin_0 = const()[name = tensor("op_37064_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_37064_end_0 = const()[name = tensor("op_37064_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_37064_end_mask_0 = const()[name = tensor("op_37064_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37064_cast_fp16 = slice_by_index(begin = var_37064_begin_0, end = var_37064_end_0, end_mask = var_37064_end_mask_0, x = var_36910_cast_fp16)[name = tensor("op_37064_cast_fp16")]; + tensor var_37071_begin_0 = const()[name = tensor("op_37071_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_37071_end_0 = const()[name = tensor("op_37071_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_37071_end_mask_0 = const()[name = tensor("op_37071_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37071_cast_fp16 = slice_by_index(begin = var_37071_begin_0, end = var_37071_end_0, end_mask = var_37071_end_mask_0, x = var_36914_cast_fp16)[name = tensor("op_37071_cast_fp16")]; + tensor var_37078_begin_0 = const()[name = tensor("op_37078_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_37078_end_0 = const()[name = tensor("op_37078_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_37078_end_mask_0 = const()[name = tensor("op_37078_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37078_cast_fp16 = slice_by_index(begin = var_37078_begin_0, end = var_37078_end_0, end_mask = var_37078_end_mask_0, x = var_36914_cast_fp16)[name = tensor("op_37078_cast_fp16")]; + tensor var_37085_begin_0 = const()[name = tensor("op_37085_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_37085_end_0 = const()[name = tensor("op_37085_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_37085_end_mask_0 = const()[name = tensor("op_37085_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37085_cast_fp16 = slice_by_index(begin = var_37085_begin_0, end = var_37085_end_0, end_mask = var_37085_end_mask_0, x = var_36914_cast_fp16)[name = tensor("op_37085_cast_fp16")]; + tensor var_37092_begin_0 = const()[name = tensor("op_37092_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_37092_end_0 = const()[name = tensor("op_37092_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_37092_end_mask_0 = const()[name = tensor("op_37092_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37092_cast_fp16 = slice_by_index(begin = var_37092_begin_0, end = var_37092_end_0, end_mask = var_37092_end_mask_0, x = var_36914_cast_fp16)[name = tensor("op_37092_cast_fp16")]; + tensor var_37099_begin_0 = const()[name = tensor("op_37099_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_37099_end_0 = const()[name = tensor("op_37099_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_37099_end_mask_0 = const()[name = tensor("op_37099_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37099_cast_fp16 = slice_by_index(begin = var_37099_begin_0, end = var_37099_end_0, end_mask = var_37099_end_mask_0, x = var_36918_cast_fp16)[name = tensor("op_37099_cast_fp16")]; + tensor var_37106_begin_0 = const()[name = tensor("op_37106_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_37106_end_0 = const()[name = tensor("op_37106_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_37106_end_mask_0 = const()[name = tensor("op_37106_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37106_cast_fp16 = slice_by_index(begin = var_37106_begin_0, end = var_37106_end_0, end_mask = var_37106_end_mask_0, x = var_36918_cast_fp16)[name = tensor("op_37106_cast_fp16")]; + tensor var_37113_begin_0 = const()[name = tensor("op_37113_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_37113_end_0 = const()[name = tensor("op_37113_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_37113_end_mask_0 = const()[name = tensor("op_37113_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37113_cast_fp16 = slice_by_index(begin = var_37113_begin_0, end = var_37113_end_0, end_mask = var_37113_end_mask_0, x = var_36918_cast_fp16)[name = tensor("op_37113_cast_fp16")]; + tensor var_37120_begin_0 = const()[name = tensor("op_37120_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_37120_end_0 = const()[name = tensor("op_37120_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_37120_end_mask_0 = const()[name = tensor("op_37120_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37120_cast_fp16 = slice_by_index(begin = var_37120_begin_0, end = var_37120_end_0, end_mask = var_37120_end_mask_0, x = var_36918_cast_fp16)[name = tensor("op_37120_cast_fp16")]; + tensor var_37127_begin_0 = const()[name = tensor("op_37127_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_37127_end_0 = const()[name = tensor("op_37127_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_37127_end_mask_0 = const()[name = tensor("op_37127_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37127_cast_fp16 = slice_by_index(begin = var_37127_begin_0, end = var_37127_end_0, end_mask = var_37127_end_mask_0, x = var_36922_cast_fp16)[name = tensor("op_37127_cast_fp16")]; + tensor var_37134_begin_0 = const()[name = tensor("op_37134_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_37134_end_0 = const()[name = tensor("op_37134_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_37134_end_mask_0 = const()[name = tensor("op_37134_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37134_cast_fp16 = slice_by_index(begin = var_37134_begin_0, end = var_37134_end_0, end_mask = var_37134_end_mask_0, x = var_36922_cast_fp16)[name = tensor("op_37134_cast_fp16")]; + tensor var_37141_begin_0 = const()[name = tensor("op_37141_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_37141_end_0 = const()[name = tensor("op_37141_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_37141_end_mask_0 = const()[name = tensor("op_37141_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37141_cast_fp16 = slice_by_index(begin = var_37141_begin_0, end = var_37141_end_0, end_mask = var_37141_end_mask_0, x = var_36922_cast_fp16)[name = tensor("op_37141_cast_fp16")]; + tensor var_37148_begin_0 = const()[name = tensor("op_37148_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_37148_end_0 = const()[name = tensor("op_37148_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_37148_end_mask_0 = const()[name = tensor("op_37148_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37148_cast_fp16 = slice_by_index(begin = var_37148_begin_0, end = var_37148_end_0, end_mask = var_37148_end_mask_0, x = var_36922_cast_fp16)[name = tensor("op_37148_cast_fp16")]; + tensor var_37155_begin_0 = const()[name = tensor("op_37155_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_37155_end_0 = const()[name = tensor("op_37155_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_37155_end_mask_0 = const()[name = tensor("op_37155_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37155_cast_fp16 = slice_by_index(begin = var_37155_begin_0, end = var_37155_end_0, end_mask = var_37155_end_mask_0, x = var_36926_cast_fp16)[name = tensor("op_37155_cast_fp16")]; + tensor var_37162_begin_0 = const()[name = tensor("op_37162_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_37162_end_0 = const()[name = tensor("op_37162_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_37162_end_mask_0 = const()[name = tensor("op_37162_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37162_cast_fp16 = slice_by_index(begin = var_37162_begin_0, end = var_37162_end_0, end_mask = var_37162_end_mask_0, x = var_36926_cast_fp16)[name = tensor("op_37162_cast_fp16")]; + tensor var_37169_begin_0 = const()[name = tensor("op_37169_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_37169_end_0 = const()[name = tensor("op_37169_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_37169_end_mask_0 = const()[name = tensor("op_37169_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37169_cast_fp16 = slice_by_index(begin = var_37169_begin_0, end = var_37169_end_0, end_mask = var_37169_end_mask_0, x = var_36926_cast_fp16)[name = tensor("op_37169_cast_fp16")]; + tensor var_37176_begin_0 = const()[name = tensor("op_37176_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_37176_end_0 = const()[name = tensor("op_37176_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_37176_end_mask_0 = const()[name = tensor("op_37176_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37176_cast_fp16 = slice_by_index(begin = var_37176_begin_0, end = var_37176_end_0, end_mask = var_37176_end_mask_0, x = var_36926_cast_fp16)[name = tensor("op_37176_cast_fp16")]; + tensor var_37183_begin_0 = const()[name = tensor("op_37183_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_37183_end_0 = const()[name = tensor("op_37183_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_37183_end_mask_0 = const()[name = tensor("op_37183_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37183_cast_fp16 = slice_by_index(begin = var_37183_begin_0, end = var_37183_end_0, end_mask = var_37183_end_mask_0, x = var_36930_cast_fp16)[name = tensor("op_37183_cast_fp16")]; + tensor var_37190_begin_0 = const()[name = tensor("op_37190_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_37190_end_0 = const()[name = tensor("op_37190_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_37190_end_mask_0 = const()[name = tensor("op_37190_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37190_cast_fp16 = slice_by_index(begin = var_37190_begin_0, end = var_37190_end_0, end_mask = var_37190_end_mask_0, x = var_36930_cast_fp16)[name = tensor("op_37190_cast_fp16")]; + tensor var_37197_begin_0 = const()[name = tensor("op_37197_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_37197_end_0 = const()[name = tensor("op_37197_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_37197_end_mask_0 = const()[name = tensor("op_37197_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37197_cast_fp16 = slice_by_index(begin = var_37197_begin_0, end = var_37197_end_0, end_mask = var_37197_end_mask_0, x = var_36930_cast_fp16)[name = tensor("op_37197_cast_fp16")]; + tensor var_37204_begin_0 = const()[name = tensor("op_37204_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_37204_end_0 = const()[name = tensor("op_37204_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_37204_end_mask_0 = const()[name = tensor("op_37204_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37204_cast_fp16 = slice_by_index(begin = var_37204_begin_0, end = var_37204_end_0, end_mask = var_37204_end_mask_0, x = var_36930_cast_fp16)[name = tensor("op_37204_cast_fp16")]; + tensor var_37211_begin_0 = const()[name = tensor("op_37211_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_37211_end_0 = const()[name = tensor("op_37211_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_37211_end_mask_0 = const()[name = tensor("op_37211_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37211_cast_fp16 = slice_by_index(begin = var_37211_begin_0, end = var_37211_end_0, end_mask = var_37211_end_mask_0, x = var_36934_cast_fp16)[name = tensor("op_37211_cast_fp16")]; + tensor var_37218_begin_0 = const()[name = tensor("op_37218_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_37218_end_0 = const()[name = tensor("op_37218_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_37218_end_mask_0 = const()[name = tensor("op_37218_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37218_cast_fp16 = slice_by_index(begin = var_37218_begin_0, end = var_37218_end_0, end_mask = var_37218_end_mask_0, x = var_36934_cast_fp16)[name = tensor("op_37218_cast_fp16")]; + tensor var_37225_begin_0 = const()[name = tensor("op_37225_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_37225_end_0 = const()[name = tensor("op_37225_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_37225_end_mask_0 = const()[name = tensor("op_37225_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37225_cast_fp16 = slice_by_index(begin = var_37225_begin_0, end = var_37225_end_0, end_mask = var_37225_end_mask_0, x = var_36934_cast_fp16)[name = tensor("op_37225_cast_fp16")]; + tensor var_37232_begin_0 = const()[name = tensor("op_37232_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_37232_end_0 = const()[name = tensor("op_37232_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_37232_end_mask_0 = const()[name = tensor("op_37232_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37232_cast_fp16 = slice_by_index(begin = var_37232_begin_0, end = var_37232_end_0, end_mask = var_37232_end_mask_0, x = var_36934_cast_fp16)[name = tensor("op_37232_cast_fp16")]; + tensor var_37239_begin_0 = const()[name = tensor("op_37239_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_37239_end_0 = const()[name = tensor("op_37239_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_37239_end_mask_0 = const()[name = tensor("op_37239_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37239_cast_fp16 = slice_by_index(begin = var_37239_begin_0, end = var_37239_end_0, end_mask = var_37239_end_mask_0, x = var_36938_cast_fp16)[name = tensor("op_37239_cast_fp16")]; + tensor var_37246_begin_0 = const()[name = tensor("op_37246_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_37246_end_0 = const()[name = tensor("op_37246_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_37246_end_mask_0 = const()[name = tensor("op_37246_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37246_cast_fp16 = slice_by_index(begin = var_37246_begin_0, end = var_37246_end_0, end_mask = var_37246_end_mask_0, x = var_36938_cast_fp16)[name = tensor("op_37246_cast_fp16")]; + tensor var_37253_begin_0 = const()[name = tensor("op_37253_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_37253_end_0 = const()[name = tensor("op_37253_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_37253_end_mask_0 = const()[name = tensor("op_37253_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37253_cast_fp16 = slice_by_index(begin = var_37253_begin_0, end = var_37253_end_0, end_mask = var_37253_end_mask_0, x = var_36938_cast_fp16)[name = tensor("op_37253_cast_fp16")]; + tensor var_37260_begin_0 = const()[name = tensor("op_37260_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_37260_end_0 = const()[name = tensor("op_37260_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_37260_end_mask_0 = const()[name = tensor("op_37260_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37260_cast_fp16 = slice_by_index(begin = var_37260_begin_0, end = var_37260_end_0, end_mask = var_37260_end_mask_0, x = var_36938_cast_fp16)[name = tensor("op_37260_cast_fp16")]; + tensor var_37267_begin_0 = const()[name = tensor("op_37267_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_37267_end_0 = const()[name = tensor("op_37267_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_37267_end_mask_0 = const()[name = tensor("op_37267_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37267_cast_fp16 = slice_by_index(begin = var_37267_begin_0, end = var_37267_end_0, end_mask = var_37267_end_mask_0, x = var_36942_cast_fp16)[name = tensor("op_37267_cast_fp16")]; + tensor var_37274_begin_0 = const()[name = tensor("op_37274_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_37274_end_0 = const()[name = tensor("op_37274_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_37274_end_mask_0 = const()[name = tensor("op_37274_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37274_cast_fp16 = slice_by_index(begin = var_37274_begin_0, end = var_37274_end_0, end_mask = var_37274_end_mask_0, x = var_36942_cast_fp16)[name = tensor("op_37274_cast_fp16")]; + tensor var_37281_begin_0 = const()[name = tensor("op_37281_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_37281_end_0 = const()[name = tensor("op_37281_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_37281_end_mask_0 = const()[name = tensor("op_37281_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37281_cast_fp16 = slice_by_index(begin = var_37281_begin_0, end = var_37281_end_0, end_mask = var_37281_end_mask_0, x = var_36942_cast_fp16)[name = tensor("op_37281_cast_fp16")]; + tensor var_37288_begin_0 = const()[name = tensor("op_37288_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_37288_end_0 = const()[name = tensor("op_37288_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_37288_end_mask_0 = const()[name = tensor("op_37288_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37288_cast_fp16 = slice_by_index(begin = var_37288_begin_0, end = var_37288_end_0, end_mask = var_37288_end_mask_0, x = var_36942_cast_fp16)[name = tensor("op_37288_cast_fp16")]; + tensor var_37295_begin_0 = const()[name = tensor("op_37295_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_37295_end_0 = const()[name = tensor("op_37295_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_37295_end_mask_0 = const()[name = tensor("op_37295_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37295_cast_fp16 = slice_by_index(begin = var_37295_begin_0, end = var_37295_end_0, end_mask = var_37295_end_mask_0, x = var_36946_cast_fp16)[name = tensor("op_37295_cast_fp16")]; + tensor var_37302_begin_0 = const()[name = tensor("op_37302_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_37302_end_0 = const()[name = tensor("op_37302_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_37302_end_mask_0 = const()[name = tensor("op_37302_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37302_cast_fp16 = slice_by_index(begin = var_37302_begin_0, end = var_37302_end_0, end_mask = var_37302_end_mask_0, x = var_36946_cast_fp16)[name = tensor("op_37302_cast_fp16")]; + tensor var_37309_begin_0 = const()[name = tensor("op_37309_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_37309_end_0 = const()[name = tensor("op_37309_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_37309_end_mask_0 = const()[name = tensor("op_37309_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37309_cast_fp16 = slice_by_index(begin = var_37309_begin_0, end = var_37309_end_0, end_mask = var_37309_end_mask_0, x = var_36946_cast_fp16)[name = tensor("op_37309_cast_fp16")]; + tensor var_37316_begin_0 = const()[name = tensor("op_37316_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_37316_end_0 = const()[name = tensor("op_37316_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_37316_end_mask_0 = const()[name = tensor("op_37316_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37316_cast_fp16 = slice_by_index(begin = var_37316_begin_0, end = var_37316_end_0, end_mask = var_37316_end_mask_0, x = var_36946_cast_fp16)[name = tensor("op_37316_cast_fp16")]; + tensor var_37323_begin_0 = const()[name = tensor("op_37323_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_37323_end_0 = const()[name = tensor("op_37323_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_37323_end_mask_0 = const()[name = tensor("op_37323_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37323_cast_fp16 = slice_by_index(begin = var_37323_begin_0, end = var_37323_end_0, end_mask = var_37323_end_mask_0, x = var_36950_cast_fp16)[name = tensor("op_37323_cast_fp16")]; + tensor var_37330_begin_0 = const()[name = tensor("op_37330_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_37330_end_0 = const()[name = tensor("op_37330_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_37330_end_mask_0 = const()[name = tensor("op_37330_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37330_cast_fp16 = slice_by_index(begin = var_37330_begin_0, end = var_37330_end_0, end_mask = var_37330_end_mask_0, x = var_36950_cast_fp16)[name = tensor("op_37330_cast_fp16")]; + tensor var_37337_begin_0 = const()[name = tensor("op_37337_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_37337_end_0 = const()[name = tensor("op_37337_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_37337_end_mask_0 = const()[name = tensor("op_37337_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37337_cast_fp16 = slice_by_index(begin = var_37337_begin_0, end = var_37337_end_0, end_mask = var_37337_end_mask_0, x = var_36950_cast_fp16)[name = tensor("op_37337_cast_fp16")]; + tensor var_37344_begin_0 = const()[name = tensor("op_37344_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_37344_end_0 = const()[name = tensor("op_37344_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_37344_end_mask_0 = const()[name = tensor("op_37344_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37344_cast_fp16 = slice_by_index(begin = var_37344_begin_0, end = var_37344_end_0, end_mask = var_37344_end_mask_0, x = var_36950_cast_fp16)[name = tensor("op_37344_cast_fp16")]; + tensor var_37351_begin_0 = const()[name = tensor("op_37351_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_37351_end_0 = const()[name = tensor("op_37351_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_37351_end_mask_0 = const()[name = tensor("op_37351_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37351_cast_fp16 = slice_by_index(begin = var_37351_begin_0, end = var_37351_end_0, end_mask = var_37351_end_mask_0, x = var_36954_cast_fp16)[name = tensor("op_37351_cast_fp16")]; + tensor var_37358_begin_0 = const()[name = tensor("op_37358_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_37358_end_0 = const()[name = tensor("op_37358_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_37358_end_mask_0 = const()[name = tensor("op_37358_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37358_cast_fp16 = slice_by_index(begin = var_37358_begin_0, end = var_37358_end_0, end_mask = var_37358_end_mask_0, x = var_36954_cast_fp16)[name = tensor("op_37358_cast_fp16")]; + tensor var_37365_begin_0 = const()[name = tensor("op_37365_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_37365_end_0 = const()[name = tensor("op_37365_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_37365_end_mask_0 = const()[name = tensor("op_37365_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37365_cast_fp16 = slice_by_index(begin = var_37365_begin_0, end = var_37365_end_0, end_mask = var_37365_end_mask_0, x = var_36954_cast_fp16)[name = tensor("op_37365_cast_fp16")]; + tensor var_37372_begin_0 = const()[name = tensor("op_37372_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_37372_end_0 = const()[name = tensor("op_37372_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_37372_end_mask_0 = const()[name = tensor("op_37372_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37372_cast_fp16 = slice_by_index(begin = var_37372_begin_0, end = var_37372_end_0, end_mask = var_37372_end_mask_0, x = var_36954_cast_fp16)[name = tensor("op_37372_cast_fp16")]; + tensor var_37379_begin_0 = const()[name = tensor("op_37379_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_37379_end_0 = const()[name = tensor("op_37379_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_37379_end_mask_0 = const()[name = tensor("op_37379_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37379_cast_fp16 = slice_by_index(begin = var_37379_begin_0, end = var_37379_end_0, end_mask = var_37379_end_mask_0, x = var_36958_cast_fp16)[name = tensor("op_37379_cast_fp16")]; + tensor var_37386_begin_0 = const()[name = tensor("op_37386_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_37386_end_0 = const()[name = tensor("op_37386_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_37386_end_mask_0 = const()[name = tensor("op_37386_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37386_cast_fp16 = slice_by_index(begin = var_37386_begin_0, end = var_37386_end_0, end_mask = var_37386_end_mask_0, x = var_36958_cast_fp16)[name = tensor("op_37386_cast_fp16")]; + tensor var_37393_begin_0 = const()[name = tensor("op_37393_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_37393_end_0 = const()[name = tensor("op_37393_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_37393_end_mask_0 = const()[name = tensor("op_37393_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37393_cast_fp16 = slice_by_index(begin = var_37393_begin_0, end = var_37393_end_0, end_mask = var_37393_end_mask_0, x = var_36958_cast_fp16)[name = tensor("op_37393_cast_fp16")]; + tensor var_37400_begin_0 = const()[name = tensor("op_37400_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_37400_end_0 = const()[name = tensor("op_37400_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_37400_end_mask_0 = const()[name = tensor("op_37400_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37400_cast_fp16 = slice_by_index(begin = var_37400_begin_0, end = var_37400_end_0, end_mask = var_37400_end_mask_0, x = var_36958_cast_fp16)[name = tensor("op_37400_cast_fp16")]; + tensor var_37407_begin_0 = const()[name = tensor("op_37407_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_37407_end_0 = const()[name = tensor("op_37407_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_37407_end_mask_0 = const()[name = tensor("op_37407_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37407_cast_fp16 = slice_by_index(begin = var_37407_begin_0, end = var_37407_end_0, end_mask = var_37407_end_mask_0, x = var_36962_cast_fp16)[name = tensor("op_37407_cast_fp16")]; + tensor var_37414_begin_0 = const()[name = tensor("op_37414_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_37414_end_0 = const()[name = tensor("op_37414_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_37414_end_mask_0 = const()[name = tensor("op_37414_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37414_cast_fp16 = slice_by_index(begin = var_37414_begin_0, end = var_37414_end_0, end_mask = var_37414_end_mask_0, x = var_36962_cast_fp16)[name = tensor("op_37414_cast_fp16")]; + tensor var_37421_begin_0 = const()[name = tensor("op_37421_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_37421_end_0 = const()[name = tensor("op_37421_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_37421_end_mask_0 = const()[name = tensor("op_37421_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37421_cast_fp16 = slice_by_index(begin = var_37421_begin_0, end = var_37421_end_0, end_mask = var_37421_end_mask_0, x = var_36962_cast_fp16)[name = tensor("op_37421_cast_fp16")]; + tensor var_37428_begin_0 = const()[name = tensor("op_37428_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_37428_end_0 = const()[name = tensor("op_37428_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_37428_end_mask_0 = const()[name = tensor("op_37428_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37428_cast_fp16 = slice_by_index(begin = var_37428_begin_0, end = var_37428_end_0, end_mask = var_37428_end_mask_0, x = var_36962_cast_fp16)[name = tensor("op_37428_cast_fp16")]; + tensor var_37435_begin_0 = const()[name = tensor("op_37435_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_37435_end_0 = const()[name = tensor("op_37435_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_37435_end_mask_0 = const()[name = tensor("op_37435_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37435_cast_fp16 = slice_by_index(begin = var_37435_begin_0, end = var_37435_end_0, end_mask = var_37435_end_mask_0, x = var_36966_cast_fp16)[name = tensor("op_37435_cast_fp16")]; + tensor var_37442_begin_0 = const()[name = tensor("op_37442_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_37442_end_0 = const()[name = tensor("op_37442_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_37442_end_mask_0 = const()[name = tensor("op_37442_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37442_cast_fp16 = slice_by_index(begin = var_37442_begin_0, end = var_37442_end_0, end_mask = var_37442_end_mask_0, x = var_36966_cast_fp16)[name = tensor("op_37442_cast_fp16")]; + tensor var_37449_begin_0 = const()[name = tensor("op_37449_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_37449_end_0 = const()[name = tensor("op_37449_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_37449_end_mask_0 = const()[name = tensor("op_37449_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37449_cast_fp16 = slice_by_index(begin = var_37449_begin_0, end = var_37449_end_0, end_mask = var_37449_end_mask_0, x = var_36966_cast_fp16)[name = tensor("op_37449_cast_fp16")]; + tensor var_37456_begin_0 = const()[name = tensor("op_37456_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_37456_end_0 = const()[name = tensor("op_37456_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_37456_end_mask_0 = const()[name = tensor("op_37456_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37456_cast_fp16 = slice_by_index(begin = var_37456_begin_0, end = var_37456_end_0, end_mask = var_37456_end_mask_0, x = var_36966_cast_fp16)[name = tensor("op_37456_cast_fp16")]; + tensor var_37463_begin_0 = const()[name = tensor("op_37463_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_37463_end_0 = const()[name = tensor("op_37463_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_37463_end_mask_0 = const()[name = tensor("op_37463_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37463_cast_fp16 = slice_by_index(begin = var_37463_begin_0, end = var_37463_end_0, end_mask = var_37463_end_mask_0, x = var_36970_cast_fp16)[name = tensor("op_37463_cast_fp16")]; + tensor var_37470_begin_0 = const()[name = tensor("op_37470_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_37470_end_0 = const()[name = tensor("op_37470_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_37470_end_mask_0 = const()[name = tensor("op_37470_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37470_cast_fp16 = slice_by_index(begin = var_37470_begin_0, end = var_37470_end_0, end_mask = var_37470_end_mask_0, x = var_36970_cast_fp16)[name = tensor("op_37470_cast_fp16")]; + tensor var_37477_begin_0 = const()[name = tensor("op_37477_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_37477_end_0 = const()[name = tensor("op_37477_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_37477_end_mask_0 = const()[name = tensor("op_37477_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37477_cast_fp16 = slice_by_index(begin = var_37477_begin_0, end = var_37477_end_0, end_mask = var_37477_end_mask_0, x = var_36970_cast_fp16)[name = tensor("op_37477_cast_fp16")]; + tensor var_37484_begin_0 = const()[name = tensor("op_37484_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_37484_end_0 = const()[name = tensor("op_37484_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_37484_end_mask_0 = const()[name = tensor("op_37484_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37484_cast_fp16 = slice_by_index(begin = var_37484_begin_0, end = var_37484_end_0, end_mask = var_37484_end_mask_0, x = var_36970_cast_fp16)[name = tensor("op_37484_cast_fp16")]; + tensor var_37491_begin_0 = const()[name = tensor("op_37491_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_37491_end_0 = const()[name = tensor("op_37491_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_37491_end_mask_0 = const()[name = tensor("op_37491_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37491_cast_fp16 = slice_by_index(begin = var_37491_begin_0, end = var_37491_end_0, end_mask = var_37491_end_mask_0, x = var_36974_cast_fp16)[name = tensor("op_37491_cast_fp16")]; + tensor var_37498_begin_0 = const()[name = tensor("op_37498_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_37498_end_0 = const()[name = tensor("op_37498_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_37498_end_mask_0 = const()[name = tensor("op_37498_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37498_cast_fp16 = slice_by_index(begin = var_37498_begin_0, end = var_37498_end_0, end_mask = var_37498_end_mask_0, x = var_36974_cast_fp16)[name = tensor("op_37498_cast_fp16")]; + tensor var_37505_begin_0 = const()[name = tensor("op_37505_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_37505_end_0 = const()[name = tensor("op_37505_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_37505_end_mask_0 = const()[name = tensor("op_37505_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37505_cast_fp16 = slice_by_index(begin = var_37505_begin_0, end = var_37505_end_0, end_mask = var_37505_end_mask_0, x = var_36974_cast_fp16)[name = tensor("op_37505_cast_fp16")]; + tensor var_37512_begin_0 = const()[name = tensor("op_37512_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_37512_end_0 = const()[name = tensor("op_37512_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_37512_end_mask_0 = const()[name = tensor("op_37512_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37512_cast_fp16 = slice_by_index(begin = var_37512_begin_0, end = var_37512_end_0, end_mask = var_37512_end_mask_0, x = var_36974_cast_fp16)[name = tensor("op_37512_cast_fp16")]; + tensor var_37519_begin_0 = const()[name = tensor("op_37519_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_37519_end_0 = const()[name = tensor("op_37519_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_37519_end_mask_0 = const()[name = tensor("op_37519_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37519_cast_fp16 = slice_by_index(begin = var_37519_begin_0, end = var_37519_end_0, end_mask = var_37519_end_mask_0, x = var_36978_cast_fp16)[name = tensor("op_37519_cast_fp16")]; + tensor var_37526_begin_0 = const()[name = tensor("op_37526_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_37526_end_0 = const()[name = tensor("op_37526_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_37526_end_mask_0 = const()[name = tensor("op_37526_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37526_cast_fp16 = slice_by_index(begin = var_37526_begin_0, end = var_37526_end_0, end_mask = var_37526_end_mask_0, x = var_36978_cast_fp16)[name = tensor("op_37526_cast_fp16")]; + tensor var_37533_begin_0 = const()[name = tensor("op_37533_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_37533_end_0 = const()[name = tensor("op_37533_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_37533_end_mask_0 = const()[name = tensor("op_37533_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37533_cast_fp16 = slice_by_index(begin = var_37533_begin_0, end = var_37533_end_0, end_mask = var_37533_end_mask_0, x = var_36978_cast_fp16)[name = tensor("op_37533_cast_fp16")]; + tensor var_37540_begin_0 = const()[name = tensor("op_37540_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_37540_end_0 = const()[name = tensor("op_37540_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_37540_end_mask_0 = const()[name = tensor("op_37540_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37540_cast_fp16 = slice_by_index(begin = var_37540_begin_0, end = var_37540_end_0, end_mask = var_37540_end_mask_0, x = var_36978_cast_fp16)[name = tensor("op_37540_cast_fp16")]; + tensor k_49_perm_0 = const()[name = tensor("k_49_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_37545_begin_0 = const()[name = tensor("op_37545_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_37545_end_0 = const()[name = tensor("op_37545_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_37545_end_mask_0 = const()[name = tensor("op_37545_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_7 = transpose(perm = k_49_perm_0, x = key_49_cast_fp16)[name = tensor("transpose_7")]; + tensor var_37545_cast_fp16 = slice_by_index(begin = var_37545_begin_0, end = var_37545_end_0, end_mask = var_37545_end_mask_0, x = transpose_7)[name = tensor("op_37545_cast_fp16")]; + tensor var_37549_begin_0 = const()[name = tensor("op_37549_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_37549_end_0 = const()[name = tensor("op_37549_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_37549_end_mask_0 = const()[name = tensor("op_37549_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37549_cast_fp16 = slice_by_index(begin = var_37549_begin_0, end = var_37549_end_0, end_mask = var_37549_end_mask_0, x = transpose_7)[name = tensor("op_37549_cast_fp16")]; + tensor var_37553_begin_0 = const()[name = tensor("op_37553_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_37553_end_0 = const()[name = tensor("op_37553_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_37553_end_mask_0 = const()[name = tensor("op_37553_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37553_cast_fp16 = slice_by_index(begin = var_37553_begin_0, end = var_37553_end_0, end_mask = var_37553_end_mask_0, x = transpose_7)[name = tensor("op_37553_cast_fp16")]; + tensor var_37557_begin_0 = const()[name = tensor("op_37557_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_37557_end_0 = const()[name = tensor("op_37557_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_37557_end_mask_0 = const()[name = tensor("op_37557_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37557_cast_fp16 = slice_by_index(begin = var_37557_begin_0, end = var_37557_end_0, end_mask = var_37557_end_mask_0, x = transpose_7)[name = tensor("op_37557_cast_fp16")]; + tensor var_37561_begin_0 = const()[name = tensor("op_37561_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_37561_end_0 = const()[name = tensor("op_37561_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_37561_end_mask_0 = const()[name = tensor("op_37561_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37561_cast_fp16 = slice_by_index(begin = var_37561_begin_0, end = var_37561_end_0, end_mask = var_37561_end_mask_0, x = transpose_7)[name = tensor("op_37561_cast_fp16")]; + tensor var_37565_begin_0 = const()[name = tensor("op_37565_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_37565_end_0 = const()[name = tensor("op_37565_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_37565_end_mask_0 = const()[name = tensor("op_37565_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37565_cast_fp16 = slice_by_index(begin = var_37565_begin_0, end = var_37565_end_0, end_mask = var_37565_end_mask_0, x = transpose_7)[name = tensor("op_37565_cast_fp16")]; + tensor var_37569_begin_0 = const()[name = tensor("op_37569_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_37569_end_0 = const()[name = tensor("op_37569_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_37569_end_mask_0 = const()[name = tensor("op_37569_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37569_cast_fp16 = slice_by_index(begin = var_37569_begin_0, end = var_37569_end_0, end_mask = var_37569_end_mask_0, x = transpose_7)[name = tensor("op_37569_cast_fp16")]; + tensor var_37573_begin_0 = const()[name = tensor("op_37573_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_37573_end_0 = const()[name = tensor("op_37573_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_37573_end_mask_0 = const()[name = tensor("op_37573_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37573_cast_fp16 = slice_by_index(begin = var_37573_begin_0, end = var_37573_end_0, end_mask = var_37573_end_mask_0, x = transpose_7)[name = tensor("op_37573_cast_fp16")]; + tensor var_37577_begin_0 = const()[name = tensor("op_37577_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_37577_end_0 = const()[name = tensor("op_37577_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_37577_end_mask_0 = const()[name = tensor("op_37577_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37577_cast_fp16 = slice_by_index(begin = var_37577_begin_0, end = var_37577_end_0, end_mask = var_37577_end_mask_0, x = transpose_7)[name = tensor("op_37577_cast_fp16")]; + tensor var_37581_begin_0 = const()[name = tensor("op_37581_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_37581_end_0 = const()[name = tensor("op_37581_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_37581_end_mask_0 = const()[name = tensor("op_37581_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37581_cast_fp16 = slice_by_index(begin = var_37581_begin_0, end = var_37581_end_0, end_mask = var_37581_end_mask_0, x = transpose_7)[name = tensor("op_37581_cast_fp16")]; + tensor var_37585_begin_0 = const()[name = tensor("op_37585_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_37585_end_0 = const()[name = tensor("op_37585_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_37585_end_mask_0 = const()[name = tensor("op_37585_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37585_cast_fp16 = slice_by_index(begin = var_37585_begin_0, end = var_37585_end_0, end_mask = var_37585_end_mask_0, x = transpose_7)[name = tensor("op_37585_cast_fp16")]; + tensor var_37589_begin_0 = const()[name = tensor("op_37589_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_37589_end_0 = const()[name = tensor("op_37589_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_37589_end_mask_0 = const()[name = tensor("op_37589_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37589_cast_fp16 = slice_by_index(begin = var_37589_begin_0, end = var_37589_end_0, end_mask = var_37589_end_mask_0, x = transpose_7)[name = tensor("op_37589_cast_fp16")]; + tensor var_37593_begin_0 = const()[name = tensor("op_37593_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_37593_end_0 = const()[name = tensor("op_37593_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_37593_end_mask_0 = const()[name = tensor("op_37593_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37593_cast_fp16 = slice_by_index(begin = var_37593_begin_0, end = var_37593_end_0, end_mask = var_37593_end_mask_0, x = transpose_7)[name = tensor("op_37593_cast_fp16")]; + tensor var_37597_begin_0 = const()[name = tensor("op_37597_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_37597_end_0 = const()[name = tensor("op_37597_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_37597_end_mask_0 = const()[name = tensor("op_37597_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37597_cast_fp16 = slice_by_index(begin = var_37597_begin_0, end = var_37597_end_0, end_mask = var_37597_end_mask_0, x = transpose_7)[name = tensor("op_37597_cast_fp16")]; + tensor var_37601_begin_0 = const()[name = tensor("op_37601_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_37601_end_0 = const()[name = tensor("op_37601_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_37601_end_mask_0 = const()[name = tensor("op_37601_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37601_cast_fp16 = slice_by_index(begin = var_37601_begin_0, end = var_37601_end_0, end_mask = var_37601_end_mask_0, x = transpose_7)[name = tensor("op_37601_cast_fp16")]; + tensor var_37605_begin_0 = const()[name = tensor("op_37605_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_37605_end_0 = const()[name = tensor("op_37605_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_37605_end_mask_0 = const()[name = tensor("op_37605_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37605_cast_fp16 = slice_by_index(begin = var_37605_begin_0, end = var_37605_end_0, end_mask = var_37605_end_mask_0, x = transpose_7)[name = tensor("op_37605_cast_fp16")]; + tensor var_37609_begin_0 = const()[name = tensor("op_37609_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_37609_end_0 = const()[name = tensor("op_37609_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_37609_end_mask_0 = const()[name = tensor("op_37609_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37609_cast_fp16 = slice_by_index(begin = var_37609_begin_0, end = var_37609_end_0, end_mask = var_37609_end_mask_0, x = transpose_7)[name = tensor("op_37609_cast_fp16")]; + tensor var_37613_begin_0 = const()[name = tensor("op_37613_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_37613_end_0 = const()[name = tensor("op_37613_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_37613_end_mask_0 = const()[name = tensor("op_37613_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37613_cast_fp16 = slice_by_index(begin = var_37613_begin_0, end = var_37613_end_0, end_mask = var_37613_end_mask_0, x = transpose_7)[name = tensor("op_37613_cast_fp16")]; + tensor var_37617_begin_0 = const()[name = tensor("op_37617_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_37617_end_0 = const()[name = tensor("op_37617_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_37617_end_mask_0 = const()[name = tensor("op_37617_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37617_cast_fp16 = slice_by_index(begin = var_37617_begin_0, end = var_37617_end_0, end_mask = var_37617_end_mask_0, x = transpose_7)[name = tensor("op_37617_cast_fp16")]; + tensor var_37621_begin_0 = const()[name = tensor("op_37621_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_37621_end_0 = const()[name = tensor("op_37621_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_37621_end_mask_0 = const()[name = tensor("op_37621_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_37621_cast_fp16 = slice_by_index(begin = var_37621_begin_0, end = var_37621_end_0, end_mask = var_37621_end_mask_0, x = transpose_7)[name = tensor("op_37621_cast_fp16")]; + tensor var_37623_begin_0 = const()[name = tensor("op_37623_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_37623_end_0 = const()[name = tensor("op_37623_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_37623_end_mask_0 = const()[name = tensor("op_37623_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_37623_cast_fp16 = slice_by_index(begin = var_37623_begin_0, end = var_37623_end_0, end_mask = var_37623_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_37623_cast_fp16")]; + tensor var_37627_begin_0 = const()[name = tensor("op_37627_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_37627_end_0 = const()[name = tensor("op_37627_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_37627_end_mask_0 = const()[name = tensor("op_37627_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_37627_cast_fp16 = slice_by_index(begin = var_37627_begin_0, end = var_37627_end_0, end_mask = var_37627_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_37627_cast_fp16")]; + tensor var_37631_begin_0 = const()[name = tensor("op_37631_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_37631_end_0 = const()[name = tensor("op_37631_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_37631_end_mask_0 = const()[name = tensor("op_37631_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_37631_cast_fp16 = slice_by_index(begin = var_37631_begin_0, end = var_37631_end_0, end_mask = var_37631_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_37631_cast_fp16")]; + tensor var_37635_begin_0 = const()[name = tensor("op_37635_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_37635_end_0 = const()[name = tensor("op_37635_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_37635_end_mask_0 = const()[name = tensor("op_37635_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_37635_cast_fp16 = slice_by_index(begin = var_37635_begin_0, end = var_37635_end_0, end_mask = var_37635_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_37635_cast_fp16")]; + tensor var_37639_begin_0 = const()[name = tensor("op_37639_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_37639_end_0 = const()[name = tensor("op_37639_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_37639_end_mask_0 = const()[name = tensor("op_37639_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_37639_cast_fp16 = slice_by_index(begin = var_37639_begin_0, end = var_37639_end_0, end_mask = var_37639_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_37639_cast_fp16")]; + tensor var_37643_begin_0 = const()[name = tensor("op_37643_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_37643_end_0 = const()[name = tensor("op_37643_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_37643_end_mask_0 = const()[name = tensor("op_37643_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_37643_cast_fp16 = slice_by_index(begin = var_37643_begin_0, end = var_37643_end_0, end_mask = var_37643_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_37643_cast_fp16")]; + tensor var_37647_begin_0 = const()[name = tensor("op_37647_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_37647_end_0 = const()[name = tensor("op_37647_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_37647_end_mask_0 = const()[name = tensor("op_37647_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_37647_cast_fp16 = slice_by_index(begin = var_37647_begin_0, end = var_37647_end_0, end_mask = var_37647_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_37647_cast_fp16")]; + tensor var_37651_begin_0 = const()[name = tensor("op_37651_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_37651_end_0 = const()[name = tensor("op_37651_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_37651_end_mask_0 = const()[name = tensor("op_37651_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_37651_cast_fp16 = slice_by_index(begin = var_37651_begin_0, end = var_37651_end_0, end_mask = var_37651_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_37651_cast_fp16")]; + tensor var_37655_begin_0 = const()[name = tensor("op_37655_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_37655_end_0 = const()[name = tensor("op_37655_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_37655_end_mask_0 = const()[name = tensor("op_37655_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_37655_cast_fp16 = slice_by_index(begin = var_37655_begin_0, end = var_37655_end_0, end_mask = var_37655_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_37655_cast_fp16")]; + tensor var_37659_begin_0 = const()[name = tensor("op_37659_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_37659_end_0 = const()[name = tensor("op_37659_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_37659_end_mask_0 = const()[name = tensor("op_37659_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_37659_cast_fp16 = slice_by_index(begin = var_37659_begin_0, end = var_37659_end_0, end_mask = var_37659_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_37659_cast_fp16")]; + tensor var_37663_begin_0 = const()[name = tensor("op_37663_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_37663_end_0 = const()[name = tensor("op_37663_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_37663_end_mask_0 = const()[name = tensor("op_37663_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_37663_cast_fp16 = slice_by_index(begin = var_37663_begin_0, end = var_37663_end_0, end_mask = var_37663_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_37663_cast_fp16")]; + tensor var_37667_begin_0 = const()[name = tensor("op_37667_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_37667_end_0 = const()[name = tensor("op_37667_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_37667_end_mask_0 = const()[name = tensor("op_37667_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_37667_cast_fp16 = slice_by_index(begin = var_37667_begin_0, end = var_37667_end_0, end_mask = var_37667_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_37667_cast_fp16")]; + tensor var_37671_begin_0 = const()[name = tensor("op_37671_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_37671_end_0 = const()[name = tensor("op_37671_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_37671_end_mask_0 = const()[name = tensor("op_37671_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_37671_cast_fp16 = slice_by_index(begin = var_37671_begin_0, end = var_37671_end_0, end_mask = var_37671_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_37671_cast_fp16")]; + tensor var_37675_begin_0 = const()[name = tensor("op_37675_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_37675_end_0 = const()[name = tensor("op_37675_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_37675_end_mask_0 = const()[name = tensor("op_37675_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_37675_cast_fp16 = slice_by_index(begin = var_37675_begin_0, end = var_37675_end_0, end_mask = var_37675_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_37675_cast_fp16")]; + tensor var_37679_begin_0 = const()[name = tensor("op_37679_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_37679_end_0 = const()[name = tensor("op_37679_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_37679_end_mask_0 = const()[name = tensor("op_37679_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_37679_cast_fp16 = slice_by_index(begin = var_37679_begin_0, end = var_37679_end_0, end_mask = var_37679_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_37679_cast_fp16")]; + tensor var_37683_begin_0 = const()[name = tensor("op_37683_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_37683_end_0 = const()[name = tensor("op_37683_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_37683_end_mask_0 = const()[name = tensor("op_37683_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_37683_cast_fp16 = slice_by_index(begin = var_37683_begin_0, end = var_37683_end_0, end_mask = var_37683_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_37683_cast_fp16")]; + tensor var_37687_begin_0 = const()[name = tensor("op_37687_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_37687_end_0 = const()[name = tensor("op_37687_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_37687_end_mask_0 = const()[name = tensor("op_37687_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_37687_cast_fp16 = slice_by_index(begin = var_37687_begin_0, end = var_37687_end_0, end_mask = var_37687_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_37687_cast_fp16")]; + tensor var_37691_begin_0 = const()[name = tensor("op_37691_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_37691_end_0 = const()[name = tensor("op_37691_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_37691_end_mask_0 = const()[name = tensor("op_37691_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_37691_cast_fp16 = slice_by_index(begin = var_37691_begin_0, end = var_37691_end_0, end_mask = var_37691_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_37691_cast_fp16")]; + tensor var_37695_begin_0 = const()[name = tensor("op_37695_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_37695_end_0 = const()[name = tensor("op_37695_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_37695_end_mask_0 = const()[name = tensor("op_37695_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_37695_cast_fp16 = slice_by_index(begin = var_37695_begin_0, end = var_37695_end_0, end_mask = var_37695_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_37695_cast_fp16")]; + tensor var_37699_begin_0 = const()[name = tensor("op_37699_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_37699_end_0 = const()[name = tensor("op_37699_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_37699_end_mask_0 = const()[name = tensor("op_37699_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_37699_cast_fp16 = slice_by_index(begin = var_37699_begin_0, end = var_37699_end_0, end_mask = var_37699_end_mask_0, x = value_49_cast_fp16)[name = tensor("op_37699_cast_fp16")]; + tensor var_37703_equation_0 = const()[name = tensor("op_37703_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37703_cast_fp16 = einsum(equation = var_37703_equation_0, values = (var_37545_cast_fp16, var_36987_cast_fp16))[name = tensor("op_37703_cast_fp16")]; + tensor var_37704_to_fp16 = const()[name = tensor("op_37704_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3841_cast_fp16 = mul(x = var_37703_cast_fp16, y = var_37704_to_fp16)[name = tensor("aw_chunk_3841_cast_fp16")]; + tensor var_37707_equation_0 = const()[name = tensor("op_37707_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37707_cast_fp16 = einsum(equation = var_37707_equation_0, values = (var_37545_cast_fp16, var_36994_cast_fp16))[name = tensor("op_37707_cast_fp16")]; + tensor var_37708_to_fp16 = const()[name = tensor("op_37708_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3843_cast_fp16 = mul(x = var_37707_cast_fp16, y = var_37708_to_fp16)[name = tensor("aw_chunk_3843_cast_fp16")]; + tensor var_37711_equation_0 = const()[name = tensor("op_37711_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37711_cast_fp16 = einsum(equation = var_37711_equation_0, values = (var_37545_cast_fp16, var_37001_cast_fp16))[name = tensor("op_37711_cast_fp16")]; + tensor var_37712_to_fp16 = const()[name = tensor("op_37712_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3845_cast_fp16 = mul(x = var_37711_cast_fp16, y = var_37712_to_fp16)[name = tensor("aw_chunk_3845_cast_fp16")]; + tensor var_37715_equation_0 = const()[name = tensor("op_37715_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37715_cast_fp16 = einsum(equation = var_37715_equation_0, values = (var_37545_cast_fp16, var_37008_cast_fp16))[name = tensor("op_37715_cast_fp16")]; + tensor var_37716_to_fp16 = const()[name = tensor("op_37716_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3847_cast_fp16 = mul(x = var_37715_cast_fp16, y = var_37716_to_fp16)[name = tensor("aw_chunk_3847_cast_fp16")]; + tensor var_37719_equation_0 = const()[name = tensor("op_37719_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37719_cast_fp16 = einsum(equation = var_37719_equation_0, values = (var_37549_cast_fp16, var_37015_cast_fp16))[name = tensor("op_37719_cast_fp16")]; + tensor var_37720_to_fp16 = const()[name = tensor("op_37720_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3849_cast_fp16 = mul(x = var_37719_cast_fp16, y = var_37720_to_fp16)[name = tensor("aw_chunk_3849_cast_fp16")]; + tensor var_37723_equation_0 = const()[name = tensor("op_37723_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37723_cast_fp16 = einsum(equation = var_37723_equation_0, values = (var_37549_cast_fp16, var_37022_cast_fp16))[name = tensor("op_37723_cast_fp16")]; + tensor var_37724_to_fp16 = const()[name = tensor("op_37724_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3851_cast_fp16 = mul(x = var_37723_cast_fp16, y = var_37724_to_fp16)[name = tensor("aw_chunk_3851_cast_fp16")]; + tensor var_37727_equation_0 = const()[name = tensor("op_37727_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37727_cast_fp16 = einsum(equation = var_37727_equation_0, values = (var_37549_cast_fp16, var_37029_cast_fp16))[name = tensor("op_37727_cast_fp16")]; + tensor var_37728_to_fp16 = const()[name = tensor("op_37728_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3853_cast_fp16 = mul(x = var_37727_cast_fp16, y = var_37728_to_fp16)[name = tensor("aw_chunk_3853_cast_fp16")]; + tensor var_37731_equation_0 = const()[name = tensor("op_37731_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37731_cast_fp16 = einsum(equation = var_37731_equation_0, values = (var_37549_cast_fp16, var_37036_cast_fp16))[name = tensor("op_37731_cast_fp16")]; + tensor var_37732_to_fp16 = const()[name = tensor("op_37732_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3855_cast_fp16 = mul(x = var_37731_cast_fp16, y = var_37732_to_fp16)[name = tensor("aw_chunk_3855_cast_fp16")]; + tensor var_37735_equation_0 = const()[name = tensor("op_37735_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37735_cast_fp16 = einsum(equation = var_37735_equation_0, values = (var_37553_cast_fp16, var_37043_cast_fp16))[name = tensor("op_37735_cast_fp16")]; + tensor var_37736_to_fp16 = const()[name = tensor("op_37736_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3857_cast_fp16 = mul(x = var_37735_cast_fp16, y = var_37736_to_fp16)[name = tensor("aw_chunk_3857_cast_fp16")]; + tensor var_37739_equation_0 = const()[name = tensor("op_37739_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37739_cast_fp16 = einsum(equation = var_37739_equation_0, values = (var_37553_cast_fp16, var_37050_cast_fp16))[name = tensor("op_37739_cast_fp16")]; + tensor var_37740_to_fp16 = const()[name = tensor("op_37740_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3859_cast_fp16 = mul(x = var_37739_cast_fp16, y = var_37740_to_fp16)[name = tensor("aw_chunk_3859_cast_fp16")]; + tensor var_37743_equation_0 = const()[name = tensor("op_37743_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37743_cast_fp16 = einsum(equation = var_37743_equation_0, values = (var_37553_cast_fp16, var_37057_cast_fp16))[name = tensor("op_37743_cast_fp16")]; + tensor var_37744_to_fp16 = const()[name = tensor("op_37744_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3861_cast_fp16 = mul(x = var_37743_cast_fp16, y = var_37744_to_fp16)[name = tensor("aw_chunk_3861_cast_fp16")]; + tensor var_37747_equation_0 = const()[name = tensor("op_37747_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37747_cast_fp16 = einsum(equation = var_37747_equation_0, values = (var_37553_cast_fp16, var_37064_cast_fp16))[name = tensor("op_37747_cast_fp16")]; + tensor var_37748_to_fp16 = const()[name = tensor("op_37748_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3863_cast_fp16 = mul(x = var_37747_cast_fp16, y = var_37748_to_fp16)[name = tensor("aw_chunk_3863_cast_fp16")]; + tensor var_37751_equation_0 = const()[name = tensor("op_37751_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37751_cast_fp16 = einsum(equation = var_37751_equation_0, values = (var_37557_cast_fp16, var_37071_cast_fp16))[name = tensor("op_37751_cast_fp16")]; + tensor var_37752_to_fp16 = const()[name = tensor("op_37752_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3865_cast_fp16 = mul(x = var_37751_cast_fp16, y = var_37752_to_fp16)[name = tensor("aw_chunk_3865_cast_fp16")]; + tensor var_37755_equation_0 = const()[name = tensor("op_37755_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37755_cast_fp16 = einsum(equation = var_37755_equation_0, values = (var_37557_cast_fp16, var_37078_cast_fp16))[name = tensor("op_37755_cast_fp16")]; + tensor var_37756_to_fp16 = const()[name = tensor("op_37756_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3867_cast_fp16 = mul(x = var_37755_cast_fp16, y = var_37756_to_fp16)[name = tensor("aw_chunk_3867_cast_fp16")]; + tensor var_37759_equation_0 = const()[name = tensor("op_37759_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37759_cast_fp16 = einsum(equation = var_37759_equation_0, values = (var_37557_cast_fp16, var_37085_cast_fp16))[name = tensor("op_37759_cast_fp16")]; + tensor var_37760_to_fp16 = const()[name = tensor("op_37760_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3869_cast_fp16 = mul(x = var_37759_cast_fp16, y = var_37760_to_fp16)[name = tensor("aw_chunk_3869_cast_fp16")]; + tensor var_37763_equation_0 = const()[name = tensor("op_37763_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37763_cast_fp16 = einsum(equation = var_37763_equation_0, values = (var_37557_cast_fp16, var_37092_cast_fp16))[name = tensor("op_37763_cast_fp16")]; + tensor var_37764_to_fp16 = const()[name = tensor("op_37764_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3871_cast_fp16 = mul(x = var_37763_cast_fp16, y = var_37764_to_fp16)[name = tensor("aw_chunk_3871_cast_fp16")]; + tensor var_37767_equation_0 = const()[name = tensor("op_37767_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37767_cast_fp16 = einsum(equation = var_37767_equation_0, values = (var_37561_cast_fp16, var_37099_cast_fp16))[name = tensor("op_37767_cast_fp16")]; + tensor var_37768_to_fp16 = const()[name = tensor("op_37768_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3873_cast_fp16 = mul(x = var_37767_cast_fp16, y = var_37768_to_fp16)[name = tensor("aw_chunk_3873_cast_fp16")]; + tensor var_37771_equation_0 = const()[name = tensor("op_37771_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37771_cast_fp16 = einsum(equation = var_37771_equation_0, values = (var_37561_cast_fp16, var_37106_cast_fp16))[name = tensor("op_37771_cast_fp16")]; + tensor var_37772_to_fp16 = const()[name = tensor("op_37772_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3875_cast_fp16 = mul(x = var_37771_cast_fp16, y = var_37772_to_fp16)[name = tensor("aw_chunk_3875_cast_fp16")]; + tensor var_37775_equation_0 = const()[name = tensor("op_37775_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37775_cast_fp16 = einsum(equation = var_37775_equation_0, values = (var_37561_cast_fp16, var_37113_cast_fp16))[name = tensor("op_37775_cast_fp16")]; + tensor var_37776_to_fp16 = const()[name = tensor("op_37776_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3877_cast_fp16 = mul(x = var_37775_cast_fp16, y = var_37776_to_fp16)[name = tensor("aw_chunk_3877_cast_fp16")]; + tensor var_37779_equation_0 = const()[name = tensor("op_37779_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37779_cast_fp16 = einsum(equation = var_37779_equation_0, values = (var_37561_cast_fp16, var_37120_cast_fp16))[name = tensor("op_37779_cast_fp16")]; + tensor var_37780_to_fp16 = const()[name = tensor("op_37780_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3879_cast_fp16 = mul(x = var_37779_cast_fp16, y = var_37780_to_fp16)[name = tensor("aw_chunk_3879_cast_fp16")]; + tensor var_37783_equation_0 = const()[name = tensor("op_37783_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37783_cast_fp16 = einsum(equation = var_37783_equation_0, values = (var_37565_cast_fp16, var_37127_cast_fp16))[name = tensor("op_37783_cast_fp16")]; + tensor var_37784_to_fp16 = const()[name = tensor("op_37784_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3881_cast_fp16 = mul(x = var_37783_cast_fp16, y = var_37784_to_fp16)[name = tensor("aw_chunk_3881_cast_fp16")]; + tensor var_37787_equation_0 = const()[name = tensor("op_37787_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37787_cast_fp16 = einsum(equation = var_37787_equation_0, values = (var_37565_cast_fp16, var_37134_cast_fp16))[name = tensor("op_37787_cast_fp16")]; + tensor var_37788_to_fp16 = const()[name = tensor("op_37788_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3883_cast_fp16 = mul(x = var_37787_cast_fp16, y = var_37788_to_fp16)[name = tensor("aw_chunk_3883_cast_fp16")]; + tensor var_37791_equation_0 = const()[name = tensor("op_37791_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37791_cast_fp16 = einsum(equation = var_37791_equation_0, values = (var_37565_cast_fp16, var_37141_cast_fp16))[name = tensor("op_37791_cast_fp16")]; + tensor var_37792_to_fp16 = const()[name = tensor("op_37792_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3885_cast_fp16 = mul(x = var_37791_cast_fp16, y = var_37792_to_fp16)[name = tensor("aw_chunk_3885_cast_fp16")]; + tensor var_37795_equation_0 = const()[name = tensor("op_37795_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37795_cast_fp16 = einsum(equation = var_37795_equation_0, values = (var_37565_cast_fp16, var_37148_cast_fp16))[name = tensor("op_37795_cast_fp16")]; + tensor var_37796_to_fp16 = const()[name = tensor("op_37796_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3887_cast_fp16 = mul(x = var_37795_cast_fp16, y = var_37796_to_fp16)[name = tensor("aw_chunk_3887_cast_fp16")]; + tensor var_37799_equation_0 = const()[name = tensor("op_37799_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37799_cast_fp16 = einsum(equation = var_37799_equation_0, values = (var_37569_cast_fp16, var_37155_cast_fp16))[name = tensor("op_37799_cast_fp16")]; + tensor var_37800_to_fp16 = const()[name = tensor("op_37800_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3889_cast_fp16 = mul(x = var_37799_cast_fp16, y = var_37800_to_fp16)[name = tensor("aw_chunk_3889_cast_fp16")]; + tensor var_37803_equation_0 = const()[name = tensor("op_37803_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37803_cast_fp16 = einsum(equation = var_37803_equation_0, values = (var_37569_cast_fp16, var_37162_cast_fp16))[name = tensor("op_37803_cast_fp16")]; + tensor var_37804_to_fp16 = const()[name = tensor("op_37804_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3891_cast_fp16 = mul(x = var_37803_cast_fp16, y = var_37804_to_fp16)[name = tensor("aw_chunk_3891_cast_fp16")]; + tensor var_37807_equation_0 = const()[name = tensor("op_37807_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37807_cast_fp16 = einsum(equation = var_37807_equation_0, values = (var_37569_cast_fp16, var_37169_cast_fp16))[name = tensor("op_37807_cast_fp16")]; + tensor var_37808_to_fp16 = const()[name = tensor("op_37808_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3893_cast_fp16 = mul(x = var_37807_cast_fp16, y = var_37808_to_fp16)[name = tensor("aw_chunk_3893_cast_fp16")]; + tensor var_37811_equation_0 = const()[name = tensor("op_37811_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37811_cast_fp16 = einsum(equation = var_37811_equation_0, values = (var_37569_cast_fp16, var_37176_cast_fp16))[name = tensor("op_37811_cast_fp16")]; + tensor var_37812_to_fp16 = const()[name = tensor("op_37812_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3895_cast_fp16 = mul(x = var_37811_cast_fp16, y = var_37812_to_fp16)[name = tensor("aw_chunk_3895_cast_fp16")]; + tensor var_37815_equation_0 = const()[name = tensor("op_37815_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37815_cast_fp16 = einsum(equation = var_37815_equation_0, values = (var_37573_cast_fp16, var_37183_cast_fp16))[name = tensor("op_37815_cast_fp16")]; + tensor var_37816_to_fp16 = const()[name = tensor("op_37816_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3897_cast_fp16 = mul(x = var_37815_cast_fp16, y = var_37816_to_fp16)[name = tensor("aw_chunk_3897_cast_fp16")]; + tensor var_37819_equation_0 = const()[name = tensor("op_37819_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37819_cast_fp16 = einsum(equation = var_37819_equation_0, values = (var_37573_cast_fp16, var_37190_cast_fp16))[name = tensor("op_37819_cast_fp16")]; + tensor var_37820_to_fp16 = const()[name = tensor("op_37820_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3899_cast_fp16 = mul(x = var_37819_cast_fp16, y = var_37820_to_fp16)[name = tensor("aw_chunk_3899_cast_fp16")]; + tensor var_37823_equation_0 = const()[name = tensor("op_37823_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37823_cast_fp16 = einsum(equation = var_37823_equation_0, values = (var_37573_cast_fp16, var_37197_cast_fp16))[name = tensor("op_37823_cast_fp16")]; + tensor var_37824_to_fp16 = const()[name = tensor("op_37824_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3901_cast_fp16 = mul(x = var_37823_cast_fp16, y = var_37824_to_fp16)[name = tensor("aw_chunk_3901_cast_fp16")]; + tensor var_37827_equation_0 = const()[name = tensor("op_37827_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37827_cast_fp16 = einsum(equation = var_37827_equation_0, values = (var_37573_cast_fp16, var_37204_cast_fp16))[name = tensor("op_37827_cast_fp16")]; + tensor var_37828_to_fp16 = const()[name = tensor("op_37828_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3903_cast_fp16 = mul(x = var_37827_cast_fp16, y = var_37828_to_fp16)[name = tensor("aw_chunk_3903_cast_fp16")]; + tensor var_37831_equation_0 = const()[name = tensor("op_37831_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37831_cast_fp16 = einsum(equation = var_37831_equation_0, values = (var_37577_cast_fp16, var_37211_cast_fp16))[name = tensor("op_37831_cast_fp16")]; + tensor var_37832_to_fp16 = const()[name = tensor("op_37832_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3905_cast_fp16 = mul(x = var_37831_cast_fp16, y = var_37832_to_fp16)[name = tensor("aw_chunk_3905_cast_fp16")]; + tensor var_37835_equation_0 = const()[name = tensor("op_37835_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37835_cast_fp16 = einsum(equation = var_37835_equation_0, values = (var_37577_cast_fp16, var_37218_cast_fp16))[name = tensor("op_37835_cast_fp16")]; + tensor var_37836_to_fp16 = const()[name = tensor("op_37836_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3907_cast_fp16 = mul(x = var_37835_cast_fp16, y = var_37836_to_fp16)[name = tensor("aw_chunk_3907_cast_fp16")]; + tensor var_37839_equation_0 = const()[name = tensor("op_37839_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37839_cast_fp16 = einsum(equation = var_37839_equation_0, values = (var_37577_cast_fp16, var_37225_cast_fp16))[name = tensor("op_37839_cast_fp16")]; + tensor var_37840_to_fp16 = const()[name = tensor("op_37840_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3909_cast_fp16 = mul(x = var_37839_cast_fp16, y = var_37840_to_fp16)[name = tensor("aw_chunk_3909_cast_fp16")]; + tensor var_37843_equation_0 = const()[name = tensor("op_37843_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37843_cast_fp16 = einsum(equation = var_37843_equation_0, values = (var_37577_cast_fp16, var_37232_cast_fp16))[name = tensor("op_37843_cast_fp16")]; + tensor var_37844_to_fp16 = const()[name = tensor("op_37844_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3911_cast_fp16 = mul(x = var_37843_cast_fp16, y = var_37844_to_fp16)[name = tensor("aw_chunk_3911_cast_fp16")]; + tensor var_37847_equation_0 = const()[name = tensor("op_37847_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37847_cast_fp16 = einsum(equation = var_37847_equation_0, values = (var_37581_cast_fp16, var_37239_cast_fp16))[name = tensor("op_37847_cast_fp16")]; + tensor var_37848_to_fp16 = const()[name = tensor("op_37848_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3913_cast_fp16 = mul(x = var_37847_cast_fp16, y = var_37848_to_fp16)[name = tensor("aw_chunk_3913_cast_fp16")]; + tensor var_37851_equation_0 = const()[name = tensor("op_37851_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37851_cast_fp16 = einsum(equation = var_37851_equation_0, values = (var_37581_cast_fp16, var_37246_cast_fp16))[name = tensor("op_37851_cast_fp16")]; + tensor var_37852_to_fp16 = const()[name = tensor("op_37852_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3915_cast_fp16 = mul(x = var_37851_cast_fp16, y = var_37852_to_fp16)[name = tensor("aw_chunk_3915_cast_fp16")]; + tensor var_37855_equation_0 = const()[name = tensor("op_37855_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37855_cast_fp16 = einsum(equation = var_37855_equation_0, values = (var_37581_cast_fp16, var_37253_cast_fp16))[name = tensor("op_37855_cast_fp16")]; + tensor var_37856_to_fp16 = const()[name = tensor("op_37856_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3917_cast_fp16 = mul(x = var_37855_cast_fp16, y = var_37856_to_fp16)[name = tensor("aw_chunk_3917_cast_fp16")]; + tensor var_37859_equation_0 = const()[name = tensor("op_37859_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37859_cast_fp16 = einsum(equation = var_37859_equation_0, values = (var_37581_cast_fp16, var_37260_cast_fp16))[name = tensor("op_37859_cast_fp16")]; + tensor var_37860_to_fp16 = const()[name = tensor("op_37860_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3919_cast_fp16 = mul(x = var_37859_cast_fp16, y = var_37860_to_fp16)[name = tensor("aw_chunk_3919_cast_fp16")]; + tensor var_37863_equation_0 = const()[name = tensor("op_37863_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37863_cast_fp16 = einsum(equation = var_37863_equation_0, values = (var_37585_cast_fp16, var_37267_cast_fp16))[name = tensor("op_37863_cast_fp16")]; + tensor var_37864_to_fp16 = const()[name = tensor("op_37864_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3921_cast_fp16 = mul(x = var_37863_cast_fp16, y = var_37864_to_fp16)[name = tensor("aw_chunk_3921_cast_fp16")]; + tensor var_37867_equation_0 = const()[name = tensor("op_37867_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37867_cast_fp16 = einsum(equation = var_37867_equation_0, values = (var_37585_cast_fp16, var_37274_cast_fp16))[name = tensor("op_37867_cast_fp16")]; + tensor var_37868_to_fp16 = const()[name = tensor("op_37868_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3923_cast_fp16 = mul(x = var_37867_cast_fp16, y = var_37868_to_fp16)[name = tensor("aw_chunk_3923_cast_fp16")]; + tensor var_37871_equation_0 = const()[name = tensor("op_37871_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37871_cast_fp16 = einsum(equation = var_37871_equation_0, values = (var_37585_cast_fp16, var_37281_cast_fp16))[name = tensor("op_37871_cast_fp16")]; + tensor var_37872_to_fp16 = const()[name = tensor("op_37872_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3925_cast_fp16 = mul(x = var_37871_cast_fp16, y = var_37872_to_fp16)[name = tensor("aw_chunk_3925_cast_fp16")]; + tensor var_37875_equation_0 = const()[name = tensor("op_37875_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37875_cast_fp16 = einsum(equation = var_37875_equation_0, values = (var_37585_cast_fp16, var_37288_cast_fp16))[name = tensor("op_37875_cast_fp16")]; + tensor var_37876_to_fp16 = const()[name = tensor("op_37876_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3927_cast_fp16 = mul(x = var_37875_cast_fp16, y = var_37876_to_fp16)[name = tensor("aw_chunk_3927_cast_fp16")]; + tensor var_37879_equation_0 = const()[name = tensor("op_37879_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37879_cast_fp16 = einsum(equation = var_37879_equation_0, values = (var_37589_cast_fp16, var_37295_cast_fp16))[name = tensor("op_37879_cast_fp16")]; + tensor var_37880_to_fp16 = const()[name = tensor("op_37880_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3929_cast_fp16 = mul(x = var_37879_cast_fp16, y = var_37880_to_fp16)[name = tensor("aw_chunk_3929_cast_fp16")]; + tensor var_37883_equation_0 = const()[name = tensor("op_37883_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37883_cast_fp16 = einsum(equation = var_37883_equation_0, values = (var_37589_cast_fp16, var_37302_cast_fp16))[name = tensor("op_37883_cast_fp16")]; + tensor var_37884_to_fp16 = const()[name = tensor("op_37884_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3931_cast_fp16 = mul(x = var_37883_cast_fp16, y = var_37884_to_fp16)[name = tensor("aw_chunk_3931_cast_fp16")]; + tensor var_37887_equation_0 = const()[name = tensor("op_37887_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37887_cast_fp16 = einsum(equation = var_37887_equation_0, values = (var_37589_cast_fp16, var_37309_cast_fp16))[name = tensor("op_37887_cast_fp16")]; + tensor var_37888_to_fp16 = const()[name = tensor("op_37888_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3933_cast_fp16 = mul(x = var_37887_cast_fp16, y = var_37888_to_fp16)[name = tensor("aw_chunk_3933_cast_fp16")]; + tensor var_37891_equation_0 = const()[name = tensor("op_37891_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37891_cast_fp16 = einsum(equation = var_37891_equation_0, values = (var_37589_cast_fp16, var_37316_cast_fp16))[name = tensor("op_37891_cast_fp16")]; + tensor var_37892_to_fp16 = const()[name = tensor("op_37892_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3935_cast_fp16 = mul(x = var_37891_cast_fp16, y = var_37892_to_fp16)[name = tensor("aw_chunk_3935_cast_fp16")]; + tensor var_37895_equation_0 = const()[name = tensor("op_37895_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37895_cast_fp16 = einsum(equation = var_37895_equation_0, values = (var_37593_cast_fp16, var_37323_cast_fp16))[name = tensor("op_37895_cast_fp16")]; + tensor var_37896_to_fp16 = const()[name = tensor("op_37896_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3937_cast_fp16 = mul(x = var_37895_cast_fp16, y = var_37896_to_fp16)[name = tensor("aw_chunk_3937_cast_fp16")]; + tensor var_37899_equation_0 = const()[name = tensor("op_37899_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37899_cast_fp16 = einsum(equation = var_37899_equation_0, values = (var_37593_cast_fp16, var_37330_cast_fp16))[name = tensor("op_37899_cast_fp16")]; + tensor var_37900_to_fp16 = const()[name = tensor("op_37900_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3939_cast_fp16 = mul(x = var_37899_cast_fp16, y = var_37900_to_fp16)[name = tensor("aw_chunk_3939_cast_fp16")]; + tensor var_37903_equation_0 = const()[name = tensor("op_37903_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37903_cast_fp16 = einsum(equation = var_37903_equation_0, values = (var_37593_cast_fp16, var_37337_cast_fp16))[name = tensor("op_37903_cast_fp16")]; + tensor var_37904_to_fp16 = const()[name = tensor("op_37904_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3941_cast_fp16 = mul(x = var_37903_cast_fp16, y = var_37904_to_fp16)[name = tensor("aw_chunk_3941_cast_fp16")]; + tensor var_37907_equation_0 = const()[name = tensor("op_37907_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37907_cast_fp16 = einsum(equation = var_37907_equation_0, values = (var_37593_cast_fp16, var_37344_cast_fp16))[name = tensor("op_37907_cast_fp16")]; + tensor var_37908_to_fp16 = const()[name = tensor("op_37908_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3943_cast_fp16 = mul(x = var_37907_cast_fp16, y = var_37908_to_fp16)[name = tensor("aw_chunk_3943_cast_fp16")]; + tensor var_37911_equation_0 = const()[name = tensor("op_37911_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37911_cast_fp16 = einsum(equation = var_37911_equation_0, values = (var_37597_cast_fp16, var_37351_cast_fp16))[name = tensor("op_37911_cast_fp16")]; + tensor var_37912_to_fp16 = const()[name = tensor("op_37912_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3945_cast_fp16 = mul(x = var_37911_cast_fp16, y = var_37912_to_fp16)[name = tensor("aw_chunk_3945_cast_fp16")]; + tensor var_37915_equation_0 = const()[name = tensor("op_37915_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37915_cast_fp16 = einsum(equation = var_37915_equation_0, values = (var_37597_cast_fp16, var_37358_cast_fp16))[name = tensor("op_37915_cast_fp16")]; + tensor var_37916_to_fp16 = const()[name = tensor("op_37916_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3947_cast_fp16 = mul(x = var_37915_cast_fp16, y = var_37916_to_fp16)[name = tensor("aw_chunk_3947_cast_fp16")]; + tensor var_37919_equation_0 = const()[name = tensor("op_37919_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37919_cast_fp16 = einsum(equation = var_37919_equation_0, values = (var_37597_cast_fp16, var_37365_cast_fp16))[name = tensor("op_37919_cast_fp16")]; + tensor var_37920_to_fp16 = const()[name = tensor("op_37920_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3949_cast_fp16 = mul(x = var_37919_cast_fp16, y = var_37920_to_fp16)[name = tensor("aw_chunk_3949_cast_fp16")]; + tensor var_37923_equation_0 = const()[name = tensor("op_37923_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37923_cast_fp16 = einsum(equation = var_37923_equation_0, values = (var_37597_cast_fp16, var_37372_cast_fp16))[name = tensor("op_37923_cast_fp16")]; + tensor var_37924_to_fp16 = const()[name = tensor("op_37924_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3951_cast_fp16 = mul(x = var_37923_cast_fp16, y = var_37924_to_fp16)[name = tensor("aw_chunk_3951_cast_fp16")]; + tensor var_37927_equation_0 = const()[name = tensor("op_37927_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37927_cast_fp16 = einsum(equation = var_37927_equation_0, values = (var_37601_cast_fp16, var_37379_cast_fp16))[name = tensor("op_37927_cast_fp16")]; + tensor var_37928_to_fp16 = const()[name = tensor("op_37928_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3953_cast_fp16 = mul(x = var_37927_cast_fp16, y = var_37928_to_fp16)[name = tensor("aw_chunk_3953_cast_fp16")]; + tensor var_37931_equation_0 = const()[name = tensor("op_37931_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37931_cast_fp16 = einsum(equation = var_37931_equation_0, values = (var_37601_cast_fp16, var_37386_cast_fp16))[name = tensor("op_37931_cast_fp16")]; + tensor var_37932_to_fp16 = const()[name = tensor("op_37932_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3955_cast_fp16 = mul(x = var_37931_cast_fp16, y = var_37932_to_fp16)[name = tensor("aw_chunk_3955_cast_fp16")]; + tensor var_37935_equation_0 = const()[name = tensor("op_37935_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37935_cast_fp16 = einsum(equation = var_37935_equation_0, values = (var_37601_cast_fp16, var_37393_cast_fp16))[name = tensor("op_37935_cast_fp16")]; + tensor var_37936_to_fp16 = const()[name = tensor("op_37936_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3957_cast_fp16 = mul(x = var_37935_cast_fp16, y = var_37936_to_fp16)[name = tensor("aw_chunk_3957_cast_fp16")]; + tensor var_37939_equation_0 = const()[name = tensor("op_37939_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37939_cast_fp16 = einsum(equation = var_37939_equation_0, values = (var_37601_cast_fp16, var_37400_cast_fp16))[name = tensor("op_37939_cast_fp16")]; + tensor var_37940_to_fp16 = const()[name = tensor("op_37940_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3959_cast_fp16 = mul(x = var_37939_cast_fp16, y = var_37940_to_fp16)[name = tensor("aw_chunk_3959_cast_fp16")]; + tensor var_37943_equation_0 = const()[name = tensor("op_37943_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37943_cast_fp16 = einsum(equation = var_37943_equation_0, values = (var_37605_cast_fp16, var_37407_cast_fp16))[name = tensor("op_37943_cast_fp16")]; + tensor var_37944_to_fp16 = const()[name = tensor("op_37944_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3961_cast_fp16 = mul(x = var_37943_cast_fp16, y = var_37944_to_fp16)[name = tensor("aw_chunk_3961_cast_fp16")]; + tensor var_37947_equation_0 = const()[name = tensor("op_37947_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37947_cast_fp16 = einsum(equation = var_37947_equation_0, values = (var_37605_cast_fp16, var_37414_cast_fp16))[name = tensor("op_37947_cast_fp16")]; + tensor var_37948_to_fp16 = const()[name = tensor("op_37948_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3963_cast_fp16 = mul(x = var_37947_cast_fp16, y = var_37948_to_fp16)[name = tensor("aw_chunk_3963_cast_fp16")]; + tensor var_37951_equation_0 = const()[name = tensor("op_37951_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37951_cast_fp16 = einsum(equation = var_37951_equation_0, values = (var_37605_cast_fp16, var_37421_cast_fp16))[name = tensor("op_37951_cast_fp16")]; + tensor var_37952_to_fp16 = const()[name = tensor("op_37952_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3965_cast_fp16 = mul(x = var_37951_cast_fp16, y = var_37952_to_fp16)[name = tensor("aw_chunk_3965_cast_fp16")]; + tensor var_37955_equation_0 = const()[name = tensor("op_37955_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37955_cast_fp16 = einsum(equation = var_37955_equation_0, values = (var_37605_cast_fp16, var_37428_cast_fp16))[name = tensor("op_37955_cast_fp16")]; + tensor var_37956_to_fp16 = const()[name = tensor("op_37956_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3967_cast_fp16 = mul(x = var_37955_cast_fp16, y = var_37956_to_fp16)[name = tensor("aw_chunk_3967_cast_fp16")]; + tensor var_37959_equation_0 = const()[name = tensor("op_37959_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37959_cast_fp16 = einsum(equation = var_37959_equation_0, values = (var_37609_cast_fp16, var_37435_cast_fp16))[name = tensor("op_37959_cast_fp16")]; + tensor var_37960_to_fp16 = const()[name = tensor("op_37960_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3969_cast_fp16 = mul(x = var_37959_cast_fp16, y = var_37960_to_fp16)[name = tensor("aw_chunk_3969_cast_fp16")]; + tensor var_37963_equation_0 = const()[name = tensor("op_37963_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37963_cast_fp16 = einsum(equation = var_37963_equation_0, values = (var_37609_cast_fp16, var_37442_cast_fp16))[name = tensor("op_37963_cast_fp16")]; + tensor var_37964_to_fp16 = const()[name = tensor("op_37964_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3971_cast_fp16 = mul(x = var_37963_cast_fp16, y = var_37964_to_fp16)[name = tensor("aw_chunk_3971_cast_fp16")]; + tensor var_37967_equation_0 = const()[name = tensor("op_37967_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37967_cast_fp16 = einsum(equation = var_37967_equation_0, values = (var_37609_cast_fp16, var_37449_cast_fp16))[name = tensor("op_37967_cast_fp16")]; + tensor var_37968_to_fp16 = const()[name = tensor("op_37968_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3973_cast_fp16 = mul(x = var_37967_cast_fp16, y = var_37968_to_fp16)[name = tensor("aw_chunk_3973_cast_fp16")]; + tensor var_37971_equation_0 = const()[name = tensor("op_37971_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37971_cast_fp16 = einsum(equation = var_37971_equation_0, values = (var_37609_cast_fp16, var_37456_cast_fp16))[name = tensor("op_37971_cast_fp16")]; + tensor var_37972_to_fp16 = const()[name = tensor("op_37972_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3975_cast_fp16 = mul(x = var_37971_cast_fp16, y = var_37972_to_fp16)[name = tensor("aw_chunk_3975_cast_fp16")]; + tensor var_37975_equation_0 = const()[name = tensor("op_37975_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37975_cast_fp16 = einsum(equation = var_37975_equation_0, values = (var_37613_cast_fp16, var_37463_cast_fp16))[name = tensor("op_37975_cast_fp16")]; + tensor var_37976_to_fp16 = const()[name = tensor("op_37976_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3977_cast_fp16 = mul(x = var_37975_cast_fp16, y = var_37976_to_fp16)[name = tensor("aw_chunk_3977_cast_fp16")]; + tensor var_37979_equation_0 = const()[name = tensor("op_37979_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37979_cast_fp16 = einsum(equation = var_37979_equation_0, values = (var_37613_cast_fp16, var_37470_cast_fp16))[name = tensor("op_37979_cast_fp16")]; + tensor var_37980_to_fp16 = const()[name = tensor("op_37980_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3979_cast_fp16 = mul(x = var_37979_cast_fp16, y = var_37980_to_fp16)[name = tensor("aw_chunk_3979_cast_fp16")]; + tensor var_37983_equation_0 = const()[name = tensor("op_37983_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37983_cast_fp16 = einsum(equation = var_37983_equation_0, values = (var_37613_cast_fp16, var_37477_cast_fp16))[name = tensor("op_37983_cast_fp16")]; + tensor var_37984_to_fp16 = const()[name = tensor("op_37984_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3981_cast_fp16 = mul(x = var_37983_cast_fp16, y = var_37984_to_fp16)[name = tensor("aw_chunk_3981_cast_fp16")]; + tensor var_37987_equation_0 = const()[name = tensor("op_37987_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37987_cast_fp16 = einsum(equation = var_37987_equation_0, values = (var_37613_cast_fp16, var_37484_cast_fp16))[name = tensor("op_37987_cast_fp16")]; + tensor var_37988_to_fp16 = const()[name = tensor("op_37988_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3983_cast_fp16 = mul(x = var_37987_cast_fp16, y = var_37988_to_fp16)[name = tensor("aw_chunk_3983_cast_fp16")]; + tensor var_37991_equation_0 = const()[name = tensor("op_37991_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37991_cast_fp16 = einsum(equation = var_37991_equation_0, values = (var_37617_cast_fp16, var_37491_cast_fp16))[name = tensor("op_37991_cast_fp16")]; + tensor var_37992_to_fp16 = const()[name = tensor("op_37992_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3985_cast_fp16 = mul(x = var_37991_cast_fp16, y = var_37992_to_fp16)[name = tensor("aw_chunk_3985_cast_fp16")]; + tensor var_37995_equation_0 = const()[name = tensor("op_37995_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37995_cast_fp16 = einsum(equation = var_37995_equation_0, values = (var_37617_cast_fp16, var_37498_cast_fp16))[name = tensor("op_37995_cast_fp16")]; + tensor var_37996_to_fp16 = const()[name = tensor("op_37996_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3987_cast_fp16 = mul(x = var_37995_cast_fp16, y = var_37996_to_fp16)[name = tensor("aw_chunk_3987_cast_fp16")]; + tensor var_37999_equation_0 = const()[name = tensor("op_37999_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_37999_cast_fp16 = einsum(equation = var_37999_equation_0, values = (var_37617_cast_fp16, var_37505_cast_fp16))[name = tensor("op_37999_cast_fp16")]; + tensor var_38000_to_fp16 = const()[name = tensor("op_38000_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3989_cast_fp16 = mul(x = var_37999_cast_fp16, y = var_38000_to_fp16)[name = tensor("aw_chunk_3989_cast_fp16")]; + tensor var_38003_equation_0 = const()[name = tensor("op_38003_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_38003_cast_fp16 = einsum(equation = var_38003_equation_0, values = (var_37617_cast_fp16, var_37512_cast_fp16))[name = tensor("op_38003_cast_fp16")]; + tensor var_38004_to_fp16 = const()[name = tensor("op_38004_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3991_cast_fp16 = mul(x = var_38003_cast_fp16, y = var_38004_to_fp16)[name = tensor("aw_chunk_3991_cast_fp16")]; + tensor var_38007_equation_0 = const()[name = tensor("op_38007_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_38007_cast_fp16 = einsum(equation = var_38007_equation_0, values = (var_37621_cast_fp16, var_37519_cast_fp16))[name = tensor("op_38007_cast_fp16")]; + tensor var_38008_to_fp16 = const()[name = tensor("op_38008_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3993_cast_fp16 = mul(x = var_38007_cast_fp16, y = var_38008_to_fp16)[name = tensor("aw_chunk_3993_cast_fp16")]; + tensor var_38011_equation_0 = const()[name = tensor("op_38011_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_38011_cast_fp16 = einsum(equation = var_38011_equation_0, values = (var_37621_cast_fp16, var_37526_cast_fp16))[name = tensor("op_38011_cast_fp16")]; + tensor var_38012_to_fp16 = const()[name = tensor("op_38012_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3995_cast_fp16 = mul(x = var_38011_cast_fp16, y = var_38012_to_fp16)[name = tensor("aw_chunk_3995_cast_fp16")]; + tensor var_38015_equation_0 = const()[name = tensor("op_38015_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_38015_cast_fp16 = einsum(equation = var_38015_equation_0, values = (var_37621_cast_fp16, var_37533_cast_fp16))[name = tensor("op_38015_cast_fp16")]; + tensor var_38016_to_fp16 = const()[name = tensor("op_38016_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3997_cast_fp16 = mul(x = var_38015_cast_fp16, y = var_38016_to_fp16)[name = tensor("aw_chunk_3997_cast_fp16")]; + tensor var_38019_equation_0 = const()[name = tensor("op_38019_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_38019_cast_fp16 = einsum(equation = var_38019_equation_0, values = (var_37621_cast_fp16, var_37540_cast_fp16))[name = tensor("op_38019_cast_fp16")]; + tensor var_38020_to_fp16 = const()[name = tensor("op_38020_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_3999_cast_fp16 = mul(x = var_38019_cast_fp16, y = var_38020_to_fp16)[name = tensor("aw_chunk_3999_cast_fp16")]; + tensor var_38022_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3841_cast_fp16)[name = tensor("op_38022_cast_fp16")]; + tensor var_38023_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3843_cast_fp16)[name = tensor("op_38023_cast_fp16")]; + tensor var_38024_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3845_cast_fp16)[name = tensor("op_38024_cast_fp16")]; + tensor var_38025_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3847_cast_fp16)[name = tensor("op_38025_cast_fp16")]; + tensor var_38026_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3849_cast_fp16)[name = tensor("op_38026_cast_fp16")]; + tensor var_38027_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3851_cast_fp16)[name = tensor("op_38027_cast_fp16")]; + tensor var_38028_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3853_cast_fp16)[name = tensor("op_38028_cast_fp16")]; + tensor var_38029_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3855_cast_fp16)[name = tensor("op_38029_cast_fp16")]; + tensor var_38030_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3857_cast_fp16)[name = tensor("op_38030_cast_fp16")]; + tensor var_38031_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3859_cast_fp16)[name = tensor("op_38031_cast_fp16")]; + tensor var_38032_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3861_cast_fp16)[name = tensor("op_38032_cast_fp16")]; + tensor var_38033_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3863_cast_fp16)[name = tensor("op_38033_cast_fp16")]; + tensor var_38034_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3865_cast_fp16)[name = tensor("op_38034_cast_fp16")]; + tensor var_38035_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3867_cast_fp16)[name = tensor("op_38035_cast_fp16")]; + tensor var_38036_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3869_cast_fp16)[name = tensor("op_38036_cast_fp16")]; + tensor var_38037_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3871_cast_fp16)[name = tensor("op_38037_cast_fp16")]; + tensor var_38038_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3873_cast_fp16)[name = tensor("op_38038_cast_fp16")]; + tensor var_38039_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3875_cast_fp16)[name = tensor("op_38039_cast_fp16")]; + tensor var_38040_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3877_cast_fp16)[name = tensor("op_38040_cast_fp16")]; + tensor var_38041_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3879_cast_fp16)[name = tensor("op_38041_cast_fp16")]; + tensor var_38042_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3881_cast_fp16)[name = tensor("op_38042_cast_fp16")]; + tensor var_38043_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3883_cast_fp16)[name = tensor("op_38043_cast_fp16")]; + tensor var_38044_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3885_cast_fp16)[name = tensor("op_38044_cast_fp16")]; + tensor var_38045_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3887_cast_fp16)[name = tensor("op_38045_cast_fp16")]; + tensor var_38046_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3889_cast_fp16)[name = tensor("op_38046_cast_fp16")]; + tensor var_38047_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3891_cast_fp16)[name = tensor("op_38047_cast_fp16")]; + tensor var_38048_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3893_cast_fp16)[name = tensor("op_38048_cast_fp16")]; + tensor var_38049_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3895_cast_fp16)[name = tensor("op_38049_cast_fp16")]; + tensor var_38050_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3897_cast_fp16)[name = tensor("op_38050_cast_fp16")]; + tensor var_38051_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3899_cast_fp16)[name = tensor("op_38051_cast_fp16")]; + tensor var_38052_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3901_cast_fp16)[name = tensor("op_38052_cast_fp16")]; + tensor var_38053_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3903_cast_fp16)[name = tensor("op_38053_cast_fp16")]; + tensor var_38054_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3905_cast_fp16)[name = tensor("op_38054_cast_fp16")]; + tensor var_38055_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3907_cast_fp16)[name = tensor("op_38055_cast_fp16")]; + tensor var_38056_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3909_cast_fp16)[name = tensor("op_38056_cast_fp16")]; + tensor var_38057_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3911_cast_fp16)[name = tensor("op_38057_cast_fp16")]; + tensor var_38058_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3913_cast_fp16)[name = tensor("op_38058_cast_fp16")]; + tensor var_38059_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3915_cast_fp16)[name = tensor("op_38059_cast_fp16")]; + tensor var_38060_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3917_cast_fp16)[name = tensor("op_38060_cast_fp16")]; + tensor var_38061_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3919_cast_fp16)[name = tensor("op_38061_cast_fp16")]; + tensor var_38062_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3921_cast_fp16)[name = tensor("op_38062_cast_fp16")]; + tensor var_38063_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3923_cast_fp16)[name = tensor("op_38063_cast_fp16")]; + tensor var_38064_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3925_cast_fp16)[name = tensor("op_38064_cast_fp16")]; + tensor var_38065_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3927_cast_fp16)[name = tensor("op_38065_cast_fp16")]; + tensor var_38066_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3929_cast_fp16)[name = tensor("op_38066_cast_fp16")]; + tensor var_38067_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3931_cast_fp16)[name = tensor("op_38067_cast_fp16")]; + tensor var_38068_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3933_cast_fp16)[name = tensor("op_38068_cast_fp16")]; + tensor var_38069_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3935_cast_fp16)[name = tensor("op_38069_cast_fp16")]; + tensor var_38070_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3937_cast_fp16)[name = tensor("op_38070_cast_fp16")]; + tensor var_38071_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3939_cast_fp16)[name = tensor("op_38071_cast_fp16")]; + tensor var_38072_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3941_cast_fp16)[name = tensor("op_38072_cast_fp16")]; + tensor var_38073_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3943_cast_fp16)[name = tensor("op_38073_cast_fp16")]; + tensor var_38074_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3945_cast_fp16)[name = tensor("op_38074_cast_fp16")]; + tensor var_38075_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3947_cast_fp16)[name = tensor("op_38075_cast_fp16")]; + tensor var_38076_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3949_cast_fp16)[name = tensor("op_38076_cast_fp16")]; + tensor var_38077_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3951_cast_fp16)[name = tensor("op_38077_cast_fp16")]; + tensor var_38078_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3953_cast_fp16)[name = tensor("op_38078_cast_fp16")]; + tensor var_38079_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3955_cast_fp16)[name = tensor("op_38079_cast_fp16")]; + tensor var_38080_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3957_cast_fp16)[name = tensor("op_38080_cast_fp16")]; + tensor var_38081_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3959_cast_fp16)[name = tensor("op_38081_cast_fp16")]; + tensor var_38082_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3961_cast_fp16)[name = tensor("op_38082_cast_fp16")]; + tensor var_38083_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3963_cast_fp16)[name = tensor("op_38083_cast_fp16")]; + tensor var_38084_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3965_cast_fp16)[name = tensor("op_38084_cast_fp16")]; + tensor var_38085_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3967_cast_fp16)[name = tensor("op_38085_cast_fp16")]; + tensor var_38086_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3969_cast_fp16)[name = tensor("op_38086_cast_fp16")]; + tensor var_38087_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3971_cast_fp16)[name = tensor("op_38087_cast_fp16")]; + tensor var_38088_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3973_cast_fp16)[name = tensor("op_38088_cast_fp16")]; + tensor var_38089_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3975_cast_fp16)[name = tensor("op_38089_cast_fp16")]; + tensor var_38090_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3977_cast_fp16)[name = tensor("op_38090_cast_fp16")]; + tensor var_38091_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3979_cast_fp16)[name = tensor("op_38091_cast_fp16")]; + tensor var_38092_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3981_cast_fp16)[name = tensor("op_38092_cast_fp16")]; + tensor var_38093_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3983_cast_fp16)[name = tensor("op_38093_cast_fp16")]; + tensor var_38094_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3985_cast_fp16)[name = tensor("op_38094_cast_fp16")]; + tensor var_38095_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3987_cast_fp16)[name = tensor("op_38095_cast_fp16")]; + tensor var_38096_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3989_cast_fp16)[name = tensor("op_38096_cast_fp16")]; + tensor var_38097_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3991_cast_fp16)[name = tensor("op_38097_cast_fp16")]; + tensor var_38098_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3993_cast_fp16)[name = tensor("op_38098_cast_fp16")]; + tensor var_38099_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3995_cast_fp16)[name = tensor("op_38099_cast_fp16")]; + tensor var_38100_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3997_cast_fp16)[name = tensor("op_38100_cast_fp16")]; + tensor var_38101_cast_fp16 = softmax(axis = var_36847, x = aw_chunk_3999_cast_fp16)[name = tensor("op_38101_cast_fp16")]; + tensor var_38103_equation_0 = const()[name = tensor("op_38103_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38103_cast_fp16 = einsum(equation = var_38103_equation_0, values = (var_37623_cast_fp16, var_38022_cast_fp16))[name = tensor("op_38103_cast_fp16")]; + tensor var_38105_equation_0 = const()[name = tensor("op_38105_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38105_cast_fp16 = einsum(equation = var_38105_equation_0, values = (var_37623_cast_fp16, var_38023_cast_fp16))[name = tensor("op_38105_cast_fp16")]; + tensor var_38107_equation_0 = const()[name = tensor("op_38107_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38107_cast_fp16 = einsum(equation = var_38107_equation_0, values = (var_37623_cast_fp16, var_38024_cast_fp16))[name = tensor("op_38107_cast_fp16")]; + tensor var_38109_equation_0 = const()[name = tensor("op_38109_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38109_cast_fp16 = einsum(equation = var_38109_equation_0, values = (var_37623_cast_fp16, var_38025_cast_fp16))[name = tensor("op_38109_cast_fp16")]; + tensor var_38111_equation_0 = const()[name = tensor("op_38111_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38111_cast_fp16 = einsum(equation = var_38111_equation_0, values = (var_37627_cast_fp16, var_38026_cast_fp16))[name = tensor("op_38111_cast_fp16")]; + tensor var_38113_equation_0 = const()[name = tensor("op_38113_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38113_cast_fp16 = einsum(equation = var_38113_equation_0, values = (var_37627_cast_fp16, var_38027_cast_fp16))[name = tensor("op_38113_cast_fp16")]; + tensor var_38115_equation_0 = const()[name = tensor("op_38115_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38115_cast_fp16 = einsum(equation = var_38115_equation_0, values = (var_37627_cast_fp16, var_38028_cast_fp16))[name = tensor("op_38115_cast_fp16")]; + tensor var_38117_equation_0 = const()[name = tensor("op_38117_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38117_cast_fp16 = einsum(equation = var_38117_equation_0, values = (var_37627_cast_fp16, var_38029_cast_fp16))[name = tensor("op_38117_cast_fp16")]; + tensor var_38119_equation_0 = const()[name = tensor("op_38119_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38119_cast_fp16 = einsum(equation = var_38119_equation_0, values = (var_37631_cast_fp16, var_38030_cast_fp16))[name = tensor("op_38119_cast_fp16")]; + tensor var_38121_equation_0 = const()[name = tensor("op_38121_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38121_cast_fp16 = einsum(equation = var_38121_equation_0, values = (var_37631_cast_fp16, var_38031_cast_fp16))[name = tensor("op_38121_cast_fp16")]; + tensor var_38123_equation_0 = const()[name = tensor("op_38123_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38123_cast_fp16 = einsum(equation = var_38123_equation_0, values = (var_37631_cast_fp16, var_38032_cast_fp16))[name = tensor("op_38123_cast_fp16")]; + tensor var_38125_equation_0 = const()[name = tensor("op_38125_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38125_cast_fp16 = einsum(equation = var_38125_equation_0, values = (var_37631_cast_fp16, var_38033_cast_fp16))[name = tensor("op_38125_cast_fp16")]; + tensor var_38127_equation_0 = const()[name = tensor("op_38127_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38127_cast_fp16 = einsum(equation = var_38127_equation_0, values = (var_37635_cast_fp16, var_38034_cast_fp16))[name = tensor("op_38127_cast_fp16")]; + tensor var_38129_equation_0 = const()[name = tensor("op_38129_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38129_cast_fp16 = einsum(equation = var_38129_equation_0, values = (var_37635_cast_fp16, var_38035_cast_fp16))[name = tensor("op_38129_cast_fp16")]; + tensor var_38131_equation_0 = const()[name = tensor("op_38131_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38131_cast_fp16 = einsum(equation = var_38131_equation_0, values = (var_37635_cast_fp16, var_38036_cast_fp16))[name = tensor("op_38131_cast_fp16")]; + tensor var_38133_equation_0 = const()[name = tensor("op_38133_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38133_cast_fp16 = einsum(equation = var_38133_equation_0, values = (var_37635_cast_fp16, var_38037_cast_fp16))[name = tensor("op_38133_cast_fp16")]; + tensor var_38135_equation_0 = const()[name = tensor("op_38135_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38135_cast_fp16 = einsum(equation = var_38135_equation_0, values = (var_37639_cast_fp16, var_38038_cast_fp16))[name = tensor("op_38135_cast_fp16")]; + tensor var_38137_equation_0 = const()[name = tensor("op_38137_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38137_cast_fp16 = einsum(equation = var_38137_equation_0, values = (var_37639_cast_fp16, var_38039_cast_fp16))[name = tensor("op_38137_cast_fp16")]; + tensor var_38139_equation_0 = const()[name = tensor("op_38139_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38139_cast_fp16 = einsum(equation = var_38139_equation_0, values = (var_37639_cast_fp16, var_38040_cast_fp16))[name = tensor("op_38139_cast_fp16")]; + tensor var_38141_equation_0 = const()[name = tensor("op_38141_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38141_cast_fp16 = einsum(equation = var_38141_equation_0, values = (var_37639_cast_fp16, var_38041_cast_fp16))[name = tensor("op_38141_cast_fp16")]; + tensor var_38143_equation_0 = const()[name = tensor("op_38143_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38143_cast_fp16 = einsum(equation = var_38143_equation_0, values = (var_37643_cast_fp16, var_38042_cast_fp16))[name = tensor("op_38143_cast_fp16")]; + tensor var_38145_equation_0 = const()[name = tensor("op_38145_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38145_cast_fp16 = einsum(equation = var_38145_equation_0, values = (var_37643_cast_fp16, var_38043_cast_fp16))[name = tensor("op_38145_cast_fp16")]; + tensor var_38147_equation_0 = const()[name = tensor("op_38147_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38147_cast_fp16 = einsum(equation = var_38147_equation_0, values = (var_37643_cast_fp16, var_38044_cast_fp16))[name = tensor("op_38147_cast_fp16")]; + tensor var_38149_equation_0 = const()[name = tensor("op_38149_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38149_cast_fp16 = einsum(equation = var_38149_equation_0, values = (var_37643_cast_fp16, var_38045_cast_fp16))[name = tensor("op_38149_cast_fp16")]; + tensor var_38151_equation_0 = const()[name = tensor("op_38151_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38151_cast_fp16 = einsum(equation = var_38151_equation_0, values = (var_37647_cast_fp16, var_38046_cast_fp16))[name = tensor("op_38151_cast_fp16")]; + tensor var_38153_equation_0 = const()[name = tensor("op_38153_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38153_cast_fp16 = einsum(equation = var_38153_equation_0, values = (var_37647_cast_fp16, var_38047_cast_fp16))[name = tensor("op_38153_cast_fp16")]; + tensor var_38155_equation_0 = const()[name = tensor("op_38155_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38155_cast_fp16 = einsum(equation = var_38155_equation_0, values = (var_37647_cast_fp16, var_38048_cast_fp16))[name = tensor("op_38155_cast_fp16")]; + tensor var_38157_equation_0 = const()[name = tensor("op_38157_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38157_cast_fp16 = einsum(equation = var_38157_equation_0, values = (var_37647_cast_fp16, var_38049_cast_fp16))[name = tensor("op_38157_cast_fp16")]; + tensor var_38159_equation_0 = const()[name = tensor("op_38159_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38159_cast_fp16 = einsum(equation = var_38159_equation_0, values = (var_37651_cast_fp16, var_38050_cast_fp16))[name = tensor("op_38159_cast_fp16")]; + tensor var_38161_equation_0 = const()[name = tensor("op_38161_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38161_cast_fp16 = einsum(equation = var_38161_equation_0, values = (var_37651_cast_fp16, var_38051_cast_fp16))[name = tensor("op_38161_cast_fp16")]; + tensor var_38163_equation_0 = const()[name = tensor("op_38163_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38163_cast_fp16 = einsum(equation = var_38163_equation_0, values = (var_37651_cast_fp16, var_38052_cast_fp16))[name = tensor("op_38163_cast_fp16")]; + tensor var_38165_equation_0 = const()[name = tensor("op_38165_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38165_cast_fp16 = einsum(equation = var_38165_equation_0, values = (var_37651_cast_fp16, var_38053_cast_fp16))[name = tensor("op_38165_cast_fp16")]; + tensor var_38167_equation_0 = const()[name = tensor("op_38167_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38167_cast_fp16 = einsum(equation = var_38167_equation_0, values = (var_37655_cast_fp16, var_38054_cast_fp16))[name = tensor("op_38167_cast_fp16")]; + tensor var_38169_equation_0 = const()[name = tensor("op_38169_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38169_cast_fp16 = einsum(equation = var_38169_equation_0, values = (var_37655_cast_fp16, var_38055_cast_fp16))[name = tensor("op_38169_cast_fp16")]; + tensor var_38171_equation_0 = const()[name = tensor("op_38171_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38171_cast_fp16 = einsum(equation = var_38171_equation_0, values = (var_37655_cast_fp16, var_38056_cast_fp16))[name = tensor("op_38171_cast_fp16")]; + tensor var_38173_equation_0 = const()[name = tensor("op_38173_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38173_cast_fp16 = einsum(equation = var_38173_equation_0, values = (var_37655_cast_fp16, var_38057_cast_fp16))[name = tensor("op_38173_cast_fp16")]; + tensor var_38175_equation_0 = const()[name = tensor("op_38175_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38175_cast_fp16 = einsum(equation = var_38175_equation_0, values = (var_37659_cast_fp16, var_38058_cast_fp16))[name = tensor("op_38175_cast_fp16")]; + tensor var_38177_equation_0 = const()[name = tensor("op_38177_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38177_cast_fp16 = einsum(equation = var_38177_equation_0, values = (var_37659_cast_fp16, var_38059_cast_fp16))[name = tensor("op_38177_cast_fp16")]; + tensor var_38179_equation_0 = const()[name = tensor("op_38179_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38179_cast_fp16 = einsum(equation = var_38179_equation_0, values = (var_37659_cast_fp16, var_38060_cast_fp16))[name = tensor("op_38179_cast_fp16")]; + tensor var_38181_equation_0 = const()[name = tensor("op_38181_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38181_cast_fp16 = einsum(equation = var_38181_equation_0, values = (var_37659_cast_fp16, var_38061_cast_fp16))[name = tensor("op_38181_cast_fp16")]; + tensor var_38183_equation_0 = const()[name = tensor("op_38183_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38183_cast_fp16 = einsum(equation = var_38183_equation_0, values = (var_37663_cast_fp16, var_38062_cast_fp16))[name = tensor("op_38183_cast_fp16")]; + tensor var_38185_equation_0 = const()[name = tensor("op_38185_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38185_cast_fp16 = einsum(equation = var_38185_equation_0, values = (var_37663_cast_fp16, var_38063_cast_fp16))[name = tensor("op_38185_cast_fp16")]; + tensor var_38187_equation_0 = const()[name = tensor("op_38187_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38187_cast_fp16 = einsum(equation = var_38187_equation_0, values = (var_37663_cast_fp16, var_38064_cast_fp16))[name = tensor("op_38187_cast_fp16")]; + tensor var_38189_equation_0 = const()[name = tensor("op_38189_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38189_cast_fp16 = einsum(equation = var_38189_equation_0, values = (var_37663_cast_fp16, var_38065_cast_fp16))[name = tensor("op_38189_cast_fp16")]; + tensor var_38191_equation_0 = const()[name = tensor("op_38191_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38191_cast_fp16 = einsum(equation = var_38191_equation_0, values = (var_37667_cast_fp16, var_38066_cast_fp16))[name = tensor("op_38191_cast_fp16")]; + tensor var_38193_equation_0 = const()[name = tensor("op_38193_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38193_cast_fp16 = einsum(equation = var_38193_equation_0, values = (var_37667_cast_fp16, var_38067_cast_fp16))[name = tensor("op_38193_cast_fp16")]; + tensor var_38195_equation_0 = const()[name = tensor("op_38195_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38195_cast_fp16 = einsum(equation = var_38195_equation_0, values = (var_37667_cast_fp16, var_38068_cast_fp16))[name = tensor("op_38195_cast_fp16")]; + tensor var_38197_equation_0 = const()[name = tensor("op_38197_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38197_cast_fp16 = einsum(equation = var_38197_equation_0, values = (var_37667_cast_fp16, var_38069_cast_fp16))[name = tensor("op_38197_cast_fp16")]; + tensor var_38199_equation_0 = const()[name = tensor("op_38199_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38199_cast_fp16 = einsum(equation = var_38199_equation_0, values = (var_37671_cast_fp16, var_38070_cast_fp16))[name = tensor("op_38199_cast_fp16")]; + tensor var_38201_equation_0 = const()[name = tensor("op_38201_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38201_cast_fp16 = einsum(equation = var_38201_equation_0, values = (var_37671_cast_fp16, var_38071_cast_fp16))[name = tensor("op_38201_cast_fp16")]; + tensor var_38203_equation_0 = const()[name = tensor("op_38203_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38203_cast_fp16 = einsum(equation = var_38203_equation_0, values = (var_37671_cast_fp16, var_38072_cast_fp16))[name = tensor("op_38203_cast_fp16")]; + tensor var_38205_equation_0 = const()[name = tensor("op_38205_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38205_cast_fp16 = einsum(equation = var_38205_equation_0, values = (var_37671_cast_fp16, var_38073_cast_fp16))[name = tensor("op_38205_cast_fp16")]; + tensor var_38207_equation_0 = const()[name = tensor("op_38207_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38207_cast_fp16 = einsum(equation = var_38207_equation_0, values = (var_37675_cast_fp16, var_38074_cast_fp16))[name = tensor("op_38207_cast_fp16")]; + tensor var_38209_equation_0 = const()[name = tensor("op_38209_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38209_cast_fp16 = einsum(equation = var_38209_equation_0, values = (var_37675_cast_fp16, var_38075_cast_fp16))[name = tensor("op_38209_cast_fp16")]; + tensor var_38211_equation_0 = const()[name = tensor("op_38211_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38211_cast_fp16 = einsum(equation = var_38211_equation_0, values = (var_37675_cast_fp16, var_38076_cast_fp16))[name = tensor("op_38211_cast_fp16")]; + tensor var_38213_equation_0 = const()[name = tensor("op_38213_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38213_cast_fp16 = einsum(equation = var_38213_equation_0, values = (var_37675_cast_fp16, var_38077_cast_fp16))[name = tensor("op_38213_cast_fp16")]; + tensor var_38215_equation_0 = const()[name = tensor("op_38215_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38215_cast_fp16 = einsum(equation = var_38215_equation_0, values = (var_37679_cast_fp16, var_38078_cast_fp16))[name = tensor("op_38215_cast_fp16")]; + tensor var_38217_equation_0 = const()[name = tensor("op_38217_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38217_cast_fp16 = einsum(equation = var_38217_equation_0, values = (var_37679_cast_fp16, var_38079_cast_fp16))[name = tensor("op_38217_cast_fp16")]; + tensor var_38219_equation_0 = const()[name = tensor("op_38219_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38219_cast_fp16 = einsum(equation = var_38219_equation_0, values = (var_37679_cast_fp16, var_38080_cast_fp16))[name = tensor("op_38219_cast_fp16")]; + tensor var_38221_equation_0 = const()[name = tensor("op_38221_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38221_cast_fp16 = einsum(equation = var_38221_equation_0, values = (var_37679_cast_fp16, var_38081_cast_fp16))[name = tensor("op_38221_cast_fp16")]; + tensor var_38223_equation_0 = const()[name = tensor("op_38223_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38223_cast_fp16 = einsum(equation = var_38223_equation_0, values = (var_37683_cast_fp16, var_38082_cast_fp16))[name = tensor("op_38223_cast_fp16")]; + tensor var_38225_equation_0 = const()[name = tensor("op_38225_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38225_cast_fp16 = einsum(equation = var_38225_equation_0, values = (var_37683_cast_fp16, var_38083_cast_fp16))[name = tensor("op_38225_cast_fp16")]; + tensor var_38227_equation_0 = const()[name = tensor("op_38227_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38227_cast_fp16 = einsum(equation = var_38227_equation_0, values = (var_37683_cast_fp16, var_38084_cast_fp16))[name = tensor("op_38227_cast_fp16")]; + tensor var_38229_equation_0 = const()[name = tensor("op_38229_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38229_cast_fp16 = einsum(equation = var_38229_equation_0, values = (var_37683_cast_fp16, var_38085_cast_fp16))[name = tensor("op_38229_cast_fp16")]; + tensor var_38231_equation_0 = const()[name = tensor("op_38231_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38231_cast_fp16 = einsum(equation = var_38231_equation_0, values = (var_37687_cast_fp16, var_38086_cast_fp16))[name = tensor("op_38231_cast_fp16")]; + tensor var_38233_equation_0 = const()[name = tensor("op_38233_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38233_cast_fp16 = einsum(equation = var_38233_equation_0, values = (var_37687_cast_fp16, var_38087_cast_fp16))[name = tensor("op_38233_cast_fp16")]; + tensor var_38235_equation_0 = const()[name = tensor("op_38235_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38235_cast_fp16 = einsum(equation = var_38235_equation_0, values = (var_37687_cast_fp16, var_38088_cast_fp16))[name = tensor("op_38235_cast_fp16")]; + tensor var_38237_equation_0 = const()[name = tensor("op_38237_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38237_cast_fp16 = einsum(equation = var_38237_equation_0, values = (var_37687_cast_fp16, var_38089_cast_fp16))[name = tensor("op_38237_cast_fp16")]; + tensor var_38239_equation_0 = const()[name = tensor("op_38239_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38239_cast_fp16 = einsum(equation = var_38239_equation_0, values = (var_37691_cast_fp16, var_38090_cast_fp16))[name = tensor("op_38239_cast_fp16")]; + tensor var_38241_equation_0 = const()[name = tensor("op_38241_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38241_cast_fp16 = einsum(equation = var_38241_equation_0, values = (var_37691_cast_fp16, var_38091_cast_fp16))[name = tensor("op_38241_cast_fp16")]; + tensor var_38243_equation_0 = const()[name = tensor("op_38243_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38243_cast_fp16 = einsum(equation = var_38243_equation_0, values = (var_37691_cast_fp16, var_38092_cast_fp16))[name = tensor("op_38243_cast_fp16")]; + tensor var_38245_equation_0 = const()[name = tensor("op_38245_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38245_cast_fp16 = einsum(equation = var_38245_equation_0, values = (var_37691_cast_fp16, var_38093_cast_fp16))[name = tensor("op_38245_cast_fp16")]; + tensor var_38247_equation_0 = const()[name = tensor("op_38247_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38247_cast_fp16 = einsum(equation = var_38247_equation_0, values = (var_37695_cast_fp16, var_38094_cast_fp16))[name = tensor("op_38247_cast_fp16")]; + tensor var_38249_equation_0 = const()[name = tensor("op_38249_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38249_cast_fp16 = einsum(equation = var_38249_equation_0, values = (var_37695_cast_fp16, var_38095_cast_fp16))[name = tensor("op_38249_cast_fp16")]; + tensor var_38251_equation_0 = const()[name = tensor("op_38251_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38251_cast_fp16 = einsum(equation = var_38251_equation_0, values = (var_37695_cast_fp16, var_38096_cast_fp16))[name = tensor("op_38251_cast_fp16")]; + tensor var_38253_equation_0 = const()[name = tensor("op_38253_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38253_cast_fp16 = einsum(equation = var_38253_equation_0, values = (var_37695_cast_fp16, var_38097_cast_fp16))[name = tensor("op_38253_cast_fp16")]; + tensor var_38255_equation_0 = const()[name = tensor("op_38255_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38255_cast_fp16 = einsum(equation = var_38255_equation_0, values = (var_37699_cast_fp16, var_38098_cast_fp16))[name = tensor("op_38255_cast_fp16")]; + tensor var_38257_equation_0 = const()[name = tensor("op_38257_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38257_cast_fp16 = einsum(equation = var_38257_equation_0, values = (var_37699_cast_fp16, var_38099_cast_fp16))[name = tensor("op_38257_cast_fp16")]; + tensor var_38259_equation_0 = const()[name = tensor("op_38259_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38259_cast_fp16 = einsum(equation = var_38259_equation_0, values = (var_37699_cast_fp16, var_38100_cast_fp16))[name = tensor("op_38259_cast_fp16")]; + tensor var_38261_equation_0 = const()[name = tensor("op_38261_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_38261_cast_fp16 = einsum(equation = var_38261_equation_0, values = (var_37699_cast_fp16, var_38101_cast_fp16))[name = tensor("op_38261_cast_fp16")]; + tensor var_38263_interleave_0 = const()[name = tensor("op_38263_interleave_0"), val = tensor(false)]; + tensor var_38263_cast_fp16 = concat(axis = var_36822, interleave = var_38263_interleave_0, values = (var_38103_cast_fp16, var_38105_cast_fp16, var_38107_cast_fp16, var_38109_cast_fp16))[name = tensor("op_38263_cast_fp16")]; + tensor var_38265_interleave_0 = const()[name = tensor("op_38265_interleave_0"), val = tensor(false)]; + tensor var_38265_cast_fp16 = concat(axis = var_36822, interleave = var_38265_interleave_0, values = (var_38111_cast_fp16, var_38113_cast_fp16, var_38115_cast_fp16, var_38117_cast_fp16))[name = tensor("op_38265_cast_fp16")]; + tensor var_38267_interleave_0 = const()[name = tensor("op_38267_interleave_0"), val = tensor(false)]; + tensor var_38267_cast_fp16 = concat(axis = var_36822, interleave = var_38267_interleave_0, values = (var_38119_cast_fp16, var_38121_cast_fp16, var_38123_cast_fp16, var_38125_cast_fp16))[name = tensor("op_38267_cast_fp16")]; + tensor var_38269_interleave_0 = const()[name = tensor("op_38269_interleave_0"), val = tensor(false)]; + tensor var_38269_cast_fp16 = concat(axis = var_36822, interleave = var_38269_interleave_0, values = (var_38127_cast_fp16, var_38129_cast_fp16, var_38131_cast_fp16, var_38133_cast_fp16))[name = tensor("op_38269_cast_fp16")]; + tensor var_38271_interleave_0 = const()[name = tensor("op_38271_interleave_0"), val = tensor(false)]; + tensor var_38271_cast_fp16 = concat(axis = var_36822, interleave = var_38271_interleave_0, values = (var_38135_cast_fp16, var_38137_cast_fp16, var_38139_cast_fp16, var_38141_cast_fp16))[name = tensor("op_38271_cast_fp16")]; + tensor var_38273_interleave_0 = const()[name = tensor("op_38273_interleave_0"), val = tensor(false)]; + tensor var_38273_cast_fp16 = concat(axis = var_36822, interleave = var_38273_interleave_0, values = (var_38143_cast_fp16, var_38145_cast_fp16, var_38147_cast_fp16, var_38149_cast_fp16))[name = tensor("op_38273_cast_fp16")]; + tensor var_38275_interleave_0 = const()[name = tensor("op_38275_interleave_0"), val = tensor(false)]; + tensor var_38275_cast_fp16 = concat(axis = var_36822, interleave = var_38275_interleave_0, values = (var_38151_cast_fp16, var_38153_cast_fp16, var_38155_cast_fp16, var_38157_cast_fp16))[name = tensor("op_38275_cast_fp16")]; + tensor var_38277_interleave_0 = const()[name = tensor("op_38277_interleave_0"), val = tensor(false)]; + tensor var_38277_cast_fp16 = concat(axis = var_36822, interleave = var_38277_interleave_0, values = (var_38159_cast_fp16, var_38161_cast_fp16, var_38163_cast_fp16, var_38165_cast_fp16))[name = tensor("op_38277_cast_fp16")]; + tensor var_38279_interleave_0 = const()[name = tensor("op_38279_interleave_0"), val = tensor(false)]; + tensor var_38279_cast_fp16 = concat(axis = var_36822, interleave = var_38279_interleave_0, values = (var_38167_cast_fp16, var_38169_cast_fp16, var_38171_cast_fp16, var_38173_cast_fp16))[name = tensor("op_38279_cast_fp16")]; + tensor var_38281_interleave_0 = const()[name = tensor("op_38281_interleave_0"), val = tensor(false)]; + tensor var_38281_cast_fp16 = concat(axis = var_36822, interleave = var_38281_interleave_0, values = (var_38175_cast_fp16, var_38177_cast_fp16, var_38179_cast_fp16, var_38181_cast_fp16))[name = tensor("op_38281_cast_fp16")]; + tensor var_38283_interleave_0 = const()[name = tensor("op_38283_interleave_0"), val = tensor(false)]; + tensor var_38283_cast_fp16 = concat(axis = var_36822, interleave = var_38283_interleave_0, values = (var_38183_cast_fp16, var_38185_cast_fp16, var_38187_cast_fp16, var_38189_cast_fp16))[name = tensor("op_38283_cast_fp16")]; + tensor var_38285_interleave_0 = const()[name = tensor("op_38285_interleave_0"), val = tensor(false)]; + tensor var_38285_cast_fp16 = concat(axis = var_36822, interleave = var_38285_interleave_0, values = (var_38191_cast_fp16, var_38193_cast_fp16, var_38195_cast_fp16, var_38197_cast_fp16))[name = tensor("op_38285_cast_fp16")]; + tensor var_38287_interleave_0 = const()[name = tensor("op_38287_interleave_0"), val = tensor(false)]; + tensor var_38287_cast_fp16 = concat(axis = var_36822, interleave = var_38287_interleave_0, values = (var_38199_cast_fp16, var_38201_cast_fp16, var_38203_cast_fp16, var_38205_cast_fp16))[name = tensor("op_38287_cast_fp16")]; + tensor var_38289_interleave_0 = const()[name = tensor("op_38289_interleave_0"), val = tensor(false)]; + tensor var_38289_cast_fp16 = concat(axis = var_36822, interleave = var_38289_interleave_0, values = (var_38207_cast_fp16, var_38209_cast_fp16, var_38211_cast_fp16, var_38213_cast_fp16))[name = tensor("op_38289_cast_fp16")]; + tensor var_38291_interleave_0 = const()[name = tensor("op_38291_interleave_0"), val = tensor(false)]; + tensor var_38291_cast_fp16 = concat(axis = var_36822, interleave = var_38291_interleave_0, values = (var_38215_cast_fp16, var_38217_cast_fp16, var_38219_cast_fp16, var_38221_cast_fp16))[name = tensor("op_38291_cast_fp16")]; + tensor var_38293_interleave_0 = const()[name = tensor("op_38293_interleave_0"), val = tensor(false)]; + tensor var_38293_cast_fp16 = concat(axis = var_36822, interleave = var_38293_interleave_0, values = (var_38223_cast_fp16, var_38225_cast_fp16, var_38227_cast_fp16, var_38229_cast_fp16))[name = tensor("op_38293_cast_fp16")]; + tensor var_38295_interleave_0 = const()[name = tensor("op_38295_interleave_0"), val = tensor(false)]; + tensor var_38295_cast_fp16 = concat(axis = var_36822, interleave = var_38295_interleave_0, values = (var_38231_cast_fp16, var_38233_cast_fp16, var_38235_cast_fp16, var_38237_cast_fp16))[name = tensor("op_38295_cast_fp16")]; + tensor var_38297_interleave_0 = const()[name = tensor("op_38297_interleave_0"), val = tensor(false)]; + tensor var_38297_cast_fp16 = concat(axis = var_36822, interleave = var_38297_interleave_0, values = (var_38239_cast_fp16, var_38241_cast_fp16, var_38243_cast_fp16, var_38245_cast_fp16))[name = tensor("op_38297_cast_fp16")]; + tensor var_38299_interleave_0 = const()[name = tensor("op_38299_interleave_0"), val = tensor(false)]; + tensor var_38299_cast_fp16 = concat(axis = var_36822, interleave = var_38299_interleave_0, values = (var_38247_cast_fp16, var_38249_cast_fp16, var_38251_cast_fp16, var_38253_cast_fp16))[name = tensor("op_38299_cast_fp16")]; + tensor var_38301_interleave_0 = const()[name = tensor("op_38301_interleave_0"), val = tensor(false)]; + tensor var_38301_cast_fp16 = concat(axis = var_36822, interleave = var_38301_interleave_0, values = (var_38255_cast_fp16, var_38257_cast_fp16, var_38259_cast_fp16, var_38261_cast_fp16))[name = tensor("op_38301_cast_fp16")]; + tensor input_193_interleave_0 = const()[name = tensor("input_193_interleave_0"), val = tensor(false)]; + tensor input_193_cast_fp16 = concat(axis = var_36847, interleave = input_193_interleave_0, values = (var_38263_cast_fp16, var_38265_cast_fp16, var_38267_cast_fp16, var_38269_cast_fp16, var_38271_cast_fp16, var_38273_cast_fp16, var_38275_cast_fp16, var_38277_cast_fp16, var_38279_cast_fp16, var_38281_cast_fp16, var_38283_cast_fp16, var_38285_cast_fp16, var_38287_cast_fp16, var_38289_cast_fp16, var_38291_cast_fp16, var_38293_cast_fp16, var_38295_cast_fp16, var_38297_cast_fp16, var_38299_cast_fp16, var_38301_cast_fp16))[name = tensor("input_193_cast_fp16")]; + tensor var_38306 = const()[name = tensor("op_38306"), val = tensor([1, 1])]; + tensor var_38308 = const()[name = tensor("op_38308"), val = tensor([1, 1])]; + tensor obj_99_pad_type_0 = const()[name = tensor("obj_99_pad_type_0"), val = tensor("custom")]; + tensor obj_99_pad_0 = const()[name = tensor("obj_99_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_24_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_24_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(968615360)))]; + tensor layers_24_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_24_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(971892224)))]; + tensor obj_99_cast_fp16 = conv(bias = layers_24_self_attn_o_proj_bias_to_fp16, dilations = var_38308, groups = var_36847, pad = obj_99_pad_0, pad_type = obj_99_pad_type_0, strides = var_38306, weight = layers_24_self_attn_o_proj_weight_to_fp16, x = input_193_cast_fp16)[name = tensor("obj_99_cast_fp16")]; + tensor inputs_99_cast_fp16 = add(x = inputs_97_cast_fp16, y = obj_99_cast_fp16)[name = tensor("inputs_99_cast_fp16")]; + tensor var_38314 = const()[name = tensor("op_38314"), val = tensor([1])]; + tensor channels_mean_99_cast_fp16 = reduce_mean(axes = var_38314, keep_dims = var_36848, x = inputs_99_cast_fp16)[name = tensor("channels_mean_99_cast_fp16")]; + tensor zero_mean_99_cast_fp16 = sub(x = inputs_99_cast_fp16, y = channels_mean_99_cast_fp16)[name = tensor("zero_mean_99_cast_fp16")]; + tensor zero_mean_sq_99_cast_fp16 = mul(x = zero_mean_99_cast_fp16, y = zero_mean_99_cast_fp16)[name = tensor("zero_mean_sq_99_cast_fp16")]; + tensor var_38318 = const()[name = tensor("op_38318"), val = tensor([1])]; + tensor var_38319_cast_fp16 = reduce_mean(axes = var_38318, keep_dims = var_36848, x = zero_mean_sq_99_cast_fp16)[name = tensor("op_38319_cast_fp16")]; + tensor var_38320_to_fp16 = const()[name = tensor("op_38320_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_38321_cast_fp16 = add(x = var_38319_cast_fp16, y = var_38320_to_fp16)[name = tensor("op_38321_cast_fp16")]; + tensor denom_99_epsilon_0_to_fp16 = const()[name = tensor("denom_99_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_99_cast_fp16 = rsqrt(epsilon = denom_99_epsilon_0_to_fp16, x = var_38321_cast_fp16)[name = tensor("denom_99_cast_fp16")]; + tensor out_99_cast_fp16 = mul(x = zero_mean_99_cast_fp16, y = denom_99_cast_fp16)[name = tensor("out_99_cast_fp16")]; + tensor input_195_gamma_0_to_fp16 = const()[name = tensor("input_195_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(971894848)))]; + tensor input_195_beta_0_to_fp16 = const()[name = tensor("input_195_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(971897472)))]; + tensor input_195_epsilon_0_to_fp16 = const()[name = tensor("input_195_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_195_cast_fp16 = batch_norm(beta = input_195_beta_0_to_fp16, epsilon = input_195_epsilon_0_to_fp16, gamma = input_195_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_99_cast_fp16)[name = tensor("input_195_cast_fp16")]; + tensor var_38332 = const()[name = tensor("op_38332"), val = tensor([1, 1])]; + tensor var_38334 = const()[name = tensor("op_38334"), val = tensor([1, 1])]; + tensor input_197_pad_type_0 = const()[name = tensor("input_197_pad_type_0"), val = tensor("custom")]; + tensor input_197_pad_0 = const()[name = tensor("input_197_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_24_fc1_weight_to_fp16 = const()[name = tensor("layers_24_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(971900096)))]; + tensor layers_24_fc1_bias_to_fp16 = const()[name = tensor("layers_24_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(985007360)))]; + tensor input_197_cast_fp16 = conv(bias = layers_24_fc1_bias_to_fp16, dilations = var_38334, groups = var_36847, pad = input_197_pad_0, pad_type = input_197_pad_type_0, strides = var_38332, weight = layers_24_fc1_weight_to_fp16, x = input_195_cast_fp16)[name = tensor("input_197_cast_fp16")]; + tensor input_199_mode_0 = const()[name = tensor("input_199_mode_0"), val = tensor("EXACT")]; + tensor input_199_cast_fp16 = gelu(mode = input_199_mode_0, x = input_197_cast_fp16)[name = tensor("input_199_cast_fp16")]; + tensor var_38340 = const()[name = tensor("op_38340"), val = tensor([1, 1])]; + tensor var_38342 = const()[name = tensor("op_38342"), val = tensor([1, 1])]; + tensor hidden_states_53_pad_type_0 = const()[name = tensor("hidden_states_53_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_53_pad_0 = const()[name = tensor("hidden_states_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_24_fc2_weight_to_fp16 = const()[name = tensor("layers_24_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(985017664)))]; + tensor layers_24_fc2_bias_to_fp16 = const()[name = tensor("layers_24_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(998124928)))]; + tensor hidden_states_53_cast_fp16 = conv(bias = layers_24_fc2_bias_to_fp16, dilations = var_38342, groups = var_36847, pad = hidden_states_53_pad_0, pad_type = hidden_states_53_pad_type_0, strides = var_38340, weight = layers_24_fc2_weight_to_fp16, x = input_199_cast_fp16)[name = tensor("hidden_states_53_cast_fp16")]; + tensor inputs_101_cast_fp16 = add(x = inputs_99_cast_fp16, y = hidden_states_53_cast_fp16)[name = tensor("inputs_101_cast_fp16")]; + tensor var_38349 = const()[name = tensor("op_38349"), val = tensor(3)]; + tensor var_38374 = const()[name = tensor("op_38374"), val = tensor(1)]; + tensor var_38375 = const()[name = tensor("op_38375"), val = tensor(true)]; + tensor var_38385 = const()[name = tensor("op_38385"), val = tensor([1])]; + tensor channels_mean_101_cast_fp16 = reduce_mean(axes = var_38385, keep_dims = var_38375, x = inputs_101_cast_fp16)[name = tensor("channels_mean_101_cast_fp16")]; + tensor zero_mean_101_cast_fp16 = sub(x = inputs_101_cast_fp16, y = channels_mean_101_cast_fp16)[name = tensor("zero_mean_101_cast_fp16")]; + tensor zero_mean_sq_101_cast_fp16 = mul(x = zero_mean_101_cast_fp16, y = zero_mean_101_cast_fp16)[name = tensor("zero_mean_sq_101_cast_fp16")]; + tensor var_38389 = const()[name = tensor("op_38389"), val = tensor([1])]; + tensor var_38390_cast_fp16 = reduce_mean(axes = var_38389, keep_dims = var_38375, x = zero_mean_sq_101_cast_fp16)[name = tensor("op_38390_cast_fp16")]; + tensor var_38391_to_fp16 = const()[name = tensor("op_38391_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_38392_cast_fp16 = add(x = var_38390_cast_fp16, y = var_38391_to_fp16)[name = tensor("op_38392_cast_fp16")]; + tensor denom_101_epsilon_0_to_fp16 = const()[name = tensor("denom_101_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_101_cast_fp16 = rsqrt(epsilon = denom_101_epsilon_0_to_fp16, x = var_38392_cast_fp16)[name = tensor("denom_101_cast_fp16")]; + tensor out_101_cast_fp16 = mul(x = zero_mean_101_cast_fp16, y = denom_101_cast_fp16)[name = tensor("out_101_cast_fp16")]; + tensor obj_101_gamma_0_to_fp16 = const()[name = tensor("obj_101_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(998127552)))]; + tensor obj_101_beta_0_to_fp16 = const()[name = tensor("obj_101_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(998130176)))]; + tensor obj_101_epsilon_0_to_fp16 = const()[name = tensor("obj_101_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_101_cast_fp16 = batch_norm(beta = obj_101_beta_0_to_fp16, epsilon = obj_101_epsilon_0_to_fp16, gamma = obj_101_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_101_cast_fp16)[name = tensor("obj_101_cast_fp16")]; + tensor var_38407 = const()[name = tensor("op_38407"), val = tensor([1, 1])]; + tensor var_38409 = const()[name = tensor("op_38409"), val = tensor([1, 1])]; + tensor query_51_pad_type_0 = const()[name = tensor("query_51_pad_type_0"), val = tensor("custom")]; + tensor query_51_pad_0 = const()[name = tensor("query_51_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_25_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_25_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(998132800)))]; + tensor layers_25_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_25_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1001409664)))]; + tensor query_51_cast_fp16 = conv(bias = layers_25_self_attn_q_proj_bias_to_fp16, dilations = var_38409, groups = var_38374, pad = query_51_pad_0, pad_type = query_51_pad_type_0, strides = var_38407, weight = layers_25_self_attn_q_proj_weight_to_fp16, x = obj_101_cast_fp16)[name = tensor("query_51_cast_fp16")]; + tensor var_38413 = const()[name = tensor("op_38413"), val = tensor([1, 1])]; + tensor var_38415 = const()[name = tensor("op_38415"), val = tensor([1, 1])]; + tensor key_51_pad_type_0 = const()[name = tensor("key_51_pad_type_0"), val = tensor("custom")]; + tensor key_51_pad_0 = const()[name = tensor("key_51_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_25_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_25_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1001412288)))]; + tensor key_51_cast_fp16 = conv(dilations = var_38415, groups = var_38374, pad = key_51_pad_0, pad_type = key_51_pad_type_0, strides = var_38413, weight = layers_25_self_attn_k_proj_weight_to_fp16, x = obj_101_cast_fp16)[name = tensor("key_51_cast_fp16")]; + tensor var_38420 = const()[name = tensor("op_38420"), val = tensor([1, 1])]; + tensor var_38422 = const()[name = tensor("op_38422"), val = tensor([1, 1])]; + tensor value_51_pad_type_0 = const()[name = tensor("value_51_pad_type_0"), val = tensor("custom")]; + tensor value_51_pad_0 = const()[name = tensor("value_51_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_25_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_25_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1004689152)))]; + tensor layers_25_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_25_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1007966016)))]; + tensor value_51_cast_fp16 = conv(bias = layers_25_self_attn_v_proj_bias_to_fp16, dilations = var_38422, groups = var_38374, pad = value_51_pad_0, pad_type = value_51_pad_type_0, strides = var_38420, weight = layers_25_self_attn_v_proj_weight_to_fp16, x = obj_101_cast_fp16)[name = tensor("value_51_cast_fp16")]; + tensor var_38429_begin_0 = const()[name = tensor("op_38429_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_38429_end_0 = const()[name = tensor("op_38429_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_38429_end_mask_0 = const()[name = tensor("op_38429_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_38429_cast_fp16 = slice_by_index(begin = var_38429_begin_0, end = var_38429_end_0, end_mask = var_38429_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_38429_cast_fp16")]; + tensor var_38433_begin_0 = const()[name = tensor("op_38433_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_38433_end_0 = const()[name = tensor("op_38433_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_38433_end_mask_0 = const()[name = tensor("op_38433_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_38433_cast_fp16 = slice_by_index(begin = var_38433_begin_0, end = var_38433_end_0, end_mask = var_38433_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_38433_cast_fp16")]; + tensor var_38437_begin_0 = const()[name = tensor("op_38437_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_38437_end_0 = const()[name = tensor("op_38437_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_38437_end_mask_0 = const()[name = tensor("op_38437_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_38437_cast_fp16 = slice_by_index(begin = var_38437_begin_0, end = var_38437_end_0, end_mask = var_38437_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_38437_cast_fp16")]; + tensor var_38441_begin_0 = const()[name = tensor("op_38441_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_38441_end_0 = const()[name = tensor("op_38441_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_38441_end_mask_0 = const()[name = tensor("op_38441_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_38441_cast_fp16 = slice_by_index(begin = var_38441_begin_0, end = var_38441_end_0, end_mask = var_38441_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_38441_cast_fp16")]; + tensor var_38445_begin_0 = const()[name = tensor("op_38445_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_38445_end_0 = const()[name = tensor("op_38445_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_38445_end_mask_0 = const()[name = tensor("op_38445_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_38445_cast_fp16 = slice_by_index(begin = var_38445_begin_0, end = var_38445_end_0, end_mask = var_38445_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_38445_cast_fp16")]; + tensor var_38449_begin_0 = const()[name = tensor("op_38449_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_38449_end_0 = const()[name = tensor("op_38449_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_38449_end_mask_0 = const()[name = tensor("op_38449_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_38449_cast_fp16 = slice_by_index(begin = var_38449_begin_0, end = var_38449_end_0, end_mask = var_38449_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_38449_cast_fp16")]; + tensor var_38453_begin_0 = const()[name = tensor("op_38453_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_38453_end_0 = const()[name = tensor("op_38453_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_38453_end_mask_0 = const()[name = tensor("op_38453_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_38453_cast_fp16 = slice_by_index(begin = var_38453_begin_0, end = var_38453_end_0, end_mask = var_38453_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_38453_cast_fp16")]; + tensor var_38457_begin_0 = const()[name = tensor("op_38457_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_38457_end_0 = const()[name = tensor("op_38457_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_38457_end_mask_0 = const()[name = tensor("op_38457_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_38457_cast_fp16 = slice_by_index(begin = var_38457_begin_0, end = var_38457_end_0, end_mask = var_38457_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_38457_cast_fp16")]; + tensor var_38461_begin_0 = const()[name = tensor("op_38461_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_38461_end_0 = const()[name = tensor("op_38461_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_38461_end_mask_0 = const()[name = tensor("op_38461_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_38461_cast_fp16 = slice_by_index(begin = var_38461_begin_0, end = var_38461_end_0, end_mask = var_38461_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_38461_cast_fp16")]; + tensor var_38465_begin_0 = const()[name = tensor("op_38465_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_38465_end_0 = const()[name = tensor("op_38465_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_38465_end_mask_0 = const()[name = tensor("op_38465_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_38465_cast_fp16 = slice_by_index(begin = var_38465_begin_0, end = var_38465_end_0, end_mask = var_38465_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_38465_cast_fp16")]; + tensor var_38469_begin_0 = const()[name = tensor("op_38469_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_38469_end_0 = const()[name = tensor("op_38469_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_38469_end_mask_0 = const()[name = tensor("op_38469_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_38469_cast_fp16 = slice_by_index(begin = var_38469_begin_0, end = var_38469_end_0, end_mask = var_38469_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_38469_cast_fp16")]; + tensor var_38473_begin_0 = const()[name = tensor("op_38473_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_38473_end_0 = const()[name = tensor("op_38473_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_38473_end_mask_0 = const()[name = tensor("op_38473_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_38473_cast_fp16 = slice_by_index(begin = var_38473_begin_0, end = var_38473_end_0, end_mask = var_38473_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_38473_cast_fp16")]; + tensor var_38477_begin_0 = const()[name = tensor("op_38477_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_38477_end_0 = const()[name = tensor("op_38477_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_38477_end_mask_0 = const()[name = tensor("op_38477_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_38477_cast_fp16 = slice_by_index(begin = var_38477_begin_0, end = var_38477_end_0, end_mask = var_38477_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_38477_cast_fp16")]; + tensor var_38481_begin_0 = const()[name = tensor("op_38481_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_38481_end_0 = const()[name = tensor("op_38481_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_38481_end_mask_0 = const()[name = tensor("op_38481_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_38481_cast_fp16 = slice_by_index(begin = var_38481_begin_0, end = var_38481_end_0, end_mask = var_38481_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_38481_cast_fp16")]; + tensor var_38485_begin_0 = const()[name = tensor("op_38485_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_38485_end_0 = const()[name = tensor("op_38485_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_38485_end_mask_0 = const()[name = tensor("op_38485_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_38485_cast_fp16 = slice_by_index(begin = var_38485_begin_0, end = var_38485_end_0, end_mask = var_38485_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_38485_cast_fp16")]; + tensor var_38489_begin_0 = const()[name = tensor("op_38489_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_38489_end_0 = const()[name = tensor("op_38489_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_38489_end_mask_0 = const()[name = tensor("op_38489_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_38489_cast_fp16 = slice_by_index(begin = var_38489_begin_0, end = var_38489_end_0, end_mask = var_38489_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_38489_cast_fp16")]; + tensor var_38493_begin_0 = const()[name = tensor("op_38493_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_38493_end_0 = const()[name = tensor("op_38493_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_38493_end_mask_0 = const()[name = tensor("op_38493_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_38493_cast_fp16 = slice_by_index(begin = var_38493_begin_0, end = var_38493_end_0, end_mask = var_38493_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_38493_cast_fp16")]; + tensor var_38497_begin_0 = const()[name = tensor("op_38497_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_38497_end_0 = const()[name = tensor("op_38497_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_38497_end_mask_0 = const()[name = tensor("op_38497_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_38497_cast_fp16 = slice_by_index(begin = var_38497_begin_0, end = var_38497_end_0, end_mask = var_38497_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_38497_cast_fp16")]; + tensor var_38501_begin_0 = const()[name = tensor("op_38501_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_38501_end_0 = const()[name = tensor("op_38501_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_38501_end_mask_0 = const()[name = tensor("op_38501_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_38501_cast_fp16 = slice_by_index(begin = var_38501_begin_0, end = var_38501_end_0, end_mask = var_38501_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_38501_cast_fp16")]; + tensor var_38505_begin_0 = const()[name = tensor("op_38505_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_38505_end_0 = const()[name = tensor("op_38505_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_38505_end_mask_0 = const()[name = tensor("op_38505_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_38505_cast_fp16 = slice_by_index(begin = var_38505_begin_0, end = var_38505_end_0, end_mask = var_38505_end_mask_0, x = query_51_cast_fp16)[name = tensor("op_38505_cast_fp16")]; + tensor var_38514_begin_0 = const()[name = tensor("op_38514_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_38514_end_0 = const()[name = tensor("op_38514_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_38514_end_mask_0 = const()[name = tensor("op_38514_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38514_cast_fp16 = slice_by_index(begin = var_38514_begin_0, end = var_38514_end_0, end_mask = var_38514_end_mask_0, x = var_38429_cast_fp16)[name = tensor("op_38514_cast_fp16")]; + tensor var_38521_begin_0 = const()[name = tensor("op_38521_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_38521_end_0 = const()[name = tensor("op_38521_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_38521_end_mask_0 = const()[name = tensor("op_38521_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38521_cast_fp16 = slice_by_index(begin = var_38521_begin_0, end = var_38521_end_0, end_mask = var_38521_end_mask_0, x = var_38429_cast_fp16)[name = tensor("op_38521_cast_fp16")]; + tensor var_38528_begin_0 = const()[name = tensor("op_38528_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_38528_end_0 = const()[name = tensor("op_38528_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_38528_end_mask_0 = const()[name = tensor("op_38528_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38528_cast_fp16 = slice_by_index(begin = var_38528_begin_0, end = var_38528_end_0, end_mask = var_38528_end_mask_0, x = var_38429_cast_fp16)[name = tensor("op_38528_cast_fp16")]; + tensor var_38535_begin_0 = const()[name = tensor("op_38535_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_38535_end_0 = const()[name = tensor("op_38535_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_38535_end_mask_0 = const()[name = tensor("op_38535_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38535_cast_fp16 = slice_by_index(begin = var_38535_begin_0, end = var_38535_end_0, end_mask = var_38535_end_mask_0, x = var_38429_cast_fp16)[name = tensor("op_38535_cast_fp16")]; + tensor var_38542_begin_0 = const()[name = tensor("op_38542_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_38542_end_0 = const()[name = tensor("op_38542_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_38542_end_mask_0 = const()[name = tensor("op_38542_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38542_cast_fp16 = slice_by_index(begin = var_38542_begin_0, end = var_38542_end_0, end_mask = var_38542_end_mask_0, x = var_38433_cast_fp16)[name = tensor("op_38542_cast_fp16")]; + tensor var_38549_begin_0 = const()[name = tensor("op_38549_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_38549_end_0 = const()[name = tensor("op_38549_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_38549_end_mask_0 = const()[name = tensor("op_38549_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38549_cast_fp16 = slice_by_index(begin = var_38549_begin_0, end = var_38549_end_0, end_mask = var_38549_end_mask_0, x = var_38433_cast_fp16)[name = tensor("op_38549_cast_fp16")]; + tensor var_38556_begin_0 = const()[name = tensor("op_38556_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_38556_end_0 = const()[name = tensor("op_38556_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_38556_end_mask_0 = const()[name = tensor("op_38556_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38556_cast_fp16 = slice_by_index(begin = var_38556_begin_0, end = var_38556_end_0, end_mask = var_38556_end_mask_0, x = var_38433_cast_fp16)[name = tensor("op_38556_cast_fp16")]; + tensor var_38563_begin_0 = const()[name = tensor("op_38563_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_38563_end_0 = const()[name = tensor("op_38563_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_38563_end_mask_0 = const()[name = tensor("op_38563_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38563_cast_fp16 = slice_by_index(begin = var_38563_begin_0, end = var_38563_end_0, end_mask = var_38563_end_mask_0, x = var_38433_cast_fp16)[name = tensor("op_38563_cast_fp16")]; + tensor var_38570_begin_0 = const()[name = tensor("op_38570_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_38570_end_0 = const()[name = tensor("op_38570_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_38570_end_mask_0 = const()[name = tensor("op_38570_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38570_cast_fp16 = slice_by_index(begin = var_38570_begin_0, end = var_38570_end_0, end_mask = var_38570_end_mask_0, x = var_38437_cast_fp16)[name = tensor("op_38570_cast_fp16")]; + tensor var_38577_begin_0 = const()[name = tensor("op_38577_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_38577_end_0 = const()[name = tensor("op_38577_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_38577_end_mask_0 = const()[name = tensor("op_38577_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38577_cast_fp16 = slice_by_index(begin = var_38577_begin_0, end = var_38577_end_0, end_mask = var_38577_end_mask_0, x = var_38437_cast_fp16)[name = tensor("op_38577_cast_fp16")]; + tensor var_38584_begin_0 = const()[name = tensor("op_38584_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_38584_end_0 = const()[name = tensor("op_38584_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_38584_end_mask_0 = const()[name = tensor("op_38584_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38584_cast_fp16 = slice_by_index(begin = var_38584_begin_0, end = var_38584_end_0, end_mask = var_38584_end_mask_0, x = var_38437_cast_fp16)[name = tensor("op_38584_cast_fp16")]; + tensor var_38591_begin_0 = const()[name = tensor("op_38591_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_38591_end_0 = const()[name = tensor("op_38591_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_38591_end_mask_0 = const()[name = tensor("op_38591_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38591_cast_fp16 = slice_by_index(begin = var_38591_begin_0, end = var_38591_end_0, end_mask = var_38591_end_mask_0, x = var_38437_cast_fp16)[name = tensor("op_38591_cast_fp16")]; + tensor var_38598_begin_0 = const()[name = tensor("op_38598_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_38598_end_0 = const()[name = tensor("op_38598_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_38598_end_mask_0 = const()[name = tensor("op_38598_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38598_cast_fp16 = slice_by_index(begin = var_38598_begin_0, end = var_38598_end_0, end_mask = var_38598_end_mask_0, x = var_38441_cast_fp16)[name = tensor("op_38598_cast_fp16")]; + tensor var_38605_begin_0 = const()[name = tensor("op_38605_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_38605_end_0 = const()[name = tensor("op_38605_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_38605_end_mask_0 = const()[name = tensor("op_38605_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38605_cast_fp16 = slice_by_index(begin = var_38605_begin_0, end = var_38605_end_0, end_mask = var_38605_end_mask_0, x = var_38441_cast_fp16)[name = tensor("op_38605_cast_fp16")]; + tensor var_38612_begin_0 = const()[name = tensor("op_38612_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_38612_end_0 = const()[name = tensor("op_38612_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_38612_end_mask_0 = const()[name = tensor("op_38612_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38612_cast_fp16 = slice_by_index(begin = var_38612_begin_0, end = var_38612_end_0, end_mask = var_38612_end_mask_0, x = var_38441_cast_fp16)[name = tensor("op_38612_cast_fp16")]; + tensor var_38619_begin_0 = const()[name = tensor("op_38619_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_38619_end_0 = const()[name = tensor("op_38619_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_38619_end_mask_0 = const()[name = tensor("op_38619_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38619_cast_fp16 = slice_by_index(begin = var_38619_begin_0, end = var_38619_end_0, end_mask = var_38619_end_mask_0, x = var_38441_cast_fp16)[name = tensor("op_38619_cast_fp16")]; + tensor var_38626_begin_0 = const()[name = tensor("op_38626_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_38626_end_0 = const()[name = tensor("op_38626_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_38626_end_mask_0 = const()[name = tensor("op_38626_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38626_cast_fp16 = slice_by_index(begin = var_38626_begin_0, end = var_38626_end_0, end_mask = var_38626_end_mask_0, x = var_38445_cast_fp16)[name = tensor("op_38626_cast_fp16")]; + tensor var_38633_begin_0 = const()[name = tensor("op_38633_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_38633_end_0 = const()[name = tensor("op_38633_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_38633_end_mask_0 = const()[name = tensor("op_38633_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38633_cast_fp16 = slice_by_index(begin = var_38633_begin_0, end = var_38633_end_0, end_mask = var_38633_end_mask_0, x = var_38445_cast_fp16)[name = tensor("op_38633_cast_fp16")]; + tensor var_38640_begin_0 = const()[name = tensor("op_38640_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_38640_end_0 = const()[name = tensor("op_38640_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_38640_end_mask_0 = const()[name = tensor("op_38640_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38640_cast_fp16 = slice_by_index(begin = var_38640_begin_0, end = var_38640_end_0, end_mask = var_38640_end_mask_0, x = var_38445_cast_fp16)[name = tensor("op_38640_cast_fp16")]; + tensor var_38647_begin_0 = const()[name = tensor("op_38647_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_38647_end_0 = const()[name = tensor("op_38647_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_38647_end_mask_0 = const()[name = tensor("op_38647_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38647_cast_fp16 = slice_by_index(begin = var_38647_begin_0, end = var_38647_end_0, end_mask = var_38647_end_mask_0, x = var_38445_cast_fp16)[name = tensor("op_38647_cast_fp16")]; + tensor var_38654_begin_0 = const()[name = tensor("op_38654_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_38654_end_0 = const()[name = tensor("op_38654_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_38654_end_mask_0 = const()[name = tensor("op_38654_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38654_cast_fp16 = slice_by_index(begin = var_38654_begin_0, end = var_38654_end_0, end_mask = var_38654_end_mask_0, x = var_38449_cast_fp16)[name = tensor("op_38654_cast_fp16")]; + tensor var_38661_begin_0 = const()[name = tensor("op_38661_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_38661_end_0 = const()[name = tensor("op_38661_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_38661_end_mask_0 = const()[name = tensor("op_38661_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38661_cast_fp16 = slice_by_index(begin = var_38661_begin_0, end = var_38661_end_0, end_mask = var_38661_end_mask_0, x = var_38449_cast_fp16)[name = tensor("op_38661_cast_fp16")]; + tensor var_38668_begin_0 = const()[name = tensor("op_38668_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_38668_end_0 = const()[name = tensor("op_38668_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_38668_end_mask_0 = const()[name = tensor("op_38668_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38668_cast_fp16 = slice_by_index(begin = var_38668_begin_0, end = var_38668_end_0, end_mask = var_38668_end_mask_0, x = var_38449_cast_fp16)[name = tensor("op_38668_cast_fp16")]; + tensor var_38675_begin_0 = const()[name = tensor("op_38675_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_38675_end_0 = const()[name = tensor("op_38675_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_38675_end_mask_0 = const()[name = tensor("op_38675_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38675_cast_fp16 = slice_by_index(begin = var_38675_begin_0, end = var_38675_end_0, end_mask = var_38675_end_mask_0, x = var_38449_cast_fp16)[name = tensor("op_38675_cast_fp16")]; + tensor var_38682_begin_0 = const()[name = tensor("op_38682_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_38682_end_0 = const()[name = tensor("op_38682_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_38682_end_mask_0 = const()[name = tensor("op_38682_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38682_cast_fp16 = slice_by_index(begin = var_38682_begin_0, end = var_38682_end_0, end_mask = var_38682_end_mask_0, x = var_38453_cast_fp16)[name = tensor("op_38682_cast_fp16")]; + tensor var_38689_begin_0 = const()[name = tensor("op_38689_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_38689_end_0 = const()[name = tensor("op_38689_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_38689_end_mask_0 = const()[name = tensor("op_38689_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38689_cast_fp16 = slice_by_index(begin = var_38689_begin_0, end = var_38689_end_0, end_mask = var_38689_end_mask_0, x = var_38453_cast_fp16)[name = tensor("op_38689_cast_fp16")]; + tensor var_38696_begin_0 = const()[name = tensor("op_38696_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_38696_end_0 = const()[name = tensor("op_38696_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_38696_end_mask_0 = const()[name = tensor("op_38696_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38696_cast_fp16 = slice_by_index(begin = var_38696_begin_0, end = var_38696_end_0, end_mask = var_38696_end_mask_0, x = var_38453_cast_fp16)[name = tensor("op_38696_cast_fp16")]; + tensor var_38703_begin_0 = const()[name = tensor("op_38703_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_38703_end_0 = const()[name = tensor("op_38703_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_38703_end_mask_0 = const()[name = tensor("op_38703_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38703_cast_fp16 = slice_by_index(begin = var_38703_begin_0, end = var_38703_end_0, end_mask = var_38703_end_mask_0, x = var_38453_cast_fp16)[name = tensor("op_38703_cast_fp16")]; + tensor var_38710_begin_0 = const()[name = tensor("op_38710_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_38710_end_0 = const()[name = tensor("op_38710_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_38710_end_mask_0 = const()[name = tensor("op_38710_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38710_cast_fp16 = slice_by_index(begin = var_38710_begin_0, end = var_38710_end_0, end_mask = var_38710_end_mask_0, x = var_38457_cast_fp16)[name = tensor("op_38710_cast_fp16")]; + tensor var_38717_begin_0 = const()[name = tensor("op_38717_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_38717_end_0 = const()[name = tensor("op_38717_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_38717_end_mask_0 = const()[name = tensor("op_38717_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38717_cast_fp16 = slice_by_index(begin = var_38717_begin_0, end = var_38717_end_0, end_mask = var_38717_end_mask_0, x = var_38457_cast_fp16)[name = tensor("op_38717_cast_fp16")]; + tensor var_38724_begin_0 = const()[name = tensor("op_38724_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_38724_end_0 = const()[name = tensor("op_38724_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_38724_end_mask_0 = const()[name = tensor("op_38724_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38724_cast_fp16 = slice_by_index(begin = var_38724_begin_0, end = var_38724_end_0, end_mask = var_38724_end_mask_0, x = var_38457_cast_fp16)[name = tensor("op_38724_cast_fp16")]; + tensor var_38731_begin_0 = const()[name = tensor("op_38731_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_38731_end_0 = const()[name = tensor("op_38731_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_38731_end_mask_0 = const()[name = tensor("op_38731_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38731_cast_fp16 = slice_by_index(begin = var_38731_begin_0, end = var_38731_end_0, end_mask = var_38731_end_mask_0, x = var_38457_cast_fp16)[name = tensor("op_38731_cast_fp16")]; + tensor var_38738_begin_0 = const()[name = tensor("op_38738_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_38738_end_0 = const()[name = tensor("op_38738_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_38738_end_mask_0 = const()[name = tensor("op_38738_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38738_cast_fp16 = slice_by_index(begin = var_38738_begin_0, end = var_38738_end_0, end_mask = var_38738_end_mask_0, x = var_38461_cast_fp16)[name = tensor("op_38738_cast_fp16")]; + tensor var_38745_begin_0 = const()[name = tensor("op_38745_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_38745_end_0 = const()[name = tensor("op_38745_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_38745_end_mask_0 = const()[name = tensor("op_38745_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38745_cast_fp16 = slice_by_index(begin = var_38745_begin_0, end = var_38745_end_0, end_mask = var_38745_end_mask_0, x = var_38461_cast_fp16)[name = tensor("op_38745_cast_fp16")]; + tensor var_38752_begin_0 = const()[name = tensor("op_38752_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_38752_end_0 = const()[name = tensor("op_38752_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_38752_end_mask_0 = const()[name = tensor("op_38752_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38752_cast_fp16 = slice_by_index(begin = var_38752_begin_0, end = var_38752_end_0, end_mask = var_38752_end_mask_0, x = var_38461_cast_fp16)[name = tensor("op_38752_cast_fp16")]; + tensor var_38759_begin_0 = const()[name = tensor("op_38759_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_38759_end_0 = const()[name = tensor("op_38759_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_38759_end_mask_0 = const()[name = tensor("op_38759_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38759_cast_fp16 = slice_by_index(begin = var_38759_begin_0, end = var_38759_end_0, end_mask = var_38759_end_mask_0, x = var_38461_cast_fp16)[name = tensor("op_38759_cast_fp16")]; + tensor var_38766_begin_0 = const()[name = tensor("op_38766_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_38766_end_0 = const()[name = tensor("op_38766_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_38766_end_mask_0 = const()[name = tensor("op_38766_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38766_cast_fp16 = slice_by_index(begin = var_38766_begin_0, end = var_38766_end_0, end_mask = var_38766_end_mask_0, x = var_38465_cast_fp16)[name = tensor("op_38766_cast_fp16")]; + tensor var_38773_begin_0 = const()[name = tensor("op_38773_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_38773_end_0 = const()[name = tensor("op_38773_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_38773_end_mask_0 = const()[name = tensor("op_38773_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38773_cast_fp16 = slice_by_index(begin = var_38773_begin_0, end = var_38773_end_0, end_mask = var_38773_end_mask_0, x = var_38465_cast_fp16)[name = tensor("op_38773_cast_fp16")]; + tensor var_38780_begin_0 = const()[name = tensor("op_38780_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_38780_end_0 = const()[name = tensor("op_38780_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_38780_end_mask_0 = const()[name = tensor("op_38780_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38780_cast_fp16 = slice_by_index(begin = var_38780_begin_0, end = var_38780_end_0, end_mask = var_38780_end_mask_0, x = var_38465_cast_fp16)[name = tensor("op_38780_cast_fp16")]; + tensor var_38787_begin_0 = const()[name = tensor("op_38787_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_38787_end_0 = const()[name = tensor("op_38787_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_38787_end_mask_0 = const()[name = tensor("op_38787_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38787_cast_fp16 = slice_by_index(begin = var_38787_begin_0, end = var_38787_end_0, end_mask = var_38787_end_mask_0, x = var_38465_cast_fp16)[name = tensor("op_38787_cast_fp16")]; + tensor var_38794_begin_0 = const()[name = tensor("op_38794_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_38794_end_0 = const()[name = tensor("op_38794_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_38794_end_mask_0 = const()[name = tensor("op_38794_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38794_cast_fp16 = slice_by_index(begin = var_38794_begin_0, end = var_38794_end_0, end_mask = var_38794_end_mask_0, x = var_38469_cast_fp16)[name = tensor("op_38794_cast_fp16")]; + tensor var_38801_begin_0 = const()[name = tensor("op_38801_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_38801_end_0 = const()[name = tensor("op_38801_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_38801_end_mask_0 = const()[name = tensor("op_38801_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38801_cast_fp16 = slice_by_index(begin = var_38801_begin_0, end = var_38801_end_0, end_mask = var_38801_end_mask_0, x = var_38469_cast_fp16)[name = tensor("op_38801_cast_fp16")]; + tensor var_38808_begin_0 = const()[name = tensor("op_38808_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_38808_end_0 = const()[name = tensor("op_38808_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_38808_end_mask_0 = const()[name = tensor("op_38808_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38808_cast_fp16 = slice_by_index(begin = var_38808_begin_0, end = var_38808_end_0, end_mask = var_38808_end_mask_0, x = var_38469_cast_fp16)[name = tensor("op_38808_cast_fp16")]; + tensor var_38815_begin_0 = const()[name = tensor("op_38815_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_38815_end_0 = const()[name = tensor("op_38815_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_38815_end_mask_0 = const()[name = tensor("op_38815_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38815_cast_fp16 = slice_by_index(begin = var_38815_begin_0, end = var_38815_end_0, end_mask = var_38815_end_mask_0, x = var_38469_cast_fp16)[name = tensor("op_38815_cast_fp16")]; + tensor var_38822_begin_0 = const()[name = tensor("op_38822_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_38822_end_0 = const()[name = tensor("op_38822_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_38822_end_mask_0 = const()[name = tensor("op_38822_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38822_cast_fp16 = slice_by_index(begin = var_38822_begin_0, end = var_38822_end_0, end_mask = var_38822_end_mask_0, x = var_38473_cast_fp16)[name = tensor("op_38822_cast_fp16")]; + tensor var_38829_begin_0 = const()[name = tensor("op_38829_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_38829_end_0 = const()[name = tensor("op_38829_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_38829_end_mask_0 = const()[name = tensor("op_38829_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38829_cast_fp16 = slice_by_index(begin = var_38829_begin_0, end = var_38829_end_0, end_mask = var_38829_end_mask_0, x = var_38473_cast_fp16)[name = tensor("op_38829_cast_fp16")]; + tensor var_38836_begin_0 = const()[name = tensor("op_38836_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_38836_end_0 = const()[name = tensor("op_38836_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_38836_end_mask_0 = const()[name = tensor("op_38836_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38836_cast_fp16 = slice_by_index(begin = var_38836_begin_0, end = var_38836_end_0, end_mask = var_38836_end_mask_0, x = var_38473_cast_fp16)[name = tensor("op_38836_cast_fp16")]; + tensor var_38843_begin_0 = const()[name = tensor("op_38843_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_38843_end_0 = const()[name = tensor("op_38843_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_38843_end_mask_0 = const()[name = tensor("op_38843_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38843_cast_fp16 = slice_by_index(begin = var_38843_begin_0, end = var_38843_end_0, end_mask = var_38843_end_mask_0, x = var_38473_cast_fp16)[name = tensor("op_38843_cast_fp16")]; + tensor var_38850_begin_0 = const()[name = tensor("op_38850_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_38850_end_0 = const()[name = tensor("op_38850_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_38850_end_mask_0 = const()[name = tensor("op_38850_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38850_cast_fp16 = slice_by_index(begin = var_38850_begin_0, end = var_38850_end_0, end_mask = var_38850_end_mask_0, x = var_38477_cast_fp16)[name = tensor("op_38850_cast_fp16")]; + tensor var_38857_begin_0 = const()[name = tensor("op_38857_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_38857_end_0 = const()[name = tensor("op_38857_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_38857_end_mask_0 = const()[name = tensor("op_38857_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38857_cast_fp16 = slice_by_index(begin = var_38857_begin_0, end = var_38857_end_0, end_mask = var_38857_end_mask_0, x = var_38477_cast_fp16)[name = tensor("op_38857_cast_fp16")]; + tensor var_38864_begin_0 = const()[name = tensor("op_38864_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_38864_end_0 = const()[name = tensor("op_38864_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_38864_end_mask_0 = const()[name = tensor("op_38864_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38864_cast_fp16 = slice_by_index(begin = var_38864_begin_0, end = var_38864_end_0, end_mask = var_38864_end_mask_0, x = var_38477_cast_fp16)[name = tensor("op_38864_cast_fp16")]; + tensor var_38871_begin_0 = const()[name = tensor("op_38871_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_38871_end_0 = const()[name = tensor("op_38871_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_38871_end_mask_0 = const()[name = tensor("op_38871_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38871_cast_fp16 = slice_by_index(begin = var_38871_begin_0, end = var_38871_end_0, end_mask = var_38871_end_mask_0, x = var_38477_cast_fp16)[name = tensor("op_38871_cast_fp16")]; + tensor var_38878_begin_0 = const()[name = tensor("op_38878_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_38878_end_0 = const()[name = tensor("op_38878_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_38878_end_mask_0 = const()[name = tensor("op_38878_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38878_cast_fp16 = slice_by_index(begin = var_38878_begin_0, end = var_38878_end_0, end_mask = var_38878_end_mask_0, x = var_38481_cast_fp16)[name = tensor("op_38878_cast_fp16")]; + tensor var_38885_begin_0 = const()[name = tensor("op_38885_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_38885_end_0 = const()[name = tensor("op_38885_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_38885_end_mask_0 = const()[name = tensor("op_38885_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38885_cast_fp16 = slice_by_index(begin = var_38885_begin_0, end = var_38885_end_0, end_mask = var_38885_end_mask_0, x = var_38481_cast_fp16)[name = tensor("op_38885_cast_fp16")]; + tensor var_38892_begin_0 = const()[name = tensor("op_38892_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_38892_end_0 = const()[name = tensor("op_38892_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_38892_end_mask_0 = const()[name = tensor("op_38892_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38892_cast_fp16 = slice_by_index(begin = var_38892_begin_0, end = var_38892_end_0, end_mask = var_38892_end_mask_0, x = var_38481_cast_fp16)[name = tensor("op_38892_cast_fp16")]; + tensor var_38899_begin_0 = const()[name = tensor("op_38899_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_38899_end_0 = const()[name = tensor("op_38899_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_38899_end_mask_0 = const()[name = tensor("op_38899_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38899_cast_fp16 = slice_by_index(begin = var_38899_begin_0, end = var_38899_end_0, end_mask = var_38899_end_mask_0, x = var_38481_cast_fp16)[name = tensor("op_38899_cast_fp16")]; + tensor var_38906_begin_0 = const()[name = tensor("op_38906_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_38906_end_0 = const()[name = tensor("op_38906_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_38906_end_mask_0 = const()[name = tensor("op_38906_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38906_cast_fp16 = slice_by_index(begin = var_38906_begin_0, end = var_38906_end_0, end_mask = var_38906_end_mask_0, x = var_38485_cast_fp16)[name = tensor("op_38906_cast_fp16")]; + tensor var_38913_begin_0 = const()[name = tensor("op_38913_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_38913_end_0 = const()[name = tensor("op_38913_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_38913_end_mask_0 = const()[name = tensor("op_38913_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38913_cast_fp16 = slice_by_index(begin = var_38913_begin_0, end = var_38913_end_0, end_mask = var_38913_end_mask_0, x = var_38485_cast_fp16)[name = tensor("op_38913_cast_fp16")]; + tensor var_38920_begin_0 = const()[name = tensor("op_38920_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_38920_end_0 = const()[name = tensor("op_38920_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_38920_end_mask_0 = const()[name = tensor("op_38920_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38920_cast_fp16 = slice_by_index(begin = var_38920_begin_0, end = var_38920_end_0, end_mask = var_38920_end_mask_0, x = var_38485_cast_fp16)[name = tensor("op_38920_cast_fp16")]; + tensor var_38927_begin_0 = const()[name = tensor("op_38927_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_38927_end_0 = const()[name = tensor("op_38927_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_38927_end_mask_0 = const()[name = tensor("op_38927_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38927_cast_fp16 = slice_by_index(begin = var_38927_begin_0, end = var_38927_end_0, end_mask = var_38927_end_mask_0, x = var_38485_cast_fp16)[name = tensor("op_38927_cast_fp16")]; + tensor var_38934_begin_0 = const()[name = tensor("op_38934_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_38934_end_0 = const()[name = tensor("op_38934_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_38934_end_mask_0 = const()[name = tensor("op_38934_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38934_cast_fp16 = slice_by_index(begin = var_38934_begin_0, end = var_38934_end_0, end_mask = var_38934_end_mask_0, x = var_38489_cast_fp16)[name = tensor("op_38934_cast_fp16")]; + tensor var_38941_begin_0 = const()[name = tensor("op_38941_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_38941_end_0 = const()[name = tensor("op_38941_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_38941_end_mask_0 = const()[name = tensor("op_38941_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38941_cast_fp16 = slice_by_index(begin = var_38941_begin_0, end = var_38941_end_0, end_mask = var_38941_end_mask_0, x = var_38489_cast_fp16)[name = tensor("op_38941_cast_fp16")]; + tensor var_38948_begin_0 = const()[name = tensor("op_38948_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_38948_end_0 = const()[name = tensor("op_38948_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_38948_end_mask_0 = const()[name = tensor("op_38948_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38948_cast_fp16 = slice_by_index(begin = var_38948_begin_0, end = var_38948_end_0, end_mask = var_38948_end_mask_0, x = var_38489_cast_fp16)[name = tensor("op_38948_cast_fp16")]; + tensor var_38955_begin_0 = const()[name = tensor("op_38955_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_38955_end_0 = const()[name = tensor("op_38955_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_38955_end_mask_0 = const()[name = tensor("op_38955_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38955_cast_fp16 = slice_by_index(begin = var_38955_begin_0, end = var_38955_end_0, end_mask = var_38955_end_mask_0, x = var_38489_cast_fp16)[name = tensor("op_38955_cast_fp16")]; + tensor var_38962_begin_0 = const()[name = tensor("op_38962_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_38962_end_0 = const()[name = tensor("op_38962_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_38962_end_mask_0 = const()[name = tensor("op_38962_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38962_cast_fp16 = slice_by_index(begin = var_38962_begin_0, end = var_38962_end_0, end_mask = var_38962_end_mask_0, x = var_38493_cast_fp16)[name = tensor("op_38962_cast_fp16")]; + tensor var_38969_begin_0 = const()[name = tensor("op_38969_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_38969_end_0 = const()[name = tensor("op_38969_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_38969_end_mask_0 = const()[name = tensor("op_38969_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38969_cast_fp16 = slice_by_index(begin = var_38969_begin_0, end = var_38969_end_0, end_mask = var_38969_end_mask_0, x = var_38493_cast_fp16)[name = tensor("op_38969_cast_fp16")]; + tensor var_38976_begin_0 = const()[name = tensor("op_38976_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_38976_end_0 = const()[name = tensor("op_38976_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_38976_end_mask_0 = const()[name = tensor("op_38976_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38976_cast_fp16 = slice_by_index(begin = var_38976_begin_0, end = var_38976_end_0, end_mask = var_38976_end_mask_0, x = var_38493_cast_fp16)[name = tensor("op_38976_cast_fp16")]; + tensor var_38983_begin_0 = const()[name = tensor("op_38983_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_38983_end_0 = const()[name = tensor("op_38983_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_38983_end_mask_0 = const()[name = tensor("op_38983_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38983_cast_fp16 = slice_by_index(begin = var_38983_begin_0, end = var_38983_end_0, end_mask = var_38983_end_mask_0, x = var_38493_cast_fp16)[name = tensor("op_38983_cast_fp16")]; + tensor var_38990_begin_0 = const()[name = tensor("op_38990_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_38990_end_0 = const()[name = tensor("op_38990_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_38990_end_mask_0 = const()[name = tensor("op_38990_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38990_cast_fp16 = slice_by_index(begin = var_38990_begin_0, end = var_38990_end_0, end_mask = var_38990_end_mask_0, x = var_38497_cast_fp16)[name = tensor("op_38990_cast_fp16")]; + tensor var_38997_begin_0 = const()[name = tensor("op_38997_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_38997_end_0 = const()[name = tensor("op_38997_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_38997_end_mask_0 = const()[name = tensor("op_38997_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_38997_cast_fp16 = slice_by_index(begin = var_38997_begin_0, end = var_38997_end_0, end_mask = var_38997_end_mask_0, x = var_38497_cast_fp16)[name = tensor("op_38997_cast_fp16")]; + tensor var_39004_begin_0 = const()[name = tensor("op_39004_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_39004_end_0 = const()[name = tensor("op_39004_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_39004_end_mask_0 = const()[name = tensor("op_39004_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39004_cast_fp16 = slice_by_index(begin = var_39004_begin_0, end = var_39004_end_0, end_mask = var_39004_end_mask_0, x = var_38497_cast_fp16)[name = tensor("op_39004_cast_fp16")]; + tensor var_39011_begin_0 = const()[name = tensor("op_39011_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_39011_end_0 = const()[name = tensor("op_39011_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_39011_end_mask_0 = const()[name = tensor("op_39011_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39011_cast_fp16 = slice_by_index(begin = var_39011_begin_0, end = var_39011_end_0, end_mask = var_39011_end_mask_0, x = var_38497_cast_fp16)[name = tensor("op_39011_cast_fp16")]; + tensor var_39018_begin_0 = const()[name = tensor("op_39018_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_39018_end_0 = const()[name = tensor("op_39018_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_39018_end_mask_0 = const()[name = tensor("op_39018_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39018_cast_fp16 = slice_by_index(begin = var_39018_begin_0, end = var_39018_end_0, end_mask = var_39018_end_mask_0, x = var_38501_cast_fp16)[name = tensor("op_39018_cast_fp16")]; + tensor var_39025_begin_0 = const()[name = tensor("op_39025_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_39025_end_0 = const()[name = tensor("op_39025_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_39025_end_mask_0 = const()[name = tensor("op_39025_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39025_cast_fp16 = slice_by_index(begin = var_39025_begin_0, end = var_39025_end_0, end_mask = var_39025_end_mask_0, x = var_38501_cast_fp16)[name = tensor("op_39025_cast_fp16")]; + tensor var_39032_begin_0 = const()[name = tensor("op_39032_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_39032_end_0 = const()[name = tensor("op_39032_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_39032_end_mask_0 = const()[name = tensor("op_39032_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39032_cast_fp16 = slice_by_index(begin = var_39032_begin_0, end = var_39032_end_0, end_mask = var_39032_end_mask_0, x = var_38501_cast_fp16)[name = tensor("op_39032_cast_fp16")]; + tensor var_39039_begin_0 = const()[name = tensor("op_39039_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_39039_end_0 = const()[name = tensor("op_39039_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_39039_end_mask_0 = const()[name = tensor("op_39039_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39039_cast_fp16 = slice_by_index(begin = var_39039_begin_0, end = var_39039_end_0, end_mask = var_39039_end_mask_0, x = var_38501_cast_fp16)[name = tensor("op_39039_cast_fp16")]; + tensor var_39046_begin_0 = const()[name = tensor("op_39046_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_39046_end_0 = const()[name = tensor("op_39046_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_39046_end_mask_0 = const()[name = tensor("op_39046_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39046_cast_fp16 = slice_by_index(begin = var_39046_begin_0, end = var_39046_end_0, end_mask = var_39046_end_mask_0, x = var_38505_cast_fp16)[name = tensor("op_39046_cast_fp16")]; + tensor var_39053_begin_0 = const()[name = tensor("op_39053_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_39053_end_0 = const()[name = tensor("op_39053_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_39053_end_mask_0 = const()[name = tensor("op_39053_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39053_cast_fp16 = slice_by_index(begin = var_39053_begin_0, end = var_39053_end_0, end_mask = var_39053_end_mask_0, x = var_38505_cast_fp16)[name = tensor("op_39053_cast_fp16")]; + tensor var_39060_begin_0 = const()[name = tensor("op_39060_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_39060_end_0 = const()[name = tensor("op_39060_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_39060_end_mask_0 = const()[name = tensor("op_39060_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39060_cast_fp16 = slice_by_index(begin = var_39060_begin_0, end = var_39060_end_0, end_mask = var_39060_end_mask_0, x = var_38505_cast_fp16)[name = tensor("op_39060_cast_fp16")]; + tensor var_39067_begin_0 = const()[name = tensor("op_39067_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_39067_end_0 = const()[name = tensor("op_39067_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_39067_end_mask_0 = const()[name = tensor("op_39067_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39067_cast_fp16 = slice_by_index(begin = var_39067_begin_0, end = var_39067_end_0, end_mask = var_39067_end_mask_0, x = var_38505_cast_fp16)[name = tensor("op_39067_cast_fp16")]; + tensor k_51_perm_0 = const()[name = tensor("k_51_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_39072_begin_0 = const()[name = tensor("op_39072_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_39072_end_0 = const()[name = tensor("op_39072_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_39072_end_mask_0 = const()[name = tensor("op_39072_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_6 = transpose(perm = k_51_perm_0, x = key_51_cast_fp16)[name = tensor("transpose_6")]; + tensor var_39072_cast_fp16 = slice_by_index(begin = var_39072_begin_0, end = var_39072_end_0, end_mask = var_39072_end_mask_0, x = transpose_6)[name = tensor("op_39072_cast_fp16")]; + tensor var_39076_begin_0 = const()[name = tensor("op_39076_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_39076_end_0 = const()[name = tensor("op_39076_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_39076_end_mask_0 = const()[name = tensor("op_39076_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39076_cast_fp16 = slice_by_index(begin = var_39076_begin_0, end = var_39076_end_0, end_mask = var_39076_end_mask_0, x = transpose_6)[name = tensor("op_39076_cast_fp16")]; + tensor var_39080_begin_0 = const()[name = tensor("op_39080_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_39080_end_0 = const()[name = tensor("op_39080_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_39080_end_mask_0 = const()[name = tensor("op_39080_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39080_cast_fp16 = slice_by_index(begin = var_39080_begin_0, end = var_39080_end_0, end_mask = var_39080_end_mask_0, x = transpose_6)[name = tensor("op_39080_cast_fp16")]; + tensor var_39084_begin_0 = const()[name = tensor("op_39084_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_39084_end_0 = const()[name = tensor("op_39084_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_39084_end_mask_0 = const()[name = tensor("op_39084_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39084_cast_fp16 = slice_by_index(begin = var_39084_begin_0, end = var_39084_end_0, end_mask = var_39084_end_mask_0, x = transpose_6)[name = tensor("op_39084_cast_fp16")]; + tensor var_39088_begin_0 = const()[name = tensor("op_39088_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_39088_end_0 = const()[name = tensor("op_39088_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_39088_end_mask_0 = const()[name = tensor("op_39088_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39088_cast_fp16 = slice_by_index(begin = var_39088_begin_0, end = var_39088_end_0, end_mask = var_39088_end_mask_0, x = transpose_6)[name = tensor("op_39088_cast_fp16")]; + tensor var_39092_begin_0 = const()[name = tensor("op_39092_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_39092_end_0 = const()[name = tensor("op_39092_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_39092_end_mask_0 = const()[name = tensor("op_39092_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39092_cast_fp16 = slice_by_index(begin = var_39092_begin_0, end = var_39092_end_0, end_mask = var_39092_end_mask_0, x = transpose_6)[name = tensor("op_39092_cast_fp16")]; + tensor var_39096_begin_0 = const()[name = tensor("op_39096_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_39096_end_0 = const()[name = tensor("op_39096_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_39096_end_mask_0 = const()[name = tensor("op_39096_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39096_cast_fp16 = slice_by_index(begin = var_39096_begin_0, end = var_39096_end_0, end_mask = var_39096_end_mask_0, x = transpose_6)[name = tensor("op_39096_cast_fp16")]; + tensor var_39100_begin_0 = const()[name = tensor("op_39100_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_39100_end_0 = const()[name = tensor("op_39100_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_39100_end_mask_0 = const()[name = tensor("op_39100_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39100_cast_fp16 = slice_by_index(begin = var_39100_begin_0, end = var_39100_end_0, end_mask = var_39100_end_mask_0, x = transpose_6)[name = tensor("op_39100_cast_fp16")]; + tensor var_39104_begin_0 = const()[name = tensor("op_39104_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_39104_end_0 = const()[name = tensor("op_39104_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_39104_end_mask_0 = const()[name = tensor("op_39104_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39104_cast_fp16 = slice_by_index(begin = var_39104_begin_0, end = var_39104_end_0, end_mask = var_39104_end_mask_0, x = transpose_6)[name = tensor("op_39104_cast_fp16")]; + tensor var_39108_begin_0 = const()[name = tensor("op_39108_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_39108_end_0 = const()[name = tensor("op_39108_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_39108_end_mask_0 = const()[name = tensor("op_39108_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39108_cast_fp16 = slice_by_index(begin = var_39108_begin_0, end = var_39108_end_0, end_mask = var_39108_end_mask_0, x = transpose_6)[name = tensor("op_39108_cast_fp16")]; + tensor var_39112_begin_0 = const()[name = tensor("op_39112_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_39112_end_0 = const()[name = tensor("op_39112_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_39112_end_mask_0 = const()[name = tensor("op_39112_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39112_cast_fp16 = slice_by_index(begin = var_39112_begin_0, end = var_39112_end_0, end_mask = var_39112_end_mask_0, x = transpose_6)[name = tensor("op_39112_cast_fp16")]; + tensor var_39116_begin_0 = const()[name = tensor("op_39116_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_39116_end_0 = const()[name = tensor("op_39116_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_39116_end_mask_0 = const()[name = tensor("op_39116_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39116_cast_fp16 = slice_by_index(begin = var_39116_begin_0, end = var_39116_end_0, end_mask = var_39116_end_mask_0, x = transpose_6)[name = tensor("op_39116_cast_fp16")]; + tensor var_39120_begin_0 = const()[name = tensor("op_39120_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_39120_end_0 = const()[name = tensor("op_39120_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_39120_end_mask_0 = const()[name = tensor("op_39120_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39120_cast_fp16 = slice_by_index(begin = var_39120_begin_0, end = var_39120_end_0, end_mask = var_39120_end_mask_0, x = transpose_6)[name = tensor("op_39120_cast_fp16")]; + tensor var_39124_begin_0 = const()[name = tensor("op_39124_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_39124_end_0 = const()[name = tensor("op_39124_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_39124_end_mask_0 = const()[name = tensor("op_39124_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39124_cast_fp16 = slice_by_index(begin = var_39124_begin_0, end = var_39124_end_0, end_mask = var_39124_end_mask_0, x = transpose_6)[name = tensor("op_39124_cast_fp16")]; + tensor var_39128_begin_0 = const()[name = tensor("op_39128_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_39128_end_0 = const()[name = tensor("op_39128_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_39128_end_mask_0 = const()[name = tensor("op_39128_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39128_cast_fp16 = slice_by_index(begin = var_39128_begin_0, end = var_39128_end_0, end_mask = var_39128_end_mask_0, x = transpose_6)[name = tensor("op_39128_cast_fp16")]; + tensor var_39132_begin_0 = const()[name = tensor("op_39132_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_39132_end_0 = const()[name = tensor("op_39132_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_39132_end_mask_0 = const()[name = tensor("op_39132_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39132_cast_fp16 = slice_by_index(begin = var_39132_begin_0, end = var_39132_end_0, end_mask = var_39132_end_mask_0, x = transpose_6)[name = tensor("op_39132_cast_fp16")]; + tensor var_39136_begin_0 = const()[name = tensor("op_39136_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_39136_end_0 = const()[name = tensor("op_39136_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_39136_end_mask_0 = const()[name = tensor("op_39136_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39136_cast_fp16 = slice_by_index(begin = var_39136_begin_0, end = var_39136_end_0, end_mask = var_39136_end_mask_0, x = transpose_6)[name = tensor("op_39136_cast_fp16")]; + tensor var_39140_begin_0 = const()[name = tensor("op_39140_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_39140_end_0 = const()[name = tensor("op_39140_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_39140_end_mask_0 = const()[name = tensor("op_39140_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39140_cast_fp16 = slice_by_index(begin = var_39140_begin_0, end = var_39140_end_0, end_mask = var_39140_end_mask_0, x = transpose_6)[name = tensor("op_39140_cast_fp16")]; + tensor var_39144_begin_0 = const()[name = tensor("op_39144_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_39144_end_0 = const()[name = tensor("op_39144_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_39144_end_mask_0 = const()[name = tensor("op_39144_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39144_cast_fp16 = slice_by_index(begin = var_39144_begin_0, end = var_39144_end_0, end_mask = var_39144_end_mask_0, x = transpose_6)[name = tensor("op_39144_cast_fp16")]; + tensor var_39148_begin_0 = const()[name = tensor("op_39148_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_39148_end_0 = const()[name = tensor("op_39148_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_39148_end_mask_0 = const()[name = tensor("op_39148_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_39148_cast_fp16 = slice_by_index(begin = var_39148_begin_0, end = var_39148_end_0, end_mask = var_39148_end_mask_0, x = transpose_6)[name = tensor("op_39148_cast_fp16")]; + tensor var_39150_begin_0 = const()[name = tensor("op_39150_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_39150_end_0 = const()[name = tensor("op_39150_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_39150_end_mask_0 = const()[name = tensor("op_39150_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39150_cast_fp16 = slice_by_index(begin = var_39150_begin_0, end = var_39150_end_0, end_mask = var_39150_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_39150_cast_fp16")]; + tensor var_39154_begin_0 = const()[name = tensor("op_39154_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_39154_end_0 = const()[name = tensor("op_39154_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_39154_end_mask_0 = const()[name = tensor("op_39154_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39154_cast_fp16 = slice_by_index(begin = var_39154_begin_0, end = var_39154_end_0, end_mask = var_39154_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_39154_cast_fp16")]; + tensor var_39158_begin_0 = const()[name = tensor("op_39158_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_39158_end_0 = const()[name = tensor("op_39158_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_39158_end_mask_0 = const()[name = tensor("op_39158_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39158_cast_fp16 = slice_by_index(begin = var_39158_begin_0, end = var_39158_end_0, end_mask = var_39158_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_39158_cast_fp16")]; + tensor var_39162_begin_0 = const()[name = tensor("op_39162_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_39162_end_0 = const()[name = tensor("op_39162_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_39162_end_mask_0 = const()[name = tensor("op_39162_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39162_cast_fp16 = slice_by_index(begin = var_39162_begin_0, end = var_39162_end_0, end_mask = var_39162_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_39162_cast_fp16")]; + tensor var_39166_begin_0 = const()[name = tensor("op_39166_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_39166_end_0 = const()[name = tensor("op_39166_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_39166_end_mask_0 = const()[name = tensor("op_39166_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39166_cast_fp16 = slice_by_index(begin = var_39166_begin_0, end = var_39166_end_0, end_mask = var_39166_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_39166_cast_fp16")]; + tensor var_39170_begin_0 = const()[name = tensor("op_39170_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_39170_end_0 = const()[name = tensor("op_39170_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_39170_end_mask_0 = const()[name = tensor("op_39170_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39170_cast_fp16 = slice_by_index(begin = var_39170_begin_0, end = var_39170_end_0, end_mask = var_39170_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_39170_cast_fp16")]; + tensor var_39174_begin_0 = const()[name = tensor("op_39174_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_39174_end_0 = const()[name = tensor("op_39174_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_39174_end_mask_0 = const()[name = tensor("op_39174_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39174_cast_fp16 = slice_by_index(begin = var_39174_begin_0, end = var_39174_end_0, end_mask = var_39174_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_39174_cast_fp16")]; + tensor var_39178_begin_0 = const()[name = tensor("op_39178_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_39178_end_0 = const()[name = tensor("op_39178_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_39178_end_mask_0 = const()[name = tensor("op_39178_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39178_cast_fp16 = slice_by_index(begin = var_39178_begin_0, end = var_39178_end_0, end_mask = var_39178_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_39178_cast_fp16")]; + tensor var_39182_begin_0 = const()[name = tensor("op_39182_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_39182_end_0 = const()[name = tensor("op_39182_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_39182_end_mask_0 = const()[name = tensor("op_39182_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39182_cast_fp16 = slice_by_index(begin = var_39182_begin_0, end = var_39182_end_0, end_mask = var_39182_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_39182_cast_fp16")]; + tensor var_39186_begin_0 = const()[name = tensor("op_39186_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_39186_end_0 = const()[name = tensor("op_39186_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_39186_end_mask_0 = const()[name = tensor("op_39186_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39186_cast_fp16 = slice_by_index(begin = var_39186_begin_0, end = var_39186_end_0, end_mask = var_39186_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_39186_cast_fp16")]; + tensor var_39190_begin_0 = const()[name = tensor("op_39190_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_39190_end_0 = const()[name = tensor("op_39190_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_39190_end_mask_0 = const()[name = tensor("op_39190_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39190_cast_fp16 = slice_by_index(begin = var_39190_begin_0, end = var_39190_end_0, end_mask = var_39190_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_39190_cast_fp16")]; + tensor var_39194_begin_0 = const()[name = tensor("op_39194_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_39194_end_0 = const()[name = tensor("op_39194_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_39194_end_mask_0 = const()[name = tensor("op_39194_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39194_cast_fp16 = slice_by_index(begin = var_39194_begin_0, end = var_39194_end_0, end_mask = var_39194_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_39194_cast_fp16")]; + tensor var_39198_begin_0 = const()[name = tensor("op_39198_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_39198_end_0 = const()[name = tensor("op_39198_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_39198_end_mask_0 = const()[name = tensor("op_39198_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39198_cast_fp16 = slice_by_index(begin = var_39198_begin_0, end = var_39198_end_0, end_mask = var_39198_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_39198_cast_fp16")]; + tensor var_39202_begin_0 = const()[name = tensor("op_39202_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_39202_end_0 = const()[name = tensor("op_39202_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_39202_end_mask_0 = const()[name = tensor("op_39202_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39202_cast_fp16 = slice_by_index(begin = var_39202_begin_0, end = var_39202_end_0, end_mask = var_39202_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_39202_cast_fp16")]; + tensor var_39206_begin_0 = const()[name = tensor("op_39206_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_39206_end_0 = const()[name = tensor("op_39206_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_39206_end_mask_0 = const()[name = tensor("op_39206_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39206_cast_fp16 = slice_by_index(begin = var_39206_begin_0, end = var_39206_end_0, end_mask = var_39206_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_39206_cast_fp16")]; + tensor var_39210_begin_0 = const()[name = tensor("op_39210_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_39210_end_0 = const()[name = tensor("op_39210_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_39210_end_mask_0 = const()[name = tensor("op_39210_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39210_cast_fp16 = slice_by_index(begin = var_39210_begin_0, end = var_39210_end_0, end_mask = var_39210_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_39210_cast_fp16")]; + tensor var_39214_begin_0 = const()[name = tensor("op_39214_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_39214_end_0 = const()[name = tensor("op_39214_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_39214_end_mask_0 = const()[name = tensor("op_39214_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39214_cast_fp16 = slice_by_index(begin = var_39214_begin_0, end = var_39214_end_0, end_mask = var_39214_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_39214_cast_fp16")]; + tensor var_39218_begin_0 = const()[name = tensor("op_39218_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_39218_end_0 = const()[name = tensor("op_39218_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_39218_end_mask_0 = const()[name = tensor("op_39218_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39218_cast_fp16 = slice_by_index(begin = var_39218_begin_0, end = var_39218_end_0, end_mask = var_39218_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_39218_cast_fp16")]; + tensor var_39222_begin_0 = const()[name = tensor("op_39222_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_39222_end_0 = const()[name = tensor("op_39222_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_39222_end_mask_0 = const()[name = tensor("op_39222_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39222_cast_fp16 = slice_by_index(begin = var_39222_begin_0, end = var_39222_end_0, end_mask = var_39222_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_39222_cast_fp16")]; + tensor var_39226_begin_0 = const()[name = tensor("op_39226_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_39226_end_0 = const()[name = tensor("op_39226_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_39226_end_mask_0 = const()[name = tensor("op_39226_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39226_cast_fp16 = slice_by_index(begin = var_39226_begin_0, end = var_39226_end_0, end_mask = var_39226_end_mask_0, x = value_51_cast_fp16)[name = tensor("op_39226_cast_fp16")]; + tensor var_39230_equation_0 = const()[name = tensor("op_39230_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39230_cast_fp16 = einsum(equation = var_39230_equation_0, values = (var_39072_cast_fp16, var_38514_cast_fp16))[name = tensor("op_39230_cast_fp16")]; + tensor var_39231_to_fp16 = const()[name = tensor("op_39231_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4001_cast_fp16 = mul(x = var_39230_cast_fp16, y = var_39231_to_fp16)[name = tensor("aw_chunk_4001_cast_fp16")]; + tensor var_39234_equation_0 = const()[name = tensor("op_39234_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39234_cast_fp16 = einsum(equation = var_39234_equation_0, values = (var_39072_cast_fp16, var_38521_cast_fp16))[name = tensor("op_39234_cast_fp16")]; + tensor var_39235_to_fp16 = const()[name = tensor("op_39235_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4003_cast_fp16 = mul(x = var_39234_cast_fp16, y = var_39235_to_fp16)[name = tensor("aw_chunk_4003_cast_fp16")]; + tensor var_39238_equation_0 = const()[name = tensor("op_39238_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39238_cast_fp16 = einsum(equation = var_39238_equation_0, values = (var_39072_cast_fp16, var_38528_cast_fp16))[name = tensor("op_39238_cast_fp16")]; + tensor var_39239_to_fp16 = const()[name = tensor("op_39239_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4005_cast_fp16 = mul(x = var_39238_cast_fp16, y = var_39239_to_fp16)[name = tensor("aw_chunk_4005_cast_fp16")]; + tensor var_39242_equation_0 = const()[name = tensor("op_39242_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39242_cast_fp16 = einsum(equation = var_39242_equation_0, values = (var_39072_cast_fp16, var_38535_cast_fp16))[name = tensor("op_39242_cast_fp16")]; + tensor var_39243_to_fp16 = const()[name = tensor("op_39243_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4007_cast_fp16 = mul(x = var_39242_cast_fp16, y = var_39243_to_fp16)[name = tensor("aw_chunk_4007_cast_fp16")]; + tensor var_39246_equation_0 = const()[name = tensor("op_39246_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39246_cast_fp16 = einsum(equation = var_39246_equation_0, values = (var_39076_cast_fp16, var_38542_cast_fp16))[name = tensor("op_39246_cast_fp16")]; + tensor var_39247_to_fp16 = const()[name = tensor("op_39247_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4009_cast_fp16 = mul(x = var_39246_cast_fp16, y = var_39247_to_fp16)[name = tensor("aw_chunk_4009_cast_fp16")]; + tensor var_39250_equation_0 = const()[name = tensor("op_39250_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39250_cast_fp16 = einsum(equation = var_39250_equation_0, values = (var_39076_cast_fp16, var_38549_cast_fp16))[name = tensor("op_39250_cast_fp16")]; + tensor var_39251_to_fp16 = const()[name = tensor("op_39251_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4011_cast_fp16 = mul(x = var_39250_cast_fp16, y = var_39251_to_fp16)[name = tensor("aw_chunk_4011_cast_fp16")]; + tensor var_39254_equation_0 = const()[name = tensor("op_39254_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39254_cast_fp16 = einsum(equation = var_39254_equation_0, values = (var_39076_cast_fp16, var_38556_cast_fp16))[name = tensor("op_39254_cast_fp16")]; + tensor var_39255_to_fp16 = const()[name = tensor("op_39255_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4013_cast_fp16 = mul(x = var_39254_cast_fp16, y = var_39255_to_fp16)[name = tensor("aw_chunk_4013_cast_fp16")]; + tensor var_39258_equation_0 = const()[name = tensor("op_39258_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39258_cast_fp16 = einsum(equation = var_39258_equation_0, values = (var_39076_cast_fp16, var_38563_cast_fp16))[name = tensor("op_39258_cast_fp16")]; + tensor var_39259_to_fp16 = const()[name = tensor("op_39259_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4015_cast_fp16 = mul(x = var_39258_cast_fp16, y = var_39259_to_fp16)[name = tensor("aw_chunk_4015_cast_fp16")]; + tensor var_39262_equation_0 = const()[name = tensor("op_39262_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39262_cast_fp16 = einsum(equation = var_39262_equation_0, values = (var_39080_cast_fp16, var_38570_cast_fp16))[name = tensor("op_39262_cast_fp16")]; + tensor var_39263_to_fp16 = const()[name = tensor("op_39263_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4017_cast_fp16 = mul(x = var_39262_cast_fp16, y = var_39263_to_fp16)[name = tensor("aw_chunk_4017_cast_fp16")]; + tensor var_39266_equation_0 = const()[name = tensor("op_39266_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39266_cast_fp16 = einsum(equation = var_39266_equation_0, values = (var_39080_cast_fp16, var_38577_cast_fp16))[name = tensor("op_39266_cast_fp16")]; + tensor var_39267_to_fp16 = const()[name = tensor("op_39267_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4019_cast_fp16 = mul(x = var_39266_cast_fp16, y = var_39267_to_fp16)[name = tensor("aw_chunk_4019_cast_fp16")]; + tensor var_39270_equation_0 = const()[name = tensor("op_39270_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39270_cast_fp16 = einsum(equation = var_39270_equation_0, values = (var_39080_cast_fp16, var_38584_cast_fp16))[name = tensor("op_39270_cast_fp16")]; + tensor var_39271_to_fp16 = const()[name = tensor("op_39271_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4021_cast_fp16 = mul(x = var_39270_cast_fp16, y = var_39271_to_fp16)[name = tensor("aw_chunk_4021_cast_fp16")]; + tensor var_39274_equation_0 = const()[name = tensor("op_39274_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39274_cast_fp16 = einsum(equation = var_39274_equation_0, values = (var_39080_cast_fp16, var_38591_cast_fp16))[name = tensor("op_39274_cast_fp16")]; + tensor var_39275_to_fp16 = const()[name = tensor("op_39275_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4023_cast_fp16 = mul(x = var_39274_cast_fp16, y = var_39275_to_fp16)[name = tensor("aw_chunk_4023_cast_fp16")]; + tensor var_39278_equation_0 = const()[name = tensor("op_39278_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39278_cast_fp16 = einsum(equation = var_39278_equation_0, values = (var_39084_cast_fp16, var_38598_cast_fp16))[name = tensor("op_39278_cast_fp16")]; + tensor var_39279_to_fp16 = const()[name = tensor("op_39279_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4025_cast_fp16 = mul(x = var_39278_cast_fp16, y = var_39279_to_fp16)[name = tensor("aw_chunk_4025_cast_fp16")]; + tensor var_39282_equation_0 = const()[name = tensor("op_39282_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39282_cast_fp16 = einsum(equation = var_39282_equation_0, values = (var_39084_cast_fp16, var_38605_cast_fp16))[name = tensor("op_39282_cast_fp16")]; + tensor var_39283_to_fp16 = const()[name = tensor("op_39283_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4027_cast_fp16 = mul(x = var_39282_cast_fp16, y = var_39283_to_fp16)[name = tensor("aw_chunk_4027_cast_fp16")]; + tensor var_39286_equation_0 = const()[name = tensor("op_39286_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39286_cast_fp16 = einsum(equation = var_39286_equation_0, values = (var_39084_cast_fp16, var_38612_cast_fp16))[name = tensor("op_39286_cast_fp16")]; + tensor var_39287_to_fp16 = const()[name = tensor("op_39287_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4029_cast_fp16 = mul(x = var_39286_cast_fp16, y = var_39287_to_fp16)[name = tensor("aw_chunk_4029_cast_fp16")]; + tensor var_39290_equation_0 = const()[name = tensor("op_39290_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39290_cast_fp16 = einsum(equation = var_39290_equation_0, values = (var_39084_cast_fp16, var_38619_cast_fp16))[name = tensor("op_39290_cast_fp16")]; + tensor var_39291_to_fp16 = const()[name = tensor("op_39291_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4031_cast_fp16 = mul(x = var_39290_cast_fp16, y = var_39291_to_fp16)[name = tensor("aw_chunk_4031_cast_fp16")]; + tensor var_39294_equation_0 = const()[name = tensor("op_39294_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39294_cast_fp16 = einsum(equation = var_39294_equation_0, values = (var_39088_cast_fp16, var_38626_cast_fp16))[name = tensor("op_39294_cast_fp16")]; + tensor var_39295_to_fp16 = const()[name = tensor("op_39295_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4033_cast_fp16 = mul(x = var_39294_cast_fp16, y = var_39295_to_fp16)[name = tensor("aw_chunk_4033_cast_fp16")]; + tensor var_39298_equation_0 = const()[name = tensor("op_39298_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39298_cast_fp16 = einsum(equation = var_39298_equation_0, values = (var_39088_cast_fp16, var_38633_cast_fp16))[name = tensor("op_39298_cast_fp16")]; + tensor var_39299_to_fp16 = const()[name = tensor("op_39299_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4035_cast_fp16 = mul(x = var_39298_cast_fp16, y = var_39299_to_fp16)[name = tensor("aw_chunk_4035_cast_fp16")]; + tensor var_39302_equation_0 = const()[name = tensor("op_39302_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39302_cast_fp16 = einsum(equation = var_39302_equation_0, values = (var_39088_cast_fp16, var_38640_cast_fp16))[name = tensor("op_39302_cast_fp16")]; + tensor var_39303_to_fp16 = const()[name = tensor("op_39303_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4037_cast_fp16 = mul(x = var_39302_cast_fp16, y = var_39303_to_fp16)[name = tensor("aw_chunk_4037_cast_fp16")]; + tensor var_39306_equation_0 = const()[name = tensor("op_39306_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39306_cast_fp16 = einsum(equation = var_39306_equation_0, values = (var_39088_cast_fp16, var_38647_cast_fp16))[name = tensor("op_39306_cast_fp16")]; + tensor var_39307_to_fp16 = const()[name = tensor("op_39307_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4039_cast_fp16 = mul(x = var_39306_cast_fp16, y = var_39307_to_fp16)[name = tensor("aw_chunk_4039_cast_fp16")]; + tensor var_39310_equation_0 = const()[name = tensor("op_39310_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39310_cast_fp16 = einsum(equation = var_39310_equation_0, values = (var_39092_cast_fp16, var_38654_cast_fp16))[name = tensor("op_39310_cast_fp16")]; + tensor var_39311_to_fp16 = const()[name = tensor("op_39311_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4041_cast_fp16 = mul(x = var_39310_cast_fp16, y = var_39311_to_fp16)[name = tensor("aw_chunk_4041_cast_fp16")]; + tensor var_39314_equation_0 = const()[name = tensor("op_39314_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39314_cast_fp16 = einsum(equation = var_39314_equation_0, values = (var_39092_cast_fp16, var_38661_cast_fp16))[name = tensor("op_39314_cast_fp16")]; + tensor var_39315_to_fp16 = const()[name = tensor("op_39315_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4043_cast_fp16 = mul(x = var_39314_cast_fp16, y = var_39315_to_fp16)[name = tensor("aw_chunk_4043_cast_fp16")]; + tensor var_39318_equation_0 = const()[name = tensor("op_39318_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39318_cast_fp16 = einsum(equation = var_39318_equation_0, values = (var_39092_cast_fp16, var_38668_cast_fp16))[name = tensor("op_39318_cast_fp16")]; + tensor var_39319_to_fp16 = const()[name = tensor("op_39319_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4045_cast_fp16 = mul(x = var_39318_cast_fp16, y = var_39319_to_fp16)[name = tensor("aw_chunk_4045_cast_fp16")]; + tensor var_39322_equation_0 = const()[name = tensor("op_39322_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39322_cast_fp16 = einsum(equation = var_39322_equation_0, values = (var_39092_cast_fp16, var_38675_cast_fp16))[name = tensor("op_39322_cast_fp16")]; + tensor var_39323_to_fp16 = const()[name = tensor("op_39323_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4047_cast_fp16 = mul(x = var_39322_cast_fp16, y = var_39323_to_fp16)[name = tensor("aw_chunk_4047_cast_fp16")]; + tensor var_39326_equation_0 = const()[name = tensor("op_39326_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39326_cast_fp16 = einsum(equation = var_39326_equation_0, values = (var_39096_cast_fp16, var_38682_cast_fp16))[name = tensor("op_39326_cast_fp16")]; + tensor var_39327_to_fp16 = const()[name = tensor("op_39327_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4049_cast_fp16 = mul(x = var_39326_cast_fp16, y = var_39327_to_fp16)[name = tensor("aw_chunk_4049_cast_fp16")]; + tensor var_39330_equation_0 = const()[name = tensor("op_39330_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39330_cast_fp16 = einsum(equation = var_39330_equation_0, values = (var_39096_cast_fp16, var_38689_cast_fp16))[name = tensor("op_39330_cast_fp16")]; + tensor var_39331_to_fp16 = const()[name = tensor("op_39331_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4051_cast_fp16 = mul(x = var_39330_cast_fp16, y = var_39331_to_fp16)[name = tensor("aw_chunk_4051_cast_fp16")]; + tensor var_39334_equation_0 = const()[name = tensor("op_39334_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39334_cast_fp16 = einsum(equation = var_39334_equation_0, values = (var_39096_cast_fp16, var_38696_cast_fp16))[name = tensor("op_39334_cast_fp16")]; + tensor var_39335_to_fp16 = const()[name = tensor("op_39335_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4053_cast_fp16 = mul(x = var_39334_cast_fp16, y = var_39335_to_fp16)[name = tensor("aw_chunk_4053_cast_fp16")]; + tensor var_39338_equation_0 = const()[name = tensor("op_39338_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39338_cast_fp16 = einsum(equation = var_39338_equation_0, values = (var_39096_cast_fp16, var_38703_cast_fp16))[name = tensor("op_39338_cast_fp16")]; + tensor var_39339_to_fp16 = const()[name = tensor("op_39339_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4055_cast_fp16 = mul(x = var_39338_cast_fp16, y = var_39339_to_fp16)[name = tensor("aw_chunk_4055_cast_fp16")]; + tensor var_39342_equation_0 = const()[name = tensor("op_39342_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39342_cast_fp16 = einsum(equation = var_39342_equation_0, values = (var_39100_cast_fp16, var_38710_cast_fp16))[name = tensor("op_39342_cast_fp16")]; + tensor var_39343_to_fp16 = const()[name = tensor("op_39343_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4057_cast_fp16 = mul(x = var_39342_cast_fp16, y = var_39343_to_fp16)[name = tensor("aw_chunk_4057_cast_fp16")]; + tensor var_39346_equation_0 = const()[name = tensor("op_39346_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39346_cast_fp16 = einsum(equation = var_39346_equation_0, values = (var_39100_cast_fp16, var_38717_cast_fp16))[name = tensor("op_39346_cast_fp16")]; + tensor var_39347_to_fp16 = const()[name = tensor("op_39347_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4059_cast_fp16 = mul(x = var_39346_cast_fp16, y = var_39347_to_fp16)[name = tensor("aw_chunk_4059_cast_fp16")]; + tensor var_39350_equation_0 = const()[name = tensor("op_39350_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39350_cast_fp16 = einsum(equation = var_39350_equation_0, values = (var_39100_cast_fp16, var_38724_cast_fp16))[name = tensor("op_39350_cast_fp16")]; + tensor var_39351_to_fp16 = const()[name = tensor("op_39351_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4061_cast_fp16 = mul(x = var_39350_cast_fp16, y = var_39351_to_fp16)[name = tensor("aw_chunk_4061_cast_fp16")]; + tensor var_39354_equation_0 = const()[name = tensor("op_39354_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39354_cast_fp16 = einsum(equation = var_39354_equation_0, values = (var_39100_cast_fp16, var_38731_cast_fp16))[name = tensor("op_39354_cast_fp16")]; + tensor var_39355_to_fp16 = const()[name = tensor("op_39355_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4063_cast_fp16 = mul(x = var_39354_cast_fp16, y = var_39355_to_fp16)[name = tensor("aw_chunk_4063_cast_fp16")]; + tensor var_39358_equation_0 = const()[name = tensor("op_39358_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39358_cast_fp16 = einsum(equation = var_39358_equation_0, values = (var_39104_cast_fp16, var_38738_cast_fp16))[name = tensor("op_39358_cast_fp16")]; + tensor var_39359_to_fp16 = const()[name = tensor("op_39359_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4065_cast_fp16 = mul(x = var_39358_cast_fp16, y = var_39359_to_fp16)[name = tensor("aw_chunk_4065_cast_fp16")]; + tensor var_39362_equation_0 = const()[name = tensor("op_39362_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39362_cast_fp16 = einsum(equation = var_39362_equation_0, values = (var_39104_cast_fp16, var_38745_cast_fp16))[name = tensor("op_39362_cast_fp16")]; + tensor var_39363_to_fp16 = const()[name = tensor("op_39363_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4067_cast_fp16 = mul(x = var_39362_cast_fp16, y = var_39363_to_fp16)[name = tensor("aw_chunk_4067_cast_fp16")]; + tensor var_39366_equation_0 = const()[name = tensor("op_39366_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39366_cast_fp16 = einsum(equation = var_39366_equation_0, values = (var_39104_cast_fp16, var_38752_cast_fp16))[name = tensor("op_39366_cast_fp16")]; + tensor var_39367_to_fp16 = const()[name = tensor("op_39367_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4069_cast_fp16 = mul(x = var_39366_cast_fp16, y = var_39367_to_fp16)[name = tensor("aw_chunk_4069_cast_fp16")]; + tensor var_39370_equation_0 = const()[name = tensor("op_39370_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39370_cast_fp16 = einsum(equation = var_39370_equation_0, values = (var_39104_cast_fp16, var_38759_cast_fp16))[name = tensor("op_39370_cast_fp16")]; + tensor var_39371_to_fp16 = const()[name = tensor("op_39371_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4071_cast_fp16 = mul(x = var_39370_cast_fp16, y = var_39371_to_fp16)[name = tensor("aw_chunk_4071_cast_fp16")]; + tensor var_39374_equation_0 = const()[name = tensor("op_39374_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39374_cast_fp16 = einsum(equation = var_39374_equation_0, values = (var_39108_cast_fp16, var_38766_cast_fp16))[name = tensor("op_39374_cast_fp16")]; + tensor var_39375_to_fp16 = const()[name = tensor("op_39375_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4073_cast_fp16 = mul(x = var_39374_cast_fp16, y = var_39375_to_fp16)[name = tensor("aw_chunk_4073_cast_fp16")]; + tensor var_39378_equation_0 = const()[name = tensor("op_39378_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39378_cast_fp16 = einsum(equation = var_39378_equation_0, values = (var_39108_cast_fp16, var_38773_cast_fp16))[name = tensor("op_39378_cast_fp16")]; + tensor var_39379_to_fp16 = const()[name = tensor("op_39379_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4075_cast_fp16 = mul(x = var_39378_cast_fp16, y = var_39379_to_fp16)[name = tensor("aw_chunk_4075_cast_fp16")]; + tensor var_39382_equation_0 = const()[name = tensor("op_39382_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39382_cast_fp16 = einsum(equation = var_39382_equation_0, values = (var_39108_cast_fp16, var_38780_cast_fp16))[name = tensor("op_39382_cast_fp16")]; + tensor var_39383_to_fp16 = const()[name = tensor("op_39383_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4077_cast_fp16 = mul(x = var_39382_cast_fp16, y = var_39383_to_fp16)[name = tensor("aw_chunk_4077_cast_fp16")]; + tensor var_39386_equation_0 = const()[name = tensor("op_39386_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39386_cast_fp16 = einsum(equation = var_39386_equation_0, values = (var_39108_cast_fp16, var_38787_cast_fp16))[name = tensor("op_39386_cast_fp16")]; + tensor var_39387_to_fp16 = const()[name = tensor("op_39387_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4079_cast_fp16 = mul(x = var_39386_cast_fp16, y = var_39387_to_fp16)[name = tensor("aw_chunk_4079_cast_fp16")]; + tensor var_39390_equation_0 = const()[name = tensor("op_39390_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39390_cast_fp16 = einsum(equation = var_39390_equation_0, values = (var_39112_cast_fp16, var_38794_cast_fp16))[name = tensor("op_39390_cast_fp16")]; + tensor var_39391_to_fp16 = const()[name = tensor("op_39391_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4081_cast_fp16 = mul(x = var_39390_cast_fp16, y = var_39391_to_fp16)[name = tensor("aw_chunk_4081_cast_fp16")]; + tensor var_39394_equation_0 = const()[name = tensor("op_39394_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39394_cast_fp16 = einsum(equation = var_39394_equation_0, values = (var_39112_cast_fp16, var_38801_cast_fp16))[name = tensor("op_39394_cast_fp16")]; + tensor var_39395_to_fp16 = const()[name = tensor("op_39395_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4083_cast_fp16 = mul(x = var_39394_cast_fp16, y = var_39395_to_fp16)[name = tensor("aw_chunk_4083_cast_fp16")]; + tensor var_39398_equation_0 = const()[name = tensor("op_39398_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39398_cast_fp16 = einsum(equation = var_39398_equation_0, values = (var_39112_cast_fp16, var_38808_cast_fp16))[name = tensor("op_39398_cast_fp16")]; + tensor var_39399_to_fp16 = const()[name = tensor("op_39399_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4085_cast_fp16 = mul(x = var_39398_cast_fp16, y = var_39399_to_fp16)[name = tensor("aw_chunk_4085_cast_fp16")]; + tensor var_39402_equation_0 = const()[name = tensor("op_39402_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39402_cast_fp16 = einsum(equation = var_39402_equation_0, values = (var_39112_cast_fp16, var_38815_cast_fp16))[name = tensor("op_39402_cast_fp16")]; + tensor var_39403_to_fp16 = const()[name = tensor("op_39403_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4087_cast_fp16 = mul(x = var_39402_cast_fp16, y = var_39403_to_fp16)[name = tensor("aw_chunk_4087_cast_fp16")]; + tensor var_39406_equation_0 = const()[name = tensor("op_39406_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39406_cast_fp16 = einsum(equation = var_39406_equation_0, values = (var_39116_cast_fp16, var_38822_cast_fp16))[name = tensor("op_39406_cast_fp16")]; + tensor var_39407_to_fp16 = const()[name = tensor("op_39407_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4089_cast_fp16 = mul(x = var_39406_cast_fp16, y = var_39407_to_fp16)[name = tensor("aw_chunk_4089_cast_fp16")]; + tensor var_39410_equation_0 = const()[name = tensor("op_39410_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39410_cast_fp16 = einsum(equation = var_39410_equation_0, values = (var_39116_cast_fp16, var_38829_cast_fp16))[name = tensor("op_39410_cast_fp16")]; + tensor var_39411_to_fp16 = const()[name = tensor("op_39411_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4091_cast_fp16 = mul(x = var_39410_cast_fp16, y = var_39411_to_fp16)[name = tensor("aw_chunk_4091_cast_fp16")]; + tensor var_39414_equation_0 = const()[name = tensor("op_39414_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39414_cast_fp16 = einsum(equation = var_39414_equation_0, values = (var_39116_cast_fp16, var_38836_cast_fp16))[name = tensor("op_39414_cast_fp16")]; + tensor var_39415_to_fp16 = const()[name = tensor("op_39415_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4093_cast_fp16 = mul(x = var_39414_cast_fp16, y = var_39415_to_fp16)[name = tensor("aw_chunk_4093_cast_fp16")]; + tensor var_39418_equation_0 = const()[name = tensor("op_39418_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39418_cast_fp16 = einsum(equation = var_39418_equation_0, values = (var_39116_cast_fp16, var_38843_cast_fp16))[name = tensor("op_39418_cast_fp16")]; + tensor var_39419_to_fp16 = const()[name = tensor("op_39419_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4095_cast_fp16 = mul(x = var_39418_cast_fp16, y = var_39419_to_fp16)[name = tensor("aw_chunk_4095_cast_fp16")]; + tensor var_39422_equation_0 = const()[name = tensor("op_39422_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39422_cast_fp16 = einsum(equation = var_39422_equation_0, values = (var_39120_cast_fp16, var_38850_cast_fp16))[name = tensor("op_39422_cast_fp16")]; + tensor var_39423_to_fp16 = const()[name = tensor("op_39423_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4097_cast_fp16 = mul(x = var_39422_cast_fp16, y = var_39423_to_fp16)[name = tensor("aw_chunk_4097_cast_fp16")]; + tensor var_39426_equation_0 = const()[name = tensor("op_39426_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39426_cast_fp16 = einsum(equation = var_39426_equation_0, values = (var_39120_cast_fp16, var_38857_cast_fp16))[name = tensor("op_39426_cast_fp16")]; + tensor var_39427_to_fp16 = const()[name = tensor("op_39427_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4099_cast_fp16 = mul(x = var_39426_cast_fp16, y = var_39427_to_fp16)[name = tensor("aw_chunk_4099_cast_fp16")]; + tensor var_39430_equation_0 = const()[name = tensor("op_39430_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39430_cast_fp16 = einsum(equation = var_39430_equation_0, values = (var_39120_cast_fp16, var_38864_cast_fp16))[name = tensor("op_39430_cast_fp16")]; + tensor var_39431_to_fp16 = const()[name = tensor("op_39431_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4101_cast_fp16 = mul(x = var_39430_cast_fp16, y = var_39431_to_fp16)[name = tensor("aw_chunk_4101_cast_fp16")]; + tensor var_39434_equation_0 = const()[name = tensor("op_39434_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39434_cast_fp16 = einsum(equation = var_39434_equation_0, values = (var_39120_cast_fp16, var_38871_cast_fp16))[name = tensor("op_39434_cast_fp16")]; + tensor var_39435_to_fp16 = const()[name = tensor("op_39435_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4103_cast_fp16 = mul(x = var_39434_cast_fp16, y = var_39435_to_fp16)[name = tensor("aw_chunk_4103_cast_fp16")]; + tensor var_39438_equation_0 = const()[name = tensor("op_39438_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39438_cast_fp16 = einsum(equation = var_39438_equation_0, values = (var_39124_cast_fp16, var_38878_cast_fp16))[name = tensor("op_39438_cast_fp16")]; + tensor var_39439_to_fp16 = const()[name = tensor("op_39439_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4105_cast_fp16 = mul(x = var_39438_cast_fp16, y = var_39439_to_fp16)[name = tensor("aw_chunk_4105_cast_fp16")]; + tensor var_39442_equation_0 = const()[name = tensor("op_39442_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39442_cast_fp16 = einsum(equation = var_39442_equation_0, values = (var_39124_cast_fp16, var_38885_cast_fp16))[name = tensor("op_39442_cast_fp16")]; + tensor var_39443_to_fp16 = const()[name = tensor("op_39443_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4107_cast_fp16 = mul(x = var_39442_cast_fp16, y = var_39443_to_fp16)[name = tensor("aw_chunk_4107_cast_fp16")]; + tensor var_39446_equation_0 = const()[name = tensor("op_39446_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39446_cast_fp16 = einsum(equation = var_39446_equation_0, values = (var_39124_cast_fp16, var_38892_cast_fp16))[name = tensor("op_39446_cast_fp16")]; + tensor var_39447_to_fp16 = const()[name = tensor("op_39447_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4109_cast_fp16 = mul(x = var_39446_cast_fp16, y = var_39447_to_fp16)[name = tensor("aw_chunk_4109_cast_fp16")]; + tensor var_39450_equation_0 = const()[name = tensor("op_39450_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39450_cast_fp16 = einsum(equation = var_39450_equation_0, values = (var_39124_cast_fp16, var_38899_cast_fp16))[name = tensor("op_39450_cast_fp16")]; + tensor var_39451_to_fp16 = const()[name = tensor("op_39451_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4111_cast_fp16 = mul(x = var_39450_cast_fp16, y = var_39451_to_fp16)[name = tensor("aw_chunk_4111_cast_fp16")]; + tensor var_39454_equation_0 = const()[name = tensor("op_39454_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39454_cast_fp16 = einsum(equation = var_39454_equation_0, values = (var_39128_cast_fp16, var_38906_cast_fp16))[name = tensor("op_39454_cast_fp16")]; + tensor var_39455_to_fp16 = const()[name = tensor("op_39455_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4113_cast_fp16 = mul(x = var_39454_cast_fp16, y = var_39455_to_fp16)[name = tensor("aw_chunk_4113_cast_fp16")]; + tensor var_39458_equation_0 = const()[name = tensor("op_39458_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39458_cast_fp16 = einsum(equation = var_39458_equation_0, values = (var_39128_cast_fp16, var_38913_cast_fp16))[name = tensor("op_39458_cast_fp16")]; + tensor var_39459_to_fp16 = const()[name = tensor("op_39459_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4115_cast_fp16 = mul(x = var_39458_cast_fp16, y = var_39459_to_fp16)[name = tensor("aw_chunk_4115_cast_fp16")]; + tensor var_39462_equation_0 = const()[name = tensor("op_39462_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39462_cast_fp16 = einsum(equation = var_39462_equation_0, values = (var_39128_cast_fp16, var_38920_cast_fp16))[name = tensor("op_39462_cast_fp16")]; + tensor var_39463_to_fp16 = const()[name = tensor("op_39463_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4117_cast_fp16 = mul(x = var_39462_cast_fp16, y = var_39463_to_fp16)[name = tensor("aw_chunk_4117_cast_fp16")]; + tensor var_39466_equation_0 = const()[name = tensor("op_39466_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39466_cast_fp16 = einsum(equation = var_39466_equation_0, values = (var_39128_cast_fp16, var_38927_cast_fp16))[name = tensor("op_39466_cast_fp16")]; + tensor var_39467_to_fp16 = const()[name = tensor("op_39467_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4119_cast_fp16 = mul(x = var_39466_cast_fp16, y = var_39467_to_fp16)[name = tensor("aw_chunk_4119_cast_fp16")]; + tensor var_39470_equation_0 = const()[name = tensor("op_39470_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39470_cast_fp16 = einsum(equation = var_39470_equation_0, values = (var_39132_cast_fp16, var_38934_cast_fp16))[name = tensor("op_39470_cast_fp16")]; + tensor var_39471_to_fp16 = const()[name = tensor("op_39471_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4121_cast_fp16 = mul(x = var_39470_cast_fp16, y = var_39471_to_fp16)[name = tensor("aw_chunk_4121_cast_fp16")]; + tensor var_39474_equation_0 = const()[name = tensor("op_39474_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39474_cast_fp16 = einsum(equation = var_39474_equation_0, values = (var_39132_cast_fp16, var_38941_cast_fp16))[name = tensor("op_39474_cast_fp16")]; + tensor var_39475_to_fp16 = const()[name = tensor("op_39475_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4123_cast_fp16 = mul(x = var_39474_cast_fp16, y = var_39475_to_fp16)[name = tensor("aw_chunk_4123_cast_fp16")]; + tensor var_39478_equation_0 = const()[name = tensor("op_39478_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39478_cast_fp16 = einsum(equation = var_39478_equation_0, values = (var_39132_cast_fp16, var_38948_cast_fp16))[name = tensor("op_39478_cast_fp16")]; + tensor var_39479_to_fp16 = const()[name = tensor("op_39479_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4125_cast_fp16 = mul(x = var_39478_cast_fp16, y = var_39479_to_fp16)[name = tensor("aw_chunk_4125_cast_fp16")]; + tensor var_39482_equation_0 = const()[name = tensor("op_39482_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39482_cast_fp16 = einsum(equation = var_39482_equation_0, values = (var_39132_cast_fp16, var_38955_cast_fp16))[name = tensor("op_39482_cast_fp16")]; + tensor var_39483_to_fp16 = const()[name = tensor("op_39483_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4127_cast_fp16 = mul(x = var_39482_cast_fp16, y = var_39483_to_fp16)[name = tensor("aw_chunk_4127_cast_fp16")]; + tensor var_39486_equation_0 = const()[name = tensor("op_39486_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39486_cast_fp16 = einsum(equation = var_39486_equation_0, values = (var_39136_cast_fp16, var_38962_cast_fp16))[name = tensor("op_39486_cast_fp16")]; + tensor var_39487_to_fp16 = const()[name = tensor("op_39487_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4129_cast_fp16 = mul(x = var_39486_cast_fp16, y = var_39487_to_fp16)[name = tensor("aw_chunk_4129_cast_fp16")]; + tensor var_39490_equation_0 = const()[name = tensor("op_39490_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39490_cast_fp16 = einsum(equation = var_39490_equation_0, values = (var_39136_cast_fp16, var_38969_cast_fp16))[name = tensor("op_39490_cast_fp16")]; + tensor var_39491_to_fp16 = const()[name = tensor("op_39491_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4131_cast_fp16 = mul(x = var_39490_cast_fp16, y = var_39491_to_fp16)[name = tensor("aw_chunk_4131_cast_fp16")]; + tensor var_39494_equation_0 = const()[name = tensor("op_39494_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39494_cast_fp16 = einsum(equation = var_39494_equation_0, values = (var_39136_cast_fp16, var_38976_cast_fp16))[name = tensor("op_39494_cast_fp16")]; + tensor var_39495_to_fp16 = const()[name = tensor("op_39495_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4133_cast_fp16 = mul(x = var_39494_cast_fp16, y = var_39495_to_fp16)[name = tensor("aw_chunk_4133_cast_fp16")]; + tensor var_39498_equation_0 = const()[name = tensor("op_39498_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39498_cast_fp16 = einsum(equation = var_39498_equation_0, values = (var_39136_cast_fp16, var_38983_cast_fp16))[name = tensor("op_39498_cast_fp16")]; + tensor var_39499_to_fp16 = const()[name = tensor("op_39499_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4135_cast_fp16 = mul(x = var_39498_cast_fp16, y = var_39499_to_fp16)[name = tensor("aw_chunk_4135_cast_fp16")]; + tensor var_39502_equation_0 = const()[name = tensor("op_39502_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39502_cast_fp16 = einsum(equation = var_39502_equation_0, values = (var_39140_cast_fp16, var_38990_cast_fp16))[name = tensor("op_39502_cast_fp16")]; + tensor var_39503_to_fp16 = const()[name = tensor("op_39503_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4137_cast_fp16 = mul(x = var_39502_cast_fp16, y = var_39503_to_fp16)[name = tensor("aw_chunk_4137_cast_fp16")]; + tensor var_39506_equation_0 = const()[name = tensor("op_39506_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39506_cast_fp16 = einsum(equation = var_39506_equation_0, values = (var_39140_cast_fp16, var_38997_cast_fp16))[name = tensor("op_39506_cast_fp16")]; + tensor var_39507_to_fp16 = const()[name = tensor("op_39507_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4139_cast_fp16 = mul(x = var_39506_cast_fp16, y = var_39507_to_fp16)[name = tensor("aw_chunk_4139_cast_fp16")]; + tensor var_39510_equation_0 = const()[name = tensor("op_39510_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39510_cast_fp16 = einsum(equation = var_39510_equation_0, values = (var_39140_cast_fp16, var_39004_cast_fp16))[name = tensor("op_39510_cast_fp16")]; + tensor var_39511_to_fp16 = const()[name = tensor("op_39511_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4141_cast_fp16 = mul(x = var_39510_cast_fp16, y = var_39511_to_fp16)[name = tensor("aw_chunk_4141_cast_fp16")]; + tensor var_39514_equation_0 = const()[name = tensor("op_39514_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39514_cast_fp16 = einsum(equation = var_39514_equation_0, values = (var_39140_cast_fp16, var_39011_cast_fp16))[name = tensor("op_39514_cast_fp16")]; + tensor var_39515_to_fp16 = const()[name = tensor("op_39515_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4143_cast_fp16 = mul(x = var_39514_cast_fp16, y = var_39515_to_fp16)[name = tensor("aw_chunk_4143_cast_fp16")]; + tensor var_39518_equation_0 = const()[name = tensor("op_39518_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39518_cast_fp16 = einsum(equation = var_39518_equation_0, values = (var_39144_cast_fp16, var_39018_cast_fp16))[name = tensor("op_39518_cast_fp16")]; + tensor var_39519_to_fp16 = const()[name = tensor("op_39519_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4145_cast_fp16 = mul(x = var_39518_cast_fp16, y = var_39519_to_fp16)[name = tensor("aw_chunk_4145_cast_fp16")]; + tensor var_39522_equation_0 = const()[name = tensor("op_39522_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39522_cast_fp16 = einsum(equation = var_39522_equation_0, values = (var_39144_cast_fp16, var_39025_cast_fp16))[name = tensor("op_39522_cast_fp16")]; + tensor var_39523_to_fp16 = const()[name = tensor("op_39523_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4147_cast_fp16 = mul(x = var_39522_cast_fp16, y = var_39523_to_fp16)[name = tensor("aw_chunk_4147_cast_fp16")]; + tensor var_39526_equation_0 = const()[name = tensor("op_39526_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39526_cast_fp16 = einsum(equation = var_39526_equation_0, values = (var_39144_cast_fp16, var_39032_cast_fp16))[name = tensor("op_39526_cast_fp16")]; + tensor var_39527_to_fp16 = const()[name = tensor("op_39527_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4149_cast_fp16 = mul(x = var_39526_cast_fp16, y = var_39527_to_fp16)[name = tensor("aw_chunk_4149_cast_fp16")]; + tensor var_39530_equation_0 = const()[name = tensor("op_39530_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39530_cast_fp16 = einsum(equation = var_39530_equation_0, values = (var_39144_cast_fp16, var_39039_cast_fp16))[name = tensor("op_39530_cast_fp16")]; + tensor var_39531_to_fp16 = const()[name = tensor("op_39531_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4151_cast_fp16 = mul(x = var_39530_cast_fp16, y = var_39531_to_fp16)[name = tensor("aw_chunk_4151_cast_fp16")]; + tensor var_39534_equation_0 = const()[name = tensor("op_39534_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39534_cast_fp16 = einsum(equation = var_39534_equation_0, values = (var_39148_cast_fp16, var_39046_cast_fp16))[name = tensor("op_39534_cast_fp16")]; + tensor var_39535_to_fp16 = const()[name = tensor("op_39535_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4153_cast_fp16 = mul(x = var_39534_cast_fp16, y = var_39535_to_fp16)[name = tensor("aw_chunk_4153_cast_fp16")]; + tensor var_39538_equation_0 = const()[name = tensor("op_39538_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39538_cast_fp16 = einsum(equation = var_39538_equation_0, values = (var_39148_cast_fp16, var_39053_cast_fp16))[name = tensor("op_39538_cast_fp16")]; + tensor var_39539_to_fp16 = const()[name = tensor("op_39539_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4155_cast_fp16 = mul(x = var_39538_cast_fp16, y = var_39539_to_fp16)[name = tensor("aw_chunk_4155_cast_fp16")]; + tensor var_39542_equation_0 = const()[name = tensor("op_39542_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39542_cast_fp16 = einsum(equation = var_39542_equation_0, values = (var_39148_cast_fp16, var_39060_cast_fp16))[name = tensor("op_39542_cast_fp16")]; + tensor var_39543_to_fp16 = const()[name = tensor("op_39543_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4157_cast_fp16 = mul(x = var_39542_cast_fp16, y = var_39543_to_fp16)[name = tensor("aw_chunk_4157_cast_fp16")]; + tensor var_39546_equation_0 = const()[name = tensor("op_39546_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_39546_cast_fp16 = einsum(equation = var_39546_equation_0, values = (var_39148_cast_fp16, var_39067_cast_fp16))[name = tensor("op_39546_cast_fp16")]; + tensor var_39547_to_fp16 = const()[name = tensor("op_39547_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4159_cast_fp16 = mul(x = var_39546_cast_fp16, y = var_39547_to_fp16)[name = tensor("aw_chunk_4159_cast_fp16")]; + tensor var_39549_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4001_cast_fp16)[name = tensor("op_39549_cast_fp16")]; + tensor var_39550_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4003_cast_fp16)[name = tensor("op_39550_cast_fp16")]; + tensor var_39551_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4005_cast_fp16)[name = tensor("op_39551_cast_fp16")]; + tensor var_39552_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4007_cast_fp16)[name = tensor("op_39552_cast_fp16")]; + tensor var_39553_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4009_cast_fp16)[name = tensor("op_39553_cast_fp16")]; + tensor var_39554_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4011_cast_fp16)[name = tensor("op_39554_cast_fp16")]; + tensor var_39555_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4013_cast_fp16)[name = tensor("op_39555_cast_fp16")]; + tensor var_39556_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4015_cast_fp16)[name = tensor("op_39556_cast_fp16")]; + tensor var_39557_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4017_cast_fp16)[name = tensor("op_39557_cast_fp16")]; + tensor var_39558_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4019_cast_fp16)[name = tensor("op_39558_cast_fp16")]; + tensor var_39559_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4021_cast_fp16)[name = tensor("op_39559_cast_fp16")]; + tensor var_39560_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4023_cast_fp16)[name = tensor("op_39560_cast_fp16")]; + tensor var_39561_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4025_cast_fp16)[name = tensor("op_39561_cast_fp16")]; + tensor var_39562_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4027_cast_fp16)[name = tensor("op_39562_cast_fp16")]; + tensor var_39563_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4029_cast_fp16)[name = tensor("op_39563_cast_fp16")]; + tensor var_39564_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4031_cast_fp16)[name = tensor("op_39564_cast_fp16")]; + tensor var_39565_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4033_cast_fp16)[name = tensor("op_39565_cast_fp16")]; + tensor var_39566_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4035_cast_fp16)[name = tensor("op_39566_cast_fp16")]; + tensor var_39567_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4037_cast_fp16)[name = tensor("op_39567_cast_fp16")]; + tensor var_39568_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4039_cast_fp16)[name = tensor("op_39568_cast_fp16")]; + tensor var_39569_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4041_cast_fp16)[name = tensor("op_39569_cast_fp16")]; + tensor var_39570_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4043_cast_fp16)[name = tensor("op_39570_cast_fp16")]; + tensor var_39571_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4045_cast_fp16)[name = tensor("op_39571_cast_fp16")]; + tensor var_39572_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4047_cast_fp16)[name = tensor("op_39572_cast_fp16")]; + tensor var_39573_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4049_cast_fp16)[name = tensor("op_39573_cast_fp16")]; + tensor var_39574_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4051_cast_fp16)[name = tensor("op_39574_cast_fp16")]; + tensor var_39575_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4053_cast_fp16)[name = tensor("op_39575_cast_fp16")]; + tensor var_39576_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4055_cast_fp16)[name = tensor("op_39576_cast_fp16")]; + tensor var_39577_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4057_cast_fp16)[name = tensor("op_39577_cast_fp16")]; + tensor var_39578_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4059_cast_fp16)[name = tensor("op_39578_cast_fp16")]; + tensor var_39579_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4061_cast_fp16)[name = tensor("op_39579_cast_fp16")]; + tensor var_39580_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4063_cast_fp16)[name = tensor("op_39580_cast_fp16")]; + tensor var_39581_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4065_cast_fp16)[name = tensor("op_39581_cast_fp16")]; + tensor var_39582_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4067_cast_fp16)[name = tensor("op_39582_cast_fp16")]; + tensor var_39583_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4069_cast_fp16)[name = tensor("op_39583_cast_fp16")]; + tensor var_39584_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4071_cast_fp16)[name = tensor("op_39584_cast_fp16")]; + tensor var_39585_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4073_cast_fp16)[name = tensor("op_39585_cast_fp16")]; + tensor var_39586_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4075_cast_fp16)[name = tensor("op_39586_cast_fp16")]; + tensor var_39587_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4077_cast_fp16)[name = tensor("op_39587_cast_fp16")]; + tensor var_39588_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4079_cast_fp16)[name = tensor("op_39588_cast_fp16")]; + tensor var_39589_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4081_cast_fp16)[name = tensor("op_39589_cast_fp16")]; + tensor var_39590_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4083_cast_fp16)[name = tensor("op_39590_cast_fp16")]; + tensor var_39591_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4085_cast_fp16)[name = tensor("op_39591_cast_fp16")]; + tensor var_39592_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4087_cast_fp16)[name = tensor("op_39592_cast_fp16")]; + tensor var_39593_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4089_cast_fp16)[name = tensor("op_39593_cast_fp16")]; + tensor var_39594_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4091_cast_fp16)[name = tensor("op_39594_cast_fp16")]; + tensor var_39595_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4093_cast_fp16)[name = tensor("op_39595_cast_fp16")]; + tensor var_39596_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4095_cast_fp16)[name = tensor("op_39596_cast_fp16")]; + tensor var_39597_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4097_cast_fp16)[name = tensor("op_39597_cast_fp16")]; + tensor var_39598_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4099_cast_fp16)[name = tensor("op_39598_cast_fp16")]; + tensor var_39599_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4101_cast_fp16)[name = tensor("op_39599_cast_fp16")]; + tensor var_39600_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4103_cast_fp16)[name = tensor("op_39600_cast_fp16")]; + tensor var_39601_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4105_cast_fp16)[name = tensor("op_39601_cast_fp16")]; + tensor var_39602_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4107_cast_fp16)[name = tensor("op_39602_cast_fp16")]; + tensor var_39603_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4109_cast_fp16)[name = tensor("op_39603_cast_fp16")]; + tensor var_39604_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4111_cast_fp16)[name = tensor("op_39604_cast_fp16")]; + tensor var_39605_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4113_cast_fp16)[name = tensor("op_39605_cast_fp16")]; + tensor var_39606_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4115_cast_fp16)[name = tensor("op_39606_cast_fp16")]; + tensor var_39607_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4117_cast_fp16)[name = tensor("op_39607_cast_fp16")]; + tensor var_39608_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4119_cast_fp16)[name = tensor("op_39608_cast_fp16")]; + tensor var_39609_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4121_cast_fp16)[name = tensor("op_39609_cast_fp16")]; + tensor var_39610_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4123_cast_fp16)[name = tensor("op_39610_cast_fp16")]; + tensor var_39611_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4125_cast_fp16)[name = tensor("op_39611_cast_fp16")]; + tensor var_39612_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4127_cast_fp16)[name = tensor("op_39612_cast_fp16")]; + tensor var_39613_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4129_cast_fp16)[name = tensor("op_39613_cast_fp16")]; + tensor var_39614_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4131_cast_fp16)[name = tensor("op_39614_cast_fp16")]; + tensor var_39615_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4133_cast_fp16)[name = tensor("op_39615_cast_fp16")]; + tensor var_39616_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4135_cast_fp16)[name = tensor("op_39616_cast_fp16")]; + tensor var_39617_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4137_cast_fp16)[name = tensor("op_39617_cast_fp16")]; + tensor var_39618_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4139_cast_fp16)[name = tensor("op_39618_cast_fp16")]; + tensor var_39619_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4141_cast_fp16)[name = tensor("op_39619_cast_fp16")]; + tensor var_39620_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4143_cast_fp16)[name = tensor("op_39620_cast_fp16")]; + tensor var_39621_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4145_cast_fp16)[name = tensor("op_39621_cast_fp16")]; + tensor var_39622_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4147_cast_fp16)[name = tensor("op_39622_cast_fp16")]; + tensor var_39623_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4149_cast_fp16)[name = tensor("op_39623_cast_fp16")]; + tensor var_39624_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4151_cast_fp16)[name = tensor("op_39624_cast_fp16")]; + tensor var_39625_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4153_cast_fp16)[name = tensor("op_39625_cast_fp16")]; + tensor var_39626_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4155_cast_fp16)[name = tensor("op_39626_cast_fp16")]; + tensor var_39627_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4157_cast_fp16)[name = tensor("op_39627_cast_fp16")]; + tensor var_39628_cast_fp16 = softmax(axis = var_38374, x = aw_chunk_4159_cast_fp16)[name = tensor("op_39628_cast_fp16")]; + tensor var_39630_equation_0 = const()[name = tensor("op_39630_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39630_cast_fp16 = einsum(equation = var_39630_equation_0, values = (var_39150_cast_fp16, var_39549_cast_fp16))[name = tensor("op_39630_cast_fp16")]; + tensor var_39632_equation_0 = const()[name = tensor("op_39632_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39632_cast_fp16 = einsum(equation = var_39632_equation_0, values = (var_39150_cast_fp16, var_39550_cast_fp16))[name = tensor("op_39632_cast_fp16")]; + tensor var_39634_equation_0 = const()[name = tensor("op_39634_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39634_cast_fp16 = einsum(equation = var_39634_equation_0, values = (var_39150_cast_fp16, var_39551_cast_fp16))[name = tensor("op_39634_cast_fp16")]; + tensor var_39636_equation_0 = const()[name = tensor("op_39636_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39636_cast_fp16 = einsum(equation = var_39636_equation_0, values = (var_39150_cast_fp16, var_39552_cast_fp16))[name = tensor("op_39636_cast_fp16")]; + tensor var_39638_equation_0 = const()[name = tensor("op_39638_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39638_cast_fp16 = einsum(equation = var_39638_equation_0, values = (var_39154_cast_fp16, var_39553_cast_fp16))[name = tensor("op_39638_cast_fp16")]; + tensor var_39640_equation_0 = const()[name = tensor("op_39640_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39640_cast_fp16 = einsum(equation = var_39640_equation_0, values = (var_39154_cast_fp16, var_39554_cast_fp16))[name = tensor("op_39640_cast_fp16")]; + tensor var_39642_equation_0 = const()[name = tensor("op_39642_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39642_cast_fp16 = einsum(equation = var_39642_equation_0, values = (var_39154_cast_fp16, var_39555_cast_fp16))[name = tensor("op_39642_cast_fp16")]; + tensor var_39644_equation_0 = const()[name = tensor("op_39644_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39644_cast_fp16 = einsum(equation = var_39644_equation_0, values = (var_39154_cast_fp16, var_39556_cast_fp16))[name = tensor("op_39644_cast_fp16")]; + tensor var_39646_equation_0 = const()[name = tensor("op_39646_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39646_cast_fp16 = einsum(equation = var_39646_equation_0, values = (var_39158_cast_fp16, var_39557_cast_fp16))[name = tensor("op_39646_cast_fp16")]; + tensor var_39648_equation_0 = const()[name = tensor("op_39648_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39648_cast_fp16 = einsum(equation = var_39648_equation_0, values = (var_39158_cast_fp16, var_39558_cast_fp16))[name = tensor("op_39648_cast_fp16")]; + tensor var_39650_equation_0 = const()[name = tensor("op_39650_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39650_cast_fp16 = einsum(equation = var_39650_equation_0, values = (var_39158_cast_fp16, var_39559_cast_fp16))[name = tensor("op_39650_cast_fp16")]; + tensor var_39652_equation_0 = const()[name = tensor("op_39652_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39652_cast_fp16 = einsum(equation = var_39652_equation_0, values = (var_39158_cast_fp16, var_39560_cast_fp16))[name = tensor("op_39652_cast_fp16")]; + tensor var_39654_equation_0 = const()[name = tensor("op_39654_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39654_cast_fp16 = einsum(equation = var_39654_equation_0, values = (var_39162_cast_fp16, var_39561_cast_fp16))[name = tensor("op_39654_cast_fp16")]; + tensor var_39656_equation_0 = const()[name = tensor("op_39656_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39656_cast_fp16 = einsum(equation = var_39656_equation_0, values = (var_39162_cast_fp16, var_39562_cast_fp16))[name = tensor("op_39656_cast_fp16")]; + tensor var_39658_equation_0 = const()[name = tensor("op_39658_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39658_cast_fp16 = einsum(equation = var_39658_equation_0, values = (var_39162_cast_fp16, var_39563_cast_fp16))[name = tensor("op_39658_cast_fp16")]; + tensor var_39660_equation_0 = const()[name = tensor("op_39660_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39660_cast_fp16 = einsum(equation = var_39660_equation_0, values = (var_39162_cast_fp16, var_39564_cast_fp16))[name = tensor("op_39660_cast_fp16")]; + tensor var_39662_equation_0 = const()[name = tensor("op_39662_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39662_cast_fp16 = einsum(equation = var_39662_equation_0, values = (var_39166_cast_fp16, var_39565_cast_fp16))[name = tensor("op_39662_cast_fp16")]; + tensor var_39664_equation_0 = const()[name = tensor("op_39664_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39664_cast_fp16 = einsum(equation = var_39664_equation_0, values = (var_39166_cast_fp16, var_39566_cast_fp16))[name = tensor("op_39664_cast_fp16")]; + tensor var_39666_equation_0 = const()[name = tensor("op_39666_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39666_cast_fp16 = einsum(equation = var_39666_equation_0, values = (var_39166_cast_fp16, var_39567_cast_fp16))[name = tensor("op_39666_cast_fp16")]; + tensor var_39668_equation_0 = const()[name = tensor("op_39668_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39668_cast_fp16 = einsum(equation = var_39668_equation_0, values = (var_39166_cast_fp16, var_39568_cast_fp16))[name = tensor("op_39668_cast_fp16")]; + tensor var_39670_equation_0 = const()[name = tensor("op_39670_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39670_cast_fp16 = einsum(equation = var_39670_equation_0, values = (var_39170_cast_fp16, var_39569_cast_fp16))[name = tensor("op_39670_cast_fp16")]; + tensor var_39672_equation_0 = const()[name = tensor("op_39672_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39672_cast_fp16 = einsum(equation = var_39672_equation_0, values = (var_39170_cast_fp16, var_39570_cast_fp16))[name = tensor("op_39672_cast_fp16")]; + tensor var_39674_equation_0 = const()[name = tensor("op_39674_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39674_cast_fp16 = einsum(equation = var_39674_equation_0, values = (var_39170_cast_fp16, var_39571_cast_fp16))[name = tensor("op_39674_cast_fp16")]; + tensor var_39676_equation_0 = const()[name = tensor("op_39676_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39676_cast_fp16 = einsum(equation = var_39676_equation_0, values = (var_39170_cast_fp16, var_39572_cast_fp16))[name = tensor("op_39676_cast_fp16")]; + tensor var_39678_equation_0 = const()[name = tensor("op_39678_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39678_cast_fp16 = einsum(equation = var_39678_equation_0, values = (var_39174_cast_fp16, var_39573_cast_fp16))[name = tensor("op_39678_cast_fp16")]; + tensor var_39680_equation_0 = const()[name = tensor("op_39680_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39680_cast_fp16 = einsum(equation = var_39680_equation_0, values = (var_39174_cast_fp16, var_39574_cast_fp16))[name = tensor("op_39680_cast_fp16")]; + tensor var_39682_equation_0 = const()[name = tensor("op_39682_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39682_cast_fp16 = einsum(equation = var_39682_equation_0, values = (var_39174_cast_fp16, var_39575_cast_fp16))[name = tensor("op_39682_cast_fp16")]; + tensor var_39684_equation_0 = const()[name = tensor("op_39684_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39684_cast_fp16 = einsum(equation = var_39684_equation_0, values = (var_39174_cast_fp16, var_39576_cast_fp16))[name = tensor("op_39684_cast_fp16")]; + tensor var_39686_equation_0 = const()[name = tensor("op_39686_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39686_cast_fp16 = einsum(equation = var_39686_equation_0, values = (var_39178_cast_fp16, var_39577_cast_fp16))[name = tensor("op_39686_cast_fp16")]; + tensor var_39688_equation_0 = const()[name = tensor("op_39688_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39688_cast_fp16 = einsum(equation = var_39688_equation_0, values = (var_39178_cast_fp16, var_39578_cast_fp16))[name = tensor("op_39688_cast_fp16")]; + tensor var_39690_equation_0 = const()[name = tensor("op_39690_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39690_cast_fp16 = einsum(equation = var_39690_equation_0, values = (var_39178_cast_fp16, var_39579_cast_fp16))[name = tensor("op_39690_cast_fp16")]; + tensor var_39692_equation_0 = const()[name = tensor("op_39692_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39692_cast_fp16 = einsum(equation = var_39692_equation_0, values = (var_39178_cast_fp16, var_39580_cast_fp16))[name = tensor("op_39692_cast_fp16")]; + tensor var_39694_equation_0 = const()[name = tensor("op_39694_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39694_cast_fp16 = einsum(equation = var_39694_equation_0, values = (var_39182_cast_fp16, var_39581_cast_fp16))[name = tensor("op_39694_cast_fp16")]; + tensor var_39696_equation_0 = const()[name = tensor("op_39696_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39696_cast_fp16 = einsum(equation = var_39696_equation_0, values = (var_39182_cast_fp16, var_39582_cast_fp16))[name = tensor("op_39696_cast_fp16")]; + tensor var_39698_equation_0 = const()[name = tensor("op_39698_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39698_cast_fp16 = einsum(equation = var_39698_equation_0, values = (var_39182_cast_fp16, var_39583_cast_fp16))[name = tensor("op_39698_cast_fp16")]; + tensor var_39700_equation_0 = const()[name = tensor("op_39700_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39700_cast_fp16 = einsum(equation = var_39700_equation_0, values = (var_39182_cast_fp16, var_39584_cast_fp16))[name = tensor("op_39700_cast_fp16")]; + tensor var_39702_equation_0 = const()[name = tensor("op_39702_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39702_cast_fp16 = einsum(equation = var_39702_equation_0, values = (var_39186_cast_fp16, var_39585_cast_fp16))[name = tensor("op_39702_cast_fp16")]; + tensor var_39704_equation_0 = const()[name = tensor("op_39704_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39704_cast_fp16 = einsum(equation = var_39704_equation_0, values = (var_39186_cast_fp16, var_39586_cast_fp16))[name = tensor("op_39704_cast_fp16")]; + tensor var_39706_equation_0 = const()[name = tensor("op_39706_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39706_cast_fp16 = einsum(equation = var_39706_equation_0, values = (var_39186_cast_fp16, var_39587_cast_fp16))[name = tensor("op_39706_cast_fp16")]; + tensor var_39708_equation_0 = const()[name = tensor("op_39708_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39708_cast_fp16 = einsum(equation = var_39708_equation_0, values = (var_39186_cast_fp16, var_39588_cast_fp16))[name = tensor("op_39708_cast_fp16")]; + tensor var_39710_equation_0 = const()[name = tensor("op_39710_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39710_cast_fp16 = einsum(equation = var_39710_equation_0, values = (var_39190_cast_fp16, var_39589_cast_fp16))[name = tensor("op_39710_cast_fp16")]; + tensor var_39712_equation_0 = const()[name = tensor("op_39712_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39712_cast_fp16 = einsum(equation = var_39712_equation_0, values = (var_39190_cast_fp16, var_39590_cast_fp16))[name = tensor("op_39712_cast_fp16")]; + tensor var_39714_equation_0 = const()[name = tensor("op_39714_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39714_cast_fp16 = einsum(equation = var_39714_equation_0, values = (var_39190_cast_fp16, var_39591_cast_fp16))[name = tensor("op_39714_cast_fp16")]; + tensor var_39716_equation_0 = const()[name = tensor("op_39716_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39716_cast_fp16 = einsum(equation = var_39716_equation_0, values = (var_39190_cast_fp16, var_39592_cast_fp16))[name = tensor("op_39716_cast_fp16")]; + tensor var_39718_equation_0 = const()[name = tensor("op_39718_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39718_cast_fp16 = einsum(equation = var_39718_equation_0, values = (var_39194_cast_fp16, var_39593_cast_fp16))[name = tensor("op_39718_cast_fp16")]; + tensor var_39720_equation_0 = const()[name = tensor("op_39720_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39720_cast_fp16 = einsum(equation = var_39720_equation_0, values = (var_39194_cast_fp16, var_39594_cast_fp16))[name = tensor("op_39720_cast_fp16")]; + tensor var_39722_equation_0 = const()[name = tensor("op_39722_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39722_cast_fp16 = einsum(equation = var_39722_equation_0, values = (var_39194_cast_fp16, var_39595_cast_fp16))[name = tensor("op_39722_cast_fp16")]; + tensor var_39724_equation_0 = const()[name = tensor("op_39724_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39724_cast_fp16 = einsum(equation = var_39724_equation_0, values = (var_39194_cast_fp16, var_39596_cast_fp16))[name = tensor("op_39724_cast_fp16")]; + tensor var_39726_equation_0 = const()[name = tensor("op_39726_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39726_cast_fp16 = einsum(equation = var_39726_equation_0, values = (var_39198_cast_fp16, var_39597_cast_fp16))[name = tensor("op_39726_cast_fp16")]; + tensor var_39728_equation_0 = const()[name = tensor("op_39728_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39728_cast_fp16 = einsum(equation = var_39728_equation_0, values = (var_39198_cast_fp16, var_39598_cast_fp16))[name = tensor("op_39728_cast_fp16")]; + tensor var_39730_equation_0 = const()[name = tensor("op_39730_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39730_cast_fp16 = einsum(equation = var_39730_equation_0, values = (var_39198_cast_fp16, var_39599_cast_fp16))[name = tensor("op_39730_cast_fp16")]; + tensor var_39732_equation_0 = const()[name = tensor("op_39732_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39732_cast_fp16 = einsum(equation = var_39732_equation_0, values = (var_39198_cast_fp16, var_39600_cast_fp16))[name = tensor("op_39732_cast_fp16")]; + tensor var_39734_equation_0 = const()[name = tensor("op_39734_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39734_cast_fp16 = einsum(equation = var_39734_equation_0, values = (var_39202_cast_fp16, var_39601_cast_fp16))[name = tensor("op_39734_cast_fp16")]; + tensor var_39736_equation_0 = const()[name = tensor("op_39736_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39736_cast_fp16 = einsum(equation = var_39736_equation_0, values = (var_39202_cast_fp16, var_39602_cast_fp16))[name = tensor("op_39736_cast_fp16")]; + tensor var_39738_equation_0 = const()[name = tensor("op_39738_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39738_cast_fp16 = einsum(equation = var_39738_equation_0, values = (var_39202_cast_fp16, var_39603_cast_fp16))[name = tensor("op_39738_cast_fp16")]; + tensor var_39740_equation_0 = const()[name = tensor("op_39740_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39740_cast_fp16 = einsum(equation = var_39740_equation_0, values = (var_39202_cast_fp16, var_39604_cast_fp16))[name = tensor("op_39740_cast_fp16")]; + tensor var_39742_equation_0 = const()[name = tensor("op_39742_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39742_cast_fp16 = einsum(equation = var_39742_equation_0, values = (var_39206_cast_fp16, var_39605_cast_fp16))[name = tensor("op_39742_cast_fp16")]; + tensor var_39744_equation_0 = const()[name = tensor("op_39744_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39744_cast_fp16 = einsum(equation = var_39744_equation_0, values = (var_39206_cast_fp16, var_39606_cast_fp16))[name = tensor("op_39744_cast_fp16")]; + tensor var_39746_equation_0 = const()[name = tensor("op_39746_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39746_cast_fp16 = einsum(equation = var_39746_equation_0, values = (var_39206_cast_fp16, var_39607_cast_fp16))[name = tensor("op_39746_cast_fp16")]; + tensor var_39748_equation_0 = const()[name = tensor("op_39748_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39748_cast_fp16 = einsum(equation = var_39748_equation_0, values = (var_39206_cast_fp16, var_39608_cast_fp16))[name = tensor("op_39748_cast_fp16")]; + tensor var_39750_equation_0 = const()[name = tensor("op_39750_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39750_cast_fp16 = einsum(equation = var_39750_equation_0, values = (var_39210_cast_fp16, var_39609_cast_fp16))[name = tensor("op_39750_cast_fp16")]; + tensor var_39752_equation_0 = const()[name = tensor("op_39752_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39752_cast_fp16 = einsum(equation = var_39752_equation_0, values = (var_39210_cast_fp16, var_39610_cast_fp16))[name = tensor("op_39752_cast_fp16")]; + tensor var_39754_equation_0 = const()[name = tensor("op_39754_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39754_cast_fp16 = einsum(equation = var_39754_equation_0, values = (var_39210_cast_fp16, var_39611_cast_fp16))[name = tensor("op_39754_cast_fp16")]; + tensor var_39756_equation_0 = const()[name = tensor("op_39756_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39756_cast_fp16 = einsum(equation = var_39756_equation_0, values = (var_39210_cast_fp16, var_39612_cast_fp16))[name = tensor("op_39756_cast_fp16")]; + tensor var_39758_equation_0 = const()[name = tensor("op_39758_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39758_cast_fp16 = einsum(equation = var_39758_equation_0, values = (var_39214_cast_fp16, var_39613_cast_fp16))[name = tensor("op_39758_cast_fp16")]; + tensor var_39760_equation_0 = const()[name = tensor("op_39760_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39760_cast_fp16 = einsum(equation = var_39760_equation_0, values = (var_39214_cast_fp16, var_39614_cast_fp16))[name = tensor("op_39760_cast_fp16")]; + tensor var_39762_equation_0 = const()[name = tensor("op_39762_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39762_cast_fp16 = einsum(equation = var_39762_equation_0, values = (var_39214_cast_fp16, var_39615_cast_fp16))[name = tensor("op_39762_cast_fp16")]; + tensor var_39764_equation_0 = const()[name = tensor("op_39764_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39764_cast_fp16 = einsum(equation = var_39764_equation_0, values = (var_39214_cast_fp16, var_39616_cast_fp16))[name = tensor("op_39764_cast_fp16")]; + tensor var_39766_equation_0 = const()[name = tensor("op_39766_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39766_cast_fp16 = einsum(equation = var_39766_equation_0, values = (var_39218_cast_fp16, var_39617_cast_fp16))[name = tensor("op_39766_cast_fp16")]; + tensor var_39768_equation_0 = const()[name = tensor("op_39768_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39768_cast_fp16 = einsum(equation = var_39768_equation_0, values = (var_39218_cast_fp16, var_39618_cast_fp16))[name = tensor("op_39768_cast_fp16")]; + tensor var_39770_equation_0 = const()[name = tensor("op_39770_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39770_cast_fp16 = einsum(equation = var_39770_equation_0, values = (var_39218_cast_fp16, var_39619_cast_fp16))[name = tensor("op_39770_cast_fp16")]; + tensor var_39772_equation_0 = const()[name = tensor("op_39772_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39772_cast_fp16 = einsum(equation = var_39772_equation_0, values = (var_39218_cast_fp16, var_39620_cast_fp16))[name = tensor("op_39772_cast_fp16")]; + tensor var_39774_equation_0 = const()[name = tensor("op_39774_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39774_cast_fp16 = einsum(equation = var_39774_equation_0, values = (var_39222_cast_fp16, var_39621_cast_fp16))[name = tensor("op_39774_cast_fp16")]; + tensor var_39776_equation_0 = const()[name = tensor("op_39776_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39776_cast_fp16 = einsum(equation = var_39776_equation_0, values = (var_39222_cast_fp16, var_39622_cast_fp16))[name = tensor("op_39776_cast_fp16")]; + tensor var_39778_equation_0 = const()[name = tensor("op_39778_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39778_cast_fp16 = einsum(equation = var_39778_equation_0, values = (var_39222_cast_fp16, var_39623_cast_fp16))[name = tensor("op_39778_cast_fp16")]; + tensor var_39780_equation_0 = const()[name = tensor("op_39780_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39780_cast_fp16 = einsum(equation = var_39780_equation_0, values = (var_39222_cast_fp16, var_39624_cast_fp16))[name = tensor("op_39780_cast_fp16")]; + tensor var_39782_equation_0 = const()[name = tensor("op_39782_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39782_cast_fp16 = einsum(equation = var_39782_equation_0, values = (var_39226_cast_fp16, var_39625_cast_fp16))[name = tensor("op_39782_cast_fp16")]; + tensor var_39784_equation_0 = const()[name = tensor("op_39784_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39784_cast_fp16 = einsum(equation = var_39784_equation_0, values = (var_39226_cast_fp16, var_39626_cast_fp16))[name = tensor("op_39784_cast_fp16")]; + tensor var_39786_equation_0 = const()[name = tensor("op_39786_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39786_cast_fp16 = einsum(equation = var_39786_equation_0, values = (var_39226_cast_fp16, var_39627_cast_fp16))[name = tensor("op_39786_cast_fp16")]; + tensor var_39788_equation_0 = const()[name = tensor("op_39788_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_39788_cast_fp16 = einsum(equation = var_39788_equation_0, values = (var_39226_cast_fp16, var_39628_cast_fp16))[name = tensor("op_39788_cast_fp16")]; + tensor var_39790_interleave_0 = const()[name = tensor("op_39790_interleave_0"), val = tensor(false)]; + tensor var_39790_cast_fp16 = concat(axis = var_38349, interleave = var_39790_interleave_0, values = (var_39630_cast_fp16, var_39632_cast_fp16, var_39634_cast_fp16, var_39636_cast_fp16))[name = tensor("op_39790_cast_fp16")]; + tensor var_39792_interleave_0 = const()[name = tensor("op_39792_interleave_0"), val = tensor(false)]; + tensor var_39792_cast_fp16 = concat(axis = var_38349, interleave = var_39792_interleave_0, values = (var_39638_cast_fp16, var_39640_cast_fp16, var_39642_cast_fp16, var_39644_cast_fp16))[name = tensor("op_39792_cast_fp16")]; + tensor var_39794_interleave_0 = const()[name = tensor("op_39794_interleave_0"), val = tensor(false)]; + tensor var_39794_cast_fp16 = concat(axis = var_38349, interleave = var_39794_interleave_0, values = (var_39646_cast_fp16, var_39648_cast_fp16, var_39650_cast_fp16, var_39652_cast_fp16))[name = tensor("op_39794_cast_fp16")]; + tensor var_39796_interleave_0 = const()[name = tensor("op_39796_interleave_0"), val = tensor(false)]; + tensor var_39796_cast_fp16 = concat(axis = var_38349, interleave = var_39796_interleave_0, values = (var_39654_cast_fp16, var_39656_cast_fp16, var_39658_cast_fp16, var_39660_cast_fp16))[name = tensor("op_39796_cast_fp16")]; + tensor var_39798_interleave_0 = const()[name = tensor("op_39798_interleave_0"), val = tensor(false)]; + tensor var_39798_cast_fp16 = concat(axis = var_38349, interleave = var_39798_interleave_0, values = (var_39662_cast_fp16, var_39664_cast_fp16, var_39666_cast_fp16, var_39668_cast_fp16))[name = tensor("op_39798_cast_fp16")]; + tensor var_39800_interleave_0 = const()[name = tensor("op_39800_interleave_0"), val = tensor(false)]; + tensor var_39800_cast_fp16 = concat(axis = var_38349, interleave = var_39800_interleave_0, values = (var_39670_cast_fp16, var_39672_cast_fp16, var_39674_cast_fp16, var_39676_cast_fp16))[name = tensor("op_39800_cast_fp16")]; + tensor var_39802_interleave_0 = const()[name = tensor("op_39802_interleave_0"), val = tensor(false)]; + tensor var_39802_cast_fp16 = concat(axis = var_38349, interleave = var_39802_interleave_0, values = (var_39678_cast_fp16, var_39680_cast_fp16, var_39682_cast_fp16, var_39684_cast_fp16))[name = tensor("op_39802_cast_fp16")]; + tensor var_39804_interleave_0 = const()[name = tensor("op_39804_interleave_0"), val = tensor(false)]; + tensor var_39804_cast_fp16 = concat(axis = var_38349, interleave = var_39804_interleave_0, values = (var_39686_cast_fp16, var_39688_cast_fp16, var_39690_cast_fp16, var_39692_cast_fp16))[name = tensor("op_39804_cast_fp16")]; + tensor var_39806_interleave_0 = const()[name = tensor("op_39806_interleave_0"), val = tensor(false)]; + tensor var_39806_cast_fp16 = concat(axis = var_38349, interleave = var_39806_interleave_0, values = (var_39694_cast_fp16, var_39696_cast_fp16, var_39698_cast_fp16, var_39700_cast_fp16))[name = tensor("op_39806_cast_fp16")]; + tensor var_39808_interleave_0 = const()[name = tensor("op_39808_interleave_0"), val = tensor(false)]; + tensor var_39808_cast_fp16 = concat(axis = var_38349, interleave = var_39808_interleave_0, values = (var_39702_cast_fp16, var_39704_cast_fp16, var_39706_cast_fp16, var_39708_cast_fp16))[name = tensor("op_39808_cast_fp16")]; + tensor var_39810_interleave_0 = const()[name = tensor("op_39810_interleave_0"), val = tensor(false)]; + tensor var_39810_cast_fp16 = concat(axis = var_38349, interleave = var_39810_interleave_0, values = (var_39710_cast_fp16, var_39712_cast_fp16, var_39714_cast_fp16, var_39716_cast_fp16))[name = tensor("op_39810_cast_fp16")]; + tensor var_39812_interleave_0 = const()[name = tensor("op_39812_interleave_0"), val = tensor(false)]; + tensor var_39812_cast_fp16 = concat(axis = var_38349, interleave = var_39812_interleave_0, values = (var_39718_cast_fp16, var_39720_cast_fp16, var_39722_cast_fp16, var_39724_cast_fp16))[name = tensor("op_39812_cast_fp16")]; + tensor var_39814_interleave_0 = const()[name = tensor("op_39814_interleave_0"), val = tensor(false)]; + tensor var_39814_cast_fp16 = concat(axis = var_38349, interleave = var_39814_interleave_0, values = (var_39726_cast_fp16, var_39728_cast_fp16, var_39730_cast_fp16, var_39732_cast_fp16))[name = tensor("op_39814_cast_fp16")]; + tensor var_39816_interleave_0 = const()[name = tensor("op_39816_interleave_0"), val = tensor(false)]; + tensor var_39816_cast_fp16 = concat(axis = var_38349, interleave = var_39816_interleave_0, values = (var_39734_cast_fp16, var_39736_cast_fp16, var_39738_cast_fp16, var_39740_cast_fp16))[name = tensor("op_39816_cast_fp16")]; + tensor var_39818_interleave_0 = const()[name = tensor("op_39818_interleave_0"), val = tensor(false)]; + tensor var_39818_cast_fp16 = concat(axis = var_38349, interleave = var_39818_interleave_0, values = (var_39742_cast_fp16, var_39744_cast_fp16, var_39746_cast_fp16, var_39748_cast_fp16))[name = tensor("op_39818_cast_fp16")]; + tensor var_39820_interleave_0 = const()[name = tensor("op_39820_interleave_0"), val = tensor(false)]; + tensor var_39820_cast_fp16 = concat(axis = var_38349, interleave = var_39820_interleave_0, values = (var_39750_cast_fp16, var_39752_cast_fp16, var_39754_cast_fp16, var_39756_cast_fp16))[name = tensor("op_39820_cast_fp16")]; + tensor var_39822_interleave_0 = const()[name = tensor("op_39822_interleave_0"), val = tensor(false)]; + tensor var_39822_cast_fp16 = concat(axis = var_38349, interleave = var_39822_interleave_0, values = (var_39758_cast_fp16, var_39760_cast_fp16, var_39762_cast_fp16, var_39764_cast_fp16))[name = tensor("op_39822_cast_fp16")]; + tensor var_39824_interleave_0 = const()[name = tensor("op_39824_interleave_0"), val = tensor(false)]; + tensor var_39824_cast_fp16 = concat(axis = var_38349, interleave = var_39824_interleave_0, values = (var_39766_cast_fp16, var_39768_cast_fp16, var_39770_cast_fp16, var_39772_cast_fp16))[name = tensor("op_39824_cast_fp16")]; + tensor var_39826_interleave_0 = const()[name = tensor("op_39826_interleave_0"), val = tensor(false)]; + tensor var_39826_cast_fp16 = concat(axis = var_38349, interleave = var_39826_interleave_0, values = (var_39774_cast_fp16, var_39776_cast_fp16, var_39778_cast_fp16, var_39780_cast_fp16))[name = tensor("op_39826_cast_fp16")]; + tensor var_39828_interleave_0 = const()[name = tensor("op_39828_interleave_0"), val = tensor(false)]; + tensor var_39828_cast_fp16 = concat(axis = var_38349, interleave = var_39828_interleave_0, values = (var_39782_cast_fp16, var_39784_cast_fp16, var_39786_cast_fp16, var_39788_cast_fp16))[name = tensor("op_39828_cast_fp16")]; + tensor input_201_interleave_0 = const()[name = tensor("input_201_interleave_0"), val = tensor(false)]; + tensor input_201_cast_fp16 = concat(axis = var_38374, interleave = input_201_interleave_0, values = (var_39790_cast_fp16, var_39792_cast_fp16, var_39794_cast_fp16, var_39796_cast_fp16, var_39798_cast_fp16, var_39800_cast_fp16, var_39802_cast_fp16, var_39804_cast_fp16, var_39806_cast_fp16, var_39808_cast_fp16, var_39810_cast_fp16, var_39812_cast_fp16, var_39814_cast_fp16, var_39816_cast_fp16, var_39818_cast_fp16, var_39820_cast_fp16, var_39822_cast_fp16, var_39824_cast_fp16, var_39826_cast_fp16, var_39828_cast_fp16))[name = tensor("input_201_cast_fp16")]; + tensor var_39833 = const()[name = tensor("op_39833"), val = tensor([1, 1])]; + tensor var_39835 = const()[name = tensor("op_39835"), val = tensor([1, 1])]; + tensor obj_103_pad_type_0 = const()[name = tensor("obj_103_pad_type_0"), val = tensor("custom")]; + tensor obj_103_pad_0 = const()[name = tensor("obj_103_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_25_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_25_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1007968640)))]; + tensor layers_25_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_25_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1011245504)))]; + tensor obj_103_cast_fp16 = conv(bias = layers_25_self_attn_o_proj_bias_to_fp16, dilations = var_39835, groups = var_38374, pad = obj_103_pad_0, pad_type = obj_103_pad_type_0, strides = var_39833, weight = layers_25_self_attn_o_proj_weight_to_fp16, x = input_201_cast_fp16)[name = tensor("obj_103_cast_fp16")]; + tensor inputs_103_cast_fp16 = add(x = inputs_101_cast_fp16, y = obj_103_cast_fp16)[name = tensor("inputs_103_cast_fp16")]; + tensor var_39841 = const()[name = tensor("op_39841"), val = tensor([1])]; + tensor channels_mean_103_cast_fp16 = reduce_mean(axes = var_39841, keep_dims = var_38375, x = inputs_103_cast_fp16)[name = tensor("channels_mean_103_cast_fp16")]; + tensor zero_mean_103_cast_fp16 = sub(x = inputs_103_cast_fp16, y = channels_mean_103_cast_fp16)[name = tensor("zero_mean_103_cast_fp16")]; + tensor zero_mean_sq_103_cast_fp16 = mul(x = zero_mean_103_cast_fp16, y = zero_mean_103_cast_fp16)[name = tensor("zero_mean_sq_103_cast_fp16")]; + tensor var_39845 = const()[name = tensor("op_39845"), val = tensor([1])]; + tensor var_39846_cast_fp16 = reduce_mean(axes = var_39845, keep_dims = var_38375, x = zero_mean_sq_103_cast_fp16)[name = tensor("op_39846_cast_fp16")]; + tensor var_39847_to_fp16 = const()[name = tensor("op_39847_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_39848_cast_fp16 = add(x = var_39846_cast_fp16, y = var_39847_to_fp16)[name = tensor("op_39848_cast_fp16")]; + tensor denom_103_epsilon_0_to_fp16 = const()[name = tensor("denom_103_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_103_cast_fp16 = rsqrt(epsilon = denom_103_epsilon_0_to_fp16, x = var_39848_cast_fp16)[name = tensor("denom_103_cast_fp16")]; + tensor out_103_cast_fp16 = mul(x = zero_mean_103_cast_fp16, y = denom_103_cast_fp16)[name = tensor("out_103_cast_fp16")]; + tensor input_203_gamma_0_to_fp16 = const()[name = tensor("input_203_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1011248128)))]; + tensor input_203_beta_0_to_fp16 = const()[name = tensor("input_203_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1011250752)))]; + tensor input_203_epsilon_0_to_fp16 = const()[name = tensor("input_203_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_203_cast_fp16 = batch_norm(beta = input_203_beta_0_to_fp16, epsilon = input_203_epsilon_0_to_fp16, gamma = input_203_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_103_cast_fp16)[name = tensor("input_203_cast_fp16")]; + tensor var_39859 = const()[name = tensor("op_39859"), val = tensor([1, 1])]; + tensor var_39861 = const()[name = tensor("op_39861"), val = tensor([1, 1])]; + tensor input_205_pad_type_0 = const()[name = tensor("input_205_pad_type_0"), val = tensor("custom")]; + tensor input_205_pad_0 = const()[name = tensor("input_205_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_25_fc1_weight_to_fp16 = const()[name = tensor("layers_25_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1011253376)))]; + tensor layers_25_fc1_bias_to_fp16 = const()[name = tensor("layers_25_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1024360640)))]; + tensor input_205_cast_fp16 = conv(bias = layers_25_fc1_bias_to_fp16, dilations = var_39861, groups = var_38374, pad = input_205_pad_0, pad_type = input_205_pad_type_0, strides = var_39859, weight = layers_25_fc1_weight_to_fp16, x = input_203_cast_fp16)[name = tensor("input_205_cast_fp16")]; + tensor input_207_mode_0 = const()[name = tensor("input_207_mode_0"), val = tensor("EXACT")]; + tensor input_207_cast_fp16 = gelu(mode = input_207_mode_0, x = input_205_cast_fp16)[name = tensor("input_207_cast_fp16")]; + tensor var_39867 = const()[name = tensor("op_39867"), val = tensor([1, 1])]; + tensor var_39869 = const()[name = tensor("op_39869"), val = tensor([1, 1])]; + tensor hidden_states_55_pad_type_0 = const()[name = tensor("hidden_states_55_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_55_pad_0 = const()[name = tensor("hidden_states_55_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_25_fc2_weight_to_fp16 = const()[name = tensor("layers_25_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1024370944)))]; + tensor layers_25_fc2_bias_to_fp16 = const()[name = tensor("layers_25_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1037478208)))]; + tensor hidden_states_55_cast_fp16 = conv(bias = layers_25_fc2_bias_to_fp16, dilations = var_39869, groups = var_38374, pad = hidden_states_55_pad_0, pad_type = hidden_states_55_pad_type_0, strides = var_39867, weight = layers_25_fc2_weight_to_fp16, x = input_207_cast_fp16)[name = tensor("hidden_states_55_cast_fp16")]; + tensor inputs_105_cast_fp16 = add(x = inputs_103_cast_fp16, y = hidden_states_55_cast_fp16)[name = tensor("inputs_105_cast_fp16")]; + tensor var_39876 = const()[name = tensor("op_39876"), val = tensor(3)]; + tensor var_39901 = const()[name = tensor("op_39901"), val = tensor(1)]; + tensor var_39902 = const()[name = tensor("op_39902"), val = tensor(true)]; + tensor var_39912 = const()[name = tensor("op_39912"), val = tensor([1])]; + tensor channels_mean_105_cast_fp16 = reduce_mean(axes = var_39912, keep_dims = var_39902, x = inputs_105_cast_fp16)[name = tensor("channels_mean_105_cast_fp16")]; + tensor zero_mean_105_cast_fp16 = sub(x = inputs_105_cast_fp16, y = channels_mean_105_cast_fp16)[name = tensor("zero_mean_105_cast_fp16")]; + tensor zero_mean_sq_105_cast_fp16 = mul(x = zero_mean_105_cast_fp16, y = zero_mean_105_cast_fp16)[name = tensor("zero_mean_sq_105_cast_fp16")]; + tensor var_39916 = const()[name = tensor("op_39916"), val = tensor([1])]; + tensor var_39917_cast_fp16 = reduce_mean(axes = var_39916, keep_dims = var_39902, x = zero_mean_sq_105_cast_fp16)[name = tensor("op_39917_cast_fp16")]; + tensor var_39918_to_fp16 = const()[name = tensor("op_39918_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_39919_cast_fp16 = add(x = var_39917_cast_fp16, y = var_39918_to_fp16)[name = tensor("op_39919_cast_fp16")]; + tensor denom_105_epsilon_0_to_fp16 = const()[name = tensor("denom_105_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_105_cast_fp16 = rsqrt(epsilon = denom_105_epsilon_0_to_fp16, x = var_39919_cast_fp16)[name = tensor("denom_105_cast_fp16")]; + tensor out_105_cast_fp16 = mul(x = zero_mean_105_cast_fp16, y = denom_105_cast_fp16)[name = tensor("out_105_cast_fp16")]; + tensor obj_105_gamma_0_to_fp16 = const()[name = tensor("obj_105_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1037480832)))]; + tensor obj_105_beta_0_to_fp16 = const()[name = tensor("obj_105_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1037483456)))]; + tensor obj_105_epsilon_0_to_fp16 = const()[name = tensor("obj_105_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_105_cast_fp16 = batch_norm(beta = obj_105_beta_0_to_fp16, epsilon = obj_105_epsilon_0_to_fp16, gamma = obj_105_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_105_cast_fp16)[name = tensor("obj_105_cast_fp16")]; + tensor var_39934 = const()[name = tensor("op_39934"), val = tensor([1, 1])]; + tensor var_39936 = const()[name = tensor("op_39936"), val = tensor([1, 1])]; + tensor query_53_pad_type_0 = const()[name = tensor("query_53_pad_type_0"), val = tensor("custom")]; + tensor query_53_pad_0 = const()[name = tensor("query_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_26_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_26_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1037486080)))]; + tensor layers_26_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_26_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1040762944)))]; + tensor query_53_cast_fp16 = conv(bias = layers_26_self_attn_q_proj_bias_to_fp16, dilations = var_39936, groups = var_39901, pad = query_53_pad_0, pad_type = query_53_pad_type_0, strides = var_39934, weight = layers_26_self_attn_q_proj_weight_to_fp16, x = obj_105_cast_fp16)[name = tensor("query_53_cast_fp16")]; + tensor var_39940 = const()[name = tensor("op_39940"), val = tensor([1, 1])]; + tensor var_39942 = const()[name = tensor("op_39942"), val = tensor([1, 1])]; + tensor key_53_pad_type_0 = const()[name = tensor("key_53_pad_type_0"), val = tensor("custom")]; + tensor key_53_pad_0 = const()[name = tensor("key_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_26_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_26_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1040765568)))]; + tensor key_53_cast_fp16 = conv(dilations = var_39942, groups = var_39901, pad = key_53_pad_0, pad_type = key_53_pad_type_0, strides = var_39940, weight = layers_26_self_attn_k_proj_weight_to_fp16, x = obj_105_cast_fp16)[name = tensor("key_53_cast_fp16")]; + tensor var_39947 = const()[name = tensor("op_39947"), val = tensor([1, 1])]; + tensor var_39949 = const()[name = tensor("op_39949"), val = tensor([1, 1])]; + tensor value_53_pad_type_0 = const()[name = tensor("value_53_pad_type_0"), val = tensor("custom")]; + tensor value_53_pad_0 = const()[name = tensor("value_53_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_26_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_26_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1044042432)))]; + tensor layers_26_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_26_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1047319296)))]; + tensor value_53_cast_fp16 = conv(bias = layers_26_self_attn_v_proj_bias_to_fp16, dilations = var_39949, groups = var_39901, pad = value_53_pad_0, pad_type = value_53_pad_type_0, strides = var_39947, weight = layers_26_self_attn_v_proj_weight_to_fp16, x = obj_105_cast_fp16)[name = tensor("value_53_cast_fp16")]; + tensor var_39956_begin_0 = const()[name = tensor("op_39956_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_39956_end_0 = const()[name = tensor("op_39956_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_39956_end_mask_0 = const()[name = tensor("op_39956_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39956_cast_fp16 = slice_by_index(begin = var_39956_begin_0, end = var_39956_end_0, end_mask = var_39956_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_39956_cast_fp16")]; + tensor var_39960_begin_0 = const()[name = tensor("op_39960_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_39960_end_0 = const()[name = tensor("op_39960_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_39960_end_mask_0 = const()[name = tensor("op_39960_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39960_cast_fp16 = slice_by_index(begin = var_39960_begin_0, end = var_39960_end_0, end_mask = var_39960_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_39960_cast_fp16")]; + tensor var_39964_begin_0 = const()[name = tensor("op_39964_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_39964_end_0 = const()[name = tensor("op_39964_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_39964_end_mask_0 = const()[name = tensor("op_39964_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39964_cast_fp16 = slice_by_index(begin = var_39964_begin_0, end = var_39964_end_0, end_mask = var_39964_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_39964_cast_fp16")]; + tensor var_39968_begin_0 = const()[name = tensor("op_39968_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_39968_end_0 = const()[name = tensor("op_39968_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_39968_end_mask_0 = const()[name = tensor("op_39968_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39968_cast_fp16 = slice_by_index(begin = var_39968_begin_0, end = var_39968_end_0, end_mask = var_39968_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_39968_cast_fp16")]; + tensor var_39972_begin_0 = const()[name = tensor("op_39972_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_39972_end_0 = const()[name = tensor("op_39972_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_39972_end_mask_0 = const()[name = tensor("op_39972_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39972_cast_fp16 = slice_by_index(begin = var_39972_begin_0, end = var_39972_end_0, end_mask = var_39972_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_39972_cast_fp16")]; + tensor var_39976_begin_0 = const()[name = tensor("op_39976_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_39976_end_0 = const()[name = tensor("op_39976_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_39976_end_mask_0 = const()[name = tensor("op_39976_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39976_cast_fp16 = slice_by_index(begin = var_39976_begin_0, end = var_39976_end_0, end_mask = var_39976_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_39976_cast_fp16")]; + tensor var_39980_begin_0 = const()[name = tensor("op_39980_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_39980_end_0 = const()[name = tensor("op_39980_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_39980_end_mask_0 = const()[name = tensor("op_39980_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39980_cast_fp16 = slice_by_index(begin = var_39980_begin_0, end = var_39980_end_0, end_mask = var_39980_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_39980_cast_fp16")]; + tensor var_39984_begin_0 = const()[name = tensor("op_39984_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_39984_end_0 = const()[name = tensor("op_39984_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_39984_end_mask_0 = const()[name = tensor("op_39984_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39984_cast_fp16 = slice_by_index(begin = var_39984_begin_0, end = var_39984_end_0, end_mask = var_39984_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_39984_cast_fp16")]; + tensor var_39988_begin_0 = const()[name = tensor("op_39988_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_39988_end_0 = const()[name = tensor("op_39988_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_39988_end_mask_0 = const()[name = tensor("op_39988_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39988_cast_fp16 = slice_by_index(begin = var_39988_begin_0, end = var_39988_end_0, end_mask = var_39988_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_39988_cast_fp16")]; + tensor var_39992_begin_0 = const()[name = tensor("op_39992_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_39992_end_0 = const()[name = tensor("op_39992_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_39992_end_mask_0 = const()[name = tensor("op_39992_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39992_cast_fp16 = slice_by_index(begin = var_39992_begin_0, end = var_39992_end_0, end_mask = var_39992_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_39992_cast_fp16")]; + tensor var_39996_begin_0 = const()[name = tensor("op_39996_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_39996_end_0 = const()[name = tensor("op_39996_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_39996_end_mask_0 = const()[name = tensor("op_39996_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_39996_cast_fp16 = slice_by_index(begin = var_39996_begin_0, end = var_39996_end_0, end_mask = var_39996_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_39996_cast_fp16")]; + tensor var_40000_begin_0 = const()[name = tensor("op_40000_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_40000_end_0 = const()[name = tensor("op_40000_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_40000_end_mask_0 = const()[name = tensor("op_40000_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40000_cast_fp16 = slice_by_index(begin = var_40000_begin_0, end = var_40000_end_0, end_mask = var_40000_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_40000_cast_fp16")]; + tensor var_40004_begin_0 = const()[name = tensor("op_40004_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_40004_end_0 = const()[name = tensor("op_40004_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_40004_end_mask_0 = const()[name = tensor("op_40004_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40004_cast_fp16 = slice_by_index(begin = var_40004_begin_0, end = var_40004_end_0, end_mask = var_40004_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_40004_cast_fp16")]; + tensor var_40008_begin_0 = const()[name = tensor("op_40008_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_40008_end_0 = const()[name = tensor("op_40008_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_40008_end_mask_0 = const()[name = tensor("op_40008_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40008_cast_fp16 = slice_by_index(begin = var_40008_begin_0, end = var_40008_end_0, end_mask = var_40008_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_40008_cast_fp16")]; + tensor var_40012_begin_0 = const()[name = tensor("op_40012_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_40012_end_0 = const()[name = tensor("op_40012_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_40012_end_mask_0 = const()[name = tensor("op_40012_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40012_cast_fp16 = slice_by_index(begin = var_40012_begin_0, end = var_40012_end_0, end_mask = var_40012_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_40012_cast_fp16")]; + tensor var_40016_begin_0 = const()[name = tensor("op_40016_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_40016_end_0 = const()[name = tensor("op_40016_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_40016_end_mask_0 = const()[name = tensor("op_40016_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40016_cast_fp16 = slice_by_index(begin = var_40016_begin_0, end = var_40016_end_0, end_mask = var_40016_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_40016_cast_fp16")]; + tensor var_40020_begin_0 = const()[name = tensor("op_40020_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_40020_end_0 = const()[name = tensor("op_40020_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_40020_end_mask_0 = const()[name = tensor("op_40020_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40020_cast_fp16 = slice_by_index(begin = var_40020_begin_0, end = var_40020_end_0, end_mask = var_40020_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_40020_cast_fp16")]; + tensor var_40024_begin_0 = const()[name = tensor("op_40024_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_40024_end_0 = const()[name = tensor("op_40024_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_40024_end_mask_0 = const()[name = tensor("op_40024_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40024_cast_fp16 = slice_by_index(begin = var_40024_begin_0, end = var_40024_end_0, end_mask = var_40024_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_40024_cast_fp16")]; + tensor var_40028_begin_0 = const()[name = tensor("op_40028_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_40028_end_0 = const()[name = tensor("op_40028_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_40028_end_mask_0 = const()[name = tensor("op_40028_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40028_cast_fp16 = slice_by_index(begin = var_40028_begin_0, end = var_40028_end_0, end_mask = var_40028_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_40028_cast_fp16")]; + tensor var_40032_begin_0 = const()[name = tensor("op_40032_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_40032_end_0 = const()[name = tensor("op_40032_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_40032_end_mask_0 = const()[name = tensor("op_40032_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40032_cast_fp16 = slice_by_index(begin = var_40032_begin_0, end = var_40032_end_0, end_mask = var_40032_end_mask_0, x = query_53_cast_fp16)[name = tensor("op_40032_cast_fp16")]; + tensor var_40041_begin_0 = const()[name = tensor("op_40041_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_40041_end_0 = const()[name = tensor("op_40041_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_40041_end_mask_0 = const()[name = tensor("op_40041_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40041_cast_fp16 = slice_by_index(begin = var_40041_begin_0, end = var_40041_end_0, end_mask = var_40041_end_mask_0, x = var_39956_cast_fp16)[name = tensor("op_40041_cast_fp16")]; + tensor var_40048_begin_0 = const()[name = tensor("op_40048_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_40048_end_0 = const()[name = tensor("op_40048_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_40048_end_mask_0 = const()[name = tensor("op_40048_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40048_cast_fp16 = slice_by_index(begin = var_40048_begin_0, end = var_40048_end_0, end_mask = var_40048_end_mask_0, x = var_39956_cast_fp16)[name = tensor("op_40048_cast_fp16")]; + tensor var_40055_begin_0 = const()[name = tensor("op_40055_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_40055_end_0 = const()[name = tensor("op_40055_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_40055_end_mask_0 = const()[name = tensor("op_40055_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40055_cast_fp16 = slice_by_index(begin = var_40055_begin_0, end = var_40055_end_0, end_mask = var_40055_end_mask_0, x = var_39956_cast_fp16)[name = tensor("op_40055_cast_fp16")]; + tensor var_40062_begin_0 = const()[name = tensor("op_40062_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_40062_end_0 = const()[name = tensor("op_40062_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_40062_end_mask_0 = const()[name = tensor("op_40062_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40062_cast_fp16 = slice_by_index(begin = var_40062_begin_0, end = var_40062_end_0, end_mask = var_40062_end_mask_0, x = var_39956_cast_fp16)[name = tensor("op_40062_cast_fp16")]; + tensor var_40069_begin_0 = const()[name = tensor("op_40069_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_40069_end_0 = const()[name = tensor("op_40069_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_40069_end_mask_0 = const()[name = tensor("op_40069_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40069_cast_fp16 = slice_by_index(begin = var_40069_begin_0, end = var_40069_end_0, end_mask = var_40069_end_mask_0, x = var_39960_cast_fp16)[name = tensor("op_40069_cast_fp16")]; + tensor var_40076_begin_0 = const()[name = tensor("op_40076_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_40076_end_0 = const()[name = tensor("op_40076_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_40076_end_mask_0 = const()[name = tensor("op_40076_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40076_cast_fp16 = slice_by_index(begin = var_40076_begin_0, end = var_40076_end_0, end_mask = var_40076_end_mask_0, x = var_39960_cast_fp16)[name = tensor("op_40076_cast_fp16")]; + tensor var_40083_begin_0 = const()[name = tensor("op_40083_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_40083_end_0 = const()[name = tensor("op_40083_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_40083_end_mask_0 = const()[name = tensor("op_40083_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40083_cast_fp16 = slice_by_index(begin = var_40083_begin_0, end = var_40083_end_0, end_mask = var_40083_end_mask_0, x = var_39960_cast_fp16)[name = tensor("op_40083_cast_fp16")]; + tensor var_40090_begin_0 = const()[name = tensor("op_40090_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_40090_end_0 = const()[name = tensor("op_40090_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_40090_end_mask_0 = const()[name = tensor("op_40090_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40090_cast_fp16 = slice_by_index(begin = var_40090_begin_0, end = var_40090_end_0, end_mask = var_40090_end_mask_0, x = var_39960_cast_fp16)[name = tensor("op_40090_cast_fp16")]; + tensor var_40097_begin_0 = const()[name = tensor("op_40097_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_40097_end_0 = const()[name = tensor("op_40097_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_40097_end_mask_0 = const()[name = tensor("op_40097_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40097_cast_fp16 = slice_by_index(begin = var_40097_begin_0, end = var_40097_end_0, end_mask = var_40097_end_mask_0, x = var_39964_cast_fp16)[name = tensor("op_40097_cast_fp16")]; + tensor var_40104_begin_0 = const()[name = tensor("op_40104_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_40104_end_0 = const()[name = tensor("op_40104_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_40104_end_mask_0 = const()[name = tensor("op_40104_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40104_cast_fp16 = slice_by_index(begin = var_40104_begin_0, end = var_40104_end_0, end_mask = var_40104_end_mask_0, x = var_39964_cast_fp16)[name = tensor("op_40104_cast_fp16")]; + tensor var_40111_begin_0 = const()[name = tensor("op_40111_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_40111_end_0 = const()[name = tensor("op_40111_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_40111_end_mask_0 = const()[name = tensor("op_40111_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40111_cast_fp16 = slice_by_index(begin = var_40111_begin_0, end = var_40111_end_0, end_mask = var_40111_end_mask_0, x = var_39964_cast_fp16)[name = tensor("op_40111_cast_fp16")]; + tensor var_40118_begin_0 = const()[name = tensor("op_40118_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_40118_end_0 = const()[name = tensor("op_40118_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_40118_end_mask_0 = const()[name = tensor("op_40118_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40118_cast_fp16 = slice_by_index(begin = var_40118_begin_0, end = var_40118_end_0, end_mask = var_40118_end_mask_0, x = var_39964_cast_fp16)[name = tensor("op_40118_cast_fp16")]; + tensor var_40125_begin_0 = const()[name = tensor("op_40125_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_40125_end_0 = const()[name = tensor("op_40125_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_40125_end_mask_0 = const()[name = tensor("op_40125_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40125_cast_fp16 = slice_by_index(begin = var_40125_begin_0, end = var_40125_end_0, end_mask = var_40125_end_mask_0, x = var_39968_cast_fp16)[name = tensor("op_40125_cast_fp16")]; + tensor var_40132_begin_0 = const()[name = tensor("op_40132_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_40132_end_0 = const()[name = tensor("op_40132_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_40132_end_mask_0 = const()[name = tensor("op_40132_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40132_cast_fp16 = slice_by_index(begin = var_40132_begin_0, end = var_40132_end_0, end_mask = var_40132_end_mask_0, x = var_39968_cast_fp16)[name = tensor("op_40132_cast_fp16")]; + tensor var_40139_begin_0 = const()[name = tensor("op_40139_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_40139_end_0 = const()[name = tensor("op_40139_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_40139_end_mask_0 = const()[name = tensor("op_40139_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40139_cast_fp16 = slice_by_index(begin = var_40139_begin_0, end = var_40139_end_0, end_mask = var_40139_end_mask_0, x = var_39968_cast_fp16)[name = tensor("op_40139_cast_fp16")]; + tensor var_40146_begin_0 = const()[name = tensor("op_40146_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_40146_end_0 = const()[name = tensor("op_40146_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_40146_end_mask_0 = const()[name = tensor("op_40146_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40146_cast_fp16 = slice_by_index(begin = var_40146_begin_0, end = var_40146_end_0, end_mask = var_40146_end_mask_0, x = var_39968_cast_fp16)[name = tensor("op_40146_cast_fp16")]; + tensor var_40153_begin_0 = const()[name = tensor("op_40153_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_40153_end_0 = const()[name = tensor("op_40153_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_40153_end_mask_0 = const()[name = tensor("op_40153_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40153_cast_fp16 = slice_by_index(begin = var_40153_begin_0, end = var_40153_end_0, end_mask = var_40153_end_mask_0, x = var_39972_cast_fp16)[name = tensor("op_40153_cast_fp16")]; + tensor var_40160_begin_0 = const()[name = tensor("op_40160_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_40160_end_0 = const()[name = tensor("op_40160_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_40160_end_mask_0 = const()[name = tensor("op_40160_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40160_cast_fp16 = slice_by_index(begin = var_40160_begin_0, end = var_40160_end_0, end_mask = var_40160_end_mask_0, x = var_39972_cast_fp16)[name = tensor("op_40160_cast_fp16")]; + tensor var_40167_begin_0 = const()[name = tensor("op_40167_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_40167_end_0 = const()[name = tensor("op_40167_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_40167_end_mask_0 = const()[name = tensor("op_40167_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40167_cast_fp16 = slice_by_index(begin = var_40167_begin_0, end = var_40167_end_0, end_mask = var_40167_end_mask_0, x = var_39972_cast_fp16)[name = tensor("op_40167_cast_fp16")]; + tensor var_40174_begin_0 = const()[name = tensor("op_40174_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_40174_end_0 = const()[name = tensor("op_40174_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_40174_end_mask_0 = const()[name = tensor("op_40174_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40174_cast_fp16 = slice_by_index(begin = var_40174_begin_0, end = var_40174_end_0, end_mask = var_40174_end_mask_0, x = var_39972_cast_fp16)[name = tensor("op_40174_cast_fp16")]; + tensor var_40181_begin_0 = const()[name = tensor("op_40181_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_40181_end_0 = const()[name = tensor("op_40181_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_40181_end_mask_0 = const()[name = tensor("op_40181_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40181_cast_fp16 = slice_by_index(begin = var_40181_begin_0, end = var_40181_end_0, end_mask = var_40181_end_mask_0, x = var_39976_cast_fp16)[name = tensor("op_40181_cast_fp16")]; + tensor var_40188_begin_0 = const()[name = tensor("op_40188_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_40188_end_0 = const()[name = tensor("op_40188_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_40188_end_mask_0 = const()[name = tensor("op_40188_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40188_cast_fp16 = slice_by_index(begin = var_40188_begin_0, end = var_40188_end_0, end_mask = var_40188_end_mask_0, x = var_39976_cast_fp16)[name = tensor("op_40188_cast_fp16")]; + tensor var_40195_begin_0 = const()[name = tensor("op_40195_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_40195_end_0 = const()[name = tensor("op_40195_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_40195_end_mask_0 = const()[name = tensor("op_40195_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40195_cast_fp16 = slice_by_index(begin = var_40195_begin_0, end = var_40195_end_0, end_mask = var_40195_end_mask_0, x = var_39976_cast_fp16)[name = tensor("op_40195_cast_fp16")]; + tensor var_40202_begin_0 = const()[name = tensor("op_40202_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_40202_end_0 = const()[name = tensor("op_40202_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_40202_end_mask_0 = const()[name = tensor("op_40202_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40202_cast_fp16 = slice_by_index(begin = var_40202_begin_0, end = var_40202_end_0, end_mask = var_40202_end_mask_0, x = var_39976_cast_fp16)[name = tensor("op_40202_cast_fp16")]; + tensor var_40209_begin_0 = const()[name = tensor("op_40209_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_40209_end_0 = const()[name = tensor("op_40209_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_40209_end_mask_0 = const()[name = tensor("op_40209_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40209_cast_fp16 = slice_by_index(begin = var_40209_begin_0, end = var_40209_end_0, end_mask = var_40209_end_mask_0, x = var_39980_cast_fp16)[name = tensor("op_40209_cast_fp16")]; + tensor var_40216_begin_0 = const()[name = tensor("op_40216_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_40216_end_0 = const()[name = tensor("op_40216_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_40216_end_mask_0 = const()[name = tensor("op_40216_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40216_cast_fp16 = slice_by_index(begin = var_40216_begin_0, end = var_40216_end_0, end_mask = var_40216_end_mask_0, x = var_39980_cast_fp16)[name = tensor("op_40216_cast_fp16")]; + tensor var_40223_begin_0 = const()[name = tensor("op_40223_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_40223_end_0 = const()[name = tensor("op_40223_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_40223_end_mask_0 = const()[name = tensor("op_40223_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40223_cast_fp16 = slice_by_index(begin = var_40223_begin_0, end = var_40223_end_0, end_mask = var_40223_end_mask_0, x = var_39980_cast_fp16)[name = tensor("op_40223_cast_fp16")]; + tensor var_40230_begin_0 = const()[name = tensor("op_40230_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_40230_end_0 = const()[name = tensor("op_40230_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_40230_end_mask_0 = const()[name = tensor("op_40230_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40230_cast_fp16 = slice_by_index(begin = var_40230_begin_0, end = var_40230_end_0, end_mask = var_40230_end_mask_0, x = var_39980_cast_fp16)[name = tensor("op_40230_cast_fp16")]; + tensor var_40237_begin_0 = const()[name = tensor("op_40237_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_40237_end_0 = const()[name = tensor("op_40237_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_40237_end_mask_0 = const()[name = tensor("op_40237_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40237_cast_fp16 = slice_by_index(begin = var_40237_begin_0, end = var_40237_end_0, end_mask = var_40237_end_mask_0, x = var_39984_cast_fp16)[name = tensor("op_40237_cast_fp16")]; + tensor var_40244_begin_0 = const()[name = tensor("op_40244_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_40244_end_0 = const()[name = tensor("op_40244_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_40244_end_mask_0 = const()[name = tensor("op_40244_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40244_cast_fp16 = slice_by_index(begin = var_40244_begin_0, end = var_40244_end_0, end_mask = var_40244_end_mask_0, x = var_39984_cast_fp16)[name = tensor("op_40244_cast_fp16")]; + tensor var_40251_begin_0 = const()[name = tensor("op_40251_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_40251_end_0 = const()[name = tensor("op_40251_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_40251_end_mask_0 = const()[name = tensor("op_40251_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40251_cast_fp16 = slice_by_index(begin = var_40251_begin_0, end = var_40251_end_0, end_mask = var_40251_end_mask_0, x = var_39984_cast_fp16)[name = tensor("op_40251_cast_fp16")]; + tensor var_40258_begin_0 = const()[name = tensor("op_40258_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_40258_end_0 = const()[name = tensor("op_40258_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_40258_end_mask_0 = const()[name = tensor("op_40258_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40258_cast_fp16 = slice_by_index(begin = var_40258_begin_0, end = var_40258_end_0, end_mask = var_40258_end_mask_0, x = var_39984_cast_fp16)[name = tensor("op_40258_cast_fp16")]; + tensor var_40265_begin_0 = const()[name = tensor("op_40265_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_40265_end_0 = const()[name = tensor("op_40265_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_40265_end_mask_0 = const()[name = tensor("op_40265_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40265_cast_fp16 = slice_by_index(begin = var_40265_begin_0, end = var_40265_end_0, end_mask = var_40265_end_mask_0, x = var_39988_cast_fp16)[name = tensor("op_40265_cast_fp16")]; + tensor var_40272_begin_0 = const()[name = tensor("op_40272_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_40272_end_0 = const()[name = tensor("op_40272_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_40272_end_mask_0 = const()[name = tensor("op_40272_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40272_cast_fp16 = slice_by_index(begin = var_40272_begin_0, end = var_40272_end_0, end_mask = var_40272_end_mask_0, x = var_39988_cast_fp16)[name = tensor("op_40272_cast_fp16")]; + tensor var_40279_begin_0 = const()[name = tensor("op_40279_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_40279_end_0 = const()[name = tensor("op_40279_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_40279_end_mask_0 = const()[name = tensor("op_40279_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40279_cast_fp16 = slice_by_index(begin = var_40279_begin_0, end = var_40279_end_0, end_mask = var_40279_end_mask_0, x = var_39988_cast_fp16)[name = tensor("op_40279_cast_fp16")]; + tensor var_40286_begin_0 = const()[name = tensor("op_40286_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_40286_end_0 = const()[name = tensor("op_40286_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_40286_end_mask_0 = const()[name = tensor("op_40286_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40286_cast_fp16 = slice_by_index(begin = var_40286_begin_0, end = var_40286_end_0, end_mask = var_40286_end_mask_0, x = var_39988_cast_fp16)[name = tensor("op_40286_cast_fp16")]; + tensor var_40293_begin_0 = const()[name = tensor("op_40293_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_40293_end_0 = const()[name = tensor("op_40293_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_40293_end_mask_0 = const()[name = tensor("op_40293_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40293_cast_fp16 = slice_by_index(begin = var_40293_begin_0, end = var_40293_end_0, end_mask = var_40293_end_mask_0, x = var_39992_cast_fp16)[name = tensor("op_40293_cast_fp16")]; + tensor var_40300_begin_0 = const()[name = tensor("op_40300_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_40300_end_0 = const()[name = tensor("op_40300_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_40300_end_mask_0 = const()[name = tensor("op_40300_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40300_cast_fp16 = slice_by_index(begin = var_40300_begin_0, end = var_40300_end_0, end_mask = var_40300_end_mask_0, x = var_39992_cast_fp16)[name = tensor("op_40300_cast_fp16")]; + tensor var_40307_begin_0 = const()[name = tensor("op_40307_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_40307_end_0 = const()[name = tensor("op_40307_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_40307_end_mask_0 = const()[name = tensor("op_40307_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40307_cast_fp16 = slice_by_index(begin = var_40307_begin_0, end = var_40307_end_0, end_mask = var_40307_end_mask_0, x = var_39992_cast_fp16)[name = tensor("op_40307_cast_fp16")]; + tensor var_40314_begin_0 = const()[name = tensor("op_40314_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_40314_end_0 = const()[name = tensor("op_40314_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_40314_end_mask_0 = const()[name = tensor("op_40314_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40314_cast_fp16 = slice_by_index(begin = var_40314_begin_0, end = var_40314_end_0, end_mask = var_40314_end_mask_0, x = var_39992_cast_fp16)[name = tensor("op_40314_cast_fp16")]; + tensor var_40321_begin_0 = const()[name = tensor("op_40321_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_40321_end_0 = const()[name = tensor("op_40321_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_40321_end_mask_0 = const()[name = tensor("op_40321_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40321_cast_fp16 = slice_by_index(begin = var_40321_begin_0, end = var_40321_end_0, end_mask = var_40321_end_mask_0, x = var_39996_cast_fp16)[name = tensor("op_40321_cast_fp16")]; + tensor var_40328_begin_0 = const()[name = tensor("op_40328_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_40328_end_0 = const()[name = tensor("op_40328_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_40328_end_mask_0 = const()[name = tensor("op_40328_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40328_cast_fp16 = slice_by_index(begin = var_40328_begin_0, end = var_40328_end_0, end_mask = var_40328_end_mask_0, x = var_39996_cast_fp16)[name = tensor("op_40328_cast_fp16")]; + tensor var_40335_begin_0 = const()[name = tensor("op_40335_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_40335_end_0 = const()[name = tensor("op_40335_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_40335_end_mask_0 = const()[name = tensor("op_40335_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40335_cast_fp16 = slice_by_index(begin = var_40335_begin_0, end = var_40335_end_0, end_mask = var_40335_end_mask_0, x = var_39996_cast_fp16)[name = tensor("op_40335_cast_fp16")]; + tensor var_40342_begin_0 = const()[name = tensor("op_40342_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_40342_end_0 = const()[name = tensor("op_40342_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_40342_end_mask_0 = const()[name = tensor("op_40342_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40342_cast_fp16 = slice_by_index(begin = var_40342_begin_0, end = var_40342_end_0, end_mask = var_40342_end_mask_0, x = var_39996_cast_fp16)[name = tensor("op_40342_cast_fp16")]; + tensor var_40349_begin_0 = const()[name = tensor("op_40349_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_40349_end_0 = const()[name = tensor("op_40349_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_40349_end_mask_0 = const()[name = tensor("op_40349_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40349_cast_fp16 = slice_by_index(begin = var_40349_begin_0, end = var_40349_end_0, end_mask = var_40349_end_mask_0, x = var_40000_cast_fp16)[name = tensor("op_40349_cast_fp16")]; + tensor var_40356_begin_0 = const()[name = tensor("op_40356_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_40356_end_0 = const()[name = tensor("op_40356_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_40356_end_mask_0 = const()[name = tensor("op_40356_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40356_cast_fp16 = slice_by_index(begin = var_40356_begin_0, end = var_40356_end_0, end_mask = var_40356_end_mask_0, x = var_40000_cast_fp16)[name = tensor("op_40356_cast_fp16")]; + tensor var_40363_begin_0 = const()[name = tensor("op_40363_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_40363_end_0 = const()[name = tensor("op_40363_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_40363_end_mask_0 = const()[name = tensor("op_40363_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40363_cast_fp16 = slice_by_index(begin = var_40363_begin_0, end = var_40363_end_0, end_mask = var_40363_end_mask_0, x = var_40000_cast_fp16)[name = tensor("op_40363_cast_fp16")]; + tensor var_40370_begin_0 = const()[name = tensor("op_40370_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_40370_end_0 = const()[name = tensor("op_40370_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_40370_end_mask_0 = const()[name = tensor("op_40370_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40370_cast_fp16 = slice_by_index(begin = var_40370_begin_0, end = var_40370_end_0, end_mask = var_40370_end_mask_0, x = var_40000_cast_fp16)[name = tensor("op_40370_cast_fp16")]; + tensor var_40377_begin_0 = const()[name = tensor("op_40377_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_40377_end_0 = const()[name = tensor("op_40377_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_40377_end_mask_0 = const()[name = tensor("op_40377_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40377_cast_fp16 = slice_by_index(begin = var_40377_begin_0, end = var_40377_end_0, end_mask = var_40377_end_mask_0, x = var_40004_cast_fp16)[name = tensor("op_40377_cast_fp16")]; + tensor var_40384_begin_0 = const()[name = tensor("op_40384_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_40384_end_0 = const()[name = tensor("op_40384_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_40384_end_mask_0 = const()[name = tensor("op_40384_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40384_cast_fp16 = slice_by_index(begin = var_40384_begin_0, end = var_40384_end_0, end_mask = var_40384_end_mask_0, x = var_40004_cast_fp16)[name = tensor("op_40384_cast_fp16")]; + tensor var_40391_begin_0 = const()[name = tensor("op_40391_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_40391_end_0 = const()[name = tensor("op_40391_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_40391_end_mask_0 = const()[name = tensor("op_40391_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40391_cast_fp16 = slice_by_index(begin = var_40391_begin_0, end = var_40391_end_0, end_mask = var_40391_end_mask_0, x = var_40004_cast_fp16)[name = tensor("op_40391_cast_fp16")]; + tensor var_40398_begin_0 = const()[name = tensor("op_40398_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_40398_end_0 = const()[name = tensor("op_40398_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_40398_end_mask_0 = const()[name = tensor("op_40398_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40398_cast_fp16 = slice_by_index(begin = var_40398_begin_0, end = var_40398_end_0, end_mask = var_40398_end_mask_0, x = var_40004_cast_fp16)[name = tensor("op_40398_cast_fp16")]; + tensor var_40405_begin_0 = const()[name = tensor("op_40405_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_40405_end_0 = const()[name = tensor("op_40405_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_40405_end_mask_0 = const()[name = tensor("op_40405_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40405_cast_fp16 = slice_by_index(begin = var_40405_begin_0, end = var_40405_end_0, end_mask = var_40405_end_mask_0, x = var_40008_cast_fp16)[name = tensor("op_40405_cast_fp16")]; + tensor var_40412_begin_0 = const()[name = tensor("op_40412_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_40412_end_0 = const()[name = tensor("op_40412_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_40412_end_mask_0 = const()[name = tensor("op_40412_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40412_cast_fp16 = slice_by_index(begin = var_40412_begin_0, end = var_40412_end_0, end_mask = var_40412_end_mask_0, x = var_40008_cast_fp16)[name = tensor("op_40412_cast_fp16")]; + tensor var_40419_begin_0 = const()[name = tensor("op_40419_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_40419_end_0 = const()[name = tensor("op_40419_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_40419_end_mask_0 = const()[name = tensor("op_40419_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40419_cast_fp16 = slice_by_index(begin = var_40419_begin_0, end = var_40419_end_0, end_mask = var_40419_end_mask_0, x = var_40008_cast_fp16)[name = tensor("op_40419_cast_fp16")]; + tensor var_40426_begin_0 = const()[name = tensor("op_40426_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_40426_end_0 = const()[name = tensor("op_40426_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_40426_end_mask_0 = const()[name = tensor("op_40426_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40426_cast_fp16 = slice_by_index(begin = var_40426_begin_0, end = var_40426_end_0, end_mask = var_40426_end_mask_0, x = var_40008_cast_fp16)[name = tensor("op_40426_cast_fp16")]; + tensor var_40433_begin_0 = const()[name = tensor("op_40433_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_40433_end_0 = const()[name = tensor("op_40433_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_40433_end_mask_0 = const()[name = tensor("op_40433_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40433_cast_fp16 = slice_by_index(begin = var_40433_begin_0, end = var_40433_end_0, end_mask = var_40433_end_mask_0, x = var_40012_cast_fp16)[name = tensor("op_40433_cast_fp16")]; + tensor var_40440_begin_0 = const()[name = tensor("op_40440_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_40440_end_0 = const()[name = tensor("op_40440_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_40440_end_mask_0 = const()[name = tensor("op_40440_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40440_cast_fp16 = slice_by_index(begin = var_40440_begin_0, end = var_40440_end_0, end_mask = var_40440_end_mask_0, x = var_40012_cast_fp16)[name = tensor("op_40440_cast_fp16")]; + tensor var_40447_begin_0 = const()[name = tensor("op_40447_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_40447_end_0 = const()[name = tensor("op_40447_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_40447_end_mask_0 = const()[name = tensor("op_40447_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40447_cast_fp16 = slice_by_index(begin = var_40447_begin_0, end = var_40447_end_0, end_mask = var_40447_end_mask_0, x = var_40012_cast_fp16)[name = tensor("op_40447_cast_fp16")]; + tensor var_40454_begin_0 = const()[name = tensor("op_40454_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_40454_end_0 = const()[name = tensor("op_40454_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_40454_end_mask_0 = const()[name = tensor("op_40454_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40454_cast_fp16 = slice_by_index(begin = var_40454_begin_0, end = var_40454_end_0, end_mask = var_40454_end_mask_0, x = var_40012_cast_fp16)[name = tensor("op_40454_cast_fp16")]; + tensor var_40461_begin_0 = const()[name = tensor("op_40461_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_40461_end_0 = const()[name = tensor("op_40461_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_40461_end_mask_0 = const()[name = tensor("op_40461_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40461_cast_fp16 = slice_by_index(begin = var_40461_begin_0, end = var_40461_end_0, end_mask = var_40461_end_mask_0, x = var_40016_cast_fp16)[name = tensor("op_40461_cast_fp16")]; + tensor var_40468_begin_0 = const()[name = tensor("op_40468_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_40468_end_0 = const()[name = tensor("op_40468_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_40468_end_mask_0 = const()[name = tensor("op_40468_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40468_cast_fp16 = slice_by_index(begin = var_40468_begin_0, end = var_40468_end_0, end_mask = var_40468_end_mask_0, x = var_40016_cast_fp16)[name = tensor("op_40468_cast_fp16")]; + tensor var_40475_begin_0 = const()[name = tensor("op_40475_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_40475_end_0 = const()[name = tensor("op_40475_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_40475_end_mask_0 = const()[name = tensor("op_40475_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40475_cast_fp16 = slice_by_index(begin = var_40475_begin_0, end = var_40475_end_0, end_mask = var_40475_end_mask_0, x = var_40016_cast_fp16)[name = tensor("op_40475_cast_fp16")]; + tensor var_40482_begin_0 = const()[name = tensor("op_40482_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_40482_end_0 = const()[name = tensor("op_40482_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_40482_end_mask_0 = const()[name = tensor("op_40482_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40482_cast_fp16 = slice_by_index(begin = var_40482_begin_0, end = var_40482_end_0, end_mask = var_40482_end_mask_0, x = var_40016_cast_fp16)[name = tensor("op_40482_cast_fp16")]; + tensor var_40489_begin_0 = const()[name = tensor("op_40489_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_40489_end_0 = const()[name = tensor("op_40489_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_40489_end_mask_0 = const()[name = tensor("op_40489_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40489_cast_fp16 = slice_by_index(begin = var_40489_begin_0, end = var_40489_end_0, end_mask = var_40489_end_mask_0, x = var_40020_cast_fp16)[name = tensor("op_40489_cast_fp16")]; + tensor var_40496_begin_0 = const()[name = tensor("op_40496_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_40496_end_0 = const()[name = tensor("op_40496_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_40496_end_mask_0 = const()[name = tensor("op_40496_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40496_cast_fp16 = slice_by_index(begin = var_40496_begin_0, end = var_40496_end_0, end_mask = var_40496_end_mask_0, x = var_40020_cast_fp16)[name = tensor("op_40496_cast_fp16")]; + tensor var_40503_begin_0 = const()[name = tensor("op_40503_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_40503_end_0 = const()[name = tensor("op_40503_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_40503_end_mask_0 = const()[name = tensor("op_40503_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40503_cast_fp16 = slice_by_index(begin = var_40503_begin_0, end = var_40503_end_0, end_mask = var_40503_end_mask_0, x = var_40020_cast_fp16)[name = tensor("op_40503_cast_fp16")]; + tensor var_40510_begin_0 = const()[name = tensor("op_40510_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_40510_end_0 = const()[name = tensor("op_40510_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_40510_end_mask_0 = const()[name = tensor("op_40510_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40510_cast_fp16 = slice_by_index(begin = var_40510_begin_0, end = var_40510_end_0, end_mask = var_40510_end_mask_0, x = var_40020_cast_fp16)[name = tensor("op_40510_cast_fp16")]; + tensor var_40517_begin_0 = const()[name = tensor("op_40517_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_40517_end_0 = const()[name = tensor("op_40517_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_40517_end_mask_0 = const()[name = tensor("op_40517_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40517_cast_fp16 = slice_by_index(begin = var_40517_begin_0, end = var_40517_end_0, end_mask = var_40517_end_mask_0, x = var_40024_cast_fp16)[name = tensor("op_40517_cast_fp16")]; + tensor var_40524_begin_0 = const()[name = tensor("op_40524_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_40524_end_0 = const()[name = tensor("op_40524_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_40524_end_mask_0 = const()[name = tensor("op_40524_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40524_cast_fp16 = slice_by_index(begin = var_40524_begin_0, end = var_40524_end_0, end_mask = var_40524_end_mask_0, x = var_40024_cast_fp16)[name = tensor("op_40524_cast_fp16")]; + tensor var_40531_begin_0 = const()[name = tensor("op_40531_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_40531_end_0 = const()[name = tensor("op_40531_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_40531_end_mask_0 = const()[name = tensor("op_40531_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40531_cast_fp16 = slice_by_index(begin = var_40531_begin_0, end = var_40531_end_0, end_mask = var_40531_end_mask_0, x = var_40024_cast_fp16)[name = tensor("op_40531_cast_fp16")]; + tensor var_40538_begin_0 = const()[name = tensor("op_40538_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_40538_end_0 = const()[name = tensor("op_40538_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_40538_end_mask_0 = const()[name = tensor("op_40538_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40538_cast_fp16 = slice_by_index(begin = var_40538_begin_0, end = var_40538_end_0, end_mask = var_40538_end_mask_0, x = var_40024_cast_fp16)[name = tensor("op_40538_cast_fp16")]; + tensor var_40545_begin_0 = const()[name = tensor("op_40545_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_40545_end_0 = const()[name = tensor("op_40545_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_40545_end_mask_0 = const()[name = tensor("op_40545_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40545_cast_fp16 = slice_by_index(begin = var_40545_begin_0, end = var_40545_end_0, end_mask = var_40545_end_mask_0, x = var_40028_cast_fp16)[name = tensor("op_40545_cast_fp16")]; + tensor var_40552_begin_0 = const()[name = tensor("op_40552_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_40552_end_0 = const()[name = tensor("op_40552_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_40552_end_mask_0 = const()[name = tensor("op_40552_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40552_cast_fp16 = slice_by_index(begin = var_40552_begin_0, end = var_40552_end_0, end_mask = var_40552_end_mask_0, x = var_40028_cast_fp16)[name = tensor("op_40552_cast_fp16")]; + tensor var_40559_begin_0 = const()[name = tensor("op_40559_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_40559_end_0 = const()[name = tensor("op_40559_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_40559_end_mask_0 = const()[name = tensor("op_40559_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40559_cast_fp16 = slice_by_index(begin = var_40559_begin_0, end = var_40559_end_0, end_mask = var_40559_end_mask_0, x = var_40028_cast_fp16)[name = tensor("op_40559_cast_fp16")]; + tensor var_40566_begin_0 = const()[name = tensor("op_40566_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_40566_end_0 = const()[name = tensor("op_40566_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_40566_end_mask_0 = const()[name = tensor("op_40566_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40566_cast_fp16 = slice_by_index(begin = var_40566_begin_0, end = var_40566_end_0, end_mask = var_40566_end_mask_0, x = var_40028_cast_fp16)[name = tensor("op_40566_cast_fp16")]; + tensor var_40573_begin_0 = const()[name = tensor("op_40573_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_40573_end_0 = const()[name = tensor("op_40573_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_40573_end_mask_0 = const()[name = tensor("op_40573_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40573_cast_fp16 = slice_by_index(begin = var_40573_begin_0, end = var_40573_end_0, end_mask = var_40573_end_mask_0, x = var_40032_cast_fp16)[name = tensor("op_40573_cast_fp16")]; + tensor var_40580_begin_0 = const()[name = tensor("op_40580_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_40580_end_0 = const()[name = tensor("op_40580_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_40580_end_mask_0 = const()[name = tensor("op_40580_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40580_cast_fp16 = slice_by_index(begin = var_40580_begin_0, end = var_40580_end_0, end_mask = var_40580_end_mask_0, x = var_40032_cast_fp16)[name = tensor("op_40580_cast_fp16")]; + tensor var_40587_begin_0 = const()[name = tensor("op_40587_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_40587_end_0 = const()[name = tensor("op_40587_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_40587_end_mask_0 = const()[name = tensor("op_40587_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40587_cast_fp16 = slice_by_index(begin = var_40587_begin_0, end = var_40587_end_0, end_mask = var_40587_end_mask_0, x = var_40032_cast_fp16)[name = tensor("op_40587_cast_fp16")]; + tensor var_40594_begin_0 = const()[name = tensor("op_40594_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_40594_end_0 = const()[name = tensor("op_40594_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_40594_end_mask_0 = const()[name = tensor("op_40594_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40594_cast_fp16 = slice_by_index(begin = var_40594_begin_0, end = var_40594_end_0, end_mask = var_40594_end_mask_0, x = var_40032_cast_fp16)[name = tensor("op_40594_cast_fp16")]; + tensor k_53_perm_0 = const()[name = tensor("k_53_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_40599_begin_0 = const()[name = tensor("op_40599_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_40599_end_0 = const()[name = tensor("op_40599_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_40599_end_mask_0 = const()[name = tensor("op_40599_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_5 = transpose(perm = k_53_perm_0, x = key_53_cast_fp16)[name = tensor("transpose_5")]; + tensor var_40599_cast_fp16 = slice_by_index(begin = var_40599_begin_0, end = var_40599_end_0, end_mask = var_40599_end_mask_0, x = transpose_5)[name = tensor("op_40599_cast_fp16")]; + tensor var_40603_begin_0 = const()[name = tensor("op_40603_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_40603_end_0 = const()[name = tensor("op_40603_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_40603_end_mask_0 = const()[name = tensor("op_40603_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40603_cast_fp16 = slice_by_index(begin = var_40603_begin_0, end = var_40603_end_0, end_mask = var_40603_end_mask_0, x = transpose_5)[name = tensor("op_40603_cast_fp16")]; + tensor var_40607_begin_0 = const()[name = tensor("op_40607_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_40607_end_0 = const()[name = tensor("op_40607_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_40607_end_mask_0 = const()[name = tensor("op_40607_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40607_cast_fp16 = slice_by_index(begin = var_40607_begin_0, end = var_40607_end_0, end_mask = var_40607_end_mask_0, x = transpose_5)[name = tensor("op_40607_cast_fp16")]; + tensor var_40611_begin_0 = const()[name = tensor("op_40611_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_40611_end_0 = const()[name = tensor("op_40611_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_40611_end_mask_0 = const()[name = tensor("op_40611_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40611_cast_fp16 = slice_by_index(begin = var_40611_begin_0, end = var_40611_end_0, end_mask = var_40611_end_mask_0, x = transpose_5)[name = tensor("op_40611_cast_fp16")]; + tensor var_40615_begin_0 = const()[name = tensor("op_40615_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_40615_end_0 = const()[name = tensor("op_40615_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_40615_end_mask_0 = const()[name = tensor("op_40615_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40615_cast_fp16 = slice_by_index(begin = var_40615_begin_0, end = var_40615_end_0, end_mask = var_40615_end_mask_0, x = transpose_5)[name = tensor("op_40615_cast_fp16")]; + tensor var_40619_begin_0 = const()[name = tensor("op_40619_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_40619_end_0 = const()[name = tensor("op_40619_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_40619_end_mask_0 = const()[name = tensor("op_40619_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40619_cast_fp16 = slice_by_index(begin = var_40619_begin_0, end = var_40619_end_0, end_mask = var_40619_end_mask_0, x = transpose_5)[name = tensor("op_40619_cast_fp16")]; + tensor var_40623_begin_0 = const()[name = tensor("op_40623_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_40623_end_0 = const()[name = tensor("op_40623_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_40623_end_mask_0 = const()[name = tensor("op_40623_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40623_cast_fp16 = slice_by_index(begin = var_40623_begin_0, end = var_40623_end_0, end_mask = var_40623_end_mask_0, x = transpose_5)[name = tensor("op_40623_cast_fp16")]; + tensor var_40627_begin_0 = const()[name = tensor("op_40627_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_40627_end_0 = const()[name = tensor("op_40627_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_40627_end_mask_0 = const()[name = tensor("op_40627_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40627_cast_fp16 = slice_by_index(begin = var_40627_begin_0, end = var_40627_end_0, end_mask = var_40627_end_mask_0, x = transpose_5)[name = tensor("op_40627_cast_fp16")]; + tensor var_40631_begin_0 = const()[name = tensor("op_40631_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_40631_end_0 = const()[name = tensor("op_40631_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_40631_end_mask_0 = const()[name = tensor("op_40631_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40631_cast_fp16 = slice_by_index(begin = var_40631_begin_0, end = var_40631_end_0, end_mask = var_40631_end_mask_0, x = transpose_5)[name = tensor("op_40631_cast_fp16")]; + tensor var_40635_begin_0 = const()[name = tensor("op_40635_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_40635_end_0 = const()[name = tensor("op_40635_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_40635_end_mask_0 = const()[name = tensor("op_40635_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40635_cast_fp16 = slice_by_index(begin = var_40635_begin_0, end = var_40635_end_0, end_mask = var_40635_end_mask_0, x = transpose_5)[name = tensor("op_40635_cast_fp16")]; + tensor var_40639_begin_0 = const()[name = tensor("op_40639_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_40639_end_0 = const()[name = tensor("op_40639_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_40639_end_mask_0 = const()[name = tensor("op_40639_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40639_cast_fp16 = slice_by_index(begin = var_40639_begin_0, end = var_40639_end_0, end_mask = var_40639_end_mask_0, x = transpose_5)[name = tensor("op_40639_cast_fp16")]; + tensor var_40643_begin_0 = const()[name = tensor("op_40643_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_40643_end_0 = const()[name = tensor("op_40643_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_40643_end_mask_0 = const()[name = tensor("op_40643_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40643_cast_fp16 = slice_by_index(begin = var_40643_begin_0, end = var_40643_end_0, end_mask = var_40643_end_mask_0, x = transpose_5)[name = tensor("op_40643_cast_fp16")]; + tensor var_40647_begin_0 = const()[name = tensor("op_40647_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_40647_end_0 = const()[name = tensor("op_40647_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_40647_end_mask_0 = const()[name = tensor("op_40647_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40647_cast_fp16 = slice_by_index(begin = var_40647_begin_0, end = var_40647_end_0, end_mask = var_40647_end_mask_0, x = transpose_5)[name = tensor("op_40647_cast_fp16")]; + tensor var_40651_begin_0 = const()[name = tensor("op_40651_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_40651_end_0 = const()[name = tensor("op_40651_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_40651_end_mask_0 = const()[name = tensor("op_40651_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40651_cast_fp16 = slice_by_index(begin = var_40651_begin_0, end = var_40651_end_0, end_mask = var_40651_end_mask_0, x = transpose_5)[name = tensor("op_40651_cast_fp16")]; + tensor var_40655_begin_0 = const()[name = tensor("op_40655_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_40655_end_0 = const()[name = tensor("op_40655_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_40655_end_mask_0 = const()[name = tensor("op_40655_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40655_cast_fp16 = slice_by_index(begin = var_40655_begin_0, end = var_40655_end_0, end_mask = var_40655_end_mask_0, x = transpose_5)[name = tensor("op_40655_cast_fp16")]; + tensor var_40659_begin_0 = const()[name = tensor("op_40659_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_40659_end_0 = const()[name = tensor("op_40659_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_40659_end_mask_0 = const()[name = tensor("op_40659_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40659_cast_fp16 = slice_by_index(begin = var_40659_begin_0, end = var_40659_end_0, end_mask = var_40659_end_mask_0, x = transpose_5)[name = tensor("op_40659_cast_fp16")]; + tensor var_40663_begin_0 = const()[name = tensor("op_40663_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_40663_end_0 = const()[name = tensor("op_40663_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_40663_end_mask_0 = const()[name = tensor("op_40663_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40663_cast_fp16 = slice_by_index(begin = var_40663_begin_0, end = var_40663_end_0, end_mask = var_40663_end_mask_0, x = transpose_5)[name = tensor("op_40663_cast_fp16")]; + tensor var_40667_begin_0 = const()[name = tensor("op_40667_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_40667_end_0 = const()[name = tensor("op_40667_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_40667_end_mask_0 = const()[name = tensor("op_40667_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40667_cast_fp16 = slice_by_index(begin = var_40667_begin_0, end = var_40667_end_0, end_mask = var_40667_end_mask_0, x = transpose_5)[name = tensor("op_40667_cast_fp16")]; + tensor var_40671_begin_0 = const()[name = tensor("op_40671_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_40671_end_0 = const()[name = tensor("op_40671_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_40671_end_mask_0 = const()[name = tensor("op_40671_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40671_cast_fp16 = slice_by_index(begin = var_40671_begin_0, end = var_40671_end_0, end_mask = var_40671_end_mask_0, x = transpose_5)[name = tensor("op_40671_cast_fp16")]; + tensor var_40675_begin_0 = const()[name = tensor("op_40675_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_40675_end_0 = const()[name = tensor("op_40675_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_40675_end_mask_0 = const()[name = tensor("op_40675_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_40675_cast_fp16 = slice_by_index(begin = var_40675_begin_0, end = var_40675_end_0, end_mask = var_40675_end_mask_0, x = transpose_5)[name = tensor("op_40675_cast_fp16")]; + tensor var_40677_begin_0 = const()[name = tensor("op_40677_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_40677_end_0 = const()[name = tensor("op_40677_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_40677_end_mask_0 = const()[name = tensor("op_40677_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40677_cast_fp16 = slice_by_index(begin = var_40677_begin_0, end = var_40677_end_0, end_mask = var_40677_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_40677_cast_fp16")]; + tensor var_40681_begin_0 = const()[name = tensor("op_40681_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_40681_end_0 = const()[name = tensor("op_40681_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_40681_end_mask_0 = const()[name = tensor("op_40681_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40681_cast_fp16 = slice_by_index(begin = var_40681_begin_0, end = var_40681_end_0, end_mask = var_40681_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_40681_cast_fp16")]; + tensor var_40685_begin_0 = const()[name = tensor("op_40685_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_40685_end_0 = const()[name = tensor("op_40685_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_40685_end_mask_0 = const()[name = tensor("op_40685_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40685_cast_fp16 = slice_by_index(begin = var_40685_begin_0, end = var_40685_end_0, end_mask = var_40685_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_40685_cast_fp16")]; + tensor var_40689_begin_0 = const()[name = tensor("op_40689_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_40689_end_0 = const()[name = tensor("op_40689_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_40689_end_mask_0 = const()[name = tensor("op_40689_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40689_cast_fp16 = slice_by_index(begin = var_40689_begin_0, end = var_40689_end_0, end_mask = var_40689_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_40689_cast_fp16")]; + tensor var_40693_begin_0 = const()[name = tensor("op_40693_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_40693_end_0 = const()[name = tensor("op_40693_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_40693_end_mask_0 = const()[name = tensor("op_40693_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40693_cast_fp16 = slice_by_index(begin = var_40693_begin_0, end = var_40693_end_0, end_mask = var_40693_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_40693_cast_fp16")]; + tensor var_40697_begin_0 = const()[name = tensor("op_40697_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_40697_end_0 = const()[name = tensor("op_40697_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_40697_end_mask_0 = const()[name = tensor("op_40697_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40697_cast_fp16 = slice_by_index(begin = var_40697_begin_0, end = var_40697_end_0, end_mask = var_40697_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_40697_cast_fp16")]; + tensor var_40701_begin_0 = const()[name = tensor("op_40701_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_40701_end_0 = const()[name = tensor("op_40701_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_40701_end_mask_0 = const()[name = tensor("op_40701_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40701_cast_fp16 = slice_by_index(begin = var_40701_begin_0, end = var_40701_end_0, end_mask = var_40701_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_40701_cast_fp16")]; + tensor var_40705_begin_0 = const()[name = tensor("op_40705_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_40705_end_0 = const()[name = tensor("op_40705_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_40705_end_mask_0 = const()[name = tensor("op_40705_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40705_cast_fp16 = slice_by_index(begin = var_40705_begin_0, end = var_40705_end_0, end_mask = var_40705_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_40705_cast_fp16")]; + tensor var_40709_begin_0 = const()[name = tensor("op_40709_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_40709_end_0 = const()[name = tensor("op_40709_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_40709_end_mask_0 = const()[name = tensor("op_40709_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40709_cast_fp16 = slice_by_index(begin = var_40709_begin_0, end = var_40709_end_0, end_mask = var_40709_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_40709_cast_fp16")]; + tensor var_40713_begin_0 = const()[name = tensor("op_40713_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_40713_end_0 = const()[name = tensor("op_40713_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_40713_end_mask_0 = const()[name = tensor("op_40713_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40713_cast_fp16 = slice_by_index(begin = var_40713_begin_0, end = var_40713_end_0, end_mask = var_40713_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_40713_cast_fp16")]; + tensor var_40717_begin_0 = const()[name = tensor("op_40717_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_40717_end_0 = const()[name = tensor("op_40717_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_40717_end_mask_0 = const()[name = tensor("op_40717_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40717_cast_fp16 = slice_by_index(begin = var_40717_begin_0, end = var_40717_end_0, end_mask = var_40717_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_40717_cast_fp16")]; + tensor var_40721_begin_0 = const()[name = tensor("op_40721_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_40721_end_0 = const()[name = tensor("op_40721_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_40721_end_mask_0 = const()[name = tensor("op_40721_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40721_cast_fp16 = slice_by_index(begin = var_40721_begin_0, end = var_40721_end_0, end_mask = var_40721_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_40721_cast_fp16")]; + tensor var_40725_begin_0 = const()[name = tensor("op_40725_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_40725_end_0 = const()[name = tensor("op_40725_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_40725_end_mask_0 = const()[name = tensor("op_40725_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40725_cast_fp16 = slice_by_index(begin = var_40725_begin_0, end = var_40725_end_0, end_mask = var_40725_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_40725_cast_fp16")]; + tensor var_40729_begin_0 = const()[name = tensor("op_40729_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_40729_end_0 = const()[name = tensor("op_40729_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_40729_end_mask_0 = const()[name = tensor("op_40729_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40729_cast_fp16 = slice_by_index(begin = var_40729_begin_0, end = var_40729_end_0, end_mask = var_40729_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_40729_cast_fp16")]; + tensor var_40733_begin_0 = const()[name = tensor("op_40733_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_40733_end_0 = const()[name = tensor("op_40733_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_40733_end_mask_0 = const()[name = tensor("op_40733_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40733_cast_fp16 = slice_by_index(begin = var_40733_begin_0, end = var_40733_end_0, end_mask = var_40733_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_40733_cast_fp16")]; + tensor var_40737_begin_0 = const()[name = tensor("op_40737_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_40737_end_0 = const()[name = tensor("op_40737_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_40737_end_mask_0 = const()[name = tensor("op_40737_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40737_cast_fp16 = slice_by_index(begin = var_40737_begin_0, end = var_40737_end_0, end_mask = var_40737_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_40737_cast_fp16")]; + tensor var_40741_begin_0 = const()[name = tensor("op_40741_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_40741_end_0 = const()[name = tensor("op_40741_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_40741_end_mask_0 = const()[name = tensor("op_40741_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40741_cast_fp16 = slice_by_index(begin = var_40741_begin_0, end = var_40741_end_0, end_mask = var_40741_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_40741_cast_fp16")]; + tensor var_40745_begin_0 = const()[name = tensor("op_40745_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_40745_end_0 = const()[name = tensor("op_40745_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_40745_end_mask_0 = const()[name = tensor("op_40745_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40745_cast_fp16 = slice_by_index(begin = var_40745_begin_0, end = var_40745_end_0, end_mask = var_40745_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_40745_cast_fp16")]; + tensor var_40749_begin_0 = const()[name = tensor("op_40749_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_40749_end_0 = const()[name = tensor("op_40749_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_40749_end_mask_0 = const()[name = tensor("op_40749_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40749_cast_fp16 = slice_by_index(begin = var_40749_begin_0, end = var_40749_end_0, end_mask = var_40749_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_40749_cast_fp16")]; + tensor var_40753_begin_0 = const()[name = tensor("op_40753_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_40753_end_0 = const()[name = tensor("op_40753_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_40753_end_mask_0 = const()[name = tensor("op_40753_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_40753_cast_fp16 = slice_by_index(begin = var_40753_begin_0, end = var_40753_end_0, end_mask = var_40753_end_mask_0, x = value_53_cast_fp16)[name = tensor("op_40753_cast_fp16")]; + tensor var_40757_equation_0 = const()[name = tensor("op_40757_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40757_cast_fp16 = einsum(equation = var_40757_equation_0, values = (var_40599_cast_fp16, var_40041_cast_fp16))[name = tensor("op_40757_cast_fp16")]; + tensor var_40758_to_fp16 = const()[name = tensor("op_40758_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4161_cast_fp16 = mul(x = var_40757_cast_fp16, y = var_40758_to_fp16)[name = tensor("aw_chunk_4161_cast_fp16")]; + tensor var_40761_equation_0 = const()[name = tensor("op_40761_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40761_cast_fp16 = einsum(equation = var_40761_equation_0, values = (var_40599_cast_fp16, var_40048_cast_fp16))[name = tensor("op_40761_cast_fp16")]; + tensor var_40762_to_fp16 = const()[name = tensor("op_40762_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4163_cast_fp16 = mul(x = var_40761_cast_fp16, y = var_40762_to_fp16)[name = tensor("aw_chunk_4163_cast_fp16")]; + tensor var_40765_equation_0 = const()[name = tensor("op_40765_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40765_cast_fp16 = einsum(equation = var_40765_equation_0, values = (var_40599_cast_fp16, var_40055_cast_fp16))[name = tensor("op_40765_cast_fp16")]; + tensor var_40766_to_fp16 = const()[name = tensor("op_40766_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4165_cast_fp16 = mul(x = var_40765_cast_fp16, y = var_40766_to_fp16)[name = tensor("aw_chunk_4165_cast_fp16")]; + tensor var_40769_equation_0 = const()[name = tensor("op_40769_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40769_cast_fp16 = einsum(equation = var_40769_equation_0, values = (var_40599_cast_fp16, var_40062_cast_fp16))[name = tensor("op_40769_cast_fp16")]; + tensor var_40770_to_fp16 = const()[name = tensor("op_40770_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4167_cast_fp16 = mul(x = var_40769_cast_fp16, y = var_40770_to_fp16)[name = tensor("aw_chunk_4167_cast_fp16")]; + tensor var_40773_equation_0 = const()[name = tensor("op_40773_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40773_cast_fp16 = einsum(equation = var_40773_equation_0, values = (var_40603_cast_fp16, var_40069_cast_fp16))[name = tensor("op_40773_cast_fp16")]; + tensor var_40774_to_fp16 = const()[name = tensor("op_40774_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4169_cast_fp16 = mul(x = var_40773_cast_fp16, y = var_40774_to_fp16)[name = tensor("aw_chunk_4169_cast_fp16")]; + tensor var_40777_equation_0 = const()[name = tensor("op_40777_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40777_cast_fp16 = einsum(equation = var_40777_equation_0, values = (var_40603_cast_fp16, var_40076_cast_fp16))[name = tensor("op_40777_cast_fp16")]; + tensor var_40778_to_fp16 = const()[name = tensor("op_40778_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4171_cast_fp16 = mul(x = var_40777_cast_fp16, y = var_40778_to_fp16)[name = tensor("aw_chunk_4171_cast_fp16")]; + tensor var_40781_equation_0 = const()[name = tensor("op_40781_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40781_cast_fp16 = einsum(equation = var_40781_equation_0, values = (var_40603_cast_fp16, var_40083_cast_fp16))[name = tensor("op_40781_cast_fp16")]; + tensor var_40782_to_fp16 = const()[name = tensor("op_40782_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4173_cast_fp16 = mul(x = var_40781_cast_fp16, y = var_40782_to_fp16)[name = tensor("aw_chunk_4173_cast_fp16")]; + tensor var_40785_equation_0 = const()[name = tensor("op_40785_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40785_cast_fp16 = einsum(equation = var_40785_equation_0, values = (var_40603_cast_fp16, var_40090_cast_fp16))[name = tensor("op_40785_cast_fp16")]; + tensor var_40786_to_fp16 = const()[name = tensor("op_40786_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4175_cast_fp16 = mul(x = var_40785_cast_fp16, y = var_40786_to_fp16)[name = tensor("aw_chunk_4175_cast_fp16")]; + tensor var_40789_equation_0 = const()[name = tensor("op_40789_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40789_cast_fp16 = einsum(equation = var_40789_equation_0, values = (var_40607_cast_fp16, var_40097_cast_fp16))[name = tensor("op_40789_cast_fp16")]; + tensor var_40790_to_fp16 = const()[name = tensor("op_40790_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4177_cast_fp16 = mul(x = var_40789_cast_fp16, y = var_40790_to_fp16)[name = tensor("aw_chunk_4177_cast_fp16")]; + tensor var_40793_equation_0 = const()[name = tensor("op_40793_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40793_cast_fp16 = einsum(equation = var_40793_equation_0, values = (var_40607_cast_fp16, var_40104_cast_fp16))[name = tensor("op_40793_cast_fp16")]; + tensor var_40794_to_fp16 = const()[name = tensor("op_40794_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4179_cast_fp16 = mul(x = var_40793_cast_fp16, y = var_40794_to_fp16)[name = tensor("aw_chunk_4179_cast_fp16")]; + tensor var_40797_equation_0 = const()[name = tensor("op_40797_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40797_cast_fp16 = einsum(equation = var_40797_equation_0, values = (var_40607_cast_fp16, var_40111_cast_fp16))[name = tensor("op_40797_cast_fp16")]; + tensor var_40798_to_fp16 = const()[name = tensor("op_40798_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4181_cast_fp16 = mul(x = var_40797_cast_fp16, y = var_40798_to_fp16)[name = tensor("aw_chunk_4181_cast_fp16")]; + tensor var_40801_equation_0 = const()[name = tensor("op_40801_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40801_cast_fp16 = einsum(equation = var_40801_equation_0, values = (var_40607_cast_fp16, var_40118_cast_fp16))[name = tensor("op_40801_cast_fp16")]; + tensor var_40802_to_fp16 = const()[name = tensor("op_40802_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4183_cast_fp16 = mul(x = var_40801_cast_fp16, y = var_40802_to_fp16)[name = tensor("aw_chunk_4183_cast_fp16")]; + tensor var_40805_equation_0 = const()[name = tensor("op_40805_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40805_cast_fp16 = einsum(equation = var_40805_equation_0, values = (var_40611_cast_fp16, var_40125_cast_fp16))[name = tensor("op_40805_cast_fp16")]; + tensor var_40806_to_fp16 = const()[name = tensor("op_40806_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4185_cast_fp16 = mul(x = var_40805_cast_fp16, y = var_40806_to_fp16)[name = tensor("aw_chunk_4185_cast_fp16")]; + tensor var_40809_equation_0 = const()[name = tensor("op_40809_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40809_cast_fp16 = einsum(equation = var_40809_equation_0, values = (var_40611_cast_fp16, var_40132_cast_fp16))[name = tensor("op_40809_cast_fp16")]; + tensor var_40810_to_fp16 = const()[name = tensor("op_40810_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4187_cast_fp16 = mul(x = var_40809_cast_fp16, y = var_40810_to_fp16)[name = tensor("aw_chunk_4187_cast_fp16")]; + tensor var_40813_equation_0 = const()[name = tensor("op_40813_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40813_cast_fp16 = einsum(equation = var_40813_equation_0, values = (var_40611_cast_fp16, var_40139_cast_fp16))[name = tensor("op_40813_cast_fp16")]; + tensor var_40814_to_fp16 = const()[name = tensor("op_40814_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4189_cast_fp16 = mul(x = var_40813_cast_fp16, y = var_40814_to_fp16)[name = tensor("aw_chunk_4189_cast_fp16")]; + tensor var_40817_equation_0 = const()[name = tensor("op_40817_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40817_cast_fp16 = einsum(equation = var_40817_equation_0, values = (var_40611_cast_fp16, var_40146_cast_fp16))[name = tensor("op_40817_cast_fp16")]; + tensor var_40818_to_fp16 = const()[name = tensor("op_40818_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4191_cast_fp16 = mul(x = var_40817_cast_fp16, y = var_40818_to_fp16)[name = tensor("aw_chunk_4191_cast_fp16")]; + tensor var_40821_equation_0 = const()[name = tensor("op_40821_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40821_cast_fp16 = einsum(equation = var_40821_equation_0, values = (var_40615_cast_fp16, var_40153_cast_fp16))[name = tensor("op_40821_cast_fp16")]; + tensor var_40822_to_fp16 = const()[name = tensor("op_40822_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4193_cast_fp16 = mul(x = var_40821_cast_fp16, y = var_40822_to_fp16)[name = tensor("aw_chunk_4193_cast_fp16")]; + tensor var_40825_equation_0 = const()[name = tensor("op_40825_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40825_cast_fp16 = einsum(equation = var_40825_equation_0, values = (var_40615_cast_fp16, var_40160_cast_fp16))[name = tensor("op_40825_cast_fp16")]; + tensor var_40826_to_fp16 = const()[name = tensor("op_40826_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4195_cast_fp16 = mul(x = var_40825_cast_fp16, y = var_40826_to_fp16)[name = tensor("aw_chunk_4195_cast_fp16")]; + tensor var_40829_equation_0 = const()[name = tensor("op_40829_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40829_cast_fp16 = einsum(equation = var_40829_equation_0, values = (var_40615_cast_fp16, var_40167_cast_fp16))[name = tensor("op_40829_cast_fp16")]; + tensor var_40830_to_fp16 = const()[name = tensor("op_40830_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4197_cast_fp16 = mul(x = var_40829_cast_fp16, y = var_40830_to_fp16)[name = tensor("aw_chunk_4197_cast_fp16")]; + tensor var_40833_equation_0 = const()[name = tensor("op_40833_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40833_cast_fp16 = einsum(equation = var_40833_equation_0, values = (var_40615_cast_fp16, var_40174_cast_fp16))[name = tensor("op_40833_cast_fp16")]; + tensor var_40834_to_fp16 = const()[name = tensor("op_40834_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4199_cast_fp16 = mul(x = var_40833_cast_fp16, y = var_40834_to_fp16)[name = tensor("aw_chunk_4199_cast_fp16")]; + tensor var_40837_equation_0 = const()[name = tensor("op_40837_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40837_cast_fp16 = einsum(equation = var_40837_equation_0, values = (var_40619_cast_fp16, var_40181_cast_fp16))[name = tensor("op_40837_cast_fp16")]; + tensor var_40838_to_fp16 = const()[name = tensor("op_40838_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4201_cast_fp16 = mul(x = var_40837_cast_fp16, y = var_40838_to_fp16)[name = tensor("aw_chunk_4201_cast_fp16")]; + tensor var_40841_equation_0 = const()[name = tensor("op_40841_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40841_cast_fp16 = einsum(equation = var_40841_equation_0, values = (var_40619_cast_fp16, var_40188_cast_fp16))[name = tensor("op_40841_cast_fp16")]; + tensor var_40842_to_fp16 = const()[name = tensor("op_40842_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4203_cast_fp16 = mul(x = var_40841_cast_fp16, y = var_40842_to_fp16)[name = tensor("aw_chunk_4203_cast_fp16")]; + tensor var_40845_equation_0 = const()[name = tensor("op_40845_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40845_cast_fp16 = einsum(equation = var_40845_equation_0, values = (var_40619_cast_fp16, var_40195_cast_fp16))[name = tensor("op_40845_cast_fp16")]; + tensor var_40846_to_fp16 = const()[name = tensor("op_40846_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4205_cast_fp16 = mul(x = var_40845_cast_fp16, y = var_40846_to_fp16)[name = tensor("aw_chunk_4205_cast_fp16")]; + tensor var_40849_equation_0 = const()[name = tensor("op_40849_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40849_cast_fp16 = einsum(equation = var_40849_equation_0, values = (var_40619_cast_fp16, var_40202_cast_fp16))[name = tensor("op_40849_cast_fp16")]; + tensor var_40850_to_fp16 = const()[name = tensor("op_40850_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4207_cast_fp16 = mul(x = var_40849_cast_fp16, y = var_40850_to_fp16)[name = tensor("aw_chunk_4207_cast_fp16")]; + tensor var_40853_equation_0 = const()[name = tensor("op_40853_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40853_cast_fp16 = einsum(equation = var_40853_equation_0, values = (var_40623_cast_fp16, var_40209_cast_fp16))[name = tensor("op_40853_cast_fp16")]; + tensor var_40854_to_fp16 = const()[name = tensor("op_40854_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4209_cast_fp16 = mul(x = var_40853_cast_fp16, y = var_40854_to_fp16)[name = tensor("aw_chunk_4209_cast_fp16")]; + tensor var_40857_equation_0 = const()[name = tensor("op_40857_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40857_cast_fp16 = einsum(equation = var_40857_equation_0, values = (var_40623_cast_fp16, var_40216_cast_fp16))[name = tensor("op_40857_cast_fp16")]; + tensor var_40858_to_fp16 = const()[name = tensor("op_40858_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4211_cast_fp16 = mul(x = var_40857_cast_fp16, y = var_40858_to_fp16)[name = tensor("aw_chunk_4211_cast_fp16")]; + tensor var_40861_equation_0 = const()[name = tensor("op_40861_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40861_cast_fp16 = einsum(equation = var_40861_equation_0, values = (var_40623_cast_fp16, var_40223_cast_fp16))[name = tensor("op_40861_cast_fp16")]; + tensor var_40862_to_fp16 = const()[name = tensor("op_40862_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4213_cast_fp16 = mul(x = var_40861_cast_fp16, y = var_40862_to_fp16)[name = tensor("aw_chunk_4213_cast_fp16")]; + tensor var_40865_equation_0 = const()[name = tensor("op_40865_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40865_cast_fp16 = einsum(equation = var_40865_equation_0, values = (var_40623_cast_fp16, var_40230_cast_fp16))[name = tensor("op_40865_cast_fp16")]; + tensor var_40866_to_fp16 = const()[name = tensor("op_40866_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4215_cast_fp16 = mul(x = var_40865_cast_fp16, y = var_40866_to_fp16)[name = tensor("aw_chunk_4215_cast_fp16")]; + tensor var_40869_equation_0 = const()[name = tensor("op_40869_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40869_cast_fp16 = einsum(equation = var_40869_equation_0, values = (var_40627_cast_fp16, var_40237_cast_fp16))[name = tensor("op_40869_cast_fp16")]; + tensor var_40870_to_fp16 = const()[name = tensor("op_40870_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4217_cast_fp16 = mul(x = var_40869_cast_fp16, y = var_40870_to_fp16)[name = tensor("aw_chunk_4217_cast_fp16")]; + tensor var_40873_equation_0 = const()[name = tensor("op_40873_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40873_cast_fp16 = einsum(equation = var_40873_equation_0, values = (var_40627_cast_fp16, var_40244_cast_fp16))[name = tensor("op_40873_cast_fp16")]; + tensor var_40874_to_fp16 = const()[name = tensor("op_40874_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4219_cast_fp16 = mul(x = var_40873_cast_fp16, y = var_40874_to_fp16)[name = tensor("aw_chunk_4219_cast_fp16")]; + tensor var_40877_equation_0 = const()[name = tensor("op_40877_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40877_cast_fp16 = einsum(equation = var_40877_equation_0, values = (var_40627_cast_fp16, var_40251_cast_fp16))[name = tensor("op_40877_cast_fp16")]; + tensor var_40878_to_fp16 = const()[name = tensor("op_40878_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4221_cast_fp16 = mul(x = var_40877_cast_fp16, y = var_40878_to_fp16)[name = tensor("aw_chunk_4221_cast_fp16")]; + tensor var_40881_equation_0 = const()[name = tensor("op_40881_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40881_cast_fp16 = einsum(equation = var_40881_equation_0, values = (var_40627_cast_fp16, var_40258_cast_fp16))[name = tensor("op_40881_cast_fp16")]; + tensor var_40882_to_fp16 = const()[name = tensor("op_40882_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4223_cast_fp16 = mul(x = var_40881_cast_fp16, y = var_40882_to_fp16)[name = tensor("aw_chunk_4223_cast_fp16")]; + tensor var_40885_equation_0 = const()[name = tensor("op_40885_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40885_cast_fp16 = einsum(equation = var_40885_equation_0, values = (var_40631_cast_fp16, var_40265_cast_fp16))[name = tensor("op_40885_cast_fp16")]; + tensor var_40886_to_fp16 = const()[name = tensor("op_40886_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4225_cast_fp16 = mul(x = var_40885_cast_fp16, y = var_40886_to_fp16)[name = tensor("aw_chunk_4225_cast_fp16")]; + tensor var_40889_equation_0 = const()[name = tensor("op_40889_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40889_cast_fp16 = einsum(equation = var_40889_equation_0, values = (var_40631_cast_fp16, var_40272_cast_fp16))[name = tensor("op_40889_cast_fp16")]; + tensor var_40890_to_fp16 = const()[name = tensor("op_40890_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4227_cast_fp16 = mul(x = var_40889_cast_fp16, y = var_40890_to_fp16)[name = tensor("aw_chunk_4227_cast_fp16")]; + tensor var_40893_equation_0 = const()[name = tensor("op_40893_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40893_cast_fp16 = einsum(equation = var_40893_equation_0, values = (var_40631_cast_fp16, var_40279_cast_fp16))[name = tensor("op_40893_cast_fp16")]; + tensor var_40894_to_fp16 = const()[name = tensor("op_40894_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4229_cast_fp16 = mul(x = var_40893_cast_fp16, y = var_40894_to_fp16)[name = tensor("aw_chunk_4229_cast_fp16")]; + tensor var_40897_equation_0 = const()[name = tensor("op_40897_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40897_cast_fp16 = einsum(equation = var_40897_equation_0, values = (var_40631_cast_fp16, var_40286_cast_fp16))[name = tensor("op_40897_cast_fp16")]; + tensor var_40898_to_fp16 = const()[name = tensor("op_40898_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4231_cast_fp16 = mul(x = var_40897_cast_fp16, y = var_40898_to_fp16)[name = tensor("aw_chunk_4231_cast_fp16")]; + tensor var_40901_equation_0 = const()[name = tensor("op_40901_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40901_cast_fp16 = einsum(equation = var_40901_equation_0, values = (var_40635_cast_fp16, var_40293_cast_fp16))[name = tensor("op_40901_cast_fp16")]; + tensor var_40902_to_fp16 = const()[name = tensor("op_40902_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4233_cast_fp16 = mul(x = var_40901_cast_fp16, y = var_40902_to_fp16)[name = tensor("aw_chunk_4233_cast_fp16")]; + tensor var_40905_equation_0 = const()[name = tensor("op_40905_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40905_cast_fp16 = einsum(equation = var_40905_equation_0, values = (var_40635_cast_fp16, var_40300_cast_fp16))[name = tensor("op_40905_cast_fp16")]; + tensor var_40906_to_fp16 = const()[name = tensor("op_40906_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4235_cast_fp16 = mul(x = var_40905_cast_fp16, y = var_40906_to_fp16)[name = tensor("aw_chunk_4235_cast_fp16")]; + tensor var_40909_equation_0 = const()[name = tensor("op_40909_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40909_cast_fp16 = einsum(equation = var_40909_equation_0, values = (var_40635_cast_fp16, var_40307_cast_fp16))[name = tensor("op_40909_cast_fp16")]; + tensor var_40910_to_fp16 = const()[name = tensor("op_40910_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4237_cast_fp16 = mul(x = var_40909_cast_fp16, y = var_40910_to_fp16)[name = tensor("aw_chunk_4237_cast_fp16")]; + tensor var_40913_equation_0 = const()[name = tensor("op_40913_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40913_cast_fp16 = einsum(equation = var_40913_equation_0, values = (var_40635_cast_fp16, var_40314_cast_fp16))[name = tensor("op_40913_cast_fp16")]; + tensor var_40914_to_fp16 = const()[name = tensor("op_40914_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4239_cast_fp16 = mul(x = var_40913_cast_fp16, y = var_40914_to_fp16)[name = tensor("aw_chunk_4239_cast_fp16")]; + tensor var_40917_equation_0 = const()[name = tensor("op_40917_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40917_cast_fp16 = einsum(equation = var_40917_equation_0, values = (var_40639_cast_fp16, var_40321_cast_fp16))[name = tensor("op_40917_cast_fp16")]; + tensor var_40918_to_fp16 = const()[name = tensor("op_40918_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4241_cast_fp16 = mul(x = var_40917_cast_fp16, y = var_40918_to_fp16)[name = tensor("aw_chunk_4241_cast_fp16")]; + tensor var_40921_equation_0 = const()[name = tensor("op_40921_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40921_cast_fp16 = einsum(equation = var_40921_equation_0, values = (var_40639_cast_fp16, var_40328_cast_fp16))[name = tensor("op_40921_cast_fp16")]; + tensor var_40922_to_fp16 = const()[name = tensor("op_40922_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4243_cast_fp16 = mul(x = var_40921_cast_fp16, y = var_40922_to_fp16)[name = tensor("aw_chunk_4243_cast_fp16")]; + tensor var_40925_equation_0 = const()[name = tensor("op_40925_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40925_cast_fp16 = einsum(equation = var_40925_equation_0, values = (var_40639_cast_fp16, var_40335_cast_fp16))[name = tensor("op_40925_cast_fp16")]; + tensor var_40926_to_fp16 = const()[name = tensor("op_40926_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4245_cast_fp16 = mul(x = var_40925_cast_fp16, y = var_40926_to_fp16)[name = tensor("aw_chunk_4245_cast_fp16")]; + tensor var_40929_equation_0 = const()[name = tensor("op_40929_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40929_cast_fp16 = einsum(equation = var_40929_equation_0, values = (var_40639_cast_fp16, var_40342_cast_fp16))[name = tensor("op_40929_cast_fp16")]; + tensor var_40930_to_fp16 = const()[name = tensor("op_40930_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4247_cast_fp16 = mul(x = var_40929_cast_fp16, y = var_40930_to_fp16)[name = tensor("aw_chunk_4247_cast_fp16")]; + tensor var_40933_equation_0 = const()[name = tensor("op_40933_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40933_cast_fp16 = einsum(equation = var_40933_equation_0, values = (var_40643_cast_fp16, var_40349_cast_fp16))[name = tensor("op_40933_cast_fp16")]; + tensor var_40934_to_fp16 = const()[name = tensor("op_40934_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4249_cast_fp16 = mul(x = var_40933_cast_fp16, y = var_40934_to_fp16)[name = tensor("aw_chunk_4249_cast_fp16")]; + tensor var_40937_equation_0 = const()[name = tensor("op_40937_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40937_cast_fp16 = einsum(equation = var_40937_equation_0, values = (var_40643_cast_fp16, var_40356_cast_fp16))[name = tensor("op_40937_cast_fp16")]; + tensor var_40938_to_fp16 = const()[name = tensor("op_40938_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4251_cast_fp16 = mul(x = var_40937_cast_fp16, y = var_40938_to_fp16)[name = tensor("aw_chunk_4251_cast_fp16")]; + tensor var_40941_equation_0 = const()[name = tensor("op_40941_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40941_cast_fp16 = einsum(equation = var_40941_equation_0, values = (var_40643_cast_fp16, var_40363_cast_fp16))[name = tensor("op_40941_cast_fp16")]; + tensor var_40942_to_fp16 = const()[name = tensor("op_40942_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4253_cast_fp16 = mul(x = var_40941_cast_fp16, y = var_40942_to_fp16)[name = tensor("aw_chunk_4253_cast_fp16")]; + tensor var_40945_equation_0 = const()[name = tensor("op_40945_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40945_cast_fp16 = einsum(equation = var_40945_equation_0, values = (var_40643_cast_fp16, var_40370_cast_fp16))[name = tensor("op_40945_cast_fp16")]; + tensor var_40946_to_fp16 = const()[name = tensor("op_40946_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4255_cast_fp16 = mul(x = var_40945_cast_fp16, y = var_40946_to_fp16)[name = tensor("aw_chunk_4255_cast_fp16")]; + tensor var_40949_equation_0 = const()[name = tensor("op_40949_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40949_cast_fp16 = einsum(equation = var_40949_equation_0, values = (var_40647_cast_fp16, var_40377_cast_fp16))[name = tensor("op_40949_cast_fp16")]; + tensor var_40950_to_fp16 = const()[name = tensor("op_40950_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4257_cast_fp16 = mul(x = var_40949_cast_fp16, y = var_40950_to_fp16)[name = tensor("aw_chunk_4257_cast_fp16")]; + tensor var_40953_equation_0 = const()[name = tensor("op_40953_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40953_cast_fp16 = einsum(equation = var_40953_equation_0, values = (var_40647_cast_fp16, var_40384_cast_fp16))[name = tensor("op_40953_cast_fp16")]; + tensor var_40954_to_fp16 = const()[name = tensor("op_40954_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4259_cast_fp16 = mul(x = var_40953_cast_fp16, y = var_40954_to_fp16)[name = tensor("aw_chunk_4259_cast_fp16")]; + tensor var_40957_equation_0 = const()[name = tensor("op_40957_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40957_cast_fp16 = einsum(equation = var_40957_equation_0, values = (var_40647_cast_fp16, var_40391_cast_fp16))[name = tensor("op_40957_cast_fp16")]; + tensor var_40958_to_fp16 = const()[name = tensor("op_40958_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4261_cast_fp16 = mul(x = var_40957_cast_fp16, y = var_40958_to_fp16)[name = tensor("aw_chunk_4261_cast_fp16")]; + tensor var_40961_equation_0 = const()[name = tensor("op_40961_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40961_cast_fp16 = einsum(equation = var_40961_equation_0, values = (var_40647_cast_fp16, var_40398_cast_fp16))[name = tensor("op_40961_cast_fp16")]; + tensor var_40962_to_fp16 = const()[name = tensor("op_40962_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4263_cast_fp16 = mul(x = var_40961_cast_fp16, y = var_40962_to_fp16)[name = tensor("aw_chunk_4263_cast_fp16")]; + tensor var_40965_equation_0 = const()[name = tensor("op_40965_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40965_cast_fp16 = einsum(equation = var_40965_equation_0, values = (var_40651_cast_fp16, var_40405_cast_fp16))[name = tensor("op_40965_cast_fp16")]; + tensor var_40966_to_fp16 = const()[name = tensor("op_40966_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4265_cast_fp16 = mul(x = var_40965_cast_fp16, y = var_40966_to_fp16)[name = tensor("aw_chunk_4265_cast_fp16")]; + tensor var_40969_equation_0 = const()[name = tensor("op_40969_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40969_cast_fp16 = einsum(equation = var_40969_equation_0, values = (var_40651_cast_fp16, var_40412_cast_fp16))[name = tensor("op_40969_cast_fp16")]; + tensor var_40970_to_fp16 = const()[name = tensor("op_40970_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4267_cast_fp16 = mul(x = var_40969_cast_fp16, y = var_40970_to_fp16)[name = tensor("aw_chunk_4267_cast_fp16")]; + tensor var_40973_equation_0 = const()[name = tensor("op_40973_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40973_cast_fp16 = einsum(equation = var_40973_equation_0, values = (var_40651_cast_fp16, var_40419_cast_fp16))[name = tensor("op_40973_cast_fp16")]; + tensor var_40974_to_fp16 = const()[name = tensor("op_40974_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4269_cast_fp16 = mul(x = var_40973_cast_fp16, y = var_40974_to_fp16)[name = tensor("aw_chunk_4269_cast_fp16")]; + tensor var_40977_equation_0 = const()[name = tensor("op_40977_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40977_cast_fp16 = einsum(equation = var_40977_equation_0, values = (var_40651_cast_fp16, var_40426_cast_fp16))[name = tensor("op_40977_cast_fp16")]; + tensor var_40978_to_fp16 = const()[name = tensor("op_40978_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4271_cast_fp16 = mul(x = var_40977_cast_fp16, y = var_40978_to_fp16)[name = tensor("aw_chunk_4271_cast_fp16")]; + tensor var_40981_equation_0 = const()[name = tensor("op_40981_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40981_cast_fp16 = einsum(equation = var_40981_equation_0, values = (var_40655_cast_fp16, var_40433_cast_fp16))[name = tensor("op_40981_cast_fp16")]; + tensor var_40982_to_fp16 = const()[name = tensor("op_40982_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4273_cast_fp16 = mul(x = var_40981_cast_fp16, y = var_40982_to_fp16)[name = tensor("aw_chunk_4273_cast_fp16")]; + tensor var_40985_equation_0 = const()[name = tensor("op_40985_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40985_cast_fp16 = einsum(equation = var_40985_equation_0, values = (var_40655_cast_fp16, var_40440_cast_fp16))[name = tensor("op_40985_cast_fp16")]; + tensor var_40986_to_fp16 = const()[name = tensor("op_40986_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4275_cast_fp16 = mul(x = var_40985_cast_fp16, y = var_40986_to_fp16)[name = tensor("aw_chunk_4275_cast_fp16")]; + tensor var_40989_equation_0 = const()[name = tensor("op_40989_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40989_cast_fp16 = einsum(equation = var_40989_equation_0, values = (var_40655_cast_fp16, var_40447_cast_fp16))[name = tensor("op_40989_cast_fp16")]; + tensor var_40990_to_fp16 = const()[name = tensor("op_40990_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4277_cast_fp16 = mul(x = var_40989_cast_fp16, y = var_40990_to_fp16)[name = tensor("aw_chunk_4277_cast_fp16")]; + tensor var_40993_equation_0 = const()[name = tensor("op_40993_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40993_cast_fp16 = einsum(equation = var_40993_equation_0, values = (var_40655_cast_fp16, var_40454_cast_fp16))[name = tensor("op_40993_cast_fp16")]; + tensor var_40994_to_fp16 = const()[name = tensor("op_40994_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4279_cast_fp16 = mul(x = var_40993_cast_fp16, y = var_40994_to_fp16)[name = tensor("aw_chunk_4279_cast_fp16")]; + tensor var_40997_equation_0 = const()[name = tensor("op_40997_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_40997_cast_fp16 = einsum(equation = var_40997_equation_0, values = (var_40659_cast_fp16, var_40461_cast_fp16))[name = tensor("op_40997_cast_fp16")]; + tensor var_40998_to_fp16 = const()[name = tensor("op_40998_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4281_cast_fp16 = mul(x = var_40997_cast_fp16, y = var_40998_to_fp16)[name = tensor("aw_chunk_4281_cast_fp16")]; + tensor var_41001_equation_0 = const()[name = tensor("op_41001_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_41001_cast_fp16 = einsum(equation = var_41001_equation_0, values = (var_40659_cast_fp16, var_40468_cast_fp16))[name = tensor("op_41001_cast_fp16")]; + tensor var_41002_to_fp16 = const()[name = tensor("op_41002_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4283_cast_fp16 = mul(x = var_41001_cast_fp16, y = var_41002_to_fp16)[name = tensor("aw_chunk_4283_cast_fp16")]; + tensor var_41005_equation_0 = const()[name = tensor("op_41005_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_41005_cast_fp16 = einsum(equation = var_41005_equation_0, values = (var_40659_cast_fp16, var_40475_cast_fp16))[name = tensor("op_41005_cast_fp16")]; + tensor var_41006_to_fp16 = const()[name = tensor("op_41006_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4285_cast_fp16 = mul(x = var_41005_cast_fp16, y = var_41006_to_fp16)[name = tensor("aw_chunk_4285_cast_fp16")]; + tensor var_41009_equation_0 = const()[name = tensor("op_41009_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_41009_cast_fp16 = einsum(equation = var_41009_equation_0, values = (var_40659_cast_fp16, var_40482_cast_fp16))[name = tensor("op_41009_cast_fp16")]; + tensor var_41010_to_fp16 = const()[name = tensor("op_41010_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4287_cast_fp16 = mul(x = var_41009_cast_fp16, y = var_41010_to_fp16)[name = tensor("aw_chunk_4287_cast_fp16")]; + tensor var_41013_equation_0 = const()[name = tensor("op_41013_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_41013_cast_fp16 = einsum(equation = var_41013_equation_0, values = (var_40663_cast_fp16, var_40489_cast_fp16))[name = tensor("op_41013_cast_fp16")]; + tensor var_41014_to_fp16 = const()[name = tensor("op_41014_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4289_cast_fp16 = mul(x = var_41013_cast_fp16, y = var_41014_to_fp16)[name = tensor("aw_chunk_4289_cast_fp16")]; + tensor var_41017_equation_0 = const()[name = tensor("op_41017_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_41017_cast_fp16 = einsum(equation = var_41017_equation_0, values = (var_40663_cast_fp16, var_40496_cast_fp16))[name = tensor("op_41017_cast_fp16")]; + tensor var_41018_to_fp16 = const()[name = tensor("op_41018_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4291_cast_fp16 = mul(x = var_41017_cast_fp16, y = var_41018_to_fp16)[name = tensor("aw_chunk_4291_cast_fp16")]; + tensor var_41021_equation_0 = const()[name = tensor("op_41021_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_41021_cast_fp16 = einsum(equation = var_41021_equation_0, values = (var_40663_cast_fp16, var_40503_cast_fp16))[name = tensor("op_41021_cast_fp16")]; + tensor var_41022_to_fp16 = const()[name = tensor("op_41022_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4293_cast_fp16 = mul(x = var_41021_cast_fp16, y = var_41022_to_fp16)[name = tensor("aw_chunk_4293_cast_fp16")]; + tensor var_41025_equation_0 = const()[name = tensor("op_41025_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_41025_cast_fp16 = einsum(equation = var_41025_equation_0, values = (var_40663_cast_fp16, var_40510_cast_fp16))[name = tensor("op_41025_cast_fp16")]; + tensor var_41026_to_fp16 = const()[name = tensor("op_41026_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4295_cast_fp16 = mul(x = var_41025_cast_fp16, y = var_41026_to_fp16)[name = tensor("aw_chunk_4295_cast_fp16")]; + tensor var_41029_equation_0 = const()[name = tensor("op_41029_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_41029_cast_fp16 = einsum(equation = var_41029_equation_0, values = (var_40667_cast_fp16, var_40517_cast_fp16))[name = tensor("op_41029_cast_fp16")]; + tensor var_41030_to_fp16 = const()[name = tensor("op_41030_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4297_cast_fp16 = mul(x = var_41029_cast_fp16, y = var_41030_to_fp16)[name = tensor("aw_chunk_4297_cast_fp16")]; + tensor var_41033_equation_0 = const()[name = tensor("op_41033_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_41033_cast_fp16 = einsum(equation = var_41033_equation_0, values = (var_40667_cast_fp16, var_40524_cast_fp16))[name = tensor("op_41033_cast_fp16")]; + tensor var_41034_to_fp16 = const()[name = tensor("op_41034_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4299_cast_fp16 = mul(x = var_41033_cast_fp16, y = var_41034_to_fp16)[name = tensor("aw_chunk_4299_cast_fp16")]; + tensor var_41037_equation_0 = const()[name = tensor("op_41037_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_41037_cast_fp16 = einsum(equation = var_41037_equation_0, values = (var_40667_cast_fp16, var_40531_cast_fp16))[name = tensor("op_41037_cast_fp16")]; + tensor var_41038_to_fp16 = const()[name = tensor("op_41038_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4301_cast_fp16 = mul(x = var_41037_cast_fp16, y = var_41038_to_fp16)[name = tensor("aw_chunk_4301_cast_fp16")]; + tensor var_41041_equation_0 = const()[name = tensor("op_41041_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_41041_cast_fp16 = einsum(equation = var_41041_equation_0, values = (var_40667_cast_fp16, var_40538_cast_fp16))[name = tensor("op_41041_cast_fp16")]; + tensor var_41042_to_fp16 = const()[name = tensor("op_41042_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4303_cast_fp16 = mul(x = var_41041_cast_fp16, y = var_41042_to_fp16)[name = tensor("aw_chunk_4303_cast_fp16")]; + tensor var_41045_equation_0 = const()[name = tensor("op_41045_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_41045_cast_fp16 = einsum(equation = var_41045_equation_0, values = (var_40671_cast_fp16, var_40545_cast_fp16))[name = tensor("op_41045_cast_fp16")]; + tensor var_41046_to_fp16 = const()[name = tensor("op_41046_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4305_cast_fp16 = mul(x = var_41045_cast_fp16, y = var_41046_to_fp16)[name = tensor("aw_chunk_4305_cast_fp16")]; + tensor var_41049_equation_0 = const()[name = tensor("op_41049_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_41049_cast_fp16 = einsum(equation = var_41049_equation_0, values = (var_40671_cast_fp16, var_40552_cast_fp16))[name = tensor("op_41049_cast_fp16")]; + tensor var_41050_to_fp16 = const()[name = tensor("op_41050_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4307_cast_fp16 = mul(x = var_41049_cast_fp16, y = var_41050_to_fp16)[name = tensor("aw_chunk_4307_cast_fp16")]; + tensor var_41053_equation_0 = const()[name = tensor("op_41053_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_41053_cast_fp16 = einsum(equation = var_41053_equation_0, values = (var_40671_cast_fp16, var_40559_cast_fp16))[name = tensor("op_41053_cast_fp16")]; + tensor var_41054_to_fp16 = const()[name = tensor("op_41054_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4309_cast_fp16 = mul(x = var_41053_cast_fp16, y = var_41054_to_fp16)[name = tensor("aw_chunk_4309_cast_fp16")]; + tensor var_41057_equation_0 = const()[name = tensor("op_41057_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_41057_cast_fp16 = einsum(equation = var_41057_equation_0, values = (var_40671_cast_fp16, var_40566_cast_fp16))[name = tensor("op_41057_cast_fp16")]; + tensor var_41058_to_fp16 = const()[name = tensor("op_41058_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4311_cast_fp16 = mul(x = var_41057_cast_fp16, y = var_41058_to_fp16)[name = tensor("aw_chunk_4311_cast_fp16")]; + tensor var_41061_equation_0 = const()[name = tensor("op_41061_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_41061_cast_fp16 = einsum(equation = var_41061_equation_0, values = (var_40675_cast_fp16, var_40573_cast_fp16))[name = tensor("op_41061_cast_fp16")]; + tensor var_41062_to_fp16 = const()[name = tensor("op_41062_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4313_cast_fp16 = mul(x = var_41061_cast_fp16, y = var_41062_to_fp16)[name = tensor("aw_chunk_4313_cast_fp16")]; + tensor var_41065_equation_0 = const()[name = tensor("op_41065_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_41065_cast_fp16 = einsum(equation = var_41065_equation_0, values = (var_40675_cast_fp16, var_40580_cast_fp16))[name = tensor("op_41065_cast_fp16")]; + tensor var_41066_to_fp16 = const()[name = tensor("op_41066_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4315_cast_fp16 = mul(x = var_41065_cast_fp16, y = var_41066_to_fp16)[name = tensor("aw_chunk_4315_cast_fp16")]; + tensor var_41069_equation_0 = const()[name = tensor("op_41069_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_41069_cast_fp16 = einsum(equation = var_41069_equation_0, values = (var_40675_cast_fp16, var_40587_cast_fp16))[name = tensor("op_41069_cast_fp16")]; + tensor var_41070_to_fp16 = const()[name = tensor("op_41070_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4317_cast_fp16 = mul(x = var_41069_cast_fp16, y = var_41070_to_fp16)[name = tensor("aw_chunk_4317_cast_fp16")]; + tensor var_41073_equation_0 = const()[name = tensor("op_41073_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_41073_cast_fp16 = einsum(equation = var_41073_equation_0, values = (var_40675_cast_fp16, var_40594_cast_fp16))[name = tensor("op_41073_cast_fp16")]; + tensor var_41074_to_fp16 = const()[name = tensor("op_41074_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4319_cast_fp16 = mul(x = var_41073_cast_fp16, y = var_41074_to_fp16)[name = tensor("aw_chunk_4319_cast_fp16")]; + tensor var_41076_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4161_cast_fp16)[name = tensor("op_41076_cast_fp16")]; + tensor var_41077_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4163_cast_fp16)[name = tensor("op_41077_cast_fp16")]; + tensor var_41078_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4165_cast_fp16)[name = tensor("op_41078_cast_fp16")]; + tensor var_41079_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4167_cast_fp16)[name = tensor("op_41079_cast_fp16")]; + tensor var_41080_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4169_cast_fp16)[name = tensor("op_41080_cast_fp16")]; + tensor var_41081_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4171_cast_fp16)[name = tensor("op_41081_cast_fp16")]; + tensor var_41082_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4173_cast_fp16)[name = tensor("op_41082_cast_fp16")]; + tensor var_41083_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4175_cast_fp16)[name = tensor("op_41083_cast_fp16")]; + tensor var_41084_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4177_cast_fp16)[name = tensor("op_41084_cast_fp16")]; + tensor var_41085_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4179_cast_fp16)[name = tensor("op_41085_cast_fp16")]; + tensor var_41086_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4181_cast_fp16)[name = tensor("op_41086_cast_fp16")]; + tensor var_41087_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4183_cast_fp16)[name = tensor("op_41087_cast_fp16")]; + tensor var_41088_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4185_cast_fp16)[name = tensor("op_41088_cast_fp16")]; + tensor var_41089_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4187_cast_fp16)[name = tensor("op_41089_cast_fp16")]; + tensor var_41090_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4189_cast_fp16)[name = tensor("op_41090_cast_fp16")]; + tensor var_41091_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4191_cast_fp16)[name = tensor("op_41091_cast_fp16")]; + tensor var_41092_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4193_cast_fp16)[name = tensor("op_41092_cast_fp16")]; + tensor var_41093_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4195_cast_fp16)[name = tensor("op_41093_cast_fp16")]; + tensor var_41094_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4197_cast_fp16)[name = tensor("op_41094_cast_fp16")]; + tensor var_41095_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4199_cast_fp16)[name = tensor("op_41095_cast_fp16")]; + tensor var_41096_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4201_cast_fp16)[name = tensor("op_41096_cast_fp16")]; + tensor var_41097_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4203_cast_fp16)[name = tensor("op_41097_cast_fp16")]; + tensor var_41098_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4205_cast_fp16)[name = tensor("op_41098_cast_fp16")]; + tensor var_41099_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4207_cast_fp16)[name = tensor("op_41099_cast_fp16")]; + tensor var_41100_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4209_cast_fp16)[name = tensor("op_41100_cast_fp16")]; + tensor var_41101_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4211_cast_fp16)[name = tensor("op_41101_cast_fp16")]; + tensor var_41102_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4213_cast_fp16)[name = tensor("op_41102_cast_fp16")]; + tensor var_41103_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4215_cast_fp16)[name = tensor("op_41103_cast_fp16")]; + tensor var_41104_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4217_cast_fp16)[name = tensor("op_41104_cast_fp16")]; + tensor var_41105_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4219_cast_fp16)[name = tensor("op_41105_cast_fp16")]; + tensor var_41106_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4221_cast_fp16)[name = tensor("op_41106_cast_fp16")]; + tensor var_41107_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4223_cast_fp16)[name = tensor("op_41107_cast_fp16")]; + tensor var_41108_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4225_cast_fp16)[name = tensor("op_41108_cast_fp16")]; + tensor var_41109_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4227_cast_fp16)[name = tensor("op_41109_cast_fp16")]; + tensor var_41110_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4229_cast_fp16)[name = tensor("op_41110_cast_fp16")]; + tensor var_41111_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4231_cast_fp16)[name = tensor("op_41111_cast_fp16")]; + tensor var_41112_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4233_cast_fp16)[name = tensor("op_41112_cast_fp16")]; + tensor var_41113_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4235_cast_fp16)[name = tensor("op_41113_cast_fp16")]; + tensor var_41114_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4237_cast_fp16)[name = tensor("op_41114_cast_fp16")]; + tensor var_41115_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4239_cast_fp16)[name = tensor("op_41115_cast_fp16")]; + tensor var_41116_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4241_cast_fp16)[name = tensor("op_41116_cast_fp16")]; + tensor var_41117_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4243_cast_fp16)[name = tensor("op_41117_cast_fp16")]; + tensor var_41118_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4245_cast_fp16)[name = tensor("op_41118_cast_fp16")]; + tensor var_41119_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4247_cast_fp16)[name = tensor("op_41119_cast_fp16")]; + tensor var_41120_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4249_cast_fp16)[name = tensor("op_41120_cast_fp16")]; + tensor var_41121_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4251_cast_fp16)[name = tensor("op_41121_cast_fp16")]; + tensor var_41122_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4253_cast_fp16)[name = tensor("op_41122_cast_fp16")]; + tensor var_41123_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4255_cast_fp16)[name = tensor("op_41123_cast_fp16")]; + tensor var_41124_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4257_cast_fp16)[name = tensor("op_41124_cast_fp16")]; + tensor var_41125_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4259_cast_fp16)[name = tensor("op_41125_cast_fp16")]; + tensor var_41126_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4261_cast_fp16)[name = tensor("op_41126_cast_fp16")]; + tensor var_41127_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4263_cast_fp16)[name = tensor("op_41127_cast_fp16")]; + tensor var_41128_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4265_cast_fp16)[name = tensor("op_41128_cast_fp16")]; + tensor var_41129_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4267_cast_fp16)[name = tensor("op_41129_cast_fp16")]; + tensor var_41130_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4269_cast_fp16)[name = tensor("op_41130_cast_fp16")]; + tensor var_41131_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4271_cast_fp16)[name = tensor("op_41131_cast_fp16")]; + tensor var_41132_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4273_cast_fp16)[name = tensor("op_41132_cast_fp16")]; + tensor var_41133_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4275_cast_fp16)[name = tensor("op_41133_cast_fp16")]; + tensor var_41134_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4277_cast_fp16)[name = tensor("op_41134_cast_fp16")]; + tensor var_41135_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4279_cast_fp16)[name = tensor("op_41135_cast_fp16")]; + tensor var_41136_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4281_cast_fp16)[name = tensor("op_41136_cast_fp16")]; + tensor var_41137_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4283_cast_fp16)[name = tensor("op_41137_cast_fp16")]; + tensor var_41138_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4285_cast_fp16)[name = tensor("op_41138_cast_fp16")]; + tensor var_41139_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4287_cast_fp16)[name = tensor("op_41139_cast_fp16")]; + tensor var_41140_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4289_cast_fp16)[name = tensor("op_41140_cast_fp16")]; + tensor var_41141_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4291_cast_fp16)[name = tensor("op_41141_cast_fp16")]; + tensor var_41142_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4293_cast_fp16)[name = tensor("op_41142_cast_fp16")]; + tensor var_41143_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4295_cast_fp16)[name = tensor("op_41143_cast_fp16")]; + tensor var_41144_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4297_cast_fp16)[name = tensor("op_41144_cast_fp16")]; + tensor var_41145_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4299_cast_fp16)[name = tensor("op_41145_cast_fp16")]; + tensor var_41146_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4301_cast_fp16)[name = tensor("op_41146_cast_fp16")]; + tensor var_41147_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4303_cast_fp16)[name = tensor("op_41147_cast_fp16")]; + tensor var_41148_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4305_cast_fp16)[name = tensor("op_41148_cast_fp16")]; + tensor var_41149_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4307_cast_fp16)[name = tensor("op_41149_cast_fp16")]; + tensor var_41150_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4309_cast_fp16)[name = tensor("op_41150_cast_fp16")]; + tensor var_41151_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4311_cast_fp16)[name = tensor("op_41151_cast_fp16")]; + tensor var_41152_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4313_cast_fp16)[name = tensor("op_41152_cast_fp16")]; + tensor var_41153_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4315_cast_fp16)[name = tensor("op_41153_cast_fp16")]; + tensor var_41154_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4317_cast_fp16)[name = tensor("op_41154_cast_fp16")]; + tensor var_41155_cast_fp16 = softmax(axis = var_39901, x = aw_chunk_4319_cast_fp16)[name = tensor("op_41155_cast_fp16")]; + tensor var_41157_equation_0 = const()[name = tensor("op_41157_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41157_cast_fp16 = einsum(equation = var_41157_equation_0, values = (var_40677_cast_fp16, var_41076_cast_fp16))[name = tensor("op_41157_cast_fp16")]; + tensor var_41159_equation_0 = const()[name = tensor("op_41159_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41159_cast_fp16 = einsum(equation = var_41159_equation_0, values = (var_40677_cast_fp16, var_41077_cast_fp16))[name = tensor("op_41159_cast_fp16")]; + tensor var_41161_equation_0 = const()[name = tensor("op_41161_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41161_cast_fp16 = einsum(equation = var_41161_equation_0, values = (var_40677_cast_fp16, var_41078_cast_fp16))[name = tensor("op_41161_cast_fp16")]; + tensor var_41163_equation_0 = const()[name = tensor("op_41163_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41163_cast_fp16 = einsum(equation = var_41163_equation_0, values = (var_40677_cast_fp16, var_41079_cast_fp16))[name = tensor("op_41163_cast_fp16")]; + tensor var_41165_equation_0 = const()[name = tensor("op_41165_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41165_cast_fp16 = einsum(equation = var_41165_equation_0, values = (var_40681_cast_fp16, var_41080_cast_fp16))[name = tensor("op_41165_cast_fp16")]; + tensor var_41167_equation_0 = const()[name = tensor("op_41167_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41167_cast_fp16 = einsum(equation = var_41167_equation_0, values = (var_40681_cast_fp16, var_41081_cast_fp16))[name = tensor("op_41167_cast_fp16")]; + tensor var_41169_equation_0 = const()[name = tensor("op_41169_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41169_cast_fp16 = einsum(equation = var_41169_equation_0, values = (var_40681_cast_fp16, var_41082_cast_fp16))[name = tensor("op_41169_cast_fp16")]; + tensor var_41171_equation_0 = const()[name = tensor("op_41171_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41171_cast_fp16 = einsum(equation = var_41171_equation_0, values = (var_40681_cast_fp16, var_41083_cast_fp16))[name = tensor("op_41171_cast_fp16")]; + tensor var_41173_equation_0 = const()[name = tensor("op_41173_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41173_cast_fp16 = einsum(equation = var_41173_equation_0, values = (var_40685_cast_fp16, var_41084_cast_fp16))[name = tensor("op_41173_cast_fp16")]; + tensor var_41175_equation_0 = const()[name = tensor("op_41175_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41175_cast_fp16 = einsum(equation = var_41175_equation_0, values = (var_40685_cast_fp16, var_41085_cast_fp16))[name = tensor("op_41175_cast_fp16")]; + tensor var_41177_equation_0 = const()[name = tensor("op_41177_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41177_cast_fp16 = einsum(equation = var_41177_equation_0, values = (var_40685_cast_fp16, var_41086_cast_fp16))[name = tensor("op_41177_cast_fp16")]; + tensor var_41179_equation_0 = const()[name = tensor("op_41179_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41179_cast_fp16 = einsum(equation = var_41179_equation_0, values = (var_40685_cast_fp16, var_41087_cast_fp16))[name = tensor("op_41179_cast_fp16")]; + tensor var_41181_equation_0 = const()[name = tensor("op_41181_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41181_cast_fp16 = einsum(equation = var_41181_equation_0, values = (var_40689_cast_fp16, var_41088_cast_fp16))[name = tensor("op_41181_cast_fp16")]; + tensor var_41183_equation_0 = const()[name = tensor("op_41183_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41183_cast_fp16 = einsum(equation = var_41183_equation_0, values = (var_40689_cast_fp16, var_41089_cast_fp16))[name = tensor("op_41183_cast_fp16")]; + tensor var_41185_equation_0 = const()[name = tensor("op_41185_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41185_cast_fp16 = einsum(equation = var_41185_equation_0, values = (var_40689_cast_fp16, var_41090_cast_fp16))[name = tensor("op_41185_cast_fp16")]; + tensor var_41187_equation_0 = const()[name = tensor("op_41187_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41187_cast_fp16 = einsum(equation = var_41187_equation_0, values = (var_40689_cast_fp16, var_41091_cast_fp16))[name = tensor("op_41187_cast_fp16")]; + tensor var_41189_equation_0 = const()[name = tensor("op_41189_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41189_cast_fp16 = einsum(equation = var_41189_equation_0, values = (var_40693_cast_fp16, var_41092_cast_fp16))[name = tensor("op_41189_cast_fp16")]; + tensor var_41191_equation_0 = const()[name = tensor("op_41191_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41191_cast_fp16 = einsum(equation = var_41191_equation_0, values = (var_40693_cast_fp16, var_41093_cast_fp16))[name = tensor("op_41191_cast_fp16")]; + tensor var_41193_equation_0 = const()[name = tensor("op_41193_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41193_cast_fp16 = einsum(equation = var_41193_equation_0, values = (var_40693_cast_fp16, var_41094_cast_fp16))[name = tensor("op_41193_cast_fp16")]; + tensor var_41195_equation_0 = const()[name = tensor("op_41195_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41195_cast_fp16 = einsum(equation = var_41195_equation_0, values = (var_40693_cast_fp16, var_41095_cast_fp16))[name = tensor("op_41195_cast_fp16")]; + tensor var_41197_equation_0 = const()[name = tensor("op_41197_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41197_cast_fp16 = einsum(equation = var_41197_equation_0, values = (var_40697_cast_fp16, var_41096_cast_fp16))[name = tensor("op_41197_cast_fp16")]; + tensor var_41199_equation_0 = const()[name = tensor("op_41199_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41199_cast_fp16 = einsum(equation = var_41199_equation_0, values = (var_40697_cast_fp16, var_41097_cast_fp16))[name = tensor("op_41199_cast_fp16")]; + tensor var_41201_equation_0 = const()[name = tensor("op_41201_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41201_cast_fp16 = einsum(equation = var_41201_equation_0, values = (var_40697_cast_fp16, var_41098_cast_fp16))[name = tensor("op_41201_cast_fp16")]; + tensor var_41203_equation_0 = const()[name = tensor("op_41203_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41203_cast_fp16 = einsum(equation = var_41203_equation_0, values = (var_40697_cast_fp16, var_41099_cast_fp16))[name = tensor("op_41203_cast_fp16")]; + tensor var_41205_equation_0 = const()[name = tensor("op_41205_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41205_cast_fp16 = einsum(equation = var_41205_equation_0, values = (var_40701_cast_fp16, var_41100_cast_fp16))[name = tensor("op_41205_cast_fp16")]; + tensor var_41207_equation_0 = const()[name = tensor("op_41207_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41207_cast_fp16 = einsum(equation = var_41207_equation_0, values = (var_40701_cast_fp16, var_41101_cast_fp16))[name = tensor("op_41207_cast_fp16")]; + tensor var_41209_equation_0 = const()[name = tensor("op_41209_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41209_cast_fp16 = einsum(equation = var_41209_equation_0, values = (var_40701_cast_fp16, var_41102_cast_fp16))[name = tensor("op_41209_cast_fp16")]; + tensor var_41211_equation_0 = const()[name = tensor("op_41211_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41211_cast_fp16 = einsum(equation = var_41211_equation_0, values = (var_40701_cast_fp16, var_41103_cast_fp16))[name = tensor("op_41211_cast_fp16")]; + tensor var_41213_equation_0 = const()[name = tensor("op_41213_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41213_cast_fp16 = einsum(equation = var_41213_equation_0, values = (var_40705_cast_fp16, var_41104_cast_fp16))[name = tensor("op_41213_cast_fp16")]; + tensor var_41215_equation_0 = const()[name = tensor("op_41215_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41215_cast_fp16 = einsum(equation = var_41215_equation_0, values = (var_40705_cast_fp16, var_41105_cast_fp16))[name = tensor("op_41215_cast_fp16")]; + tensor var_41217_equation_0 = const()[name = tensor("op_41217_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41217_cast_fp16 = einsum(equation = var_41217_equation_0, values = (var_40705_cast_fp16, var_41106_cast_fp16))[name = tensor("op_41217_cast_fp16")]; + tensor var_41219_equation_0 = const()[name = tensor("op_41219_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41219_cast_fp16 = einsum(equation = var_41219_equation_0, values = (var_40705_cast_fp16, var_41107_cast_fp16))[name = tensor("op_41219_cast_fp16")]; + tensor var_41221_equation_0 = const()[name = tensor("op_41221_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41221_cast_fp16 = einsum(equation = var_41221_equation_0, values = (var_40709_cast_fp16, var_41108_cast_fp16))[name = tensor("op_41221_cast_fp16")]; + tensor var_41223_equation_0 = const()[name = tensor("op_41223_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41223_cast_fp16 = einsum(equation = var_41223_equation_0, values = (var_40709_cast_fp16, var_41109_cast_fp16))[name = tensor("op_41223_cast_fp16")]; + tensor var_41225_equation_0 = const()[name = tensor("op_41225_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41225_cast_fp16 = einsum(equation = var_41225_equation_0, values = (var_40709_cast_fp16, var_41110_cast_fp16))[name = tensor("op_41225_cast_fp16")]; + tensor var_41227_equation_0 = const()[name = tensor("op_41227_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41227_cast_fp16 = einsum(equation = var_41227_equation_0, values = (var_40709_cast_fp16, var_41111_cast_fp16))[name = tensor("op_41227_cast_fp16")]; + tensor var_41229_equation_0 = const()[name = tensor("op_41229_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41229_cast_fp16 = einsum(equation = var_41229_equation_0, values = (var_40713_cast_fp16, var_41112_cast_fp16))[name = tensor("op_41229_cast_fp16")]; + tensor var_41231_equation_0 = const()[name = tensor("op_41231_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41231_cast_fp16 = einsum(equation = var_41231_equation_0, values = (var_40713_cast_fp16, var_41113_cast_fp16))[name = tensor("op_41231_cast_fp16")]; + tensor var_41233_equation_0 = const()[name = tensor("op_41233_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41233_cast_fp16 = einsum(equation = var_41233_equation_0, values = (var_40713_cast_fp16, var_41114_cast_fp16))[name = tensor("op_41233_cast_fp16")]; + tensor var_41235_equation_0 = const()[name = tensor("op_41235_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41235_cast_fp16 = einsum(equation = var_41235_equation_0, values = (var_40713_cast_fp16, var_41115_cast_fp16))[name = tensor("op_41235_cast_fp16")]; + tensor var_41237_equation_0 = const()[name = tensor("op_41237_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41237_cast_fp16 = einsum(equation = var_41237_equation_0, values = (var_40717_cast_fp16, var_41116_cast_fp16))[name = tensor("op_41237_cast_fp16")]; + tensor var_41239_equation_0 = const()[name = tensor("op_41239_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41239_cast_fp16 = einsum(equation = var_41239_equation_0, values = (var_40717_cast_fp16, var_41117_cast_fp16))[name = tensor("op_41239_cast_fp16")]; + tensor var_41241_equation_0 = const()[name = tensor("op_41241_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41241_cast_fp16 = einsum(equation = var_41241_equation_0, values = (var_40717_cast_fp16, var_41118_cast_fp16))[name = tensor("op_41241_cast_fp16")]; + tensor var_41243_equation_0 = const()[name = tensor("op_41243_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41243_cast_fp16 = einsum(equation = var_41243_equation_0, values = (var_40717_cast_fp16, var_41119_cast_fp16))[name = tensor("op_41243_cast_fp16")]; + tensor var_41245_equation_0 = const()[name = tensor("op_41245_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41245_cast_fp16 = einsum(equation = var_41245_equation_0, values = (var_40721_cast_fp16, var_41120_cast_fp16))[name = tensor("op_41245_cast_fp16")]; + tensor var_41247_equation_0 = const()[name = tensor("op_41247_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41247_cast_fp16 = einsum(equation = var_41247_equation_0, values = (var_40721_cast_fp16, var_41121_cast_fp16))[name = tensor("op_41247_cast_fp16")]; + tensor var_41249_equation_0 = const()[name = tensor("op_41249_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41249_cast_fp16 = einsum(equation = var_41249_equation_0, values = (var_40721_cast_fp16, var_41122_cast_fp16))[name = tensor("op_41249_cast_fp16")]; + tensor var_41251_equation_0 = const()[name = tensor("op_41251_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41251_cast_fp16 = einsum(equation = var_41251_equation_0, values = (var_40721_cast_fp16, var_41123_cast_fp16))[name = tensor("op_41251_cast_fp16")]; + tensor var_41253_equation_0 = const()[name = tensor("op_41253_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41253_cast_fp16 = einsum(equation = var_41253_equation_0, values = (var_40725_cast_fp16, var_41124_cast_fp16))[name = tensor("op_41253_cast_fp16")]; + tensor var_41255_equation_0 = const()[name = tensor("op_41255_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41255_cast_fp16 = einsum(equation = var_41255_equation_0, values = (var_40725_cast_fp16, var_41125_cast_fp16))[name = tensor("op_41255_cast_fp16")]; + tensor var_41257_equation_0 = const()[name = tensor("op_41257_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41257_cast_fp16 = einsum(equation = var_41257_equation_0, values = (var_40725_cast_fp16, var_41126_cast_fp16))[name = tensor("op_41257_cast_fp16")]; + tensor var_41259_equation_0 = const()[name = tensor("op_41259_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41259_cast_fp16 = einsum(equation = var_41259_equation_0, values = (var_40725_cast_fp16, var_41127_cast_fp16))[name = tensor("op_41259_cast_fp16")]; + tensor var_41261_equation_0 = const()[name = tensor("op_41261_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41261_cast_fp16 = einsum(equation = var_41261_equation_0, values = (var_40729_cast_fp16, var_41128_cast_fp16))[name = tensor("op_41261_cast_fp16")]; + tensor var_41263_equation_0 = const()[name = tensor("op_41263_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41263_cast_fp16 = einsum(equation = var_41263_equation_0, values = (var_40729_cast_fp16, var_41129_cast_fp16))[name = tensor("op_41263_cast_fp16")]; + tensor var_41265_equation_0 = const()[name = tensor("op_41265_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41265_cast_fp16 = einsum(equation = var_41265_equation_0, values = (var_40729_cast_fp16, var_41130_cast_fp16))[name = tensor("op_41265_cast_fp16")]; + tensor var_41267_equation_0 = const()[name = tensor("op_41267_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41267_cast_fp16 = einsum(equation = var_41267_equation_0, values = (var_40729_cast_fp16, var_41131_cast_fp16))[name = tensor("op_41267_cast_fp16")]; + tensor var_41269_equation_0 = const()[name = tensor("op_41269_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41269_cast_fp16 = einsum(equation = var_41269_equation_0, values = (var_40733_cast_fp16, var_41132_cast_fp16))[name = tensor("op_41269_cast_fp16")]; + tensor var_41271_equation_0 = const()[name = tensor("op_41271_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41271_cast_fp16 = einsum(equation = var_41271_equation_0, values = (var_40733_cast_fp16, var_41133_cast_fp16))[name = tensor("op_41271_cast_fp16")]; + tensor var_41273_equation_0 = const()[name = tensor("op_41273_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41273_cast_fp16 = einsum(equation = var_41273_equation_0, values = (var_40733_cast_fp16, var_41134_cast_fp16))[name = tensor("op_41273_cast_fp16")]; + tensor var_41275_equation_0 = const()[name = tensor("op_41275_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41275_cast_fp16 = einsum(equation = var_41275_equation_0, values = (var_40733_cast_fp16, var_41135_cast_fp16))[name = tensor("op_41275_cast_fp16")]; + tensor var_41277_equation_0 = const()[name = tensor("op_41277_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41277_cast_fp16 = einsum(equation = var_41277_equation_0, values = (var_40737_cast_fp16, var_41136_cast_fp16))[name = tensor("op_41277_cast_fp16")]; + tensor var_41279_equation_0 = const()[name = tensor("op_41279_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41279_cast_fp16 = einsum(equation = var_41279_equation_0, values = (var_40737_cast_fp16, var_41137_cast_fp16))[name = tensor("op_41279_cast_fp16")]; + tensor var_41281_equation_0 = const()[name = tensor("op_41281_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41281_cast_fp16 = einsum(equation = var_41281_equation_0, values = (var_40737_cast_fp16, var_41138_cast_fp16))[name = tensor("op_41281_cast_fp16")]; + tensor var_41283_equation_0 = const()[name = tensor("op_41283_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41283_cast_fp16 = einsum(equation = var_41283_equation_0, values = (var_40737_cast_fp16, var_41139_cast_fp16))[name = tensor("op_41283_cast_fp16")]; + tensor var_41285_equation_0 = const()[name = tensor("op_41285_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41285_cast_fp16 = einsum(equation = var_41285_equation_0, values = (var_40741_cast_fp16, var_41140_cast_fp16))[name = tensor("op_41285_cast_fp16")]; + tensor var_41287_equation_0 = const()[name = tensor("op_41287_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41287_cast_fp16 = einsum(equation = var_41287_equation_0, values = (var_40741_cast_fp16, var_41141_cast_fp16))[name = tensor("op_41287_cast_fp16")]; + tensor var_41289_equation_0 = const()[name = tensor("op_41289_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41289_cast_fp16 = einsum(equation = var_41289_equation_0, values = (var_40741_cast_fp16, var_41142_cast_fp16))[name = tensor("op_41289_cast_fp16")]; + tensor var_41291_equation_0 = const()[name = tensor("op_41291_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41291_cast_fp16 = einsum(equation = var_41291_equation_0, values = (var_40741_cast_fp16, var_41143_cast_fp16))[name = tensor("op_41291_cast_fp16")]; + tensor var_41293_equation_0 = const()[name = tensor("op_41293_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41293_cast_fp16 = einsum(equation = var_41293_equation_0, values = (var_40745_cast_fp16, var_41144_cast_fp16))[name = tensor("op_41293_cast_fp16")]; + tensor var_41295_equation_0 = const()[name = tensor("op_41295_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41295_cast_fp16 = einsum(equation = var_41295_equation_0, values = (var_40745_cast_fp16, var_41145_cast_fp16))[name = tensor("op_41295_cast_fp16")]; + tensor var_41297_equation_0 = const()[name = tensor("op_41297_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41297_cast_fp16 = einsum(equation = var_41297_equation_0, values = (var_40745_cast_fp16, var_41146_cast_fp16))[name = tensor("op_41297_cast_fp16")]; + tensor var_41299_equation_0 = const()[name = tensor("op_41299_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41299_cast_fp16 = einsum(equation = var_41299_equation_0, values = (var_40745_cast_fp16, var_41147_cast_fp16))[name = tensor("op_41299_cast_fp16")]; + tensor var_41301_equation_0 = const()[name = tensor("op_41301_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41301_cast_fp16 = einsum(equation = var_41301_equation_0, values = (var_40749_cast_fp16, var_41148_cast_fp16))[name = tensor("op_41301_cast_fp16")]; + tensor var_41303_equation_0 = const()[name = tensor("op_41303_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41303_cast_fp16 = einsum(equation = var_41303_equation_0, values = (var_40749_cast_fp16, var_41149_cast_fp16))[name = tensor("op_41303_cast_fp16")]; + tensor var_41305_equation_0 = const()[name = tensor("op_41305_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41305_cast_fp16 = einsum(equation = var_41305_equation_0, values = (var_40749_cast_fp16, var_41150_cast_fp16))[name = tensor("op_41305_cast_fp16")]; + tensor var_41307_equation_0 = const()[name = tensor("op_41307_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41307_cast_fp16 = einsum(equation = var_41307_equation_0, values = (var_40749_cast_fp16, var_41151_cast_fp16))[name = tensor("op_41307_cast_fp16")]; + tensor var_41309_equation_0 = const()[name = tensor("op_41309_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41309_cast_fp16 = einsum(equation = var_41309_equation_0, values = (var_40753_cast_fp16, var_41152_cast_fp16))[name = tensor("op_41309_cast_fp16")]; + tensor var_41311_equation_0 = const()[name = tensor("op_41311_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41311_cast_fp16 = einsum(equation = var_41311_equation_0, values = (var_40753_cast_fp16, var_41153_cast_fp16))[name = tensor("op_41311_cast_fp16")]; + tensor var_41313_equation_0 = const()[name = tensor("op_41313_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41313_cast_fp16 = einsum(equation = var_41313_equation_0, values = (var_40753_cast_fp16, var_41154_cast_fp16))[name = tensor("op_41313_cast_fp16")]; + tensor var_41315_equation_0 = const()[name = tensor("op_41315_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_41315_cast_fp16 = einsum(equation = var_41315_equation_0, values = (var_40753_cast_fp16, var_41155_cast_fp16))[name = tensor("op_41315_cast_fp16")]; + tensor var_41317_interleave_0 = const()[name = tensor("op_41317_interleave_0"), val = tensor(false)]; + tensor var_41317_cast_fp16 = concat(axis = var_39876, interleave = var_41317_interleave_0, values = (var_41157_cast_fp16, var_41159_cast_fp16, var_41161_cast_fp16, var_41163_cast_fp16))[name = tensor("op_41317_cast_fp16")]; + tensor var_41319_interleave_0 = const()[name = tensor("op_41319_interleave_0"), val = tensor(false)]; + tensor var_41319_cast_fp16 = concat(axis = var_39876, interleave = var_41319_interleave_0, values = (var_41165_cast_fp16, var_41167_cast_fp16, var_41169_cast_fp16, var_41171_cast_fp16))[name = tensor("op_41319_cast_fp16")]; + tensor var_41321_interleave_0 = const()[name = tensor("op_41321_interleave_0"), val = tensor(false)]; + tensor var_41321_cast_fp16 = concat(axis = var_39876, interleave = var_41321_interleave_0, values = (var_41173_cast_fp16, var_41175_cast_fp16, var_41177_cast_fp16, var_41179_cast_fp16))[name = tensor("op_41321_cast_fp16")]; + tensor var_41323_interleave_0 = const()[name = tensor("op_41323_interleave_0"), val = tensor(false)]; + tensor var_41323_cast_fp16 = concat(axis = var_39876, interleave = var_41323_interleave_0, values = (var_41181_cast_fp16, var_41183_cast_fp16, var_41185_cast_fp16, var_41187_cast_fp16))[name = tensor("op_41323_cast_fp16")]; + tensor var_41325_interleave_0 = const()[name = tensor("op_41325_interleave_0"), val = tensor(false)]; + tensor var_41325_cast_fp16 = concat(axis = var_39876, interleave = var_41325_interleave_0, values = (var_41189_cast_fp16, var_41191_cast_fp16, var_41193_cast_fp16, var_41195_cast_fp16))[name = tensor("op_41325_cast_fp16")]; + tensor var_41327_interleave_0 = const()[name = tensor("op_41327_interleave_0"), val = tensor(false)]; + tensor var_41327_cast_fp16 = concat(axis = var_39876, interleave = var_41327_interleave_0, values = (var_41197_cast_fp16, var_41199_cast_fp16, var_41201_cast_fp16, var_41203_cast_fp16))[name = tensor("op_41327_cast_fp16")]; + tensor var_41329_interleave_0 = const()[name = tensor("op_41329_interleave_0"), val = tensor(false)]; + tensor var_41329_cast_fp16 = concat(axis = var_39876, interleave = var_41329_interleave_0, values = (var_41205_cast_fp16, var_41207_cast_fp16, var_41209_cast_fp16, var_41211_cast_fp16))[name = tensor("op_41329_cast_fp16")]; + tensor var_41331_interleave_0 = const()[name = tensor("op_41331_interleave_0"), val = tensor(false)]; + tensor var_41331_cast_fp16 = concat(axis = var_39876, interleave = var_41331_interleave_0, values = (var_41213_cast_fp16, var_41215_cast_fp16, var_41217_cast_fp16, var_41219_cast_fp16))[name = tensor("op_41331_cast_fp16")]; + tensor var_41333_interleave_0 = const()[name = tensor("op_41333_interleave_0"), val = tensor(false)]; + tensor var_41333_cast_fp16 = concat(axis = var_39876, interleave = var_41333_interleave_0, values = (var_41221_cast_fp16, var_41223_cast_fp16, var_41225_cast_fp16, var_41227_cast_fp16))[name = tensor("op_41333_cast_fp16")]; + tensor var_41335_interleave_0 = const()[name = tensor("op_41335_interleave_0"), val = tensor(false)]; + tensor var_41335_cast_fp16 = concat(axis = var_39876, interleave = var_41335_interleave_0, values = (var_41229_cast_fp16, var_41231_cast_fp16, var_41233_cast_fp16, var_41235_cast_fp16))[name = tensor("op_41335_cast_fp16")]; + tensor var_41337_interleave_0 = const()[name = tensor("op_41337_interleave_0"), val = tensor(false)]; + tensor var_41337_cast_fp16 = concat(axis = var_39876, interleave = var_41337_interleave_0, values = (var_41237_cast_fp16, var_41239_cast_fp16, var_41241_cast_fp16, var_41243_cast_fp16))[name = tensor("op_41337_cast_fp16")]; + tensor var_41339_interleave_0 = const()[name = tensor("op_41339_interleave_0"), val = tensor(false)]; + tensor var_41339_cast_fp16 = concat(axis = var_39876, interleave = var_41339_interleave_0, values = (var_41245_cast_fp16, var_41247_cast_fp16, var_41249_cast_fp16, var_41251_cast_fp16))[name = tensor("op_41339_cast_fp16")]; + tensor var_41341_interleave_0 = const()[name = tensor("op_41341_interleave_0"), val = tensor(false)]; + tensor var_41341_cast_fp16 = concat(axis = var_39876, interleave = var_41341_interleave_0, values = (var_41253_cast_fp16, var_41255_cast_fp16, var_41257_cast_fp16, var_41259_cast_fp16))[name = tensor("op_41341_cast_fp16")]; + tensor var_41343_interleave_0 = const()[name = tensor("op_41343_interleave_0"), val = tensor(false)]; + tensor var_41343_cast_fp16 = concat(axis = var_39876, interleave = var_41343_interleave_0, values = (var_41261_cast_fp16, var_41263_cast_fp16, var_41265_cast_fp16, var_41267_cast_fp16))[name = tensor("op_41343_cast_fp16")]; + tensor var_41345_interleave_0 = const()[name = tensor("op_41345_interleave_0"), val = tensor(false)]; + tensor var_41345_cast_fp16 = concat(axis = var_39876, interleave = var_41345_interleave_0, values = (var_41269_cast_fp16, var_41271_cast_fp16, var_41273_cast_fp16, var_41275_cast_fp16))[name = tensor("op_41345_cast_fp16")]; + tensor var_41347_interleave_0 = const()[name = tensor("op_41347_interleave_0"), val = tensor(false)]; + tensor var_41347_cast_fp16 = concat(axis = var_39876, interleave = var_41347_interleave_0, values = (var_41277_cast_fp16, var_41279_cast_fp16, var_41281_cast_fp16, var_41283_cast_fp16))[name = tensor("op_41347_cast_fp16")]; + tensor var_41349_interleave_0 = const()[name = tensor("op_41349_interleave_0"), val = tensor(false)]; + tensor var_41349_cast_fp16 = concat(axis = var_39876, interleave = var_41349_interleave_0, values = (var_41285_cast_fp16, var_41287_cast_fp16, var_41289_cast_fp16, var_41291_cast_fp16))[name = tensor("op_41349_cast_fp16")]; + tensor var_41351_interleave_0 = const()[name = tensor("op_41351_interleave_0"), val = tensor(false)]; + tensor var_41351_cast_fp16 = concat(axis = var_39876, interleave = var_41351_interleave_0, values = (var_41293_cast_fp16, var_41295_cast_fp16, var_41297_cast_fp16, var_41299_cast_fp16))[name = tensor("op_41351_cast_fp16")]; + tensor var_41353_interleave_0 = const()[name = tensor("op_41353_interleave_0"), val = tensor(false)]; + tensor var_41353_cast_fp16 = concat(axis = var_39876, interleave = var_41353_interleave_0, values = (var_41301_cast_fp16, var_41303_cast_fp16, var_41305_cast_fp16, var_41307_cast_fp16))[name = tensor("op_41353_cast_fp16")]; + tensor var_41355_interleave_0 = const()[name = tensor("op_41355_interleave_0"), val = tensor(false)]; + tensor var_41355_cast_fp16 = concat(axis = var_39876, interleave = var_41355_interleave_0, values = (var_41309_cast_fp16, var_41311_cast_fp16, var_41313_cast_fp16, var_41315_cast_fp16))[name = tensor("op_41355_cast_fp16")]; + tensor input_209_interleave_0 = const()[name = tensor("input_209_interleave_0"), val = tensor(false)]; + tensor input_209_cast_fp16 = concat(axis = var_39901, interleave = input_209_interleave_0, values = (var_41317_cast_fp16, var_41319_cast_fp16, var_41321_cast_fp16, var_41323_cast_fp16, var_41325_cast_fp16, var_41327_cast_fp16, var_41329_cast_fp16, var_41331_cast_fp16, var_41333_cast_fp16, var_41335_cast_fp16, var_41337_cast_fp16, var_41339_cast_fp16, var_41341_cast_fp16, var_41343_cast_fp16, var_41345_cast_fp16, var_41347_cast_fp16, var_41349_cast_fp16, var_41351_cast_fp16, var_41353_cast_fp16, var_41355_cast_fp16))[name = tensor("input_209_cast_fp16")]; + tensor var_41360 = const()[name = tensor("op_41360"), val = tensor([1, 1])]; + tensor var_41362 = const()[name = tensor("op_41362"), val = tensor([1, 1])]; + tensor obj_107_pad_type_0 = const()[name = tensor("obj_107_pad_type_0"), val = tensor("custom")]; + tensor obj_107_pad_0 = const()[name = tensor("obj_107_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_26_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_26_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1047321920)))]; + tensor layers_26_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_26_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1050598784)))]; + tensor obj_107_cast_fp16 = conv(bias = layers_26_self_attn_o_proj_bias_to_fp16, dilations = var_41362, groups = var_39901, pad = obj_107_pad_0, pad_type = obj_107_pad_type_0, strides = var_41360, weight = layers_26_self_attn_o_proj_weight_to_fp16, x = input_209_cast_fp16)[name = tensor("obj_107_cast_fp16")]; + tensor inputs_107_cast_fp16 = add(x = inputs_105_cast_fp16, y = obj_107_cast_fp16)[name = tensor("inputs_107_cast_fp16")]; + tensor var_41368 = const()[name = tensor("op_41368"), val = tensor([1])]; + tensor channels_mean_107_cast_fp16 = reduce_mean(axes = var_41368, keep_dims = var_39902, x = inputs_107_cast_fp16)[name = tensor("channels_mean_107_cast_fp16")]; + tensor zero_mean_107_cast_fp16 = sub(x = inputs_107_cast_fp16, y = channels_mean_107_cast_fp16)[name = tensor("zero_mean_107_cast_fp16")]; + tensor zero_mean_sq_107_cast_fp16 = mul(x = zero_mean_107_cast_fp16, y = zero_mean_107_cast_fp16)[name = tensor("zero_mean_sq_107_cast_fp16")]; + tensor var_41372 = const()[name = tensor("op_41372"), val = tensor([1])]; + tensor var_41373_cast_fp16 = reduce_mean(axes = var_41372, keep_dims = var_39902, x = zero_mean_sq_107_cast_fp16)[name = tensor("op_41373_cast_fp16")]; + tensor var_41374_to_fp16 = const()[name = tensor("op_41374_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_41375_cast_fp16 = add(x = var_41373_cast_fp16, y = var_41374_to_fp16)[name = tensor("op_41375_cast_fp16")]; + tensor denom_107_epsilon_0_to_fp16 = const()[name = tensor("denom_107_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_107_cast_fp16 = rsqrt(epsilon = denom_107_epsilon_0_to_fp16, x = var_41375_cast_fp16)[name = tensor("denom_107_cast_fp16")]; + tensor out_107_cast_fp16 = mul(x = zero_mean_107_cast_fp16, y = denom_107_cast_fp16)[name = tensor("out_107_cast_fp16")]; + tensor input_211_gamma_0_to_fp16 = const()[name = tensor("input_211_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1050601408)))]; + tensor input_211_beta_0_to_fp16 = const()[name = tensor("input_211_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1050604032)))]; + tensor input_211_epsilon_0_to_fp16 = const()[name = tensor("input_211_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_211_cast_fp16 = batch_norm(beta = input_211_beta_0_to_fp16, epsilon = input_211_epsilon_0_to_fp16, gamma = input_211_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_107_cast_fp16)[name = tensor("input_211_cast_fp16")]; + tensor var_41386 = const()[name = tensor("op_41386"), val = tensor([1, 1])]; + tensor var_41388 = const()[name = tensor("op_41388"), val = tensor([1, 1])]; + tensor input_213_pad_type_0 = const()[name = tensor("input_213_pad_type_0"), val = tensor("custom")]; + tensor input_213_pad_0 = const()[name = tensor("input_213_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_26_fc1_weight_to_fp16 = const()[name = tensor("layers_26_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1050606656)))]; + tensor layers_26_fc1_bias_to_fp16 = const()[name = tensor("layers_26_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1063713920)))]; + tensor input_213_cast_fp16 = conv(bias = layers_26_fc1_bias_to_fp16, dilations = var_41388, groups = var_39901, pad = input_213_pad_0, pad_type = input_213_pad_type_0, strides = var_41386, weight = layers_26_fc1_weight_to_fp16, x = input_211_cast_fp16)[name = tensor("input_213_cast_fp16")]; + tensor input_215_mode_0 = const()[name = tensor("input_215_mode_0"), val = tensor("EXACT")]; + tensor input_215_cast_fp16 = gelu(mode = input_215_mode_0, x = input_213_cast_fp16)[name = tensor("input_215_cast_fp16")]; + tensor var_41394 = const()[name = tensor("op_41394"), val = tensor([1, 1])]; + tensor var_41396 = const()[name = tensor("op_41396"), val = tensor([1, 1])]; + tensor hidden_states_57_pad_type_0 = const()[name = tensor("hidden_states_57_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_57_pad_0 = const()[name = tensor("hidden_states_57_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_26_fc2_weight_to_fp16 = const()[name = tensor("layers_26_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1063724224)))]; + tensor layers_26_fc2_bias_to_fp16 = const()[name = tensor("layers_26_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1076831488)))]; + tensor hidden_states_57_cast_fp16 = conv(bias = layers_26_fc2_bias_to_fp16, dilations = var_41396, groups = var_39901, pad = hidden_states_57_pad_0, pad_type = hidden_states_57_pad_type_0, strides = var_41394, weight = layers_26_fc2_weight_to_fp16, x = input_215_cast_fp16)[name = tensor("hidden_states_57_cast_fp16")]; + tensor inputs_109_cast_fp16 = add(x = inputs_107_cast_fp16, y = hidden_states_57_cast_fp16)[name = tensor("inputs_109_cast_fp16")]; + tensor var_41403 = const()[name = tensor("op_41403"), val = tensor(3)]; + tensor var_41428 = const()[name = tensor("op_41428"), val = tensor(1)]; + tensor var_41429 = const()[name = tensor("op_41429"), val = tensor(true)]; + tensor var_41439 = const()[name = tensor("op_41439"), val = tensor([1])]; + tensor channels_mean_109_cast_fp16 = reduce_mean(axes = var_41439, keep_dims = var_41429, x = inputs_109_cast_fp16)[name = tensor("channels_mean_109_cast_fp16")]; + tensor zero_mean_109_cast_fp16 = sub(x = inputs_109_cast_fp16, y = channels_mean_109_cast_fp16)[name = tensor("zero_mean_109_cast_fp16")]; + tensor zero_mean_sq_109_cast_fp16 = mul(x = zero_mean_109_cast_fp16, y = zero_mean_109_cast_fp16)[name = tensor("zero_mean_sq_109_cast_fp16")]; + tensor var_41443 = const()[name = tensor("op_41443"), val = tensor([1])]; + tensor var_41444_cast_fp16 = reduce_mean(axes = var_41443, keep_dims = var_41429, x = zero_mean_sq_109_cast_fp16)[name = tensor("op_41444_cast_fp16")]; + tensor var_41445_to_fp16 = const()[name = tensor("op_41445_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_41446_cast_fp16 = add(x = var_41444_cast_fp16, y = var_41445_to_fp16)[name = tensor("op_41446_cast_fp16")]; + tensor denom_109_epsilon_0_to_fp16 = const()[name = tensor("denom_109_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_109_cast_fp16 = rsqrt(epsilon = denom_109_epsilon_0_to_fp16, x = var_41446_cast_fp16)[name = tensor("denom_109_cast_fp16")]; + tensor out_109_cast_fp16 = mul(x = zero_mean_109_cast_fp16, y = denom_109_cast_fp16)[name = tensor("out_109_cast_fp16")]; + tensor obj_109_gamma_0_to_fp16 = const()[name = tensor("obj_109_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1076834112)))]; + tensor obj_109_beta_0_to_fp16 = const()[name = tensor("obj_109_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1076836736)))]; + tensor obj_109_epsilon_0_to_fp16 = const()[name = tensor("obj_109_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_109_cast_fp16 = batch_norm(beta = obj_109_beta_0_to_fp16, epsilon = obj_109_epsilon_0_to_fp16, gamma = obj_109_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_109_cast_fp16)[name = tensor("obj_109_cast_fp16")]; + tensor var_41461 = const()[name = tensor("op_41461"), val = tensor([1, 1])]; + tensor var_41463 = const()[name = tensor("op_41463"), val = tensor([1, 1])]; + tensor query_55_pad_type_0 = const()[name = tensor("query_55_pad_type_0"), val = tensor("custom")]; + tensor query_55_pad_0 = const()[name = tensor("query_55_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_27_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_27_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1076839360)))]; + tensor layers_27_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_27_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1080116224)))]; + tensor query_55_cast_fp16 = conv(bias = layers_27_self_attn_q_proj_bias_to_fp16, dilations = var_41463, groups = var_41428, pad = query_55_pad_0, pad_type = query_55_pad_type_0, strides = var_41461, weight = layers_27_self_attn_q_proj_weight_to_fp16, x = obj_109_cast_fp16)[name = tensor("query_55_cast_fp16")]; + tensor var_41467 = const()[name = tensor("op_41467"), val = tensor([1, 1])]; + tensor var_41469 = const()[name = tensor("op_41469"), val = tensor([1, 1])]; + tensor key_55_pad_type_0 = const()[name = tensor("key_55_pad_type_0"), val = tensor("custom")]; + tensor key_55_pad_0 = const()[name = tensor("key_55_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_27_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_27_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1080118848)))]; + tensor key_55_cast_fp16 = conv(dilations = var_41469, groups = var_41428, pad = key_55_pad_0, pad_type = key_55_pad_type_0, strides = var_41467, weight = layers_27_self_attn_k_proj_weight_to_fp16, x = obj_109_cast_fp16)[name = tensor("key_55_cast_fp16")]; + tensor var_41474 = const()[name = tensor("op_41474"), val = tensor([1, 1])]; + tensor var_41476 = const()[name = tensor("op_41476"), val = tensor([1, 1])]; + tensor value_55_pad_type_0 = const()[name = tensor("value_55_pad_type_0"), val = tensor("custom")]; + tensor value_55_pad_0 = const()[name = tensor("value_55_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_27_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_27_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1083395712)))]; + tensor layers_27_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_27_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1086672576)))]; + tensor value_55_cast_fp16 = conv(bias = layers_27_self_attn_v_proj_bias_to_fp16, dilations = var_41476, groups = var_41428, pad = value_55_pad_0, pad_type = value_55_pad_type_0, strides = var_41474, weight = layers_27_self_attn_v_proj_weight_to_fp16, x = obj_109_cast_fp16)[name = tensor("value_55_cast_fp16")]; + tensor var_41483_begin_0 = const()[name = tensor("op_41483_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_41483_end_0 = const()[name = tensor("op_41483_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_41483_end_mask_0 = const()[name = tensor("op_41483_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_41483_cast_fp16 = slice_by_index(begin = var_41483_begin_0, end = var_41483_end_0, end_mask = var_41483_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_41483_cast_fp16")]; + tensor var_41487_begin_0 = const()[name = tensor("op_41487_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_41487_end_0 = const()[name = tensor("op_41487_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_41487_end_mask_0 = const()[name = tensor("op_41487_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_41487_cast_fp16 = slice_by_index(begin = var_41487_begin_0, end = var_41487_end_0, end_mask = var_41487_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_41487_cast_fp16")]; + tensor var_41491_begin_0 = const()[name = tensor("op_41491_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_41491_end_0 = const()[name = tensor("op_41491_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_41491_end_mask_0 = const()[name = tensor("op_41491_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_41491_cast_fp16 = slice_by_index(begin = var_41491_begin_0, end = var_41491_end_0, end_mask = var_41491_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_41491_cast_fp16")]; + tensor var_41495_begin_0 = const()[name = tensor("op_41495_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_41495_end_0 = const()[name = tensor("op_41495_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_41495_end_mask_0 = const()[name = tensor("op_41495_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_41495_cast_fp16 = slice_by_index(begin = var_41495_begin_0, end = var_41495_end_0, end_mask = var_41495_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_41495_cast_fp16")]; + tensor var_41499_begin_0 = const()[name = tensor("op_41499_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_41499_end_0 = const()[name = tensor("op_41499_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_41499_end_mask_0 = const()[name = tensor("op_41499_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_41499_cast_fp16 = slice_by_index(begin = var_41499_begin_0, end = var_41499_end_0, end_mask = var_41499_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_41499_cast_fp16")]; + tensor var_41503_begin_0 = const()[name = tensor("op_41503_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_41503_end_0 = const()[name = tensor("op_41503_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_41503_end_mask_0 = const()[name = tensor("op_41503_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_41503_cast_fp16 = slice_by_index(begin = var_41503_begin_0, end = var_41503_end_0, end_mask = var_41503_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_41503_cast_fp16")]; + tensor var_41507_begin_0 = const()[name = tensor("op_41507_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_41507_end_0 = const()[name = tensor("op_41507_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_41507_end_mask_0 = const()[name = tensor("op_41507_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_41507_cast_fp16 = slice_by_index(begin = var_41507_begin_0, end = var_41507_end_0, end_mask = var_41507_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_41507_cast_fp16")]; + tensor var_41511_begin_0 = const()[name = tensor("op_41511_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_41511_end_0 = const()[name = tensor("op_41511_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_41511_end_mask_0 = const()[name = tensor("op_41511_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_41511_cast_fp16 = slice_by_index(begin = var_41511_begin_0, end = var_41511_end_0, end_mask = var_41511_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_41511_cast_fp16")]; + tensor var_41515_begin_0 = const()[name = tensor("op_41515_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_41515_end_0 = const()[name = tensor("op_41515_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_41515_end_mask_0 = const()[name = tensor("op_41515_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_41515_cast_fp16 = slice_by_index(begin = var_41515_begin_0, end = var_41515_end_0, end_mask = var_41515_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_41515_cast_fp16")]; + tensor var_41519_begin_0 = const()[name = tensor("op_41519_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_41519_end_0 = const()[name = tensor("op_41519_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_41519_end_mask_0 = const()[name = tensor("op_41519_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_41519_cast_fp16 = slice_by_index(begin = var_41519_begin_0, end = var_41519_end_0, end_mask = var_41519_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_41519_cast_fp16")]; + tensor var_41523_begin_0 = const()[name = tensor("op_41523_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_41523_end_0 = const()[name = tensor("op_41523_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_41523_end_mask_0 = const()[name = tensor("op_41523_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_41523_cast_fp16 = slice_by_index(begin = var_41523_begin_0, end = var_41523_end_0, end_mask = var_41523_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_41523_cast_fp16")]; + tensor var_41527_begin_0 = const()[name = tensor("op_41527_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_41527_end_0 = const()[name = tensor("op_41527_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_41527_end_mask_0 = const()[name = tensor("op_41527_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_41527_cast_fp16 = slice_by_index(begin = var_41527_begin_0, end = var_41527_end_0, end_mask = var_41527_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_41527_cast_fp16")]; + tensor var_41531_begin_0 = const()[name = tensor("op_41531_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_41531_end_0 = const()[name = tensor("op_41531_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_41531_end_mask_0 = const()[name = tensor("op_41531_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_41531_cast_fp16 = slice_by_index(begin = var_41531_begin_0, end = var_41531_end_0, end_mask = var_41531_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_41531_cast_fp16")]; + tensor var_41535_begin_0 = const()[name = tensor("op_41535_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_41535_end_0 = const()[name = tensor("op_41535_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_41535_end_mask_0 = const()[name = tensor("op_41535_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_41535_cast_fp16 = slice_by_index(begin = var_41535_begin_0, end = var_41535_end_0, end_mask = var_41535_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_41535_cast_fp16")]; + tensor var_41539_begin_0 = const()[name = tensor("op_41539_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_41539_end_0 = const()[name = tensor("op_41539_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_41539_end_mask_0 = const()[name = tensor("op_41539_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_41539_cast_fp16 = slice_by_index(begin = var_41539_begin_0, end = var_41539_end_0, end_mask = var_41539_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_41539_cast_fp16")]; + tensor var_41543_begin_0 = const()[name = tensor("op_41543_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_41543_end_0 = const()[name = tensor("op_41543_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_41543_end_mask_0 = const()[name = tensor("op_41543_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_41543_cast_fp16 = slice_by_index(begin = var_41543_begin_0, end = var_41543_end_0, end_mask = var_41543_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_41543_cast_fp16")]; + tensor var_41547_begin_0 = const()[name = tensor("op_41547_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_41547_end_0 = const()[name = tensor("op_41547_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_41547_end_mask_0 = const()[name = tensor("op_41547_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_41547_cast_fp16 = slice_by_index(begin = var_41547_begin_0, end = var_41547_end_0, end_mask = var_41547_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_41547_cast_fp16")]; + tensor var_41551_begin_0 = const()[name = tensor("op_41551_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_41551_end_0 = const()[name = tensor("op_41551_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_41551_end_mask_0 = const()[name = tensor("op_41551_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_41551_cast_fp16 = slice_by_index(begin = var_41551_begin_0, end = var_41551_end_0, end_mask = var_41551_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_41551_cast_fp16")]; + tensor var_41555_begin_0 = const()[name = tensor("op_41555_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_41555_end_0 = const()[name = tensor("op_41555_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_41555_end_mask_0 = const()[name = tensor("op_41555_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_41555_cast_fp16 = slice_by_index(begin = var_41555_begin_0, end = var_41555_end_0, end_mask = var_41555_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_41555_cast_fp16")]; + tensor var_41559_begin_0 = const()[name = tensor("op_41559_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_41559_end_0 = const()[name = tensor("op_41559_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_41559_end_mask_0 = const()[name = tensor("op_41559_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_41559_cast_fp16 = slice_by_index(begin = var_41559_begin_0, end = var_41559_end_0, end_mask = var_41559_end_mask_0, x = query_55_cast_fp16)[name = tensor("op_41559_cast_fp16")]; + tensor var_41568_begin_0 = const()[name = tensor("op_41568_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_41568_end_0 = const()[name = tensor("op_41568_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_41568_end_mask_0 = const()[name = tensor("op_41568_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41568_cast_fp16 = slice_by_index(begin = var_41568_begin_0, end = var_41568_end_0, end_mask = var_41568_end_mask_0, x = var_41483_cast_fp16)[name = tensor("op_41568_cast_fp16")]; + tensor var_41575_begin_0 = const()[name = tensor("op_41575_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_41575_end_0 = const()[name = tensor("op_41575_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_41575_end_mask_0 = const()[name = tensor("op_41575_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41575_cast_fp16 = slice_by_index(begin = var_41575_begin_0, end = var_41575_end_0, end_mask = var_41575_end_mask_0, x = var_41483_cast_fp16)[name = tensor("op_41575_cast_fp16")]; + tensor var_41582_begin_0 = const()[name = tensor("op_41582_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_41582_end_0 = const()[name = tensor("op_41582_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_41582_end_mask_0 = const()[name = tensor("op_41582_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41582_cast_fp16 = slice_by_index(begin = var_41582_begin_0, end = var_41582_end_0, end_mask = var_41582_end_mask_0, x = var_41483_cast_fp16)[name = tensor("op_41582_cast_fp16")]; + tensor var_41589_begin_0 = const()[name = tensor("op_41589_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_41589_end_0 = const()[name = tensor("op_41589_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_41589_end_mask_0 = const()[name = tensor("op_41589_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41589_cast_fp16 = slice_by_index(begin = var_41589_begin_0, end = var_41589_end_0, end_mask = var_41589_end_mask_0, x = var_41483_cast_fp16)[name = tensor("op_41589_cast_fp16")]; + tensor var_41596_begin_0 = const()[name = tensor("op_41596_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_41596_end_0 = const()[name = tensor("op_41596_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_41596_end_mask_0 = const()[name = tensor("op_41596_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41596_cast_fp16 = slice_by_index(begin = var_41596_begin_0, end = var_41596_end_0, end_mask = var_41596_end_mask_0, x = var_41487_cast_fp16)[name = tensor("op_41596_cast_fp16")]; + tensor var_41603_begin_0 = const()[name = tensor("op_41603_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_41603_end_0 = const()[name = tensor("op_41603_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_41603_end_mask_0 = const()[name = tensor("op_41603_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41603_cast_fp16 = slice_by_index(begin = var_41603_begin_0, end = var_41603_end_0, end_mask = var_41603_end_mask_0, x = var_41487_cast_fp16)[name = tensor("op_41603_cast_fp16")]; + tensor var_41610_begin_0 = const()[name = tensor("op_41610_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_41610_end_0 = const()[name = tensor("op_41610_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_41610_end_mask_0 = const()[name = tensor("op_41610_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41610_cast_fp16 = slice_by_index(begin = var_41610_begin_0, end = var_41610_end_0, end_mask = var_41610_end_mask_0, x = var_41487_cast_fp16)[name = tensor("op_41610_cast_fp16")]; + tensor var_41617_begin_0 = const()[name = tensor("op_41617_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_41617_end_0 = const()[name = tensor("op_41617_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_41617_end_mask_0 = const()[name = tensor("op_41617_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41617_cast_fp16 = slice_by_index(begin = var_41617_begin_0, end = var_41617_end_0, end_mask = var_41617_end_mask_0, x = var_41487_cast_fp16)[name = tensor("op_41617_cast_fp16")]; + tensor var_41624_begin_0 = const()[name = tensor("op_41624_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_41624_end_0 = const()[name = tensor("op_41624_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_41624_end_mask_0 = const()[name = tensor("op_41624_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41624_cast_fp16 = slice_by_index(begin = var_41624_begin_0, end = var_41624_end_0, end_mask = var_41624_end_mask_0, x = var_41491_cast_fp16)[name = tensor("op_41624_cast_fp16")]; + tensor var_41631_begin_0 = const()[name = tensor("op_41631_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_41631_end_0 = const()[name = tensor("op_41631_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_41631_end_mask_0 = const()[name = tensor("op_41631_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41631_cast_fp16 = slice_by_index(begin = var_41631_begin_0, end = var_41631_end_0, end_mask = var_41631_end_mask_0, x = var_41491_cast_fp16)[name = tensor("op_41631_cast_fp16")]; + tensor var_41638_begin_0 = const()[name = tensor("op_41638_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_41638_end_0 = const()[name = tensor("op_41638_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_41638_end_mask_0 = const()[name = tensor("op_41638_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41638_cast_fp16 = slice_by_index(begin = var_41638_begin_0, end = var_41638_end_0, end_mask = var_41638_end_mask_0, x = var_41491_cast_fp16)[name = tensor("op_41638_cast_fp16")]; + tensor var_41645_begin_0 = const()[name = tensor("op_41645_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_41645_end_0 = const()[name = tensor("op_41645_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_41645_end_mask_0 = const()[name = tensor("op_41645_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41645_cast_fp16 = slice_by_index(begin = var_41645_begin_0, end = var_41645_end_0, end_mask = var_41645_end_mask_0, x = var_41491_cast_fp16)[name = tensor("op_41645_cast_fp16")]; + tensor var_41652_begin_0 = const()[name = tensor("op_41652_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_41652_end_0 = const()[name = tensor("op_41652_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_41652_end_mask_0 = const()[name = tensor("op_41652_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41652_cast_fp16 = slice_by_index(begin = var_41652_begin_0, end = var_41652_end_0, end_mask = var_41652_end_mask_0, x = var_41495_cast_fp16)[name = tensor("op_41652_cast_fp16")]; + tensor var_41659_begin_0 = const()[name = tensor("op_41659_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_41659_end_0 = const()[name = tensor("op_41659_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_41659_end_mask_0 = const()[name = tensor("op_41659_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41659_cast_fp16 = slice_by_index(begin = var_41659_begin_0, end = var_41659_end_0, end_mask = var_41659_end_mask_0, x = var_41495_cast_fp16)[name = tensor("op_41659_cast_fp16")]; + tensor var_41666_begin_0 = const()[name = tensor("op_41666_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_41666_end_0 = const()[name = tensor("op_41666_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_41666_end_mask_0 = const()[name = tensor("op_41666_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41666_cast_fp16 = slice_by_index(begin = var_41666_begin_0, end = var_41666_end_0, end_mask = var_41666_end_mask_0, x = var_41495_cast_fp16)[name = tensor("op_41666_cast_fp16")]; + tensor var_41673_begin_0 = const()[name = tensor("op_41673_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_41673_end_0 = const()[name = tensor("op_41673_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_41673_end_mask_0 = const()[name = tensor("op_41673_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41673_cast_fp16 = slice_by_index(begin = var_41673_begin_0, end = var_41673_end_0, end_mask = var_41673_end_mask_0, x = var_41495_cast_fp16)[name = tensor("op_41673_cast_fp16")]; + tensor var_41680_begin_0 = const()[name = tensor("op_41680_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_41680_end_0 = const()[name = tensor("op_41680_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_41680_end_mask_0 = const()[name = tensor("op_41680_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41680_cast_fp16 = slice_by_index(begin = var_41680_begin_0, end = var_41680_end_0, end_mask = var_41680_end_mask_0, x = var_41499_cast_fp16)[name = tensor("op_41680_cast_fp16")]; + tensor var_41687_begin_0 = const()[name = tensor("op_41687_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_41687_end_0 = const()[name = tensor("op_41687_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_41687_end_mask_0 = const()[name = tensor("op_41687_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41687_cast_fp16 = slice_by_index(begin = var_41687_begin_0, end = var_41687_end_0, end_mask = var_41687_end_mask_0, x = var_41499_cast_fp16)[name = tensor("op_41687_cast_fp16")]; + tensor var_41694_begin_0 = const()[name = tensor("op_41694_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_41694_end_0 = const()[name = tensor("op_41694_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_41694_end_mask_0 = const()[name = tensor("op_41694_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41694_cast_fp16 = slice_by_index(begin = var_41694_begin_0, end = var_41694_end_0, end_mask = var_41694_end_mask_0, x = var_41499_cast_fp16)[name = tensor("op_41694_cast_fp16")]; + tensor var_41701_begin_0 = const()[name = tensor("op_41701_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_41701_end_0 = const()[name = tensor("op_41701_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_41701_end_mask_0 = const()[name = tensor("op_41701_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41701_cast_fp16 = slice_by_index(begin = var_41701_begin_0, end = var_41701_end_0, end_mask = var_41701_end_mask_0, x = var_41499_cast_fp16)[name = tensor("op_41701_cast_fp16")]; + tensor var_41708_begin_0 = const()[name = tensor("op_41708_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_41708_end_0 = const()[name = tensor("op_41708_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_41708_end_mask_0 = const()[name = tensor("op_41708_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41708_cast_fp16 = slice_by_index(begin = var_41708_begin_0, end = var_41708_end_0, end_mask = var_41708_end_mask_0, x = var_41503_cast_fp16)[name = tensor("op_41708_cast_fp16")]; + tensor var_41715_begin_0 = const()[name = tensor("op_41715_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_41715_end_0 = const()[name = tensor("op_41715_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_41715_end_mask_0 = const()[name = tensor("op_41715_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41715_cast_fp16 = slice_by_index(begin = var_41715_begin_0, end = var_41715_end_0, end_mask = var_41715_end_mask_0, x = var_41503_cast_fp16)[name = tensor("op_41715_cast_fp16")]; + tensor var_41722_begin_0 = const()[name = tensor("op_41722_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_41722_end_0 = const()[name = tensor("op_41722_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_41722_end_mask_0 = const()[name = tensor("op_41722_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41722_cast_fp16 = slice_by_index(begin = var_41722_begin_0, end = var_41722_end_0, end_mask = var_41722_end_mask_0, x = var_41503_cast_fp16)[name = tensor("op_41722_cast_fp16")]; + tensor var_41729_begin_0 = const()[name = tensor("op_41729_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_41729_end_0 = const()[name = tensor("op_41729_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_41729_end_mask_0 = const()[name = tensor("op_41729_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41729_cast_fp16 = slice_by_index(begin = var_41729_begin_0, end = var_41729_end_0, end_mask = var_41729_end_mask_0, x = var_41503_cast_fp16)[name = tensor("op_41729_cast_fp16")]; + tensor var_41736_begin_0 = const()[name = tensor("op_41736_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_41736_end_0 = const()[name = tensor("op_41736_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_41736_end_mask_0 = const()[name = tensor("op_41736_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41736_cast_fp16 = slice_by_index(begin = var_41736_begin_0, end = var_41736_end_0, end_mask = var_41736_end_mask_0, x = var_41507_cast_fp16)[name = tensor("op_41736_cast_fp16")]; + tensor var_41743_begin_0 = const()[name = tensor("op_41743_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_41743_end_0 = const()[name = tensor("op_41743_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_41743_end_mask_0 = const()[name = tensor("op_41743_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41743_cast_fp16 = slice_by_index(begin = var_41743_begin_0, end = var_41743_end_0, end_mask = var_41743_end_mask_0, x = var_41507_cast_fp16)[name = tensor("op_41743_cast_fp16")]; + tensor var_41750_begin_0 = const()[name = tensor("op_41750_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_41750_end_0 = const()[name = tensor("op_41750_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_41750_end_mask_0 = const()[name = tensor("op_41750_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41750_cast_fp16 = slice_by_index(begin = var_41750_begin_0, end = var_41750_end_0, end_mask = var_41750_end_mask_0, x = var_41507_cast_fp16)[name = tensor("op_41750_cast_fp16")]; + tensor var_41757_begin_0 = const()[name = tensor("op_41757_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_41757_end_0 = const()[name = tensor("op_41757_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_41757_end_mask_0 = const()[name = tensor("op_41757_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41757_cast_fp16 = slice_by_index(begin = var_41757_begin_0, end = var_41757_end_0, end_mask = var_41757_end_mask_0, x = var_41507_cast_fp16)[name = tensor("op_41757_cast_fp16")]; + tensor var_41764_begin_0 = const()[name = tensor("op_41764_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_41764_end_0 = const()[name = tensor("op_41764_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_41764_end_mask_0 = const()[name = tensor("op_41764_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41764_cast_fp16 = slice_by_index(begin = var_41764_begin_0, end = var_41764_end_0, end_mask = var_41764_end_mask_0, x = var_41511_cast_fp16)[name = tensor("op_41764_cast_fp16")]; + tensor var_41771_begin_0 = const()[name = tensor("op_41771_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_41771_end_0 = const()[name = tensor("op_41771_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_41771_end_mask_0 = const()[name = tensor("op_41771_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41771_cast_fp16 = slice_by_index(begin = var_41771_begin_0, end = var_41771_end_0, end_mask = var_41771_end_mask_0, x = var_41511_cast_fp16)[name = tensor("op_41771_cast_fp16")]; + tensor var_41778_begin_0 = const()[name = tensor("op_41778_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_41778_end_0 = const()[name = tensor("op_41778_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_41778_end_mask_0 = const()[name = tensor("op_41778_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41778_cast_fp16 = slice_by_index(begin = var_41778_begin_0, end = var_41778_end_0, end_mask = var_41778_end_mask_0, x = var_41511_cast_fp16)[name = tensor("op_41778_cast_fp16")]; + tensor var_41785_begin_0 = const()[name = tensor("op_41785_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_41785_end_0 = const()[name = tensor("op_41785_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_41785_end_mask_0 = const()[name = tensor("op_41785_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41785_cast_fp16 = slice_by_index(begin = var_41785_begin_0, end = var_41785_end_0, end_mask = var_41785_end_mask_0, x = var_41511_cast_fp16)[name = tensor("op_41785_cast_fp16")]; + tensor var_41792_begin_0 = const()[name = tensor("op_41792_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_41792_end_0 = const()[name = tensor("op_41792_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_41792_end_mask_0 = const()[name = tensor("op_41792_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41792_cast_fp16 = slice_by_index(begin = var_41792_begin_0, end = var_41792_end_0, end_mask = var_41792_end_mask_0, x = var_41515_cast_fp16)[name = tensor("op_41792_cast_fp16")]; + tensor var_41799_begin_0 = const()[name = tensor("op_41799_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_41799_end_0 = const()[name = tensor("op_41799_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_41799_end_mask_0 = const()[name = tensor("op_41799_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41799_cast_fp16 = slice_by_index(begin = var_41799_begin_0, end = var_41799_end_0, end_mask = var_41799_end_mask_0, x = var_41515_cast_fp16)[name = tensor("op_41799_cast_fp16")]; + tensor var_41806_begin_0 = const()[name = tensor("op_41806_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_41806_end_0 = const()[name = tensor("op_41806_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_41806_end_mask_0 = const()[name = tensor("op_41806_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41806_cast_fp16 = slice_by_index(begin = var_41806_begin_0, end = var_41806_end_0, end_mask = var_41806_end_mask_0, x = var_41515_cast_fp16)[name = tensor("op_41806_cast_fp16")]; + tensor var_41813_begin_0 = const()[name = tensor("op_41813_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_41813_end_0 = const()[name = tensor("op_41813_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_41813_end_mask_0 = const()[name = tensor("op_41813_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41813_cast_fp16 = slice_by_index(begin = var_41813_begin_0, end = var_41813_end_0, end_mask = var_41813_end_mask_0, x = var_41515_cast_fp16)[name = tensor("op_41813_cast_fp16")]; + tensor var_41820_begin_0 = const()[name = tensor("op_41820_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_41820_end_0 = const()[name = tensor("op_41820_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_41820_end_mask_0 = const()[name = tensor("op_41820_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41820_cast_fp16 = slice_by_index(begin = var_41820_begin_0, end = var_41820_end_0, end_mask = var_41820_end_mask_0, x = var_41519_cast_fp16)[name = tensor("op_41820_cast_fp16")]; + tensor var_41827_begin_0 = const()[name = tensor("op_41827_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_41827_end_0 = const()[name = tensor("op_41827_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_41827_end_mask_0 = const()[name = tensor("op_41827_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41827_cast_fp16 = slice_by_index(begin = var_41827_begin_0, end = var_41827_end_0, end_mask = var_41827_end_mask_0, x = var_41519_cast_fp16)[name = tensor("op_41827_cast_fp16")]; + tensor var_41834_begin_0 = const()[name = tensor("op_41834_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_41834_end_0 = const()[name = tensor("op_41834_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_41834_end_mask_0 = const()[name = tensor("op_41834_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41834_cast_fp16 = slice_by_index(begin = var_41834_begin_0, end = var_41834_end_0, end_mask = var_41834_end_mask_0, x = var_41519_cast_fp16)[name = tensor("op_41834_cast_fp16")]; + tensor var_41841_begin_0 = const()[name = tensor("op_41841_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_41841_end_0 = const()[name = tensor("op_41841_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_41841_end_mask_0 = const()[name = tensor("op_41841_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41841_cast_fp16 = slice_by_index(begin = var_41841_begin_0, end = var_41841_end_0, end_mask = var_41841_end_mask_0, x = var_41519_cast_fp16)[name = tensor("op_41841_cast_fp16")]; + tensor var_41848_begin_0 = const()[name = tensor("op_41848_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_41848_end_0 = const()[name = tensor("op_41848_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_41848_end_mask_0 = const()[name = tensor("op_41848_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41848_cast_fp16 = slice_by_index(begin = var_41848_begin_0, end = var_41848_end_0, end_mask = var_41848_end_mask_0, x = var_41523_cast_fp16)[name = tensor("op_41848_cast_fp16")]; + tensor var_41855_begin_0 = const()[name = tensor("op_41855_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_41855_end_0 = const()[name = tensor("op_41855_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_41855_end_mask_0 = const()[name = tensor("op_41855_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41855_cast_fp16 = slice_by_index(begin = var_41855_begin_0, end = var_41855_end_0, end_mask = var_41855_end_mask_0, x = var_41523_cast_fp16)[name = tensor("op_41855_cast_fp16")]; + tensor var_41862_begin_0 = const()[name = tensor("op_41862_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_41862_end_0 = const()[name = tensor("op_41862_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_41862_end_mask_0 = const()[name = tensor("op_41862_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41862_cast_fp16 = slice_by_index(begin = var_41862_begin_0, end = var_41862_end_0, end_mask = var_41862_end_mask_0, x = var_41523_cast_fp16)[name = tensor("op_41862_cast_fp16")]; + tensor var_41869_begin_0 = const()[name = tensor("op_41869_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_41869_end_0 = const()[name = tensor("op_41869_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_41869_end_mask_0 = const()[name = tensor("op_41869_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41869_cast_fp16 = slice_by_index(begin = var_41869_begin_0, end = var_41869_end_0, end_mask = var_41869_end_mask_0, x = var_41523_cast_fp16)[name = tensor("op_41869_cast_fp16")]; + tensor var_41876_begin_0 = const()[name = tensor("op_41876_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_41876_end_0 = const()[name = tensor("op_41876_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_41876_end_mask_0 = const()[name = tensor("op_41876_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41876_cast_fp16 = slice_by_index(begin = var_41876_begin_0, end = var_41876_end_0, end_mask = var_41876_end_mask_0, x = var_41527_cast_fp16)[name = tensor("op_41876_cast_fp16")]; + tensor var_41883_begin_0 = const()[name = tensor("op_41883_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_41883_end_0 = const()[name = tensor("op_41883_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_41883_end_mask_0 = const()[name = tensor("op_41883_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41883_cast_fp16 = slice_by_index(begin = var_41883_begin_0, end = var_41883_end_0, end_mask = var_41883_end_mask_0, x = var_41527_cast_fp16)[name = tensor("op_41883_cast_fp16")]; + tensor var_41890_begin_0 = const()[name = tensor("op_41890_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_41890_end_0 = const()[name = tensor("op_41890_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_41890_end_mask_0 = const()[name = tensor("op_41890_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41890_cast_fp16 = slice_by_index(begin = var_41890_begin_0, end = var_41890_end_0, end_mask = var_41890_end_mask_0, x = var_41527_cast_fp16)[name = tensor("op_41890_cast_fp16")]; + tensor var_41897_begin_0 = const()[name = tensor("op_41897_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_41897_end_0 = const()[name = tensor("op_41897_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_41897_end_mask_0 = const()[name = tensor("op_41897_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41897_cast_fp16 = slice_by_index(begin = var_41897_begin_0, end = var_41897_end_0, end_mask = var_41897_end_mask_0, x = var_41527_cast_fp16)[name = tensor("op_41897_cast_fp16")]; + tensor var_41904_begin_0 = const()[name = tensor("op_41904_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_41904_end_0 = const()[name = tensor("op_41904_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_41904_end_mask_0 = const()[name = tensor("op_41904_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41904_cast_fp16 = slice_by_index(begin = var_41904_begin_0, end = var_41904_end_0, end_mask = var_41904_end_mask_0, x = var_41531_cast_fp16)[name = tensor("op_41904_cast_fp16")]; + tensor var_41911_begin_0 = const()[name = tensor("op_41911_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_41911_end_0 = const()[name = tensor("op_41911_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_41911_end_mask_0 = const()[name = tensor("op_41911_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41911_cast_fp16 = slice_by_index(begin = var_41911_begin_0, end = var_41911_end_0, end_mask = var_41911_end_mask_0, x = var_41531_cast_fp16)[name = tensor("op_41911_cast_fp16")]; + tensor var_41918_begin_0 = const()[name = tensor("op_41918_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_41918_end_0 = const()[name = tensor("op_41918_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_41918_end_mask_0 = const()[name = tensor("op_41918_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41918_cast_fp16 = slice_by_index(begin = var_41918_begin_0, end = var_41918_end_0, end_mask = var_41918_end_mask_0, x = var_41531_cast_fp16)[name = tensor("op_41918_cast_fp16")]; + tensor var_41925_begin_0 = const()[name = tensor("op_41925_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_41925_end_0 = const()[name = tensor("op_41925_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_41925_end_mask_0 = const()[name = tensor("op_41925_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41925_cast_fp16 = slice_by_index(begin = var_41925_begin_0, end = var_41925_end_0, end_mask = var_41925_end_mask_0, x = var_41531_cast_fp16)[name = tensor("op_41925_cast_fp16")]; + tensor var_41932_begin_0 = const()[name = tensor("op_41932_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_41932_end_0 = const()[name = tensor("op_41932_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_41932_end_mask_0 = const()[name = tensor("op_41932_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41932_cast_fp16 = slice_by_index(begin = var_41932_begin_0, end = var_41932_end_0, end_mask = var_41932_end_mask_0, x = var_41535_cast_fp16)[name = tensor("op_41932_cast_fp16")]; + tensor var_41939_begin_0 = const()[name = tensor("op_41939_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_41939_end_0 = const()[name = tensor("op_41939_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_41939_end_mask_0 = const()[name = tensor("op_41939_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41939_cast_fp16 = slice_by_index(begin = var_41939_begin_0, end = var_41939_end_0, end_mask = var_41939_end_mask_0, x = var_41535_cast_fp16)[name = tensor("op_41939_cast_fp16")]; + tensor var_41946_begin_0 = const()[name = tensor("op_41946_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_41946_end_0 = const()[name = tensor("op_41946_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_41946_end_mask_0 = const()[name = tensor("op_41946_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41946_cast_fp16 = slice_by_index(begin = var_41946_begin_0, end = var_41946_end_0, end_mask = var_41946_end_mask_0, x = var_41535_cast_fp16)[name = tensor("op_41946_cast_fp16")]; + tensor var_41953_begin_0 = const()[name = tensor("op_41953_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_41953_end_0 = const()[name = tensor("op_41953_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_41953_end_mask_0 = const()[name = tensor("op_41953_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41953_cast_fp16 = slice_by_index(begin = var_41953_begin_0, end = var_41953_end_0, end_mask = var_41953_end_mask_0, x = var_41535_cast_fp16)[name = tensor("op_41953_cast_fp16")]; + tensor var_41960_begin_0 = const()[name = tensor("op_41960_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_41960_end_0 = const()[name = tensor("op_41960_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_41960_end_mask_0 = const()[name = tensor("op_41960_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41960_cast_fp16 = slice_by_index(begin = var_41960_begin_0, end = var_41960_end_0, end_mask = var_41960_end_mask_0, x = var_41539_cast_fp16)[name = tensor("op_41960_cast_fp16")]; + tensor var_41967_begin_0 = const()[name = tensor("op_41967_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_41967_end_0 = const()[name = tensor("op_41967_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_41967_end_mask_0 = const()[name = tensor("op_41967_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41967_cast_fp16 = slice_by_index(begin = var_41967_begin_0, end = var_41967_end_0, end_mask = var_41967_end_mask_0, x = var_41539_cast_fp16)[name = tensor("op_41967_cast_fp16")]; + tensor var_41974_begin_0 = const()[name = tensor("op_41974_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_41974_end_0 = const()[name = tensor("op_41974_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_41974_end_mask_0 = const()[name = tensor("op_41974_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41974_cast_fp16 = slice_by_index(begin = var_41974_begin_0, end = var_41974_end_0, end_mask = var_41974_end_mask_0, x = var_41539_cast_fp16)[name = tensor("op_41974_cast_fp16")]; + tensor var_41981_begin_0 = const()[name = tensor("op_41981_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_41981_end_0 = const()[name = tensor("op_41981_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_41981_end_mask_0 = const()[name = tensor("op_41981_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41981_cast_fp16 = slice_by_index(begin = var_41981_begin_0, end = var_41981_end_0, end_mask = var_41981_end_mask_0, x = var_41539_cast_fp16)[name = tensor("op_41981_cast_fp16")]; + tensor var_41988_begin_0 = const()[name = tensor("op_41988_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_41988_end_0 = const()[name = tensor("op_41988_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_41988_end_mask_0 = const()[name = tensor("op_41988_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41988_cast_fp16 = slice_by_index(begin = var_41988_begin_0, end = var_41988_end_0, end_mask = var_41988_end_mask_0, x = var_41543_cast_fp16)[name = tensor("op_41988_cast_fp16")]; + tensor var_41995_begin_0 = const()[name = tensor("op_41995_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_41995_end_0 = const()[name = tensor("op_41995_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_41995_end_mask_0 = const()[name = tensor("op_41995_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_41995_cast_fp16 = slice_by_index(begin = var_41995_begin_0, end = var_41995_end_0, end_mask = var_41995_end_mask_0, x = var_41543_cast_fp16)[name = tensor("op_41995_cast_fp16")]; + tensor var_42002_begin_0 = const()[name = tensor("op_42002_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_42002_end_0 = const()[name = tensor("op_42002_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_42002_end_mask_0 = const()[name = tensor("op_42002_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42002_cast_fp16 = slice_by_index(begin = var_42002_begin_0, end = var_42002_end_0, end_mask = var_42002_end_mask_0, x = var_41543_cast_fp16)[name = tensor("op_42002_cast_fp16")]; + tensor var_42009_begin_0 = const()[name = tensor("op_42009_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_42009_end_0 = const()[name = tensor("op_42009_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_42009_end_mask_0 = const()[name = tensor("op_42009_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42009_cast_fp16 = slice_by_index(begin = var_42009_begin_0, end = var_42009_end_0, end_mask = var_42009_end_mask_0, x = var_41543_cast_fp16)[name = tensor("op_42009_cast_fp16")]; + tensor var_42016_begin_0 = const()[name = tensor("op_42016_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_42016_end_0 = const()[name = tensor("op_42016_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_42016_end_mask_0 = const()[name = tensor("op_42016_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42016_cast_fp16 = slice_by_index(begin = var_42016_begin_0, end = var_42016_end_0, end_mask = var_42016_end_mask_0, x = var_41547_cast_fp16)[name = tensor("op_42016_cast_fp16")]; + tensor var_42023_begin_0 = const()[name = tensor("op_42023_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_42023_end_0 = const()[name = tensor("op_42023_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_42023_end_mask_0 = const()[name = tensor("op_42023_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42023_cast_fp16 = slice_by_index(begin = var_42023_begin_0, end = var_42023_end_0, end_mask = var_42023_end_mask_0, x = var_41547_cast_fp16)[name = tensor("op_42023_cast_fp16")]; + tensor var_42030_begin_0 = const()[name = tensor("op_42030_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_42030_end_0 = const()[name = tensor("op_42030_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_42030_end_mask_0 = const()[name = tensor("op_42030_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42030_cast_fp16 = slice_by_index(begin = var_42030_begin_0, end = var_42030_end_0, end_mask = var_42030_end_mask_0, x = var_41547_cast_fp16)[name = tensor("op_42030_cast_fp16")]; + tensor var_42037_begin_0 = const()[name = tensor("op_42037_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_42037_end_0 = const()[name = tensor("op_42037_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_42037_end_mask_0 = const()[name = tensor("op_42037_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42037_cast_fp16 = slice_by_index(begin = var_42037_begin_0, end = var_42037_end_0, end_mask = var_42037_end_mask_0, x = var_41547_cast_fp16)[name = tensor("op_42037_cast_fp16")]; + tensor var_42044_begin_0 = const()[name = tensor("op_42044_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_42044_end_0 = const()[name = tensor("op_42044_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_42044_end_mask_0 = const()[name = tensor("op_42044_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42044_cast_fp16 = slice_by_index(begin = var_42044_begin_0, end = var_42044_end_0, end_mask = var_42044_end_mask_0, x = var_41551_cast_fp16)[name = tensor("op_42044_cast_fp16")]; + tensor var_42051_begin_0 = const()[name = tensor("op_42051_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_42051_end_0 = const()[name = tensor("op_42051_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_42051_end_mask_0 = const()[name = tensor("op_42051_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42051_cast_fp16 = slice_by_index(begin = var_42051_begin_0, end = var_42051_end_0, end_mask = var_42051_end_mask_0, x = var_41551_cast_fp16)[name = tensor("op_42051_cast_fp16")]; + tensor var_42058_begin_0 = const()[name = tensor("op_42058_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_42058_end_0 = const()[name = tensor("op_42058_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_42058_end_mask_0 = const()[name = tensor("op_42058_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42058_cast_fp16 = slice_by_index(begin = var_42058_begin_0, end = var_42058_end_0, end_mask = var_42058_end_mask_0, x = var_41551_cast_fp16)[name = tensor("op_42058_cast_fp16")]; + tensor var_42065_begin_0 = const()[name = tensor("op_42065_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_42065_end_0 = const()[name = tensor("op_42065_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_42065_end_mask_0 = const()[name = tensor("op_42065_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42065_cast_fp16 = slice_by_index(begin = var_42065_begin_0, end = var_42065_end_0, end_mask = var_42065_end_mask_0, x = var_41551_cast_fp16)[name = tensor("op_42065_cast_fp16")]; + tensor var_42072_begin_0 = const()[name = tensor("op_42072_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_42072_end_0 = const()[name = tensor("op_42072_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_42072_end_mask_0 = const()[name = tensor("op_42072_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42072_cast_fp16 = slice_by_index(begin = var_42072_begin_0, end = var_42072_end_0, end_mask = var_42072_end_mask_0, x = var_41555_cast_fp16)[name = tensor("op_42072_cast_fp16")]; + tensor var_42079_begin_0 = const()[name = tensor("op_42079_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_42079_end_0 = const()[name = tensor("op_42079_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_42079_end_mask_0 = const()[name = tensor("op_42079_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42079_cast_fp16 = slice_by_index(begin = var_42079_begin_0, end = var_42079_end_0, end_mask = var_42079_end_mask_0, x = var_41555_cast_fp16)[name = tensor("op_42079_cast_fp16")]; + tensor var_42086_begin_0 = const()[name = tensor("op_42086_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_42086_end_0 = const()[name = tensor("op_42086_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_42086_end_mask_0 = const()[name = tensor("op_42086_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42086_cast_fp16 = slice_by_index(begin = var_42086_begin_0, end = var_42086_end_0, end_mask = var_42086_end_mask_0, x = var_41555_cast_fp16)[name = tensor("op_42086_cast_fp16")]; + tensor var_42093_begin_0 = const()[name = tensor("op_42093_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_42093_end_0 = const()[name = tensor("op_42093_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_42093_end_mask_0 = const()[name = tensor("op_42093_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42093_cast_fp16 = slice_by_index(begin = var_42093_begin_0, end = var_42093_end_0, end_mask = var_42093_end_mask_0, x = var_41555_cast_fp16)[name = tensor("op_42093_cast_fp16")]; + tensor var_42100_begin_0 = const()[name = tensor("op_42100_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_42100_end_0 = const()[name = tensor("op_42100_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_42100_end_mask_0 = const()[name = tensor("op_42100_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42100_cast_fp16 = slice_by_index(begin = var_42100_begin_0, end = var_42100_end_0, end_mask = var_42100_end_mask_0, x = var_41559_cast_fp16)[name = tensor("op_42100_cast_fp16")]; + tensor var_42107_begin_0 = const()[name = tensor("op_42107_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_42107_end_0 = const()[name = tensor("op_42107_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_42107_end_mask_0 = const()[name = tensor("op_42107_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42107_cast_fp16 = slice_by_index(begin = var_42107_begin_0, end = var_42107_end_0, end_mask = var_42107_end_mask_0, x = var_41559_cast_fp16)[name = tensor("op_42107_cast_fp16")]; + tensor var_42114_begin_0 = const()[name = tensor("op_42114_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_42114_end_0 = const()[name = tensor("op_42114_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_42114_end_mask_0 = const()[name = tensor("op_42114_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42114_cast_fp16 = slice_by_index(begin = var_42114_begin_0, end = var_42114_end_0, end_mask = var_42114_end_mask_0, x = var_41559_cast_fp16)[name = tensor("op_42114_cast_fp16")]; + tensor var_42121_begin_0 = const()[name = tensor("op_42121_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_42121_end_0 = const()[name = tensor("op_42121_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_42121_end_mask_0 = const()[name = tensor("op_42121_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42121_cast_fp16 = slice_by_index(begin = var_42121_begin_0, end = var_42121_end_0, end_mask = var_42121_end_mask_0, x = var_41559_cast_fp16)[name = tensor("op_42121_cast_fp16")]; + tensor k_55_perm_0 = const()[name = tensor("k_55_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_42126_begin_0 = const()[name = tensor("op_42126_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_42126_end_0 = const()[name = tensor("op_42126_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_42126_end_mask_0 = const()[name = tensor("op_42126_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_4 = transpose(perm = k_55_perm_0, x = key_55_cast_fp16)[name = tensor("transpose_4")]; + tensor var_42126_cast_fp16 = slice_by_index(begin = var_42126_begin_0, end = var_42126_end_0, end_mask = var_42126_end_mask_0, x = transpose_4)[name = tensor("op_42126_cast_fp16")]; + tensor var_42130_begin_0 = const()[name = tensor("op_42130_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_42130_end_0 = const()[name = tensor("op_42130_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_42130_end_mask_0 = const()[name = tensor("op_42130_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42130_cast_fp16 = slice_by_index(begin = var_42130_begin_0, end = var_42130_end_0, end_mask = var_42130_end_mask_0, x = transpose_4)[name = tensor("op_42130_cast_fp16")]; + tensor var_42134_begin_0 = const()[name = tensor("op_42134_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_42134_end_0 = const()[name = tensor("op_42134_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_42134_end_mask_0 = const()[name = tensor("op_42134_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42134_cast_fp16 = slice_by_index(begin = var_42134_begin_0, end = var_42134_end_0, end_mask = var_42134_end_mask_0, x = transpose_4)[name = tensor("op_42134_cast_fp16")]; + tensor var_42138_begin_0 = const()[name = tensor("op_42138_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_42138_end_0 = const()[name = tensor("op_42138_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_42138_end_mask_0 = const()[name = tensor("op_42138_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42138_cast_fp16 = slice_by_index(begin = var_42138_begin_0, end = var_42138_end_0, end_mask = var_42138_end_mask_0, x = transpose_4)[name = tensor("op_42138_cast_fp16")]; + tensor var_42142_begin_0 = const()[name = tensor("op_42142_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_42142_end_0 = const()[name = tensor("op_42142_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_42142_end_mask_0 = const()[name = tensor("op_42142_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42142_cast_fp16 = slice_by_index(begin = var_42142_begin_0, end = var_42142_end_0, end_mask = var_42142_end_mask_0, x = transpose_4)[name = tensor("op_42142_cast_fp16")]; + tensor var_42146_begin_0 = const()[name = tensor("op_42146_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_42146_end_0 = const()[name = tensor("op_42146_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_42146_end_mask_0 = const()[name = tensor("op_42146_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42146_cast_fp16 = slice_by_index(begin = var_42146_begin_0, end = var_42146_end_0, end_mask = var_42146_end_mask_0, x = transpose_4)[name = tensor("op_42146_cast_fp16")]; + tensor var_42150_begin_0 = const()[name = tensor("op_42150_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_42150_end_0 = const()[name = tensor("op_42150_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_42150_end_mask_0 = const()[name = tensor("op_42150_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42150_cast_fp16 = slice_by_index(begin = var_42150_begin_0, end = var_42150_end_0, end_mask = var_42150_end_mask_0, x = transpose_4)[name = tensor("op_42150_cast_fp16")]; + tensor var_42154_begin_0 = const()[name = tensor("op_42154_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_42154_end_0 = const()[name = tensor("op_42154_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_42154_end_mask_0 = const()[name = tensor("op_42154_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42154_cast_fp16 = slice_by_index(begin = var_42154_begin_0, end = var_42154_end_0, end_mask = var_42154_end_mask_0, x = transpose_4)[name = tensor("op_42154_cast_fp16")]; + tensor var_42158_begin_0 = const()[name = tensor("op_42158_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_42158_end_0 = const()[name = tensor("op_42158_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_42158_end_mask_0 = const()[name = tensor("op_42158_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42158_cast_fp16 = slice_by_index(begin = var_42158_begin_0, end = var_42158_end_0, end_mask = var_42158_end_mask_0, x = transpose_4)[name = tensor("op_42158_cast_fp16")]; + tensor var_42162_begin_0 = const()[name = tensor("op_42162_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_42162_end_0 = const()[name = tensor("op_42162_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_42162_end_mask_0 = const()[name = tensor("op_42162_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42162_cast_fp16 = slice_by_index(begin = var_42162_begin_0, end = var_42162_end_0, end_mask = var_42162_end_mask_0, x = transpose_4)[name = tensor("op_42162_cast_fp16")]; + tensor var_42166_begin_0 = const()[name = tensor("op_42166_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_42166_end_0 = const()[name = tensor("op_42166_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_42166_end_mask_0 = const()[name = tensor("op_42166_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42166_cast_fp16 = slice_by_index(begin = var_42166_begin_0, end = var_42166_end_0, end_mask = var_42166_end_mask_0, x = transpose_4)[name = tensor("op_42166_cast_fp16")]; + tensor var_42170_begin_0 = const()[name = tensor("op_42170_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_42170_end_0 = const()[name = tensor("op_42170_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_42170_end_mask_0 = const()[name = tensor("op_42170_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42170_cast_fp16 = slice_by_index(begin = var_42170_begin_0, end = var_42170_end_0, end_mask = var_42170_end_mask_0, x = transpose_4)[name = tensor("op_42170_cast_fp16")]; + tensor var_42174_begin_0 = const()[name = tensor("op_42174_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_42174_end_0 = const()[name = tensor("op_42174_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_42174_end_mask_0 = const()[name = tensor("op_42174_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42174_cast_fp16 = slice_by_index(begin = var_42174_begin_0, end = var_42174_end_0, end_mask = var_42174_end_mask_0, x = transpose_4)[name = tensor("op_42174_cast_fp16")]; + tensor var_42178_begin_0 = const()[name = tensor("op_42178_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_42178_end_0 = const()[name = tensor("op_42178_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_42178_end_mask_0 = const()[name = tensor("op_42178_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42178_cast_fp16 = slice_by_index(begin = var_42178_begin_0, end = var_42178_end_0, end_mask = var_42178_end_mask_0, x = transpose_4)[name = tensor("op_42178_cast_fp16")]; + tensor var_42182_begin_0 = const()[name = tensor("op_42182_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_42182_end_0 = const()[name = tensor("op_42182_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_42182_end_mask_0 = const()[name = tensor("op_42182_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42182_cast_fp16 = slice_by_index(begin = var_42182_begin_0, end = var_42182_end_0, end_mask = var_42182_end_mask_0, x = transpose_4)[name = tensor("op_42182_cast_fp16")]; + tensor var_42186_begin_0 = const()[name = tensor("op_42186_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_42186_end_0 = const()[name = tensor("op_42186_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_42186_end_mask_0 = const()[name = tensor("op_42186_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42186_cast_fp16 = slice_by_index(begin = var_42186_begin_0, end = var_42186_end_0, end_mask = var_42186_end_mask_0, x = transpose_4)[name = tensor("op_42186_cast_fp16")]; + tensor var_42190_begin_0 = const()[name = tensor("op_42190_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_42190_end_0 = const()[name = tensor("op_42190_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_42190_end_mask_0 = const()[name = tensor("op_42190_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42190_cast_fp16 = slice_by_index(begin = var_42190_begin_0, end = var_42190_end_0, end_mask = var_42190_end_mask_0, x = transpose_4)[name = tensor("op_42190_cast_fp16")]; + tensor var_42194_begin_0 = const()[name = tensor("op_42194_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_42194_end_0 = const()[name = tensor("op_42194_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_42194_end_mask_0 = const()[name = tensor("op_42194_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42194_cast_fp16 = slice_by_index(begin = var_42194_begin_0, end = var_42194_end_0, end_mask = var_42194_end_mask_0, x = transpose_4)[name = tensor("op_42194_cast_fp16")]; + tensor var_42198_begin_0 = const()[name = tensor("op_42198_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_42198_end_0 = const()[name = tensor("op_42198_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_42198_end_mask_0 = const()[name = tensor("op_42198_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42198_cast_fp16 = slice_by_index(begin = var_42198_begin_0, end = var_42198_end_0, end_mask = var_42198_end_mask_0, x = transpose_4)[name = tensor("op_42198_cast_fp16")]; + tensor var_42202_begin_0 = const()[name = tensor("op_42202_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_42202_end_0 = const()[name = tensor("op_42202_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_42202_end_mask_0 = const()[name = tensor("op_42202_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_42202_cast_fp16 = slice_by_index(begin = var_42202_begin_0, end = var_42202_end_0, end_mask = var_42202_end_mask_0, x = transpose_4)[name = tensor("op_42202_cast_fp16")]; + tensor var_42204_begin_0 = const()[name = tensor("op_42204_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_42204_end_0 = const()[name = tensor("op_42204_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_42204_end_mask_0 = const()[name = tensor("op_42204_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_42204_cast_fp16 = slice_by_index(begin = var_42204_begin_0, end = var_42204_end_0, end_mask = var_42204_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_42204_cast_fp16")]; + tensor var_42208_begin_0 = const()[name = tensor("op_42208_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_42208_end_0 = const()[name = tensor("op_42208_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_42208_end_mask_0 = const()[name = tensor("op_42208_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_42208_cast_fp16 = slice_by_index(begin = var_42208_begin_0, end = var_42208_end_0, end_mask = var_42208_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_42208_cast_fp16")]; + tensor var_42212_begin_0 = const()[name = tensor("op_42212_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_42212_end_0 = const()[name = tensor("op_42212_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_42212_end_mask_0 = const()[name = tensor("op_42212_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_42212_cast_fp16 = slice_by_index(begin = var_42212_begin_0, end = var_42212_end_0, end_mask = var_42212_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_42212_cast_fp16")]; + tensor var_42216_begin_0 = const()[name = tensor("op_42216_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_42216_end_0 = const()[name = tensor("op_42216_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_42216_end_mask_0 = const()[name = tensor("op_42216_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_42216_cast_fp16 = slice_by_index(begin = var_42216_begin_0, end = var_42216_end_0, end_mask = var_42216_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_42216_cast_fp16")]; + tensor var_42220_begin_0 = const()[name = tensor("op_42220_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_42220_end_0 = const()[name = tensor("op_42220_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_42220_end_mask_0 = const()[name = tensor("op_42220_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_42220_cast_fp16 = slice_by_index(begin = var_42220_begin_0, end = var_42220_end_0, end_mask = var_42220_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_42220_cast_fp16")]; + tensor var_42224_begin_0 = const()[name = tensor("op_42224_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_42224_end_0 = const()[name = tensor("op_42224_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_42224_end_mask_0 = const()[name = tensor("op_42224_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_42224_cast_fp16 = slice_by_index(begin = var_42224_begin_0, end = var_42224_end_0, end_mask = var_42224_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_42224_cast_fp16")]; + tensor var_42228_begin_0 = const()[name = tensor("op_42228_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_42228_end_0 = const()[name = tensor("op_42228_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_42228_end_mask_0 = const()[name = tensor("op_42228_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_42228_cast_fp16 = slice_by_index(begin = var_42228_begin_0, end = var_42228_end_0, end_mask = var_42228_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_42228_cast_fp16")]; + tensor var_42232_begin_0 = const()[name = tensor("op_42232_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_42232_end_0 = const()[name = tensor("op_42232_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_42232_end_mask_0 = const()[name = tensor("op_42232_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_42232_cast_fp16 = slice_by_index(begin = var_42232_begin_0, end = var_42232_end_0, end_mask = var_42232_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_42232_cast_fp16")]; + tensor var_42236_begin_0 = const()[name = tensor("op_42236_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_42236_end_0 = const()[name = tensor("op_42236_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_42236_end_mask_0 = const()[name = tensor("op_42236_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_42236_cast_fp16 = slice_by_index(begin = var_42236_begin_0, end = var_42236_end_0, end_mask = var_42236_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_42236_cast_fp16")]; + tensor var_42240_begin_0 = const()[name = tensor("op_42240_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_42240_end_0 = const()[name = tensor("op_42240_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_42240_end_mask_0 = const()[name = tensor("op_42240_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_42240_cast_fp16 = slice_by_index(begin = var_42240_begin_0, end = var_42240_end_0, end_mask = var_42240_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_42240_cast_fp16")]; + tensor var_42244_begin_0 = const()[name = tensor("op_42244_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_42244_end_0 = const()[name = tensor("op_42244_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_42244_end_mask_0 = const()[name = tensor("op_42244_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_42244_cast_fp16 = slice_by_index(begin = var_42244_begin_0, end = var_42244_end_0, end_mask = var_42244_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_42244_cast_fp16")]; + tensor var_42248_begin_0 = const()[name = tensor("op_42248_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_42248_end_0 = const()[name = tensor("op_42248_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_42248_end_mask_0 = const()[name = tensor("op_42248_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_42248_cast_fp16 = slice_by_index(begin = var_42248_begin_0, end = var_42248_end_0, end_mask = var_42248_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_42248_cast_fp16")]; + tensor var_42252_begin_0 = const()[name = tensor("op_42252_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_42252_end_0 = const()[name = tensor("op_42252_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_42252_end_mask_0 = const()[name = tensor("op_42252_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_42252_cast_fp16 = slice_by_index(begin = var_42252_begin_0, end = var_42252_end_0, end_mask = var_42252_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_42252_cast_fp16")]; + tensor var_42256_begin_0 = const()[name = tensor("op_42256_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_42256_end_0 = const()[name = tensor("op_42256_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_42256_end_mask_0 = const()[name = tensor("op_42256_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_42256_cast_fp16 = slice_by_index(begin = var_42256_begin_0, end = var_42256_end_0, end_mask = var_42256_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_42256_cast_fp16")]; + tensor var_42260_begin_0 = const()[name = tensor("op_42260_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_42260_end_0 = const()[name = tensor("op_42260_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_42260_end_mask_0 = const()[name = tensor("op_42260_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_42260_cast_fp16 = slice_by_index(begin = var_42260_begin_0, end = var_42260_end_0, end_mask = var_42260_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_42260_cast_fp16")]; + tensor var_42264_begin_0 = const()[name = tensor("op_42264_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_42264_end_0 = const()[name = tensor("op_42264_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_42264_end_mask_0 = const()[name = tensor("op_42264_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_42264_cast_fp16 = slice_by_index(begin = var_42264_begin_0, end = var_42264_end_0, end_mask = var_42264_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_42264_cast_fp16")]; + tensor var_42268_begin_0 = const()[name = tensor("op_42268_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_42268_end_0 = const()[name = tensor("op_42268_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_42268_end_mask_0 = const()[name = tensor("op_42268_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_42268_cast_fp16 = slice_by_index(begin = var_42268_begin_0, end = var_42268_end_0, end_mask = var_42268_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_42268_cast_fp16")]; + tensor var_42272_begin_0 = const()[name = tensor("op_42272_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_42272_end_0 = const()[name = tensor("op_42272_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_42272_end_mask_0 = const()[name = tensor("op_42272_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_42272_cast_fp16 = slice_by_index(begin = var_42272_begin_0, end = var_42272_end_0, end_mask = var_42272_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_42272_cast_fp16")]; + tensor var_42276_begin_0 = const()[name = tensor("op_42276_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_42276_end_0 = const()[name = tensor("op_42276_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_42276_end_mask_0 = const()[name = tensor("op_42276_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_42276_cast_fp16 = slice_by_index(begin = var_42276_begin_0, end = var_42276_end_0, end_mask = var_42276_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_42276_cast_fp16")]; + tensor var_42280_begin_0 = const()[name = tensor("op_42280_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_42280_end_0 = const()[name = tensor("op_42280_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_42280_end_mask_0 = const()[name = tensor("op_42280_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_42280_cast_fp16 = slice_by_index(begin = var_42280_begin_0, end = var_42280_end_0, end_mask = var_42280_end_mask_0, x = value_55_cast_fp16)[name = tensor("op_42280_cast_fp16")]; + tensor var_42284_equation_0 = const()[name = tensor("op_42284_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42284_cast_fp16 = einsum(equation = var_42284_equation_0, values = (var_42126_cast_fp16, var_41568_cast_fp16))[name = tensor("op_42284_cast_fp16")]; + tensor var_42285_to_fp16 = const()[name = tensor("op_42285_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4321_cast_fp16 = mul(x = var_42284_cast_fp16, y = var_42285_to_fp16)[name = tensor("aw_chunk_4321_cast_fp16")]; + tensor var_42288_equation_0 = const()[name = tensor("op_42288_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42288_cast_fp16 = einsum(equation = var_42288_equation_0, values = (var_42126_cast_fp16, var_41575_cast_fp16))[name = tensor("op_42288_cast_fp16")]; + tensor var_42289_to_fp16 = const()[name = tensor("op_42289_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4323_cast_fp16 = mul(x = var_42288_cast_fp16, y = var_42289_to_fp16)[name = tensor("aw_chunk_4323_cast_fp16")]; + tensor var_42292_equation_0 = const()[name = tensor("op_42292_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42292_cast_fp16 = einsum(equation = var_42292_equation_0, values = (var_42126_cast_fp16, var_41582_cast_fp16))[name = tensor("op_42292_cast_fp16")]; + tensor var_42293_to_fp16 = const()[name = tensor("op_42293_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4325_cast_fp16 = mul(x = var_42292_cast_fp16, y = var_42293_to_fp16)[name = tensor("aw_chunk_4325_cast_fp16")]; + tensor var_42296_equation_0 = const()[name = tensor("op_42296_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42296_cast_fp16 = einsum(equation = var_42296_equation_0, values = (var_42126_cast_fp16, var_41589_cast_fp16))[name = tensor("op_42296_cast_fp16")]; + tensor var_42297_to_fp16 = const()[name = tensor("op_42297_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4327_cast_fp16 = mul(x = var_42296_cast_fp16, y = var_42297_to_fp16)[name = tensor("aw_chunk_4327_cast_fp16")]; + tensor var_42300_equation_0 = const()[name = tensor("op_42300_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42300_cast_fp16 = einsum(equation = var_42300_equation_0, values = (var_42130_cast_fp16, var_41596_cast_fp16))[name = tensor("op_42300_cast_fp16")]; + tensor var_42301_to_fp16 = const()[name = tensor("op_42301_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4329_cast_fp16 = mul(x = var_42300_cast_fp16, y = var_42301_to_fp16)[name = tensor("aw_chunk_4329_cast_fp16")]; + tensor var_42304_equation_0 = const()[name = tensor("op_42304_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42304_cast_fp16 = einsum(equation = var_42304_equation_0, values = (var_42130_cast_fp16, var_41603_cast_fp16))[name = tensor("op_42304_cast_fp16")]; + tensor var_42305_to_fp16 = const()[name = tensor("op_42305_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4331_cast_fp16 = mul(x = var_42304_cast_fp16, y = var_42305_to_fp16)[name = tensor("aw_chunk_4331_cast_fp16")]; + tensor var_42308_equation_0 = const()[name = tensor("op_42308_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42308_cast_fp16 = einsum(equation = var_42308_equation_0, values = (var_42130_cast_fp16, var_41610_cast_fp16))[name = tensor("op_42308_cast_fp16")]; + tensor var_42309_to_fp16 = const()[name = tensor("op_42309_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4333_cast_fp16 = mul(x = var_42308_cast_fp16, y = var_42309_to_fp16)[name = tensor("aw_chunk_4333_cast_fp16")]; + tensor var_42312_equation_0 = const()[name = tensor("op_42312_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42312_cast_fp16 = einsum(equation = var_42312_equation_0, values = (var_42130_cast_fp16, var_41617_cast_fp16))[name = tensor("op_42312_cast_fp16")]; + tensor var_42313_to_fp16 = const()[name = tensor("op_42313_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4335_cast_fp16 = mul(x = var_42312_cast_fp16, y = var_42313_to_fp16)[name = tensor("aw_chunk_4335_cast_fp16")]; + tensor var_42316_equation_0 = const()[name = tensor("op_42316_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42316_cast_fp16 = einsum(equation = var_42316_equation_0, values = (var_42134_cast_fp16, var_41624_cast_fp16))[name = tensor("op_42316_cast_fp16")]; + tensor var_42317_to_fp16 = const()[name = tensor("op_42317_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4337_cast_fp16 = mul(x = var_42316_cast_fp16, y = var_42317_to_fp16)[name = tensor("aw_chunk_4337_cast_fp16")]; + tensor var_42320_equation_0 = const()[name = tensor("op_42320_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42320_cast_fp16 = einsum(equation = var_42320_equation_0, values = (var_42134_cast_fp16, var_41631_cast_fp16))[name = tensor("op_42320_cast_fp16")]; + tensor var_42321_to_fp16 = const()[name = tensor("op_42321_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4339_cast_fp16 = mul(x = var_42320_cast_fp16, y = var_42321_to_fp16)[name = tensor("aw_chunk_4339_cast_fp16")]; + tensor var_42324_equation_0 = const()[name = tensor("op_42324_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42324_cast_fp16 = einsum(equation = var_42324_equation_0, values = (var_42134_cast_fp16, var_41638_cast_fp16))[name = tensor("op_42324_cast_fp16")]; + tensor var_42325_to_fp16 = const()[name = tensor("op_42325_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4341_cast_fp16 = mul(x = var_42324_cast_fp16, y = var_42325_to_fp16)[name = tensor("aw_chunk_4341_cast_fp16")]; + tensor var_42328_equation_0 = const()[name = tensor("op_42328_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42328_cast_fp16 = einsum(equation = var_42328_equation_0, values = (var_42134_cast_fp16, var_41645_cast_fp16))[name = tensor("op_42328_cast_fp16")]; + tensor var_42329_to_fp16 = const()[name = tensor("op_42329_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4343_cast_fp16 = mul(x = var_42328_cast_fp16, y = var_42329_to_fp16)[name = tensor("aw_chunk_4343_cast_fp16")]; + tensor var_42332_equation_0 = const()[name = tensor("op_42332_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42332_cast_fp16 = einsum(equation = var_42332_equation_0, values = (var_42138_cast_fp16, var_41652_cast_fp16))[name = tensor("op_42332_cast_fp16")]; + tensor var_42333_to_fp16 = const()[name = tensor("op_42333_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4345_cast_fp16 = mul(x = var_42332_cast_fp16, y = var_42333_to_fp16)[name = tensor("aw_chunk_4345_cast_fp16")]; + tensor var_42336_equation_0 = const()[name = tensor("op_42336_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42336_cast_fp16 = einsum(equation = var_42336_equation_0, values = (var_42138_cast_fp16, var_41659_cast_fp16))[name = tensor("op_42336_cast_fp16")]; + tensor var_42337_to_fp16 = const()[name = tensor("op_42337_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4347_cast_fp16 = mul(x = var_42336_cast_fp16, y = var_42337_to_fp16)[name = tensor("aw_chunk_4347_cast_fp16")]; + tensor var_42340_equation_0 = const()[name = tensor("op_42340_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42340_cast_fp16 = einsum(equation = var_42340_equation_0, values = (var_42138_cast_fp16, var_41666_cast_fp16))[name = tensor("op_42340_cast_fp16")]; + tensor var_42341_to_fp16 = const()[name = tensor("op_42341_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4349_cast_fp16 = mul(x = var_42340_cast_fp16, y = var_42341_to_fp16)[name = tensor("aw_chunk_4349_cast_fp16")]; + tensor var_42344_equation_0 = const()[name = tensor("op_42344_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42344_cast_fp16 = einsum(equation = var_42344_equation_0, values = (var_42138_cast_fp16, var_41673_cast_fp16))[name = tensor("op_42344_cast_fp16")]; + tensor var_42345_to_fp16 = const()[name = tensor("op_42345_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4351_cast_fp16 = mul(x = var_42344_cast_fp16, y = var_42345_to_fp16)[name = tensor("aw_chunk_4351_cast_fp16")]; + tensor var_42348_equation_0 = const()[name = tensor("op_42348_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42348_cast_fp16 = einsum(equation = var_42348_equation_0, values = (var_42142_cast_fp16, var_41680_cast_fp16))[name = tensor("op_42348_cast_fp16")]; + tensor var_42349_to_fp16 = const()[name = tensor("op_42349_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4353_cast_fp16 = mul(x = var_42348_cast_fp16, y = var_42349_to_fp16)[name = tensor("aw_chunk_4353_cast_fp16")]; + tensor var_42352_equation_0 = const()[name = tensor("op_42352_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42352_cast_fp16 = einsum(equation = var_42352_equation_0, values = (var_42142_cast_fp16, var_41687_cast_fp16))[name = tensor("op_42352_cast_fp16")]; + tensor var_42353_to_fp16 = const()[name = tensor("op_42353_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4355_cast_fp16 = mul(x = var_42352_cast_fp16, y = var_42353_to_fp16)[name = tensor("aw_chunk_4355_cast_fp16")]; + tensor var_42356_equation_0 = const()[name = tensor("op_42356_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42356_cast_fp16 = einsum(equation = var_42356_equation_0, values = (var_42142_cast_fp16, var_41694_cast_fp16))[name = tensor("op_42356_cast_fp16")]; + tensor var_42357_to_fp16 = const()[name = tensor("op_42357_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4357_cast_fp16 = mul(x = var_42356_cast_fp16, y = var_42357_to_fp16)[name = tensor("aw_chunk_4357_cast_fp16")]; + tensor var_42360_equation_0 = const()[name = tensor("op_42360_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42360_cast_fp16 = einsum(equation = var_42360_equation_0, values = (var_42142_cast_fp16, var_41701_cast_fp16))[name = tensor("op_42360_cast_fp16")]; + tensor var_42361_to_fp16 = const()[name = tensor("op_42361_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4359_cast_fp16 = mul(x = var_42360_cast_fp16, y = var_42361_to_fp16)[name = tensor("aw_chunk_4359_cast_fp16")]; + tensor var_42364_equation_0 = const()[name = tensor("op_42364_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42364_cast_fp16 = einsum(equation = var_42364_equation_0, values = (var_42146_cast_fp16, var_41708_cast_fp16))[name = tensor("op_42364_cast_fp16")]; + tensor var_42365_to_fp16 = const()[name = tensor("op_42365_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4361_cast_fp16 = mul(x = var_42364_cast_fp16, y = var_42365_to_fp16)[name = tensor("aw_chunk_4361_cast_fp16")]; + tensor var_42368_equation_0 = const()[name = tensor("op_42368_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42368_cast_fp16 = einsum(equation = var_42368_equation_0, values = (var_42146_cast_fp16, var_41715_cast_fp16))[name = tensor("op_42368_cast_fp16")]; + tensor var_42369_to_fp16 = const()[name = tensor("op_42369_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4363_cast_fp16 = mul(x = var_42368_cast_fp16, y = var_42369_to_fp16)[name = tensor("aw_chunk_4363_cast_fp16")]; + tensor var_42372_equation_0 = const()[name = tensor("op_42372_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42372_cast_fp16 = einsum(equation = var_42372_equation_0, values = (var_42146_cast_fp16, var_41722_cast_fp16))[name = tensor("op_42372_cast_fp16")]; + tensor var_42373_to_fp16 = const()[name = tensor("op_42373_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4365_cast_fp16 = mul(x = var_42372_cast_fp16, y = var_42373_to_fp16)[name = tensor("aw_chunk_4365_cast_fp16")]; + tensor var_42376_equation_0 = const()[name = tensor("op_42376_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42376_cast_fp16 = einsum(equation = var_42376_equation_0, values = (var_42146_cast_fp16, var_41729_cast_fp16))[name = tensor("op_42376_cast_fp16")]; + tensor var_42377_to_fp16 = const()[name = tensor("op_42377_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4367_cast_fp16 = mul(x = var_42376_cast_fp16, y = var_42377_to_fp16)[name = tensor("aw_chunk_4367_cast_fp16")]; + tensor var_42380_equation_0 = const()[name = tensor("op_42380_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42380_cast_fp16 = einsum(equation = var_42380_equation_0, values = (var_42150_cast_fp16, var_41736_cast_fp16))[name = tensor("op_42380_cast_fp16")]; + tensor var_42381_to_fp16 = const()[name = tensor("op_42381_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4369_cast_fp16 = mul(x = var_42380_cast_fp16, y = var_42381_to_fp16)[name = tensor("aw_chunk_4369_cast_fp16")]; + tensor var_42384_equation_0 = const()[name = tensor("op_42384_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42384_cast_fp16 = einsum(equation = var_42384_equation_0, values = (var_42150_cast_fp16, var_41743_cast_fp16))[name = tensor("op_42384_cast_fp16")]; + tensor var_42385_to_fp16 = const()[name = tensor("op_42385_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4371_cast_fp16 = mul(x = var_42384_cast_fp16, y = var_42385_to_fp16)[name = tensor("aw_chunk_4371_cast_fp16")]; + tensor var_42388_equation_0 = const()[name = tensor("op_42388_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42388_cast_fp16 = einsum(equation = var_42388_equation_0, values = (var_42150_cast_fp16, var_41750_cast_fp16))[name = tensor("op_42388_cast_fp16")]; + tensor var_42389_to_fp16 = const()[name = tensor("op_42389_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4373_cast_fp16 = mul(x = var_42388_cast_fp16, y = var_42389_to_fp16)[name = tensor("aw_chunk_4373_cast_fp16")]; + tensor var_42392_equation_0 = const()[name = tensor("op_42392_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42392_cast_fp16 = einsum(equation = var_42392_equation_0, values = (var_42150_cast_fp16, var_41757_cast_fp16))[name = tensor("op_42392_cast_fp16")]; + tensor var_42393_to_fp16 = const()[name = tensor("op_42393_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4375_cast_fp16 = mul(x = var_42392_cast_fp16, y = var_42393_to_fp16)[name = tensor("aw_chunk_4375_cast_fp16")]; + tensor var_42396_equation_0 = const()[name = tensor("op_42396_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42396_cast_fp16 = einsum(equation = var_42396_equation_0, values = (var_42154_cast_fp16, var_41764_cast_fp16))[name = tensor("op_42396_cast_fp16")]; + tensor var_42397_to_fp16 = const()[name = tensor("op_42397_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4377_cast_fp16 = mul(x = var_42396_cast_fp16, y = var_42397_to_fp16)[name = tensor("aw_chunk_4377_cast_fp16")]; + tensor var_42400_equation_0 = const()[name = tensor("op_42400_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42400_cast_fp16 = einsum(equation = var_42400_equation_0, values = (var_42154_cast_fp16, var_41771_cast_fp16))[name = tensor("op_42400_cast_fp16")]; + tensor var_42401_to_fp16 = const()[name = tensor("op_42401_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4379_cast_fp16 = mul(x = var_42400_cast_fp16, y = var_42401_to_fp16)[name = tensor("aw_chunk_4379_cast_fp16")]; + tensor var_42404_equation_0 = const()[name = tensor("op_42404_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42404_cast_fp16 = einsum(equation = var_42404_equation_0, values = (var_42154_cast_fp16, var_41778_cast_fp16))[name = tensor("op_42404_cast_fp16")]; + tensor var_42405_to_fp16 = const()[name = tensor("op_42405_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4381_cast_fp16 = mul(x = var_42404_cast_fp16, y = var_42405_to_fp16)[name = tensor("aw_chunk_4381_cast_fp16")]; + tensor var_42408_equation_0 = const()[name = tensor("op_42408_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42408_cast_fp16 = einsum(equation = var_42408_equation_0, values = (var_42154_cast_fp16, var_41785_cast_fp16))[name = tensor("op_42408_cast_fp16")]; + tensor var_42409_to_fp16 = const()[name = tensor("op_42409_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4383_cast_fp16 = mul(x = var_42408_cast_fp16, y = var_42409_to_fp16)[name = tensor("aw_chunk_4383_cast_fp16")]; + tensor var_42412_equation_0 = const()[name = tensor("op_42412_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42412_cast_fp16 = einsum(equation = var_42412_equation_0, values = (var_42158_cast_fp16, var_41792_cast_fp16))[name = tensor("op_42412_cast_fp16")]; + tensor var_42413_to_fp16 = const()[name = tensor("op_42413_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4385_cast_fp16 = mul(x = var_42412_cast_fp16, y = var_42413_to_fp16)[name = tensor("aw_chunk_4385_cast_fp16")]; + tensor var_42416_equation_0 = const()[name = tensor("op_42416_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42416_cast_fp16 = einsum(equation = var_42416_equation_0, values = (var_42158_cast_fp16, var_41799_cast_fp16))[name = tensor("op_42416_cast_fp16")]; + tensor var_42417_to_fp16 = const()[name = tensor("op_42417_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4387_cast_fp16 = mul(x = var_42416_cast_fp16, y = var_42417_to_fp16)[name = tensor("aw_chunk_4387_cast_fp16")]; + tensor var_42420_equation_0 = const()[name = tensor("op_42420_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42420_cast_fp16 = einsum(equation = var_42420_equation_0, values = (var_42158_cast_fp16, var_41806_cast_fp16))[name = tensor("op_42420_cast_fp16")]; + tensor var_42421_to_fp16 = const()[name = tensor("op_42421_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4389_cast_fp16 = mul(x = var_42420_cast_fp16, y = var_42421_to_fp16)[name = tensor("aw_chunk_4389_cast_fp16")]; + tensor var_42424_equation_0 = const()[name = tensor("op_42424_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42424_cast_fp16 = einsum(equation = var_42424_equation_0, values = (var_42158_cast_fp16, var_41813_cast_fp16))[name = tensor("op_42424_cast_fp16")]; + tensor var_42425_to_fp16 = const()[name = tensor("op_42425_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4391_cast_fp16 = mul(x = var_42424_cast_fp16, y = var_42425_to_fp16)[name = tensor("aw_chunk_4391_cast_fp16")]; + tensor var_42428_equation_0 = const()[name = tensor("op_42428_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42428_cast_fp16 = einsum(equation = var_42428_equation_0, values = (var_42162_cast_fp16, var_41820_cast_fp16))[name = tensor("op_42428_cast_fp16")]; + tensor var_42429_to_fp16 = const()[name = tensor("op_42429_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4393_cast_fp16 = mul(x = var_42428_cast_fp16, y = var_42429_to_fp16)[name = tensor("aw_chunk_4393_cast_fp16")]; + tensor var_42432_equation_0 = const()[name = tensor("op_42432_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42432_cast_fp16 = einsum(equation = var_42432_equation_0, values = (var_42162_cast_fp16, var_41827_cast_fp16))[name = tensor("op_42432_cast_fp16")]; + tensor var_42433_to_fp16 = const()[name = tensor("op_42433_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4395_cast_fp16 = mul(x = var_42432_cast_fp16, y = var_42433_to_fp16)[name = tensor("aw_chunk_4395_cast_fp16")]; + tensor var_42436_equation_0 = const()[name = tensor("op_42436_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42436_cast_fp16 = einsum(equation = var_42436_equation_0, values = (var_42162_cast_fp16, var_41834_cast_fp16))[name = tensor("op_42436_cast_fp16")]; + tensor var_42437_to_fp16 = const()[name = tensor("op_42437_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4397_cast_fp16 = mul(x = var_42436_cast_fp16, y = var_42437_to_fp16)[name = tensor("aw_chunk_4397_cast_fp16")]; + tensor var_42440_equation_0 = const()[name = tensor("op_42440_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42440_cast_fp16 = einsum(equation = var_42440_equation_0, values = (var_42162_cast_fp16, var_41841_cast_fp16))[name = tensor("op_42440_cast_fp16")]; + tensor var_42441_to_fp16 = const()[name = tensor("op_42441_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4399_cast_fp16 = mul(x = var_42440_cast_fp16, y = var_42441_to_fp16)[name = tensor("aw_chunk_4399_cast_fp16")]; + tensor var_42444_equation_0 = const()[name = tensor("op_42444_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42444_cast_fp16 = einsum(equation = var_42444_equation_0, values = (var_42166_cast_fp16, var_41848_cast_fp16))[name = tensor("op_42444_cast_fp16")]; + tensor var_42445_to_fp16 = const()[name = tensor("op_42445_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4401_cast_fp16 = mul(x = var_42444_cast_fp16, y = var_42445_to_fp16)[name = tensor("aw_chunk_4401_cast_fp16")]; + tensor var_42448_equation_0 = const()[name = tensor("op_42448_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42448_cast_fp16 = einsum(equation = var_42448_equation_0, values = (var_42166_cast_fp16, var_41855_cast_fp16))[name = tensor("op_42448_cast_fp16")]; + tensor var_42449_to_fp16 = const()[name = tensor("op_42449_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4403_cast_fp16 = mul(x = var_42448_cast_fp16, y = var_42449_to_fp16)[name = tensor("aw_chunk_4403_cast_fp16")]; + tensor var_42452_equation_0 = const()[name = tensor("op_42452_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42452_cast_fp16 = einsum(equation = var_42452_equation_0, values = (var_42166_cast_fp16, var_41862_cast_fp16))[name = tensor("op_42452_cast_fp16")]; + tensor var_42453_to_fp16 = const()[name = tensor("op_42453_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4405_cast_fp16 = mul(x = var_42452_cast_fp16, y = var_42453_to_fp16)[name = tensor("aw_chunk_4405_cast_fp16")]; + tensor var_42456_equation_0 = const()[name = tensor("op_42456_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42456_cast_fp16 = einsum(equation = var_42456_equation_0, values = (var_42166_cast_fp16, var_41869_cast_fp16))[name = tensor("op_42456_cast_fp16")]; + tensor var_42457_to_fp16 = const()[name = tensor("op_42457_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4407_cast_fp16 = mul(x = var_42456_cast_fp16, y = var_42457_to_fp16)[name = tensor("aw_chunk_4407_cast_fp16")]; + tensor var_42460_equation_0 = const()[name = tensor("op_42460_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42460_cast_fp16 = einsum(equation = var_42460_equation_0, values = (var_42170_cast_fp16, var_41876_cast_fp16))[name = tensor("op_42460_cast_fp16")]; + tensor var_42461_to_fp16 = const()[name = tensor("op_42461_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4409_cast_fp16 = mul(x = var_42460_cast_fp16, y = var_42461_to_fp16)[name = tensor("aw_chunk_4409_cast_fp16")]; + tensor var_42464_equation_0 = const()[name = tensor("op_42464_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42464_cast_fp16 = einsum(equation = var_42464_equation_0, values = (var_42170_cast_fp16, var_41883_cast_fp16))[name = tensor("op_42464_cast_fp16")]; + tensor var_42465_to_fp16 = const()[name = tensor("op_42465_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4411_cast_fp16 = mul(x = var_42464_cast_fp16, y = var_42465_to_fp16)[name = tensor("aw_chunk_4411_cast_fp16")]; + tensor var_42468_equation_0 = const()[name = tensor("op_42468_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42468_cast_fp16 = einsum(equation = var_42468_equation_0, values = (var_42170_cast_fp16, var_41890_cast_fp16))[name = tensor("op_42468_cast_fp16")]; + tensor var_42469_to_fp16 = const()[name = tensor("op_42469_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4413_cast_fp16 = mul(x = var_42468_cast_fp16, y = var_42469_to_fp16)[name = tensor("aw_chunk_4413_cast_fp16")]; + tensor var_42472_equation_0 = const()[name = tensor("op_42472_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42472_cast_fp16 = einsum(equation = var_42472_equation_0, values = (var_42170_cast_fp16, var_41897_cast_fp16))[name = tensor("op_42472_cast_fp16")]; + tensor var_42473_to_fp16 = const()[name = tensor("op_42473_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4415_cast_fp16 = mul(x = var_42472_cast_fp16, y = var_42473_to_fp16)[name = tensor("aw_chunk_4415_cast_fp16")]; + tensor var_42476_equation_0 = const()[name = tensor("op_42476_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42476_cast_fp16 = einsum(equation = var_42476_equation_0, values = (var_42174_cast_fp16, var_41904_cast_fp16))[name = tensor("op_42476_cast_fp16")]; + tensor var_42477_to_fp16 = const()[name = tensor("op_42477_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4417_cast_fp16 = mul(x = var_42476_cast_fp16, y = var_42477_to_fp16)[name = tensor("aw_chunk_4417_cast_fp16")]; + tensor var_42480_equation_0 = const()[name = tensor("op_42480_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42480_cast_fp16 = einsum(equation = var_42480_equation_0, values = (var_42174_cast_fp16, var_41911_cast_fp16))[name = tensor("op_42480_cast_fp16")]; + tensor var_42481_to_fp16 = const()[name = tensor("op_42481_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4419_cast_fp16 = mul(x = var_42480_cast_fp16, y = var_42481_to_fp16)[name = tensor("aw_chunk_4419_cast_fp16")]; + tensor var_42484_equation_0 = const()[name = tensor("op_42484_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42484_cast_fp16 = einsum(equation = var_42484_equation_0, values = (var_42174_cast_fp16, var_41918_cast_fp16))[name = tensor("op_42484_cast_fp16")]; + tensor var_42485_to_fp16 = const()[name = tensor("op_42485_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4421_cast_fp16 = mul(x = var_42484_cast_fp16, y = var_42485_to_fp16)[name = tensor("aw_chunk_4421_cast_fp16")]; + tensor var_42488_equation_0 = const()[name = tensor("op_42488_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42488_cast_fp16 = einsum(equation = var_42488_equation_0, values = (var_42174_cast_fp16, var_41925_cast_fp16))[name = tensor("op_42488_cast_fp16")]; + tensor var_42489_to_fp16 = const()[name = tensor("op_42489_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4423_cast_fp16 = mul(x = var_42488_cast_fp16, y = var_42489_to_fp16)[name = tensor("aw_chunk_4423_cast_fp16")]; + tensor var_42492_equation_0 = const()[name = tensor("op_42492_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42492_cast_fp16 = einsum(equation = var_42492_equation_0, values = (var_42178_cast_fp16, var_41932_cast_fp16))[name = tensor("op_42492_cast_fp16")]; + tensor var_42493_to_fp16 = const()[name = tensor("op_42493_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4425_cast_fp16 = mul(x = var_42492_cast_fp16, y = var_42493_to_fp16)[name = tensor("aw_chunk_4425_cast_fp16")]; + tensor var_42496_equation_0 = const()[name = tensor("op_42496_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42496_cast_fp16 = einsum(equation = var_42496_equation_0, values = (var_42178_cast_fp16, var_41939_cast_fp16))[name = tensor("op_42496_cast_fp16")]; + tensor var_42497_to_fp16 = const()[name = tensor("op_42497_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4427_cast_fp16 = mul(x = var_42496_cast_fp16, y = var_42497_to_fp16)[name = tensor("aw_chunk_4427_cast_fp16")]; + tensor var_42500_equation_0 = const()[name = tensor("op_42500_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42500_cast_fp16 = einsum(equation = var_42500_equation_0, values = (var_42178_cast_fp16, var_41946_cast_fp16))[name = tensor("op_42500_cast_fp16")]; + tensor var_42501_to_fp16 = const()[name = tensor("op_42501_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4429_cast_fp16 = mul(x = var_42500_cast_fp16, y = var_42501_to_fp16)[name = tensor("aw_chunk_4429_cast_fp16")]; + tensor var_42504_equation_0 = const()[name = tensor("op_42504_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42504_cast_fp16 = einsum(equation = var_42504_equation_0, values = (var_42178_cast_fp16, var_41953_cast_fp16))[name = tensor("op_42504_cast_fp16")]; + tensor var_42505_to_fp16 = const()[name = tensor("op_42505_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4431_cast_fp16 = mul(x = var_42504_cast_fp16, y = var_42505_to_fp16)[name = tensor("aw_chunk_4431_cast_fp16")]; + tensor var_42508_equation_0 = const()[name = tensor("op_42508_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42508_cast_fp16 = einsum(equation = var_42508_equation_0, values = (var_42182_cast_fp16, var_41960_cast_fp16))[name = tensor("op_42508_cast_fp16")]; + tensor var_42509_to_fp16 = const()[name = tensor("op_42509_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4433_cast_fp16 = mul(x = var_42508_cast_fp16, y = var_42509_to_fp16)[name = tensor("aw_chunk_4433_cast_fp16")]; + tensor var_42512_equation_0 = const()[name = tensor("op_42512_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42512_cast_fp16 = einsum(equation = var_42512_equation_0, values = (var_42182_cast_fp16, var_41967_cast_fp16))[name = tensor("op_42512_cast_fp16")]; + tensor var_42513_to_fp16 = const()[name = tensor("op_42513_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4435_cast_fp16 = mul(x = var_42512_cast_fp16, y = var_42513_to_fp16)[name = tensor("aw_chunk_4435_cast_fp16")]; + tensor var_42516_equation_0 = const()[name = tensor("op_42516_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42516_cast_fp16 = einsum(equation = var_42516_equation_0, values = (var_42182_cast_fp16, var_41974_cast_fp16))[name = tensor("op_42516_cast_fp16")]; + tensor var_42517_to_fp16 = const()[name = tensor("op_42517_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4437_cast_fp16 = mul(x = var_42516_cast_fp16, y = var_42517_to_fp16)[name = tensor("aw_chunk_4437_cast_fp16")]; + tensor var_42520_equation_0 = const()[name = tensor("op_42520_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42520_cast_fp16 = einsum(equation = var_42520_equation_0, values = (var_42182_cast_fp16, var_41981_cast_fp16))[name = tensor("op_42520_cast_fp16")]; + tensor var_42521_to_fp16 = const()[name = tensor("op_42521_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4439_cast_fp16 = mul(x = var_42520_cast_fp16, y = var_42521_to_fp16)[name = tensor("aw_chunk_4439_cast_fp16")]; + tensor var_42524_equation_0 = const()[name = tensor("op_42524_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42524_cast_fp16 = einsum(equation = var_42524_equation_0, values = (var_42186_cast_fp16, var_41988_cast_fp16))[name = tensor("op_42524_cast_fp16")]; + tensor var_42525_to_fp16 = const()[name = tensor("op_42525_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4441_cast_fp16 = mul(x = var_42524_cast_fp16, y = var_42525_to_fp16)[name = tensor("aw_chunk_4441_cast_fp16")]; + tensor var_42528_equation_0 = const()[name = tensor("op_42528_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42528_cast_fp16 = einsum(equation = var_42528_equation_0, values = (var_42186_cast_fp16, var_41995_cast_fp16))[name = tensor("op_42528_cast_fp16")]; + tensor var_42529_to_fp16 = const()[name = tensor("op_42529_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4443_cast_fp16 = mul(x = var_42528_cast_fp16, y = var_42529_to_fp16)[name = tensor("aw_chunk_4443_cast_fp16")]; + tensor var_42532_equation_0 = const()[name = tensor("op_42532_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42532_cast_fp16 = einsum(equation = var_42532_equation_0, values = (var_42186_cast_fp16, var_42002_cast_fp16))[name = tensor("op_42532_cast_fp16")]; + tensor var_42533_to_fp16 = const()[name = tensor("op_42533_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4445_cast_fp16 = mul(x = var_42532_cast_fp16, y = var_42533_to_fp16)[name = tensor("aw_chunk_4445_cast_fp16")]; + tensor var_42536_equation_0 = const()[name = tensor("op_42536_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42536_cast_fp16 = einsum(equation = var_42536_equation_0, values = (var_42186_cast_fp16, var_42009_cast_fp16))[name = tensor("op_42536_cast_fp16")]; + tensor var_42537_to_fp16 = const()[name = tensor("op_42537_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4447_cast_fp16 = mul(x = var_42536_cast_fp16, y = var_42537_to_fp16)[name = tensor("aw_chunk_4447_cast_fp16")]; + tensor var_42540_equation_0 = const()[name = tensor("op_42540_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42540_cast_fp16 = einsum(equation = var_42540_equation_0, values = (var_42190_cast_fp16, var_42016_cast_fp16))[name = tensor("op_42540_cast_fp16")]; + tensor var_42541_to_fp16 = const()[name = tensor("op_42541_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4449_cast_fp16 = mul(x = var_42540_cast_fp16, y = var_42541_to_fp16)[name = tensor("aw_chunk_4449_cast_fp16")]; + tensor var_42544_equation_0 = const()[name = tensor("op_42544_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42544_cast_fp16 = einsum(equation = var_42544_equation_0, values = (var_42190_cast_fp16, var_42023_cast_fp16))[name = tensor("op_42544_cast_fp16")]; + tensor var_42545_to_fp16 = const()[name = tensor("op_42545_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4451_cast_fp16 = mul(x = var_42544_cast_fp16, y = var_42545_to_fp16)[name = tensor("aw_chunk_4451_cast_fp16")]; + tensor var_42548_equation_0 = const()[name = tensor("op_42548_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42548_cast_fp16 = einsum(equation = var_42548_equation_0, values = (var_42190_cast_fp16, var_42030_cast_fp16))[name = tensor("op_42548_cast_fp16")]; + tensor var_42549_to_fp16 = const()[name = tensor("op_42549_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4453_cast_fp16 = mul(x = var_42548_cast_fp16, y = var_42549_to_fp16)[name = tensor("aw_chunk_4453_cast_fp16")]; + tensor var_42552_equation_0 = const()[name = tensor("op_42552_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42552_cast_fp16 = einsum(equation = var_42552_equation_0, values = (var_42190_cast_fp16, var_42037_cast_fp16))[name = tensor("op_42552_cast_fp16")]; + tensor var_42553_to_fp16 = const()[name = tensor("op_42553_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4455_cast_fp16 = mul(x = var_42552_cast_fp16, y = var_42553_to_fp16)[name = tensor("aw_chunk_4455_cast_fp16")]; + tensor var_42556_equation_0 = const()[name = tensor("op_42556_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42556_cast_fp16 = einsum(equation = var_42556_equation_0, values = (var_42194_cast_fp16, var_42044_cast_fp16))[name = tensor("op_42556_cast_fp16")]; + tensor var_42557_to_fp16 = const()[name = tensor("op_42557_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4457_cast_fp16 = mul(x = var_42556_cast_fp16, y = var_42557_to_fp16)[name = tensor("aw_chunk_4457_cast_fp16")]; + tensor var_42560_equation_0 = const()[name = tensor("op_42560_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42560_cast_fp16 = einsum(equation = var_42560_equation_0, values = (var_42194_cast_fp16, var_42051_cast_fp16))[name = tensor("op_42560_cast_fp16")]; + tensor var_42561_to_fp16 = const()[name = tensor("op_42561_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4459_cast_fp16 = mul(x = var_42560_cast_fp16, y = var_42561_to_fp16)[name = tensor("aw_chunk_4459_cast_fp16")]; + tensor var_42564_equation_0 = const()[name = tensor("op_42564_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42564_cast_fp16 = einsum(equation = var_42564_equation_0, values = (var_42194_cast_fp16, var_42058_cast_fp16))[name = tensor("op_42564_cast_fp16")]; + tensor var_42565_to_fp16 = const()[name = tensor("op_42565_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4461_cast_fp16 = mul(x = var_42564_cast_fp16, y = var_42565_to_fp16)[name = tensor("aw_chunk_4461_cast_fp16")]; + tensor var_42568_equation_0 = const()[name = tensor("op_42568_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42568_cast_fp16 = einsum(equation = var_42568_equation_0, values = (var_42194_cast_fp16, var_42065_cast_fp16))[name = tensor("op_42568_cast_fp16")]; + tensor var_42569_to_fp16 = const()[name = tensor("op_42569_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4463_cast_fp16 = mul(x = var_42568_cast_fp16, y = var_42569_to_fp16)[name = tensor("aw_chunk_4463_cast_fp16")]; + tensor var_42572_equation_0 = const()[name = tensor("op_42572_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42572_cast_fp16 = einsum(equation = var_42572_equation_0, values = (var_42198_cast_fp16, var_42072_cast_fp16))[name = tensor("op_42572_cast_fp16")]; + tensor var_42573_to_fp16 = const()[name = tensor("op_42573_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4465_cast_fp16 = mul(x = var_42572_cast_fp16, y = var_42573_to_fp16)[name = tensor("aw_chunk_4465_cast_fp16")]; + tensor var_42576_equation_0 = const()[name = tensor("op_42576_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42576_cast_fp16 = einsum(equation = var_42576_equation_0, values = (var_42198_cast_fp16, var_42079_cast_fp16))[name = tensor("op_42576_cast_fp16")]; + tensor var_42577_to_fp16 = const()[name = tensor("op_42577_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4467_cast_fp16 = mul(x = var_42576_cast_fp16, y = var_42577_to_fp16)[name = tensor("aw_chunk_4467_cast_fp16")]; + tensor var_42580_equation_0 = const()[name = tensor("op_42580_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42580_cast_fp16 = einsum(equation = var_42580_equation_0, values = (var_42198_cast_fp16, var_42086_cast_fp16))[name = tensor("op_42580_cast_fp16")]; + tensor var_42581_to_fp16 = const()[name = tensor("op_42581_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4469_cast_fp16 = mul(x = var_42580_cast_fp16, y = var_42581_to_fp16)[name = tensor("aw_chunk_4469_cast_fp16")]; + tensor var_42584_equation_0 = const()[name = tensor("op_42584_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42584_cast_fp16 = einsum(equation = var_42584_equation_0, values = (var_42198_cast_fp16, var_42093_cast_fp16))[name = tensor("op_42584_cast_fp16")]; + tensor var_42585_to_fp16 = const()[name = tensor("op_42585_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4471_cast_fp16 = mul(x = var_42584_cast_fp16, y = var_42585_to_fp16)[name = tensor("aw_chunk_4471_cast_fp16")]; + tensor var_42588_equation_0 = const()[name = tensor("op_42588_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42588_cast_fp16 = einsum(equation = var_42588_equation_0, values = (var_42202_cast_fp16, var_42100_cast_fp16))[name = tensor("op_42588_cast_fp16")]; + tensor var_42589_to_fp16 = const()[name = tensor("op_42589_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4473_cast_fp16 = mul(x = var_42588_cast_fp16, y = var_42589_to_fp16)[name = tensor("aw_chunk_4473_cast_fp16")]; + tensor var_42592_equation_0 = const()[name = tensor("op_42592_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42592_cast_fp16 = einsum(equation = var_42592_equation_0, values = (var_42202_cast_fp16, var_42107_cast_fp16))[name = tensor("op_42592_cast_fp16")]; + tensor var_42593_to_fp16 = const()[name = tensor("op_42593_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4475_cast_fp16 = mul(x = var_42592_cast_fp16, y = var_42593_to_fp16)[name = tensor("aw_chunk_4475_cast_fp16")]; + tensor var_42596_equation_0 = const()[name = tensor("op_42596_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42596_cast_fp16 = einsum(equation = var_42596_equation_0, values = (var_42202_cast_fp16, var_42114_cast_fp16))[name = tensor("op_42596_cast_fp16")]; + tensor var_42597_to_fp16 = const()[name = tensor("op_42597_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4477_cast_fp16 = mul(x = var_42596_cast_fp16, y = var_42597_to_fp16)[name = tensor("aw_chunk_4477_cast_fp16")]; + tensor var_42600_equation_0 = const()[name = tensor("op_42600_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_42600_cast_fp16 = einsum(equation = var_42600_equation_0, values = (var_42202_cast_fp16, var_42121_cast_fp16))[name = tensor("op_42600_cast_fp16")]; + tensor var_42601_to_fp16 = const()[name = tensor("op_42601_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4479_cast_fp16 = mul(x = var_42600_cast_fp16, y = var_42601_to_fp16)[name = tensor("aw_chunk_4479_cast_fp16")]; + tensor var_42603_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4321_cast_fp16)[name = tensor("op_42603_cast_fp16")]; + tensor var_42604_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4323_cast_fp16)[name = tensor("op_42604_cast_fp16")]; + tensor var_42605_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4325_cast_fp16)[name = tensor("op_42605_cast_fp16")]; + tensor var_42606_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4327_cast_fp16)[name = tensor("op_42606_cast_fp16")]; + tensor var_42607_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4329_cast_fp16)[name = tensor("op_42607_cast_fp16")]; + tensor var_42608_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4331_cast_fp16)[name = tensor("op_42608_cast_fp16")]; + tensor var_42609_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4333_cast_fp16)[name = tensor("op_42609_cast_fp16")]; + tensor var_42610_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4335_cast_fp16)[name = tensor("op_42610_cast_fp16")]; + tensor var_42611_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4337_cast_fp16)[name = tensor("op_42611_cast_fp16")]; + tensor var_42612_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4339_cast_fp16)[name = tensor("op_42612_cast_fp16")]; + tensor var_42613_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4341_cast_fp16)[name = tensor("op_42613_cast_fp16")]; + tensor var_42614_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4343_cast_fp16)[name = tensor("op_42614_cast_fp16")]; + tensor var_42615_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4345_cast_fp16)[name = tensor("op_42615_cast_fp16")]; + tensor var_42616_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4347_cast_fp16)[name = tensor("op_42616_cast_fp16")]; + tensor var_42617_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4349_cast_fp16)[name = tensor("op_42617_cast_fp16")]; + tensor var_42618_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4351_cast_fp16)[name = tensor("op_42618_cast_fp16")]; + tensor var_42619_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4353_cast_fp16)[name = tensor("op_42619_cast_fp16")]; + tensor var_42620_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4355_cast_fp16)[name = tensor("op_42620_cast_fp16")]; + tensor var_42621_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4357_cast_fp16)[name = tensor("op_42621_cast_fp16")]; + tensor var_42622_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4359_cast_fp16)[name = tensor("op_42622_cast_fp16")]; + tensor var_42623_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4361_cast_fp16)[name = tensor("op_42623_cast_fp16")]; + tensor var_42624_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4363_cast_fp16)[name = tensor("op_42624_cast_fp16")]; + tensor var_42625_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4365_cast_fp16)[name = tensor("op_42625_cast_fp16")]; + tensor var_42626_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4367_cast_fp16)[name = tensor("op_42626_cast_fp16")]; + tensor var_42627_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4369_cast_fp16)[name = tensor("op_42627_cast_fp16")]; + tensor var_42628_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4371_cast_fp16)[name = tensor("op_42628_cast_fp16")]; + tensor var_42629_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4373_cast_fp16)[name = tensor("op_42629_cast_fp16")]; + tensor var_42630_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4375_cast_fp16)[name = tensor("op_42630_cast_fp16")]; + tensor var_42631_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4377_cast_fp16)[name = tensor("op_42631_cast_fp16")]; + tensor var_42632_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4379_cast_fp16)[name = tensor("op_42632_cast_fp16")]; + tensor var_42633_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4381_cast_fp16)[name = tensor("op_42633_cast_fp16")]; + tensor var_42634_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4383_cast_fp16)[name = tensor("op_42634_cast_fp16")]; + tensor var_42635_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4385_cast_fp16)[name = tensor("op_42635_cast_fp16")]; + tensor var_42636_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4387_cast_fp16)[name = tensor("op_42636_cast_fp16")]; + tensor var_42637_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4389_cast_fp16)[name = tensor("op_42637_cast_fp16")]; + tensor var_42638_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4391_cast_fp16)[name = tensor("op_42638_cast_fp16")]; + tensor var_42639_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4393_cast_fp16)[name = tensor("op_42639_cast_fp16")]; + tensor var_42640_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4395_cast_fp16)[name = tensor("op_42640_cast_fp16")]; + tensor var_42641_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4397_cast_fp16)[name = tensor("op_42641_cast_fp16")]; + tensor var_42642_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4399_cast_fp16)[name = tensor("op_42642_cast_fp16")]; + tensor var_42643_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4401_cast_fp16)[name = tensor("op_42643_cast_fp16")]; + tensor var_42644_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4403_cast_fp16)[name = tensor("op_42644_cast_fp16")]; + tensor var_42645_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4405_cast_fp16)[name = tensor("op_42645_cast_fp16")]; + tensor var_42646_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4407_cast_fp16)[name = tensor("op_42646_cast_fp16")]; + tensor var_42647_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4409_cast_fp16)[name = tensor("op_42647_cast_fp16")]; + tensor var_42648_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4411_cast_fp16)[name = tensor("op_42648_cast_fp16")]; + tensor var_42649_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4413_cast_fp16)[name = tensor("op_42649_cast_fp16")]; + tensor var_42650_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4415_cast_fp16)[name = tensor("op_42650_cast_fp16")]; + tensor var_42651_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4417_cast_fp16)[name = tensor("op_42651_cast_fp16")]; + tensor var_42652_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4419_cast_fp16)[name = tensor("op_42652_cast_fp16")]; + tensor var_42653_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4421_cast_fp16)[name = tensor("op_42653_cast_fp16")]; + tensor var_42654_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4423_cast_fp16)[name = tensor("op_42654_cast_fp16")]; + tensor var_42655_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4425_cast_fp16)[name = tensor("op_42655_cast_fp16")]; + tensor var_42656_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4427_cast_fp16)[name = tensor("op_42656_cast_fp16")]; + tensor var_42657_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4429_cast_fp16)[name = tensor("op_42657_cast_fp16")]; + tensor var_42658_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4431_cast_fp16)[name = tensor("op_42658_cast_fp16")]; + tensor var_42659_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4433_cast_fp16)[name = tensor("op_42659_cast_fp16")]; + tensor var_42660_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4435_cast_fp16)[name = tensor("op_42660_cast_fp16")]; + tensor var_42661_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4437_cast_fp16)[name = tensor("op_42661_cast_fp16")]; + tensor var_42662_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4439_cast_fp16)[name = tensor("op_42662_cast_fp16")]; + tensor var_42663_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4441_cast_fp16)[name = tensor("op_42663_cast_fp16")]; + tensor var_42664_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4443_cast_fp16)[name = tensor("op_42664_cast_fp16")]; + tensor var_42665_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4445_cast_fp16)[name = tensor("op_42665_cast_fp16")]; + tensor var_42666_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4447_cast_fp16)[name = tensor("op_42666_cast_fp16")]; + tensor var_42667_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4449_cast_fp16)[name = tensor("op_42667_cast_fp16")]; + tensor var_42668_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4451_cast_fp16)[name = tensor("op_42668_cast_fp16")]; + tensor var_42669_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4453_cast_fp16)[name = tensor("op_42669_cast_fp16")]; + tensor var_42670_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4455_cast_fp16)[name = tensor("op_42670_cast_fp16")]; + tensor var_42671_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4457_cast_fp16)[name = tensor("op_42671_cast_fp16")]; + tensor var_42672_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4459_cast_fp16)[name = tensor("op_42672_cast_fp16")]; + tensor var_42673_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4461_cast_fp16)[name = tensor("op_42673_cast_fp16")]; + tensor var_42674_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4463_cast_fp16)[name = tensor("op_42674_cast_fp16")]; + tensor var_42675_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4465_cast_fp16)[name = tensor("op_42675_cast_fp16")]; + tensor var_42676_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4467_cast_fp16)[name = tensor("op_42676_cast_fp16")]; + tensor var_42677_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4469_cast_fp16)[name = tensor("op_42677_cast_fp16")]; + tensor var_42678_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4471_cast_fp16)[name = tensor("op_42678_cast_fp16")]; + tensor var_42679_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4473_cast_fp16)[name = tensor("op_42679_cast_fp16")]; + tensor var_42680_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4475_cast_fp16)[name = tensor("op_42680_cast_fp16")]; + tensor var_42681_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4477_cast_fp16)[name = tensor("op_42681_cast_fp16")]; + tensor var_42682_cast_fp16 = softmax(axis = var_41428, x = aw_chunk_4479_cast_fp16)[name = tensor("op_42682_cast_fp16")]; + tensor var_42684_equation_0 = const()[name = tensor("op_42684_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42684_cast_fp16 = einsum(equation = var_42684_equation_0, values = (var_42204_cast_fp16, var_42603_cast_fp16))[name = tensor("op_42684_cast_fp16")]; + tensor var_42686_equation_0 = const()[name = tensor("op_42686_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42686_cast_fp16 = einsum(equation = var_42686_equation_0, values = (var_42204_cast_fp16, var_42604_cast_fp16))[name = tensor("op_42686_cast_fp16")]; + tensor var_42688_equation_0 = const()[name = tensor("op_42688_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42688_cast_fp16 = einsum(equation = var_42688_equation_0, values = (var_42204_cast_fp16, var_42605_cast_fp16))[name = tensor("op_42688_cast_fp16")]; + tensor var_42690_equation_0 = const()[name = tensor("op_42690_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42690_cast_fp16 = einsum(equation = var_42690_equation_0, values = (var_42204_cast_fp16, var_42606_cast_fp16))[name = tensor("op_42690_cast_fp16")]; + tensor var_42692_equation_0 = const()[name = tensor("op_42692_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42692_cast_fp16 = einsum(equation = var_42692_equation_0, values = (var_42208_cast_fp16, var_42607_cast_fp16))[name = tensor("op_42692_cast_fp16")]; + tensor var_42694_equation_0 = const()[name = tensor("op_42694_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42694_cast_fp16 = einsum(equation = var_42694_equation_0, values = (var_42208_cast_fp16, var_42608_cast_fp16))[name = tensor("op_42694_cast_fp16")]; + tensor var_42696_equation_0 = const()[name = tensor("op_42696_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42696_cast_fp16 = einsum(equation = var_42696_equation_0, values = (var_42208_cast_fp16, var_42609_cast_fp16))[name = tensor("op_42696_cast_fp16")]; + tensor var_42698_equation_0 = const()[name = tensor("op_42698_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42698_cast_fp16 = einsum(equation = var_42698_equation_0, values = (var_42208_cast_fp16, var_42610_cast_fp16))[name = tensor("op_42698_cast_fp16")]; + tensor var_42700_equation_0 = const()[name = tensor("op_42700_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42700_cast_fp16 = einsum(equation = var_42700_equation_0, values = (var_42212_cast_fp16, var_42611_cast_fp16))[name = tensor("op_42700_cast_fp16")]; + tensor var_42702_equation_0 = const()[name = tensor("op_42702_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42702_cast_fp16 = einsum(equation = var_42702_equation_0, values = (var_42212_cast_fp16, var_42612_cast_fp16))[name = tensor("op_42702_cast_fp16")]; + tensor var_42704_equation_0 = const()[name = tensor("op_42704_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42704_cast_fp16 = einsum(equation = var_42704_equation_0, values = (var_42212_cast_fp16, var_42613_cast_fp16))[name = tensor("op_42704_cast_fp16")]; + tensor var_42706_equation_0 = const()[name = tensor("op_42706_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42706_cast_fp16 = einsum(equation = var_42706_equation_0, values = (var_42212_cast_fp16, var_42614_cast_fp16))[name = tensor("op_42706_cast_fp16")]; + tensor var_42708_equation_0 = const()[name = tensor("op_42708_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42708_cast_fp16 = einsum(equation = var_42708_equation_0, values = (var_42216_cast_fp16, var_42615_cast_fp16))[name = tensor("op_42708_cast_fp16")]; + tensor var_42710_equation_0 = const()[name = tensor("op_42710_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42710_cast_fp16 = einsum(equation = var_42710_equation_0, values = (var_42216_cast_fp16, var_42616_cast_fp16))[name = tensor("op_42710_cast_fp16")]; + tensor var_42712_equation_0 = const()[name = tensor("op_42712_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42712_cast_fp16 = einsum(equation = var_42712_equation_0, values = (var_42216_cast_fp16, var_42617_cast_fp16))[name = tensor("op_42712_cast_fp16")]; + tensor var_42714_equation_0 = const()[name = tensor("op_42714_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42714_cast_fp16 = einsum(equation = var_42714_equation_0, values = (var_42216_cast_fp16, var_42618_cast_fp16))[name = tensor("op_42714_cast_fp16")]; + tensor var_42716_equation_0 = const()[name = tensor("op_42716_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42716_cast_fp16 = einsum(equation = var_42716_equation_0, values = (var_42220_cast_fp16, var_42619_cast_fp16))[name = tensor("op_42716_cast_fp16")]; + tensor var_42718_equation_0 = const()[name = tensor("op_42718_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42718_cast_fp16 = einsum(equation = var_42718_equation_0, values = (var_42220_cast_fp16, var_42620_cast_fp16))[name = tensor("op_42718_cast_fp16")]; + tensor var_42720_equation_0 = const()[name = tensor("op_42720_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42720_cast_fp16 = einsum(equation = var_42720_equation_0, values = (var_42220_cast_fp16, var_42621_cast_fp16))[name = tensor("op_42720_cast_fp16")]; + tensor var_42722_equation_0 = const()[name = tensor("op_42722_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42722_cast_fp16 = einsum(equation = var_42722_equation_0, values = (var_42220_cast_fp16, var_42622_cast_fp16))[name = tensor("op_42722_cast_fp16")]; + tensor var_42724_equation_0 = const()[name = tensor("op_42724_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42724_cast_fp16 = einsum(equation = var_42724_equation_0, values = (var_42224_cast_fp16, var_42623_cast_fp16))[name = tensor("op_42724_cast_fp16")]; + tensor var_42726_equation_0 = const()[name = tensor("op_42726_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42726_cast_fp16 = einsum(equation = var_42726_equation_0, values = (var_42224_cast_fp16, var_42624_cast_fp16))[name = tensor("op_42726_cast_fp16")]; + tensor var_42728_equation_0 = const()[name = tensor("op_42728_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42728_cast_fp16 = einsum(equation = var_42728_equation_0, values = (var_42224_cast_fp16, var_42625_cast_fp16))[name = tensor("op_42728_cast_fp16")]; + tensor var_42730_equation_0 = const()[name = tensor("op_42730_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42730_cast_fp16 = einsum(equation = var_42730_equation_0, values = (var_42224_cast_fp16, var_42626_cast_fp16))[name = tensor("op_42730_cast_fp16")]; + tensor var_42732_equation_0 = const()[name = tensor("op_42732_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42732_cast_fp16 = einsum(equation = var_42732_equation_0, values = (var_42228_cast_fp16, var_42627_cast_fp16))[name = tensor("op_42732_cast_fp16")]; + tensor var_42734_equation_0 = const()[name = tensor("op_42734_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42734_cast_fp16 = einsum(equation = var_42734_equation_0, values = (var_42228_cast_fp16, var_42628_cast_fp16))[name = tensor("op_42734_cast_fp16")]; + tensor var_42736_equation_0 = const()[name = tensor("op_42736_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42736_cast_fp16 = einsum(equation = var_42736_equation_0, values = (var_42228_cast_fp16, var_42629_cast_fp16))[name = tensor("op_42736_cast_fp16")]; + tensor var_42738_equation_0 = const()[name = tensor("op_42738_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42738_cast_fp16 = einsum(equation = var_42738_equation_0, values = (var_42228_cast_fp16, var_42630_cast_fp16))[name = tensor("op_42738_cast_fp16")]; + tensor var_42740_equation_0 = const()[name = tensor("op_42740_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42740_cast_fp16 = einsum(equation = var_42740_equation_0, values = (var_42232_cast_fp16, var_42631_cast_fp16))[name = tensor("op_42740_cast_fp16")]; + tensor var_42742_equation_0 = const()[name = tensor("op_42742_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42742_cast_fp16 = einsum(equation = var_42742_equation_0, values = (var_42232_cast_fp16, var_42632_cast_fp16))[name = tensor("op_42742_cast_fp16")]; + tensor var_42744_equation_0 = const()[name = tensor("op_42744_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42744_cast_fp16 = einsum(equation = var_42744_equation_0, values = (var_42232_cast_fp16, var_42633_cast_fp16))[name = tensor("op_42744_cast_fp16")]; + tensor var_42746_equation_0 = const()[name = tensor("op_42746_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42746_cast_fp16 = einsum(equation = var_42746_equation_0, values = (var_42232_cast_fp16, var_42634_cast_fp16))[name = tensor("op_42746_cast_fp16")]; + tensor var_42748_equation_0 = const()[name = tensor("op_42748_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42748_cast_fp16 = einsum(equation = var_42748_equation_0, values = (var_42236_cast_fp16, var_42635_cast_fp16))[name = tensor("op_42748_cast_fp16")]; + tensor var_42750_equation_0 = const()[name = tensor("op_42750_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42750_cast_fp16 = einsum(equation = var_42750_equation_0, values = (var_42236_cast_fp16, var_42636_cast_fp16))[name = tensor("op_42750_cast_fp16")]; + tensor var_42752_equation_0 = const()[name = tensor("op_42752_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42752_cast_fp16 = einsum(equation = var_42752_equation_0, values = (var_42236_cast_fp16, var_42637_cast_fp16))[name = tensor("op_42752_cast_fp16")]; + tensor var_42754_equation_0 = const()[name = tensor("op_42754_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42754_cast_fp16 = einsum(equation = var_42754_equation_0, values = (var_42236_cast_fp16, var_42638_cast_fp16))[name = tensor("op_42754_cast_fp16")]; + tensor var_42756_equation_0 = const()[name = tensor("op_42756_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42756_cast_fp16 = einsum(equation = var_42756_equation_0, values = (var_42240_cast_fp16, var_42639_cast_fp16))[name = tensor("op_42756_cast_fp16")]; + tensor var_42758_equation_0 = const()[name = tensor("op_42758_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42758_cast_fp16 = einsum(equation = var_42758_equation_0, values = (var_42240_cast_fp16, var_42640_cast_fp16))[name = tensor("op_42758_cast_fp16")]; + tensor var_42760_equation_0 = const()[name = tensor("op_42760_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42760_cast_fp16 = einsum(equation = var_42760_equation_0, values = (var_42240_cast_fp16, var_42641_cast_fp16))[name = tensor("op_42760_cast_fp16")]; + tensor var_42762_equation_0 = const()[name = tensor("op_42762_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42762_cast_fp16 = einsum(equation = var_42762_equation_0, values = (var_42240_cast_fp16, var_42642_cast_fp16))[name = tensor("op_42762_cast_fp16")]; + tensor var_42764_equation_0 = const()[name = tensor("op_42764_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42764_cast_fp16 = einsum(equation = var_42764_equation_0, values = (var_42244_cast_fp16, var_42643_cast_fp16))[name = tensor("op_42764_cast_fp16")]; + tensor var_42766_equation_0 = const()[name = tensor("op_42766_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42766_cast_fp16 = einsum(equation = var_42766_equation_0, values = (var_42244_cast_fp16, var_42644_cast_fp16))[name = tensor("op_42766_cast_fp16")]; + tensor var_42768_equation_0 = const()[name = tensor("op_42768_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42768_cast_fp16 = einsum(equation = var_42768_equation_0, values = (var_42244_cast_fp16, var_42645_cast_fp16))[name = tensor("op_42768_cast_fp16")]; + tensor var_42770_equation_0 = const()[name = tensor("op_42770_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42770_cast_fp16 = einsum(equation = var_42770_equation_0, values = (var_42244_cast_fp16, var_42646_cast_fp16))[name = tensor("op_42770_cast_fp16")]; + tensor var_42772_equation_0 = const()[name = tensor("op_42772_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42772_cast_fp16 = einsum(equation = var_42772_equation_0, values = (var_42248_cast_fp16, var_42647_cast_fp16))[name = tensor("op_42772_cast_fp16")]; + tensor var_42774_equation_0 = const()[name = tensor("op_42774_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42774_cast_fp16 = einsum(equation = var_42774_equation_0, values = (var_42248_cast_fp16, var_42648_cast_fp16))[name = tensor("op_42774_cast_fp16")]; + tensor var_42776_equation_0 = const()[name = tensor("op_42776_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42776_cast_fp16 = einsum(equation = var_42776_equation_0, values = (var_42248_cast_fp16, var_42649_cast_fp16))[name = tensor("op_42776_cast_fp16")]; + tensor var_42778_equation_0 = const()[name = tensor("op_42778_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42778_cast_fp16 = einsum(equation = var_42778_equation_0, values = (var_42248_cast_fp16, var_42650_cast_fp16))[name = tensor("op_42778_cast_fp16")]; + tensor var_42780_equation_0 = const()[name = tensor("op_42780_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42780_cast_fp16 = einsum(equation = var_42780_equation_0, values = (var_42252_cast_fp16, var_42651_cast_fp16))[name = tensor("op_42780_cast_fp16")]; + tensor var_42782_equation_0 = const()[name = tensor("op_42782_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42782_cast_fp16 = einsum(equation = var_42782_equation_0, values = (var_42252_cast_fp16, var_42652_cast_fp16))[name = tensor("op_42782_cast_fp16")]; + tensor var_42784_equation_0 = const()[name = tensor("op_42784_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42784_cast_fp16 = einsum(equation = var_42784_equation_0, values = (var_42252_cast_fp16, var_42653_cast_fp16))[name = tensor("op_42784_cast_fp16")]; + tensor var_42786_equation_0 = const()[name = tensor("op_42786_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42786_cast_fp16 = einsum(equation = var_42786_equation_0, values = (var_42252_cast_fp16, var_42654_cast_fp16))[name = tensor("op_42786_cast_fp16")]; + tensor var_42788_equation_0 = const()[name = tensor("op_42788_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42788_cast_fp16 = einsum(equation = var_42788_equation_0, values = (var_42256_cast_fp16, var_42655_cast_fp16))[name = tensor("op_42788_cast_fp16")]; + tensor var_42790_equation_0 = const()[name = tensor("op_42790_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42790_cast_fp16 = einsum(equation = var_42790_equation_0, values = (var_42256_cast_fp16, var_42656_cast_fp16))[name = tensor("op_42790_cast_fp16")]; + tensor var_42792_equation_0 = const()[name = tensor("op_42792_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42792_cast_fp16 = einsum(equation = var_42792_equation_0, values = (var_42256_cast_fp16, var_42657_cast_fp16))[name = tensor("op_42792_cast_fp16")]; + tensor var_42794_equation_0 = const()[name = tensor("op_42794_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42794_cast_fp16 = einsum(equation = var_42794_equation_0, values = (var_42256_cast_fp16, var_42658_cast_fp16))[name = tensor("op_42794_cast_fp16")]; + tensor var_42796_equation_0 = const()[name = tensor("op_42796_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42796_cast_fp16 = einsum(equation = var_42796_equation_0, values = (var_42260_cast_fp16, var_42659_cast_fp16))[name = tensor("op_42796_cast_fp16")]; + tensor var_42798_equation_0 = const()[name = tensor("op_42798_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42798_cast_fp16 = einsum(equation = var_42798_equation_0, values = (var_42260_cast_fp16, var_42660_cast_fp16))[name = tensor("op_42798_cast_fp16")]; + tensor var_42800_equation_0 = const()[name = tensor("op_42800_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42800_cast_fp16 = einsum(equation = var_42800_equation_0, values = (var_42260_cast_fp16, var_42661_cast_fp16))[name = tensor("op_42800_cast_fp16")]; + tensor var_42802_equation_0 = const()[name = tensor("op_42802_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42802_cast_fp16 = einsum(equation = var_42802_equation_0, values = (var_42260_cast_fp16, var_42662_cast_fp16))[name = tensor("op_42802_cast_fp16")]; + tensor var_42804_equation_0 = const()[name = tensor("op_42804_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42804_cast_fp16 = einsum(equation = var_42804_equation_0, values = (var_42264_cast_fp16, var_42663_cast_fp16))[name = tensor("op_42804_cast_fp16")]; + tensor var_42806_equation_0 = const()[name = tensor("op_42806_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42806_cast_fp16 = einsum(equation = var_42806_equation_0, values = (var_42264_cast_fp16, var_42664_cast_fp16))[name = tensor("op_42806_cast_fp16")]; + tensor var_42808_equation_0 = const()[name = tensor("op_42808_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42808_cast_fp16 = einsum(equation = var_42808_equation_0, values = (var_42264_cast_fp16, var_42665_cast_fp16))[name = tensor("op_42808_cast_fp16")]; + tensor var_42810_equation_0 = const()[name = tensor("op_42810_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42810_cast_fp16 = einsum(equation = var_42810_equation_0, values = (var_42264_cast_fp16, var_42666_cast_fp16))[name = tensor("op_42810_cast_fp16")]; + tensor var_42812_equation_0 = const()[name = tensor("op_42812_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42812_cast_fp16 = einsum(equation = var_42812_equation_0, values = (var_42268_cast_fp16, var_42667_cast_fp16))[name = tensor("op_42812_cast_fp16")]; + tensor var_42814_equation_0 = const()[name = tensor("op_42814_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42814_cast_fp16 = einsum(equation = var_42814_equation_0, values = (var_42268_cast_fp16, var_42668_cast_fp16))[name = tensor("op_42814_cast_fp16")]; + tensor var_42816_equation_0 = const()[name = tensor("op_42816_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42816_cast_fp16 = einsum(equation = var_42816_equation_0, values = (var_42268_cast_fp16, var_42669_cast_fp16))[name = tensor("op_42816_cast_fp16")]; + tensor var_42818_equation_0 = const()[name = tensor("op_42818_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42818_cast_fp16 = einsum(equation = var_42818_equation_0, values = (var_42268_cast_fp16, var_42670_cast_fp16))[name = tensor("op_42818_cast_fp16")]; + tensor var_42820_equation_0 = const()[name = tensor("op_42820_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42820_cast_fp16 = einsum(equation = var_42820_equation_0, values = (var_42272_cast_fp16, var_42671_cast_fp16))[name = tensor("op_42820_cast_fp16")]; + tensor var_42822_equation_0 = const()[name = tensor("op_42822_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42822_cast_fp16 = einsum(equation = var_42822_equation_0, values = (var_42272_cast_fp16, var_42672_cast_fp16))[name = tensor("op_42822_cast_fp16")]; + tensor var_42824_equation_0 = const()[name = tensor("op_42824_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42824_cast_fp16 = einsum(equation = var_42824_equation_0, values = (var_42272_cast_fp16, var_42673_cast_fp16))[name = tensor("op_42824_cast_fp16")]; + tensor var_42826_equation_0 = const()[name = tensor("op_42826_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42826_cast_fp16 = einsum(equation = var_42826_equation_0, values = (var_42272_cast_fp16, var_42674_cast_fp16))[name = tensor("op_42826_cast_fp16")]; + tensor var_42828_equation_0 = const()[name = tensor("op_42828_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42828_cast_fp16 = einsum(equation = var_42828_equation_0, values = (var_42276_cast_fp16, var_42675_cast_fp16))[name = tensor("op_42828_cast_fp16")]; + tensor var_42830_equation_0 = const()[name = tensor("op_42830_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42830_cast_fp16 = einsum(equation = var_42830_equation_0, values = (var_42276_cast_fp16, var_42676_cast_fp16))[name = tensor("op_42830_cast_fp16")]; + tensor var_42832_equation_0 = const()[name = tensor("op_42832_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42832_cast_fp16 = einsum(equation = var_42832_equation_0, values = (var_42276_cast_fp16, var_42677_cast_fp16))[name = tensor("op_42832_cast_fp16")]; + tensor var_42834_equation_0 = const()[name = tensor("op_42834_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42834_cast_fp16 = einsum(equation = var_42834_equation_0, values = (var_42276_cast_fp16, var_42678_cast_fp16))[name = tensor("op_42834_cast_fp16")]; + tensor var_42836_equation_0 = const()[name = tensor("op_42836_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42836_cast_fp16 = einsum(equation = var_42836_equation_0, values = (var_42280_cast_fp16, var_42679_cast_fp16))[name = tensor("op_42836_cast_fp16")]; + tensor var_42838_equation_0 = const()[name = tensor("op_42838_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42838_cast_fp16 = einsum(equation = var_42838_equation_0, values = (var_42280_cast_fp16, var_42680_cast_fp16))[name = tensor("op_42838_cast_fp16")]; + tensor var_42840_equation_0 = const()[name = tensor("op_42840_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42840_cast_fp16 = einsum(equation = var_42840_equation_0, values = (var_42280_cast_fp16, var_42681_cast_fp16))[name = tensor("op_42840_cast_fp16")]; + tensor var_42842_equation_0 = const()[name = tensor("op_42842_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_42842_cast_fp16 = einsum(equation = var_42842_equation_0, values = (var_42280_cast_fp16, var_42682_cast_fp16))[name = tensor("op_42842_cast_fp16")]; + tensor var_42844_interleave_0 = const()[name = tensor("op_42844_interleave_0"), val = tensor(false)]; + tensor var_42844_cast_fp16 = concat(axis = var_41403, interleave = var_42844_interleave_0, values = (var_42684_cast_fp16, var_42686_cast_fp16, var_42688_cast_fp16, var_42690_cast_fp16))[name = tensor("op_42844_cast_fp16")]; + tensor var_42846_interleave_0 = const()[name = tensor("op_42846_interleave_0"), val = tensor(false)]; + tensor var_42846_cast_fp16 = concat(axis = var_41403, interleave = var_42846_interleave_0, values = (var_42692_cast_fp16, var_42694_cast_fp16, var_42696_cast_fp16, var_42698_cast_fp16))[name = tensor("op_42846_cast_fp16")]; + tensor var_42848_interleave_0 = const()[name = tensor("op_42848_interleave_0"), val = tensor(false)]; + tensor var_42848_cast_fp16 = concat(axis = var_41403, interleave = var_42848_interleave_0, values = (var_42700_cast_fp16, var_42702_cast_fp16, var_42704_cast_fp16, var_42706_cast_fp16))[name = tensor("op_42848_cast_fp16")]; + tensor var_42850_interleave_0 = const()[name = tensor("op_42850_interleave_0"), val = tensor(false)]; + tensor var_42850_cast_fp16 = concat(axis = var_41403, interleave = var_42850_interleave_0, values = (var_42708_cast_fp16, var_42710_cast_fp16, var_42712_cast_fp16, var_42714_cast_fp16))[name = tensor("op_42850_cast_fp16")]; + tensor var_42852_interleave_0 = const()[name = tensor("op_42852_interleave_0"), val = tensor(false)]; + tensor var_42852_cast_fp16 = concat(axis = var_41403, interleave = var_42852_interleave_0, values = (var_42716_cast_fp16, var_42718_cast_fp16, var_42720_cast_fp16, var_42722_cast_fp16))[name = tensor("op_42852_cast_fp16")]; + tensor var_42854_interleave_0 = const()[name = tensor("op_42854_interleave_0"), val = tensor(false)]; + tensor var_42854_cast_fp16 = concat(axis = var_41403, interleave = var_42854_interleave_0, values = (var_42724_cast_fp16, var_42726_cast_fp16, var_42728_cast_fp16, var_42730_cast_fp16))[name = tensor("op_42854_cast_fp16")]; + tensor var_42856_interleave_0 = const()[name = tensor("op_42856_interleave_0"), val = tensor(false)]; + tensor var_42856_cast_fp16 = concat(axis = var_41403, interleave = var_42856_interleave_0, values = (var_42732_cast_fp16, var_42734_cast_fp16, var_42736_cast_fp16, var_42738_cast_fp16))[name = tensor("op_42856_cast_fp16")]; + tensor var_42858_interleave_0 = const()[name = tensor("op_42858_interleave_0"), val = tensor(false)]; + tensor var_42858_cast_fp16 = concat(axis = var_41403, interleave = var_42858_interleave_0, values = (var_42740_cast_fp16, var_42742_cast_fp16, var_42744_cast_fp16, var_42746_cast_fp16))[name = tensor("op_42858_cast_fp16")]; + tensor var_42860_interleave_0 = const()[name = tensor("op_42860_interleave_0"), val = tensor(false)]; + tensor var_42860_cast_fp16 = concat(axis = var_41403, interleave = var_42860_interleave_0, values = (var_42748_cast_fp16, var_42750_cast_fp16, var_42752_cast_fp16, var_42754_cast_fp16))[name = tensor("op_42860_cast_fp16")]; + tensor var_42862_interleave_0 = const()[name = tensor("op_42862_interleave_0"), val = tensor(false)]; + tensor var_42862_cast_fp16 = concat(axis = var_41403, interleave = var_42862_interleave_0, values = (var_42756_cast_fp16, var_42758_cast_fp16, var_42760_cast_fp16, var_42762_cast_fp16))[name = tensor("op_42862_cast_fp16")]; + tensor var_42864_interleave_0 = const()[name = tensor("op_42864_interleave_0"), val = tensor(false)]; + tensor var_42864_cast_fp16 = concat(axis = var_41403, interleave = var_42864_interleave_0, values = (var_42764_cast_fp16, var_42766_cast_fp16, var_42768_cast_fp16, var_42770_cast_fp16))[name = tensor("op_42864_cast_fp16")]; + tensor var_42866_interleave_0 = const()[name = tensor("op_42866_interleave_0"), val = tensor(false)]; + tensor var_42866_cast_fp16 = concat(axis = var_41403, interleave = var_42866_interleave_0, values = (var_42772_cast_fp16, var_42774_cast_fp16, var_42776_cast_fp16, var_42778_cast_fp16))[name = tensor("op_42866_cast_fp16")]; + tensor var_42868_interleave_0 = const()[name = tensor("op_42868_interleave_0"), val = tensor(false)]; + tensor var_42868_cast_fp16 = concat(axis = var_41403, interleave = var_42868_interleave_0, values = (var_42780_cast_fp16, var_42782_cast_fp16, var_42784_cast_fp16, var_42786_cast_fp16))[name = tensor("op_42868_cast_fp16")]; + tensor var_42870_interleave_0 = const()[name = tensor("op_42870_interleave_0"), val = tensor(false)]; + tensor var_42870_cast_fp16 = concat(axis = var_41403, interleave = var_42870_interleave_0, values = (var_42788_cast_fp16, var_42790_cast_fp16, var_42792_cast_fp16, var_42794_cast_fp16))[name = tensor("op_42870_cast_fp16")]; + tensor var_42872_interleave_0 = const()[name = tensor("op_42872_interleave_0"), val = tensor(false)]; + tensor var_42872_cast_fp16 = concat(axis = var_41403, interleave = var_42872_interleave_0, values = (var_42796_cast_fp16, var_42798_cast_fp16, var_42800_cast_fp16, var_42802_cast_fp16))[name = tensor("op_42872_cast_fp16")]; + tensor var_42874_interleave_0 = const()[name = tensor("op_42874_interleave_0"), val = tensor(false)]; + tensor var_42874_cast_fp16 = concat(axis = var_41403, interleave = var_42874_interleave_0, values = (var_42804_cast_fp16, var_42806_cast_fp16, var_42808_cast_fp16, var_42810_cast_fp16))[name = tensor("op_42874_cast_fp16")]; + tensor var_42876_interleave_0 = const()[name = tensor("op_42876_interleave_0"), val = tensor(false)]; + tensor var_42876_cast_fp16 = concat(axis = var_41403, interleave = var_42876_interleave_0, values = (var_42812_cast_fp16, var_42814_cast_fp16, var_42816_cast_fp16, var_42818_cast_fp16))[name = tensor("op_42876_cast_fp16")]; + tensor var_42878_interleave_0 = const()[name = tensor("op_42878_interleave_0"), val = tensor(false)]; + tensor var_42878_cast_fp16 = concat(axis = var_41403, interleave = var_42878_interleave_0, values = (var_42820_cast_fp16, var_42822_cast_fp16, var_42824_cast_fp16, var_42826_cast_fp16))[name = tensor("op_42878_cast_fp16")]; + tensor var_42880_interleave_0 = const()[name = tensor("op_42880_interleave_0"), val = tensor(false)]; + tensor var_42880_cast_fp16 = concat(axis = var_41403, interleave = var_42880_interleave_0, values = (var_42828_cast_fp16, var_42830_cast_fp16, var_42832_cast_fp16, var_42834_cast_fp16))[name = tensor("op_42880_cast_fp16")]; + tensor var_42882_interleave_0 = const()[name = tensor("op_42882_interleave_0"), val = tensor(false)]; + tensor var_42882_cast_fp16 = concat(axis = var_41403, interleave = var_42882_interleave_0, values = (var_42836_cast_fp16, var_42838_cast_fp16, var_42840_cast_fp16, var_42842_cast_fp16))[name = tensor("op_42882_cast_fp16")]; + tensor input_217_interleave_0 = const()[name = tensor("input_217_interleave_0"), val = tensor(false)]; + tensor input_217_cast_fp16 = concat(axis = var_41428, interleave = input_217_interleave_0, values = (var_42844_cast_fp16, var_42846_cast_fp16, var_42848_cast_fp16, var_42850_cast_fp16, var_42852_cast_fp16, var_42854_cast_fp16, var_42856_cast_fp16, var_42858_cast_fp16, var_42860_cast_fp16, var_42862_cast_fp16, var_42864_cast_fp16, var_42866_cast_fp16, var_42868_cast_fp16, var_42870_cast_fp16, var_42872_cast_fp16, var_42874_cast_fp16, var_42876_cast_fp16, var_42878_cast_fp16, var_42880_cast_fp16, var_42882_cast_fp16))[name = tensor("input_217_cast_fp16")]; + tensor var_42887 = const()[name = tensor("op_42887"), val = tensor([1, 1])]; + tensor var_42889 = const()[name = tensor("op_42889"), val = tensor([1, 1])]; + tensor obj_111_pad_type_0 = const()[name = tensor("obj_111_pad_type_0"), val = tensor("custom")]; + tensor obj_111_pad_0 = const()[name = tensor("obj_111_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_27_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_27_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1086675200)))]; + tensor layers_27_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_27_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1089952064)))]; + tensor obj_111_cast_fp16 = conv(bias = layers_27_self_attn_o_proj_bias_to_fp16, dilations = var_42889, groups = var_41428, pad = obj_111_pad_0, pad_type = obj_111_pad_type_0, strides = var_42887, weight = layers_27_self_attn_o_proj_weight_to_fp16, x = input_217_cast_fp16)[name = tensor("obj_111_cast_fp16")]; + tensor inputs_111_cast_fp16 = add(x = inputs_109_cast_fp16, y = obj_111_cast_fp16)[name = tensor("inputs_111_cast_fp16")]; + tensor var_42895 = const()[name = tensor("op_42895"), val = tensor([1])]; + tensor channels_mean_111_cast_fp16 = reduce_mean(axes = var_42895, keep_dims = var_41429, x = inputs_111_cast_fp16)[name = tensor("channels_mean_111_cast_fp16")]; + tensor zero_mean_111_cast_fp16 = sub(x = inputs_111_cast_fp16, y = channels_mean_111_cast_fp16)[name = tensor("zero_mean_111_cast_fp16")]; + tensor zero_mean_sq_111_cast_fp16 = mul(x = zero_mean_111_cast_fp16, y = zero_mean_111_cast_fp16)[name = tensor("zero_mean_sq_111_cast_fp16")]; + tensor var_42899 = const()[name = tensor("op_42899"), val = tensor([1])]; + tensor var_42900_cast_fp16 = reduce_mean(axes = var_42899, keep_dims = var_41429, x = zero_mean_sq_111_cast_fp16)[name = tensor("op_42900_cast_fp16")]; + tensor var_42901_to_fp16 = const()[name = tensor("op_42901_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_42902_cast_fp16 = add(x = var_42900_cast_fp16, y = var_42901_to_fp16)[name = tensor("op_42902_cast_fp16")]; + tensor denom_111_epsilon_0_to_fp16 = const()[name = tensor("denom_111_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_111_cast_fp16 = rsqrt(epsilon = denom_111_epsilon_0_to_fp16, x = var_42902_cast_fp16)[name = tensor("denom_111_cast_fp16")]; + tensor out_111_cast_fp16 = mul(x = zero_mean_111_cast_fp16, y = denom_111_cast_fp16)[name = tensor("out_111_cast_fp16")]; + tensor input_219_gamma_0_to_fp16 = const()[name = tensor("input_219_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1089954688)))]; + tensor input_219_beta_0_to_fp16 = const()[name = tensor("input_219_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1089957312)))]; + tensor input_219_epsilon_0_to_fp16 = const()[name = tensor("input_219_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_219_cast_fp16 = batch_norm(beta = input_219_beta_0_to_fp16, epsilon = input_219_epsilon_0_to_fp16, gamma = input_219_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_111_cast_fp16)[name = tensor("input_219_cast_fp16")]; + tensor var_42913 = const()[name = tensor("op_42913"), val = tensor([1, 1])]; + tensor var_42915 = const()[name = tensor("op_42915"), val = tensor([1, 1])]; + tensor input_221_pad_type_0 = const()[name = tensor("input_221_pad_type_0"), val = tensor("custom")]; + tensor input_221_pad_0 = const()[name = tensor("input_221_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_27_fc1_weight_to_fp16 = const()[name = tensor("layers_27_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1089959936)))]; + tensor layers_27_fc1_bias_to_fp16 = const()[name = tensor("layers_27_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1103067200)))]; + tensor input_221_cast_fp16 = conv(bias = layers_27_fc1_bias_to_fp16, dilations = var_42915, groups = var_41428, pad = input_221_pad_0, pad_type = input_221_pad_type_0, strides = var_42913, weight = layers_27_fc1_weight_to_fp16, x = input_219_cast_fp16)[name = tensor("input_221_cast_fp16")]; + tensor input_223_mode_0 = const()[name = tensor("input_223_mode_0"), val = tensor("EXACT")]; + tensor input_223_cast_fp16 = gelu(mode = input_223_mode_0, x = input_221_cast_fp16)[name = tensor("input_223_cast_fp16")]; + tensor var_42921 = const()[name = tensor("op_42921"), val = tensor([1, 1])]; + tensor var_42923 = const()[name = tensor("op_42923"), val = tensor([1, 1])]; + tensor hidden_states_59_pad_type_0 = const()[name = tensor("hidden_states_59_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_59_pad_0 = const()[name = tensor("hidden_states_59_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_27_fc2_weight_to_fp16 = const()[name = tensor("layers_27_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1103077504)))]; + tensor layers_27_fc2_bias_to_fp16 = const()[name = tensor("layers_27_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1116184768)))]; + tensor hidden_states_59_cast_fp16 = conv(bias = layers_27_fc2_bias_to_fp16, dilations = var_42923, groups = var_41428, pad = hidden_states_59_pad_0, pad_type = hidden_states_59_pad_type_0, strides = var_42921, weight = layers_27_fc2_weight_to_fp16, x = input_223_cast_fp16)[name = tensor("hidden_states_59_cast_fp16")]; + tensor inputs_113_cast_fp16 = add(x = inputs_111_cast_fp16, y = hidden_states_59_cast_fp16)[name = tensor("inputs_113_cast_fp16")]; + tensor var_42930 = const()[name = tensor("op_42930"), val = tensor(3)]; + tensor var_42955 = const()[name = tensor("op_42955"), val = tensor(1)]; + tensor var_42956 = const()[name = tensor("op_42956"), val = tensor(true)]; + tensor var_42966 = const()[name = tensor("op_42966"), val = tensor([1])]; + tensor channels_mean_113_cast_fp16 = reduce_mean(axes = var_42966, keep_dims = var_42956, x = inputs_113_cast_fp16)[name = tensor("channels_mean_113_cast_fp16")]; + tensor zero_mean_113_cast_fp16 = sub(x = inputs_113_cast_fp16, y = channels_mean_113_cast_fp16)[name = tensor("zero_mean_113_cast_fp16")]; + tensor zero_mean_sq_113_cast_fp16 = mul(x = zero_mean_113_cast_fp16, y = zero_mean_113_cast_fp16)[name = tensor("zero_mean_sq_113_cast_fp16")]; + tensor var_42970 = const()[name = tensor("op_42970"), val = tensor([1])]; + tensor var_42971_cast_fp16 = reduce_mean(axes = var_42970, keep_dims = var_42956, x = zero_mean_sq_113_cast_fp16)[name = tensor("op_42971_cast_fp16")]; + tensor var_42972_to_fp16 = const()[name = tensor("op_42972_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_42973_cast_fp16 = add(x = var_42971_cast_fp16, y = var_42972_to_fp16)[name = tensor("op_42973_cast_fp16")]; + tensor denom_113_epsilon_0_to_fp16 = const()[name = tensor("denom_113_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_113_cast_fp16 = rsqrt(epsilon = denom_113_epsilon_0_to_fp16, x = var_42973_cast_fp16)[name = tensor("denom_113_cast_fp16")]; + tensor out_113_cast_fp16 = mul(x = zero_mean_113_cast_fp16, y = denom_113_cast_fp16)[name = tensor("out_113_cast_fp16")]; + tensor obj_113_gamma_0_to_fp16 = const()[name = tensor("obj_113_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1116187392)))]; + tensor obj_113_beta_0_to_fp16 = const()[name = tensor("obj_113_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1116190016)))]; + tensor obj_113_epsilon_0_to_fp16 = const()[name = tensor("obj_113_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_113_cast_fp16 = batch_norm(beta = obj_113_beta_0_to_fp16, epsilon = obj_113_epsilon_0_to_fp16, gamma = obj_113_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_113_cast_fp16)[name = tensor("obj_113_cast_fp16")]; + tensor var_42988 = const()[name = tensor("op_42988"), val = tensor([1, 1])]; + tensor var_42990 = const()[name = tensor("op_42990"), val = tensor([1, 1])]; + tensor query_57_pad_type_0 = const()[name = tensor("query_57_pad_type_0"), val = tensor("custom")]; + tensor query_57_pad_0 = const()[name = tensor("query_57_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_28_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_28_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1116192640)))]; + tensor layers_28_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_28_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1119469504)))]; + tensor query_57_cast_fp16 = conv(bias = layers_28_self_attn_q_proj_bias_to_fp16, dilations = var_42990, groups = var_42955, pad = query_57_pad_0, pad_type = query_57_pad_type_0, strides = var_42988, weight = layers_28_self_attn_q_proj_weight_to_fp16, x = obj_113_cast_fp16)[name = tensor("query_57_cast_fp16")]; + tensor var_42994 = const()[name = tensor("op_42994"), val = tensor([1, 1])]; + tensor var_42996 = const()[name = tensor("op_42996"), val = tensor([1, 1])]; + tensor key_57_pad_type_0 = const()[name = tensor("key_57_pad_type_0"), val = tensor("custom")]; + tensor key_57_pad_0 = const()[name = tensor("key_57_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_28_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_28_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1119472128)))]; + tensor key_57_cast_fp16 = conv(dilations = var_42996, groups = var_42955, pad = key_57_pad_0, pad_type = key_57_pad_type_0, strides = var_42994, weight = layers_28_self_attn_k_proj_weight_to_fp16, x = obj_113_cast_fp16)[name = tensor("key_57_cast_fp16")]; + tensor var_43001 = const()[name = tensor("op_43001"), val = tensor([1, 1])]; + tensor var_43003 = const()[name = tensor("op_43003"), val = tensor([1, 1])]; + tensor value_57_pad_type_0 = const()[name = tensor("value_57_pad_type_0"), val = tensor("custom")]; + tensor value_57_pad_0 = const()[name = tensor("value_57_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_28_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_28_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1122748992)))]; + tensor layers_28_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_28_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1126025856)))]; + tensor value_57_cast_fp16 = conv(bias = layers_28_self_attn_v_proj_bias_to_fp16, dilations = var_43003, groups = var_42955, pad = value_57_pad_0, pad_type = value_57_pad_type_0, strides = var_43001, weight = layers_28_self_attn_v_proj_weight_to_fp16, x = obj_113_cast_fp16)[name = tensor("value_57_cast_fp16")]; + tensor var_43010_begin_0 = const()[name = tensor("op_43010_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_43010_end_0 = const()[name = tensor("op_43010_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_43010_end_mask_0 = const()[name = tensor("op_43010_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43010_cast_fp16 = slice_by_index(begin = var_43010_begin_0, end = var_43010_end_0, end_mask = var_43010_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_43010_cast_fp16")]; + tensor var_43014_begin_0 = const()[name = tensor("op_43014_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_43014_end_0 = const()[name = tensor("op_43014_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_43014_end_mask_0 = const()[name = tensor("op_43014_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43014_cast_fp16 = slice_by_index(begin = var_43014_begin_0, end = var_43014_end_0, end_mask = var_43014_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_43014_cast_fp16")]; + tensor var_43018_begin_0 = const()[name = tensor("op_43018_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_43018_end_0 = const()[name = tensor("op_43018_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_43018_end_mask_0 = const()[name = tensor("op_43018_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43018_cast_fp16 = slice_by_index(begin = var_43018_begin_0, end = var_43018_end_0, end_mask = var_43018_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_43018_cast_fp16")]; + tensor var_43022_begin_0 = const()[name = tensor("op_43022_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_43022_end_0 = const()[name = tensor("op_43022_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_43022_end_mask_0 = const()[name = tensor("op_43022_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43022_cast_fp16 = slice_by_index(begin = var_43022_begin_0, end = var_43022_end_0, end_mask = var_43022_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_43022_cast_fp16")]; + tensor var_43026_begin_0 = const()[name = tensor("op_43026_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_43026_end_0 = const()[name = tensor("op_43026_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_43026_end_mask_0 = const()[name = tensor("op_43026_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43026_cast_fp16 = slice_by_index(begin = var_43026_begin_0, end = var_43026_end_0, end_mask = var_43026_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_43026_cast_fp16")]; + tensor var_43030_begin_0 = const()[name = tensor("op_43030_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_43030_end_0 = const()[name = tensor("op_43030_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_43030_end_mask_0 = const()[name = tensor("op_43030_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43030_cast_fp16 = slice_by_index(begin = var_43030_begin_0, end = var_43030_end_0, end_mask = var_43030_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_43030_cast_fp16")]; + tensor var_43034_begin_0 = const()[name = tensor("op_43034_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_43034_end_0 = const()[name = tensor("op_43034_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_43034_end_mask_0 = const()[name = tensor("op_43034_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43034_cast_fp16 = slice_by_index(begin = var_43034_begin_0, end = var_43034_end_0, end_mask = var_43034_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_43034_cast_fp16")]; + tensor var_43038_begin_0 = const()[name = tensor("op_43038_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_43038_end_0 = const()[name = tensor("op_43038_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_43038_end_mask_0 = const()[name = tensor("op_43038_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43038_cast_fp16 = slice_by_index(begin = var_43038_begin_0, end = var_43038_end_0, end_mask = var_43038_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_43038_cast_fp16")]; + tensor var_43042_begin_0 = const()[name = tensor("op_43042_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_43042_end_0 = const()[name = tensor("op_43042_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_43042_end_mask_0 = const()[name = tensor("op_43042_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43042_cast_fp16 = slice_by_index(begin = var_43042_begin_0, end = var_43042_end_0, end_mask = var_43042_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_43042_cast_fp16")]; + tensor var_43046_begin_0 = const()[name = tensor("op_43046_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_43046_end_0 = const()[name = tensor("op_43046_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_43046_end_mask_0 = const()[name = tensor("op_43046_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43046_cast_fp16 = slice_by_index(begin = var_43046_begin_0, end = var_43046_end_0, end_mask = var_43046_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_43046_cast_fp16")]; + tensor var_43050_begin_0 = const()[name = tensor("op_43050_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_43050_end_0 = const()[name = tensor("op_43050_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_43050_end_mask_0 = const()[name = tensor("op_43050_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43050_cast_fp16 = slice_by_index(begin = var_43050_begin_0, end = var_43050_end_0, end_mask = var_43050_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_43050_cast_fp16")]; + tensor var_43054_begin_0 = const()[name = tensor("op_43054_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_43054_end_0 = const()[name = tensor("op_43054_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_43054_end_mask_0 = const()[name = tensor("op_43054_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43054_cast_fp16 = slice_by_index(begin = var_43054_begin_0, end = var_43054_end_0, end_mask = var_43054_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_43054_cast_fp16")]; + tensor var_43058_begin_0 = const()[name = tensor("op_43058_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_43058_end_0 = const()[name = tensor("op_43058_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_43058_end_mask_0 = const()[name = tensor("op_43058_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43058_cast_fp16 = slice_by_index(begin = var_43058_begin_0, end = var_43058_end_0, end_mask = var_43058_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_43058_cast_fp16")]; + tensor var_43062_begin_0 = const()[name = tensor("op_43062_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_43062_end_0 = const()[name = tensor("op_43062_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_43062_end_mask_0 = const()[name = tensor("op_43062_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43062_cast_fp16 = slice_by_index(begin = var_43062_begin_0, end = var_43062_end_0, end_mask = var_43062_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_43062_cast_fp16")]; + tensor var_43066_begin_0 = const()[name = tensor("op_43066_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_43066_end_0 = const()[name = tensor("op_43066_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_43066_end_mask_0 = const()[name = tensor("op_43066_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43066_cast_fp16 = slice_by_index(begin = var_43066_begin_0, end = var_43066_end_0, end_mask = var_43066_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_43066_cast_fp16")]; + tensor var_43070_begin_0 = const()[name = tensor("op_43070_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_43070_end_0 = const()[name = tensor("op_43070_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_43070_end_mask_0 = const()[name = tensor("op_43070_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43070_cast_fp16 = slice_by_index(begin = var_43070_begin_0, end = var_43070_end_0, end_mask = var_43070_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_43070_cast_fp16")]; + tensor var_43074_begin_0 = const()[name = tensor("op_43074_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_43074_end_0 = const()[name = tensor("op_43074_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_43074_end_mask_0 = const()[name = tensor("op_43074_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43074_cast_fp16 = slice_by_index(begin = var_43074_begin_0, end = var_43074_end_0, end_mask = var_43074_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_43074_cast_fp16")]; + tensor var_43078_begin_0 = const()[name = tensor("op_43078_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_43078_end_0 = const()[name = tensor("op_43078_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_43078_end_mask_0 = const()[name = tensor("op_43078_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43078_cast_fp16 = slice_by_index(begin = var_43078_begin_0, end = var_43078_end_0, end_mask = var_43078_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_43078_cast_fp16")]; + tensor var_43082_begin_0 = const()[name = tensor("op_43082_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_43082_end_0 = const()[name = tensor("op_43082_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_43082_end_mask_0 = const()[name = tensor("op_43082_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43082_cast_fp16 = slice_by_index(begin = var_43082_begin_0, end = var_43082_end_0, end_mask = var_43082_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_43082_cast_fp16")]; + tensor var_43086_begin_0 = const()[name = tensor("op_43086_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_43086_end_0 = const()[name = tensor("op_43086_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_43086_end_mask_0 = const()[name = tensor("op_43086_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43086_cast_fp16 = slice_by_index(begin = var_43086_begin_0, end = var_43086_end_0, end_mask = var_43086_end_mask_0, x = query_57_cast_fp16)[name = tensor("op_43086_cast_fp16")]; + tensor var_43095_begin_0 = const()[name = tensor("op_43095_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_43095_end_0 = const()[name = tensor("op_43095_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_43095_end_mask_0 = const()[name = tensor("op_43095_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43095_cast_fp16 = slice_by_index(begin = var_43095_begin_0, end = var_43095_end_0, end_mask = var_43095_end_mask_0, x = var_43010_cast_fp16)[name = tensor("op_43095_cast_fp16")]; + tensor var_43102_begin_0 = const()[name = tensor("op_43102_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_43102_end_0 = const()[name = tensor("op_43102_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_43102_end_mask_0 = const()[name = tensor("op_43102_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43102_cast_fp16 = slice_by_index(begin = var_43102_begin_0, end = var_43102_end_0, end_mask = var_43102_end_mask_0, x = var_43010_cast_fp16)[name = tensor("op_43102_cast_fp16")]; + tensor var_43109_begin_0 = const()[name = tensor("op_43109_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_43109_end_0 = const()[name = tensor("op_43109_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_43109_end_mask_0 = const()[name = tensor("op_43109_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43109_cast_fp16 = slice_by_index(begin = var_43109_begin_0, end = var_43109_end_0, end_mask = var_43109_end_mask_0, x = var_43010_cast_fp16)[name = tensor("op_43109_cast_fp16")]; + tensor var_43116_begin_0 = const()[name = tensor("op_43116_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_43116_end_0 = const()[name = tensor("op_43116_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_43116_end_mask_0 = const()[name = tensor("op_43116_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43116_cast_fp16 = slice_by_index(begin = var_43116_begin_0, end = var_43116_end_0, end_mask = var_43116_end_mask_0, x = var_43010_cast_fp16)[name = tensor("op_43116_cast_fp16")]; + tensor var_43123_begin_0 = const()[name = tensor("op_43123_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_43123_end_0 = const()[name = tensor("op_43123_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_43123_end_mask_0 = const()[name = tensor("op_43123_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43123_cast_fp16 = slice_by_index(begin = var_43123_begin_0, end = var_43123_end_0, end_mask = var_43123_end_mask_0, x = var_43014_cast_fp16)[name = tensor("op_43123_cast_fp16")]; + tensor var_43130_begin_0 = const()[name = tensor("op_43130_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_43130_end_0 = const()[name = tensor("op_43130_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_43130_end_mask_0 = const()[name = tensor("op_43130_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43130_cast_fp16 = slice_by_index(begin = var_43130_begin_0, end = var_43130_end_0, end_mask = var_43130_end_mask_0, x = var_43014_cast_fp16)[name = tensor("op_43130_cast_fp16")]; + tensor var_43137_begin_0 = const()[name = tensor("op_43137_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_43137_end_0 = const()[name = tensor("op_43137_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_43137_end_mask_0 = const()[name = tensor("op_43137_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43137_cast_fp16 = slice_by_index(begin = var_43137_begin_0, end = var_43137_end_0, end_mask = var_43137_end_mask_0, x = var_43014_cast_fp16)[name = tensor("op_43137_cast_fp16")]; + tensor var_43144_begin_0 = const()[name = tensor("op_43144_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_43144_end_0 = const()[name = tensor("op_43144_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_43144_end_mask_0 = const()[name = tensor("op_43144_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43144_cast_fp16 = slice_by_index(begin = var_43144_begin_0, end = var_43144_end_0, end_mask = var_43144_end_mask_0, x = var_43014_cast_fp16)[name = tensor("op_43144_cast_fp16")]; + tensor var_43151_begin_0 = const()[name = tensor("op_43151_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_43151_end_0 = const()[name = tensor("op_43151_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_43151_end_mask_0 = const()[name = tensor("op_43151_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43151_cast_fp16 = slice_by_index(begin = var_43151_begin_0, end = var_43151_end_0, end_mask = var_43151_end_mask_0, x = var_43018_cast_fp16)[name = tensor("op_43151_cast_fp16")]; + tensor var_43158_begin_0 = const()[name = tensor("op_43158_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_43158_end_0 = const()[name = tensor("op_43158_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_43158_end_mask_0 = const()[name = tensor("op_43158_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43158_cast_fp16 = slice_by_index(begin = var_43158_begin_0, end = var_43158_end_0, end_mask = var_43158_end_mask_0, x = var_43018_cast_fp16)[name = tensor("op_43158_cast_fp16")]; + tensor var_43165_begin_0 = const()[name = tensor("op_43165_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_43165_end_0 = const()[name = tensor("op_43165_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_43165_end_mask_0 = const()[name = tensor("op_43165_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43165_cast_fp16 = slice_by_index(begin = var_43165_begin_0, end = var_43165_end_0, end_mask = var_43165_end_mask_0, x = var_43018_cast_fp16)[name = tensor("op_43165_cast_fp16")]; + tensor var_43172_begin_0 = const()[name = tensor("op_43172_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_43172_end_0 = const()[name = tensor("op_43172_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_43172_end_mask_0 = const()[name = tensor("op_43172_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43172_cast_fp16 = slice_by_index(begin = var_43172_begin_0, end = var_43172_end_0, end_mask = var_43172_end_mask_0, x = var_43018_cast_fp16)[name = tensor("op_43172_cast_fp16")]; + tensor var_43179_begin_0 = const()[name = tensor("op_43179_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_43179_end_0 = const()[name = tensor("op_43179_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_43179_end_mask_0 = const()[name = tensor("op_43179_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43179_cast_fp16 = slice_by_index(begin = var_43179_begin_0, end = var_43179_end_0, end_mask = var_43179_end_mask_0, x = var_43022_cast_fp16)[name = tensor("op_43179_cast_fp16")]; + tensor var_43186_begin_0 = const()[name = tensor("op_43186_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_43186_end_0 = const()[name = tensor("op_43186_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_43186_end_mask_0 = const()[name = tensor("op_43186_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43186_cast_fp16 = slice_by_index(begin = var_43186_begin_0, end = var_43186_end_0, end_mask = var_43186_end_mask_0, x = var_43022_cast_fp16)[name = tensor("op_43186_cast_fp16")]; + tensor var_43193_begin_0 = const()[name = tensor("op_43193_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_43193_end_0 = const()[name = tensor("op_43193_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_43193_end_mask_0 = const()[name = tensor("op_43193_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43193_cast_fp16 = slice_by_index(begin = var_43193_begin_0, end = var_43193_end_0, end_mask = var_43193_end_mask_0, x = var_43022_cast_fp16)[name = tensor("op_43193_cast_fp16")]; + tensor var_43200_begin_0 = const()[name = tensor("op_43200_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_43200_end_0 = const()[name = tensor("op_43200_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_43200_end_mask_0 = const()[name = tensor("op_43200_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43200_cast_fp16 = slice_by_index(begin = var_43200_begin_0, end = var_43200_end_0, end_mask = var_43200_end_mask_0, x = var_43022_cast_fp16)[name = tensor("op_43200_cast_fp16")]; + tensor var_43207_begin_0 = const()[name = tensor("op_43207_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_43207_end_0 = const()[name = tensor("op_43207_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_43207_end_mask_0 = const()[name = tensor("op_43207_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43207_cast_fp16 = slice_by_index(begin = var_43207_begin_0, end = var_43207_end_0, end_mask = var_43207_end_mask_0, x = var_43026_cast_fp16)[name = tensor("op_43207_cast_fp16")]; + tensor var_43214_begin_0 = const()[name = tensor("op_43214_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_43214_end_0 = const()[name = tensor("op_43214_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_43214_end_mask_0 = const()[name = tensor("op_43214_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43214_cast_fp16 = slice_by_index(begin = var_43214_begin_0, end = var_43214_end_0, end_mask = var_43214_end_mask_0, x = var_43026_cast_fp16)[name = tensor("op_43214_cast_fp16")]; + tensor var_43221_begin_0 = const()[name = tensor("op_43221_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_43221_end_0 = const()[name = tensor("op_43221_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_43221_end_mask_0 = const()[name = tensor("op_43221_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43221_cast_fp16 = slice_by_index(begin = var_43221_begin_0, end = var_43221_end_0, end_mask = var_43221_end_mask_0, x = var_43026_cast_fp16)[name = tensor("op_43221_cast_fp16")]; + tensor var_43228_begin_0 = const()[name = tensor("op_43228_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_43228_end_0 = const()[name = tensor("op_43228_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_43228_end_mask_0 = const()[name = tensor("op_43228_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43228_cast_fp16 = slice_by_index(begin = var_43228_begin_0, end = var_43228_end_0, end_mask = var_43228_end_mask_0, x = var_43026_cast_fp16)[name = tensor("op_43228_cast_fp16")]; + tensor var_43235_begin_0 = const()[name = tensor("op_43235_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_43235_end_0 = const()[name = tensor("op_43235_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_43235_end_mask_0 = const()[name = tensor("op_43235_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43235_cast_fp16 = slice_by_index(begin = var_43235_begin_0, end = var_43235_end_0, end_mask = var_43235_end_mask_0, x = var_43030_cast_fp16)[name = tensor("op_43235_cast_fp16")]; + tensor var_43242_begin_0 = const()[name = tensor("op_43242_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_43242_end_0 = const()[name = tensor("op_43242_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_43242_end_mask_0 = const()[name = tensor("op_43242_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43242_cast_fp16 = slice_by_index(begin = var_43242_begin_0, end = var_43242_end_0, end_mask = var_43242_end_mask_0, x = var_43030_cast_fp16)[name = tensor("op_43242_cast_fp16")]; + tensor var_43249_begin_0 = const()[name = tensor("op_43249_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_43249_end_0 = const()[name = tensor("op_43249_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_43249_end_mask_0 = const()[name = tensor("op_43249_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43249_cast_fp16 = slice_by_index(begin = var_43249_begin_0, end = var_43249_end_0, end_mask = var_43249_end_mask_0, x = var_43030_cast_fp16)[name = tensor("op_43249_cast_fp16")]; + tensor var_43256_begin_0 = const()[name = tensor("op_43256_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_43256_end_0 = const()[name = tensor("op_43256_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_43256_end_mask_0 = const()[name = tensor("op_43256_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43256_cast_fp16 = slice_by_index(begin = var_43256_begin_0, end = var_43256_end_0, end_mask = var_43256_end_mask_0, x = var_43030_cast_fp16)[name = tensor("op_43256_cast_fp16")]; + tensor var_43263_begin_0 = const()[name = tensor("op_43263_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_43263_end_0 = const()[name = tensor("op_43263_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_43263_end_mask_0 = const()[name = tensor("op_43263_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43263_cast_fp16 = slice_by_index(begin = var_43263_begin_0, end = var_43263_end_0, end_mask = var_43263_end_mask_0, x = var_43034_cast_fp16)[name = tensor("op_43263_cast_fp16")]; + tensor var_43270_begin_0 = const()[name = tensor("op_43270_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_43270_end_0 = const()[name = tensor("op_43270_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_43270_end_mask_0 = const()[name = tensor("op_43270_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43270_cast_fp16 = slice_by_index(begin = var_43270_begin_0, end = var_43270_end_0, end_mask = var_43270_end_mask_0, x = var_43034_cast_fp16)[name = tensor("op_43270_cast_fp16")]; + tensor var_43277_begin_0 = const()[name = tensor("op_43277_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_43277_end_0 = const()[name = tensor("op_43277_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_43277_end_mask_0 = const()[name = tensor("op_43277_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43277_cast_fp16 = slice_by_index(begin = var_43277_begin_0, end = var_43277_end_0, end_mask = var_43277_end_mask_0, x = var_43034_cast_fp16)[name = tensor("op_43277_cast_fp16")]; + tensor var_43284_begin_0 = const()[name = tensor("op_43284_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_43284_end_0 = const()[name = tensor("op_43284_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_43284_end_mask_0 = const()[name = tensor("op_43284_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43284_cast_fp16 = slice_by_index(begin = var_43284_begin_0, end = var_43284_end_0, end_mask = var_43284_end_mask_0, x = var_43034_cast_fp16)[name = tensor("op_43284_cast_fp16")]; + tensor var_43291_begin_0 = const()[name = tensor("op_43291_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_43291_end_0 = const()[name = tensor("op_43291_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_43291_end_mask_0 = const()[name = tensor("op_43291_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43291_cast_fp16 = slice_by_index(begin = var_43291_begin_0, end = var_43291_end_0, end_mask = var_43291_end_mask_0, x = var_43038_cast_fp16)[name = tensor("op_43291_cast_fp16")]; + tensor var_43298_begin_0 = const()[name = tensor("op_43298_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_43298_end_0 = const()[name = tensor("op_43298_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_43298_end_mask_0 = const()[name = tensor("op_43298_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43298_cast_fp16 = slice_by_index(begin = var_43298_begin_0, end = var_43298_end_0, end_mask = var_43298_end_mask_0, x = var_43038_cast_fp16)[name = tensor("op_43298_cast_fp16")]; + tensor var_43305_begin_0 = const()[name = tensor("op_43305_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_43305_end_0 = const()[name = tensor("op_43305_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_43305_end_mask_0 = const()[name = tensor("op_43305_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43305_cast_fp16 = slice_by_index(begin = var_43305_begin_0, end = var_43305_end_0, end_mask = var_43305_end_mask_0, x = var_43038_cast_fp16)[name = tensor("op_43305_cast_fp16")]; + tensor var_43312_begin_0 = const()[name = tensor("op_43312_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_43312_end_0 = const()[name = tensor("op_43312_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_43312_end_mask_0 = const()[name = tensor("op_43312_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43312_cast_fp16 = slice_by_index(begin = var_43312_begin_0, end = var_43312_end_0, end_mask = var_43312_end_mask_0, x = var_43038_cast_fp16)[name = tensor("op_43312_cast_fp16")]; + tensor var_43319_begin_0 = const()[name = tensor("op_43319_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_43319_end_0 = const()[name = tensor("op_43319_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_43319_end_mask_0 = const()[name = tensor("op_43319_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43319_cast_fp16 = slice_by_index(begin = var_43319_begin_0, end = var_43319_end_0, end_mask = var_43319_end_mask_0, x = var_43042_cast_fp16)[name = tensor("op_43319_cast_fp16")]; + tensor var_43326_begin_0 = const()[name = tensor("op_43326_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_43326_end_0 = const()[name = tensor("op_43326_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_43326_end_mask_0 = const()[name = tensor("op_43326_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43326_cast_fp16 = slice_by_index(begin = var_43326_begin_0, end = var_43326_end_0, end_mask = var_43326_end_mask_0, x = var_43042_cast_fp16)[name = tensor("op_43326_cast_fp16")]; + tensor var_43333_begin_0 = const()[name = tensor("op_43333_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_43333_end_0 = const()[name = tensor("op_43333_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_43333_end_mask_0 = const()[name = tensor("op_43333_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43333_cast_fp16 = slice_by_index(begin = var_43333_begin_0, end = var_43333_end_0, end_mask = var_43333_end_mask_0, x = var_43042_cast_fp16)[name = tensor("op_43333_cast_fp16")]; + tensor var_43340_begin_0 = const()[name = tensor("op_43340_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_43340_end_0 = const()[name = tensor("op_43340_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_43340_end_mask_0 = const()[name = tensor("op_43340_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43340_cast_fp16 = slice_by_index(begin = var_43340_begin_0, end = var_43340_end_0, end_mask = var_43340_end_mask_0, x = var_43042_cast_fp16)[name = tensor("op_43340_cast_fp16")]; + tensor var_43347_begin_0 = const()[name = tensor("op_43347_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_43347_end_0 = const()[name = tensor("op_43347_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_43347_end_mask_0 = const()[name = tensor("op_43347_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43347_cast_fp16 = slice_by_index(begin = var_43347_begin_0, end = var_43347_end_0, end_mask = var_43347_end_mask_0, x = var_43046_cast_fp16)[name = tensor("op_43347_cast_fp16")]; + tensor var_43354_begin_0 = const()[name = tensor("op_43354_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_43354_end_0 = const()[name = tensor("op_43354_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_43354_end_mask_0 = const()[name = tensor("op_43354_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43354_cast_fp16 = slice_by_index(begin = var_43354_begin_0, end = var_43354_end_0, end_mask = var_43354_end_mask_0, x = var_43046_cast_fp16)[name = tensor("op_43354_cast_fp16")]; + tensor var_43361_begin_0 = const()[name = tensor("op_43361_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_43361_end_0 = const()[name = tensor("op_43361_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_43361_end_mask_0 = const()[name = tensor("op_43361_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43361_cast_fp16 = slice_by_index(begin = var_43361_begin_0, end = var_43361_end_0, end_mask = var_43361_end_mask_0, x = var_43046_cast_fp16)[name = tensor("op_43361_cast_fp16")]; + tensor var_43368_begin_0 = const()[name = tensor("op_43368_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_43368_end_0 = const()[name = tensor("op_43368_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_43368_end_mask_0 = const()[name = tensor("op_43368_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43368_cast_fp16 = slice_by_index(begin = var_43368_begin_0, end = var_43368_end_0, end_mask = var_43368_end_mask_0, x = var_43046_cast_fp16)[name = tensor("op_43368_cast_fp16")]; + tensor var_43375_begin_0 = const()[name = tensor("op_43375_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_43375_end_0 = const()[name = tensor("op_43375_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_43375_end_mask_0 = const()[name = tensor("op_43375_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43375_cast_fp16 = slice_by_index(begin = var_43375_begin_0, end = var_43375_end_0, end_mask = var_43375_end_mask_0, x = var_43050_cast_fp16)[name = tensor("op_43375_cast_fp16")]; + tensor var_43382_begin_0 = const()[name = tensor("op_43382_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_43382_end_0 = const()[name = tensor("op_43382_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_43382_end_mask_0 = const()[name = tensor("op_43382_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43382_cast_fp16 = slice_by_index(begin = var_43382_begin_0, end = var_43382_end_0, end_mask = var_43382_end_mask_0, x = var_43050_cast_fp16)[name = tensor("op_43382_cast_fp16")]; + tensor var_43389_begin_0 = const()[name = tensor("op_43389_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_43389_end_0 = const()[name = tensor("op_43389_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_43389_end_mask_0 = const()[name = tensor("op_43389_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43389_cast_fp16 = slice_by_index(begin = var_43389_begin_0, end = var_43389_end_0, end_mask = var_43389_end_mask_0, x = var_43050_cast_fp16)[name = tensor("op_43389_cast_fp16")]; + tensor var_43396_begin_0 = const()[name = tensor("op_43396_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_43396_end_0 = const()[name = tensor("op_43396_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_43396_end_mask_0 = const()[name = tensor("op_43396_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43396_cast_fp16 = slice_by_index(begin = var_43396_begin_0, end = var_43396_end_0, end_mask = var_43396_end_mask_0, x = var_43050_cast_fp16)[name = tensor("op_43396_cast_fp16")]; + tensor var_43403_begin_0 = const()[name = tensor("op_43403_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_43403_end_0 = const()[name = tensor("op_43403_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_43403_end_mask_0 = const()[name = tensor("op_43403_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43403_cast_fp16 = slice_by_index(begin = var_43403_begin_0, end = var_43403_end_0, end_mask = var_43403_end_mask_0, x = var_43054_cast_fp16)[name = tensor("op_43403_cast_fp16")]; + tensor var_43410_begin_0 = const()[name = tensor("op_43410_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_43410_end_0 = const()[name = tensor("op_43410_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_43410_end_mask_0 = const()[name = tensor("op_43410_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43410_cast_fp16 = slice_by_index(begin = var_43410_begin_0, end = var_43410_end_0, end_mask = var_43410_end_mask_0, x = var_43054_cast_fp16)[name = tensor("op_43410_cast_fp16")]; + tensor var_43417_begin_0 = const()[name = tensor("op_43417_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_43417_end_0 = const()[name = tensor("op_43417_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_43417_end_mask_0 = const()[name = tensor("op_43417_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43417_cast_fp16 = slice_by_index(begin = var_43417_begin_0, end = var_43417_end_0, end_mask = var_43417_end_mask_0, x = var_43054_cast_fp16)[name = tensor("op_43417_cast_fp16")]; + tensor var_43424_begin_0 = const()[name = tensor("op_43424_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_43424_end_0 = const()[name = tensor("op_43424_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_43424_end_mask_0 = const()[name = tensor("op_43424_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43424_cast_fp16 = slice_by_index(begin = var_43424_begin_0, end = var_43424_end_0, end_mask = var_43424_end_mask_0, x = var_43054_cast_fp16)[name = tensor("op_43424_cast_fp16")]; + tensor var_43431_begin_0 = const()[name = tensor("op_43431_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_43431_end_0 = const()[name = tensor("op_43431_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_43431_end_mask_0 = const()[name = tensor("op_43431_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43431_cast_fp16 = slice_by_index(begin = var_43431_begin_0, end = var_43431_end_0, end_mask = var_43431_end_mask_0, x = var_43058_cast_fp16)[name = tensor("op_43431_cast_fp16")]; + tensor var_43438_begin_0 = const()[name = tensor("op_43438_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_43438_end_0 = const()[name = tensor("op_43438_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_43438_end_mask_0 = const()[name = tensor("op_43438_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43438_cast_fp16 = slice_by_index(begin = var_43438_begin_0, end = var_43438_end_0, end_mask = var_43438_end_mask_0, x = var_43058_cast_fp16)[name = tensor("op_43438_cast_fp16")]; + tensor var_43445_begin_0 = const()[name = tensor("op_43445_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_43445_end_0 = const()[name = tensor("op_43445_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_43445_end_mask_0 = const()[name = tensor("op_43445_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43445_cast_fp16 = slice_by_index(begin = var_43445_begin_0, end = var_43445_end_0, end_mask = var_43445_end_mask_0, x = var_43058_cast_fp16)[name = tensor("op_43445_cast_fp16")]; + tensor var_43452_begin_0 = const()[name = tensor("op_43452_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_43452_end_0 = const()[name = tensor("op_43452_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_43452_end_mask_0 = const()[name = tensor("op_43452_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43452_cast_fp16 = slice_by_index(begin = var_43452_begin_0, end = var_43452_end_0, end_mask = var_43452_end_mask_0, x = var_43058_cast_fp16)[name = tensor("op_43452_cast_fp16")]; + tensor var_43459_begin_0 = const()[name = tensor("op_43459_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_43459_end_0 = const()[name = tensor("op_43459_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_43459_end_mask_0 = const()[name = tensor("op_43459_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43459_cast_fp16 = slice_by_index(begin = var_43459_begin_0, end = var_43459_end_0, end_mask = var_43459_end_mask_0, x = var_43062_cast_fp16)[name = tensor("op_43459_cast_fp16")]; + tensor var_43466_begin_0 = const()[name = tensor("op_43466_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_43466_end_0 = const()[name = tensor("op_43466_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_43466_end_mask_0 = const()[name = tensor("op_43466_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43466_cast_fp16 = slice_by_index(begin = var_43466_begin_0, end = var_43466_end_0, end_mask = var_43466_end_mask_0, x = var_43062_cast_fp16)[name = tensor("op_43466_cast_fp16")]; + tensor var_43473_begin_0 = const()[name = tensor("op_43473_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_43473_end_0 = const()[name = tensor("op_43473_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_43473_end_mask_0 = const()[name = tensor("op_43473_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43473_cast_fp16 = slice_by_index(begin = var_43473_begin_0, end = var_43473_end_0, end_mask = var_43473_end_mask_0, x = var_43062_cast_fp16)[name = tensor("op_43473_cast_fp16")]; + tensor var_43480_begin_0 = const()[name = tensor("op_43480_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_43480_end_0 = const()[name = tensor("op_43480_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_43480_end_mask_0 = const()[name = tensor("op_43480_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43480_cast_fp16 = slice_by_index(begin = var_43480_begin_0, end = var_43480_end_0, end_mask = var_43480_end_mask_0, x = var_43062_cast_fp16)[name = tensor("op_43480_cast_fp16")]; + tensor var_43487_begin_0 = const()[name = tensor("op_43487_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_43487_end_0 = const()[name = tensor("op_43487_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_43487_end_mask_0 = const()[name = tensor("op_43487_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43487_cast_fp16 = slice_by_index(begin = var_43487_begin_0, end = var_43487_end_0, end_mask = var_43487_end_mask_0, x = var_43066_cast_fp16)[name = tensor("op_43487_cast_fp16")]; + tensor var_43494_begin_0 = const()[name = tensor("op_43494_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_43494_end_0 = const()[name = tensor("op_43494_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_43494_end_mask_0 = const()[name = tensor("op_43494_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43494_cast_fp16 = slice_by_index(begin = var_43494_begin_0, end = var_43494_end_0, end_mask = var_43494_end_mask_0, x = var_43066_cast_fp16)[name = tensor("op_43494_cast_fp16")]; + tensor var_43501_begin_0 = const()[name = tensor("op_43501_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_43501_end_0 = const()[name = tensor("op_43501_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_43501_end_mask_0 = const()[name = tensor("op_43501_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43501_cast_fp16 = slice_by_index(begin = var_43501_begin_0, end = var_43501_end_0, end_mask = var_43501_end_mask_0, x = var_43066_cast_fp16)[name = tensor("op_43501_cast_fp16")]; + tensor var_43508_begin_0 = const()[name = tensor("op_43508_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_43508_end_0 = const()[name = tensor("op_43508_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_43508_end_mask_0 = const()[name = tensor("op_43508_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43508_cast_fp16 = slice_by_index(begin = var_43508_begin_0, end = var_43508_end_0, end_mask = var_43508_end_mask_0, x = var_43066_cast_fp16)[name = tensor("op_43508_cast_fp16")]; + tensor var_43515_begin_0 = const()[name = tensor("op_43515_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_43515_end_0 = const()[name = tensor("op_43515_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_43515_end_mask_0 = const()[name = tensor("op_43515_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43515_cast_fp16 = slice_by_index(begin = var_43515_begin_0, end = var_43515_end_0, end_mask = var_43515_end_mask_0, x = var_43070_cast_fp16)[name = tensor("op_43515_cast_fp16")]; + tensor var_43522_begin_0 = const()[name = tensor("op_43522_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_43522_end_0 = const()[name = tensor("op_43522_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_43522_end_mask_0 = const()[name = tensor("op_43522_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43522_cast_fp16 = slice_by_index(begin = var_43522_begin_0, end = var_43522_end_0, end_mask = var_43522_end_mask_0, x = var_43070_cast_fp16)[name = tensor("op_43522_cast_fp16")]; + tensor var_43529_begin_0 = const()[name = tensor("op_43529_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_43529_end_0 = const()[name = tensor("op_43529_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_43529_end_mask_0 = const()[name = tensor("op_43529_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43529_cast_fp16 = slice_by_index(begin = var_43529_begin_0, end = var_43529_end_0, end_mask = var_43529_end_mask_0, x = var_43070_cast_fp16)[name = tensor("op_43529_cast_fp16")]; + tensor var_43536_begin_0 = const()[name = tensor("op_43536_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_43536_end_0 = const()[name = tensor("op_43536_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_43536_end_mask_0 = const()[name = tensor("op_43536_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43536_cast_fp16 = slice_by_index(begin = var_43536_begin_0, end = var_43536_end_0, end_mask = var_43536_end_mask_0, x = var_43070_cast_fp16)[name = tensor("op_43536_cast_fp16")]; + tensor var_43543_begin_0 = const()[name = tensor("op_43543_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_43543_end_0 = const()[name = tensor("op_43543_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_43543_end_mask_0 = const()[name = tensor("op_43543_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43543_cast_fp16 = slice_by_index(begin = var_43543_begin_0, end = var_43543_end_0, end_mask = var_43543_end_mask_0, x = var_43074_cast_fp16)[name = tensor("op_43543_cast_fp16")]; + tensor var_43550_begin_0 = const()[name = tensor("op_43550_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_43550_end_0 = const()[name = tensor("op_43550_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_43550_end_mask_0 = const()[name = tensor("op_43550_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43550_cast_fp16 = slice_by_index(begin = var_43550_begin_0, end = var_43550_end_0, end_mask = var_43550_end_mask_0, x = var_43074_cast_fp16)[name = tensor("op_43550_cast_fp16")]; + tensor var_43557_begin_0 = const()[name = tensor("op_43557_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_43557_end_0 = const()[name = tensor("op_43557_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_43557_end_mask_0 = const()[name = tensor("op_43557_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43557_cast_fp16 = slice_by_index(begin = var_43557_begin_0, end = var_43557_end_0, end_mask = var_43557_end_mask_0, x = var_43074_cast_fp16)[name = tensor("op_43557_cast_fp16")]; + tensor var_43564_begin_0 = const()[name = tensor("op_43564_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_43564_end_0 = const()[name = tensor("op_43564_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_43564_end_mask_0 = const()[name = tensor("op_43564_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43564_cast_fp16 = slice_by_index(begin = var_43564_begin_0, end = var_43564_end_0, end_mask = var_43564_end_mask_0, x = var_43074_cast_fp16)[name = tensor("op_43564_cast_fp16")]; + tensor var_43571_begin_0 = const()[name = tensor("op_43571_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_43571_end_0 = const()[name = tensor("op_43571_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_43571_end_mask_0 = const()[name = tensor("op_43571_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43571_cast_fp16 = slice_by_index(begin = var_43571_begin_0, end = var_43571_end_0, end_mask = var_43571_end_mask_0, x = var_43078_cast_fp16)[name = tensor("op_43571_cast_fp16")]; + tensor var_43578_begin_0 = const()[name = tensor("op_43578_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_43578_end_0 = const()[name = tensor("op_43578_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_43578_end_mask_0 = const()[name = tensor("op_43578_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43578_cast_fp16 = slice_by_index(begin = var_43578_begin_0, end = var_43578_end_0, end_mask = var_43578_end_mask_0, x = var_43078_cast_fp16)[name = tensor("op_43578_cast_fp16")]; + tensor var_43585_begin_0 = const()[name = tensor("op_43585_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_43585_end_0 = const()[name = tensor("op_43585_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_43585_end_mask_0 = const()[name = tensor("op_43585_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43585_cast_fp16 = slice_by_index(begin = var_43585_begin_0, end = var_43585_end_0, end_mask = var_43585_end_mask_0, x = var_43078_cast_fp16)[name = tensor("op_43585_cast_fp16")]; + tensor var_43592_begin_0 = const()[name = tensor("op_43592_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_43592_end_0 = const()[name = tensor("op_43592_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_43592_end_mask_0 = const()[name = tensor("op_43592_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43592_cast_fp16 = slice_by_index(begin = var_43592_begin_0, end = var_43592_end_0, end_mask = var_43592_end_mask_0, x = var_43078_cast_fp16)[name = tensor("op_43592_cast_fp16")]; + tensor var_43599_begin_0 = const()[name = tensor("op_43599_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_43599_end_0 = const()[name = tensor("op_43599_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_43599_end_mask_0 = const()[name = tensor("op_43599_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43599_cast_fp16 = slice_by_index(begin = var_43599_begin_0, end = var_43599_end_0, end_mask = var_43599_end_mask_0, x = var_43082_cast_fp16)[name = tensor("op_43599_cast_fp16")]; + tensor var_43606_begin_0 = const()[name = tensor("op_43606_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_43606_end_0 = const()[name = tensor("op_43606_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_43606_end_mask_0 = const()[name = tensor("op_43606_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43606_cast_fp16 = slice_by_index(begin = var_43606_begin_0, end = var_43606_end_0, end_mask = var_43606_end_mask_0, x = var_43082_cast_fp16)[name = tensor("op_43606_cast_fp16")]; + tensor var_43613_begin_0 = const()[name = tensor("op_43613_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_43613_end_0 = const()[name = tensor("op_43613_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_43613_end_mask_0 = const()[name = tensor("op_43613_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43613_cast_fp16 = slice_by_index(begin = var_43613_begin_0, end = var_43613_end_0, end_mask = var_43613_end_mask_0, x = var_43082_cast_fp16)[name = tensor("op_43613_cast_fp16")]; + tensor var_43620_begin_0 = const()[name = tensor("op_43620_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_43620_end_0 = const()[name = tensor("op_43620_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_43620_end_mask_0 = const()[name = tensor("op_43620_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43620_cast_fp16 = slice_by_index(begin = var_43620_begin_0, end = var_43620_end_0, end_mask = var_43620_end_mask_0, x = var_43082_cast_fp16)[name = tensor("op_43620_cast_fp16")]; + tensor var_43627_begin_0 = const()[name = tensor("op_43627_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_43627_end_0 = const()[name = tensor("op_43627_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_43627_end_mask_0 = const()[name = tensor("op_43627_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43627_cast_fp16 = slice_by_index(begin = var_43627_begin_0, end = var_43627_end_0, end_mask = var_43627_end_mask_0, x = var_43086_cast_fp16)[name = tensor("op_43627_cast_fp16")]; + tensor var_43634_begin_0 = const()[name = tensor("op_43634_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_43634_end_0 = const()[name = tensor("op_43634_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_43634_end_mask_0 = const()[name = tensor("op_43634_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43634_cast_fp16 = slice_by_index(begin = var_43634_begin_0, end = var_43634_end_0, end_mask = var_43634_end_mask_0, x = var_43086_cast_fp16)[name = tensor("op_43634_cast_fp16")]; + tensor var_43641_begin_0 = const()[name = tensor("op_43641_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_43641_end_0 = const()[name = tensor("op_43641_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_43641_end_mask_0 = const()[name = tensor("op_43641_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43641_cast_fp16 = slice_by_index(begin = var_43641_begin_0, end = var_43641_end_0, end_mask = var_43641_end_mask_0, x = var_43086_cast_fp16)[name = tensor("op_43641_cast_fp16")]; + tensor var_43648_begin_0 = const()[name = tensor("op_43648_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_43648_end_0 = const()[name = tensor("op_43648_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_43648_end_mask_0 = const()[name = tensor("op_43648_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43648_cast_fp16 = slice_by_index(begin = var_43648_begin_0, end = var_43648_end_0, end_mask = var_43648_end_mask_0, x = var_43086_cast_fp16)[name = tensor("op_43648_cast_fp16")]; + tensor k_57_perm_0 = const()[name = tensor("k_57_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_43653_begin_0 = const()[name = tensor("op_43653_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_43653_end_0 = const()[name = tensor("op_43653_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_43653_end_mask_0 = const()[name = tensor("op_43653_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_3 = transpose(perm = k_57_perm_0, x = key_57_cast_fp16)[name = tensor("transpose_3")]; + tensor var_43653_cast_fp16 = slice_by_index(begin = var_43653_begin_0, end = var_43653_end_0, end_mask = var_43653_end_mask_0, x = transpose_3)[name = tensor("op_43653_cast_fp16")]; + tensor var_43657_begin_0 = const()[name = tensor("op_43657_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_43657_end_0 = const()[name = tensor("op_43657_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_43657_end_mask_0 = const()[name = tensor("op_43657_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43657_cast_fp16 = slice_by_index(begin = var_43657_begin_0, end = var_43657_end_0, end_mask = var_43657_end_mask_0, x = transpose_3)[name = tensor("op_43657_cast_fp16")]; + tensor var_43661_begin_0 = const()[name = tensor("op_43661_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_43661_end_0 = const()[name = tensor("op_43661_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_43661_end_mask_0 = const()[name = tensor("op_43661_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43661_cast_fp16 = slice_by_index(begin = var_43661_begin_0, end = var_43661_end_0, end_mask = var_43661_end_mask_0, x = transpose_3)[name = tensor("op_43661_cast_fp16")]; + tensor var_43665_begin_0 = const()[name = tensor("op_43665_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_43665_end_0 = const()[name = tensor("op_43665_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_43665_end_mask_0 = const()[name = tensor("op_43665_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43665_cast_fp16 = slice_by_index(begin = var_43665_begin_0, end = var_43665_end_0, end_mask = var_43665_end_mask_0, x = transpose_3)[name = tensor("op_43665_cast_fp16")]; + tensor var_43669_begin_0 = const()[name = tensor("op_43669_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_43669_end_0 = const()[name = tensor("op_43669_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_43669_end_mask_0 = const()[name = tensor("op_43669_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43669_cast_fp16 = slice_by_index(begin = var_43669_begin_0, end = var_43669_end_0, end_mask = var_43669_end_mask_0, x = transpose_3)[name = tensor("op_43669_cast_fp16")]; + tensor var_43673_begin_0 = const()[name = tensor("op_43673_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_43673_end_0 = const()[name = tensor("op_43673_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_43673_end_mask_0 = const()[name = tensor("op_43673_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43673_cast_fp16 = slice_by_index(begin = var_43673_begin_0, end = var_43673_end_0, end_mask = var_43673_end_mask_0, x = transpose_3)[name = tensor("op_43673_cast_fp16")]; + tensor var_43677_begin_0 = const()[name = tensor("op_43677_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_43677_end_0 = const()[name = tensor("op_43677_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_43677_end_mask_0 = const()[name = tensor("op_43677_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43677_cast_fp16 = slice_by_index(begin = var_43677_begin_0, end = var_43677_end_0, end_mask = var_43677_end_mask_0, x = transpose_3)[name = tensor("op_43677_cast_fp16")]; + tensor var_43681_begin_0 = const()[name = tensor("op_43681_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_43681_end_0 = const()[name = tensor("op_43681_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_43681_end_mask_0 = const()[name = tensor("op_43681_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43681_cast_fp16 = slice_by_index(begin = var_43681_begin_0, end = var_43681_end_0, end_mask = var_43681_end_mask_0, x = transpose_3)[name = tensor("op_43681_cast_fp16")]; + tensor var_43685_begin_0 = const()[name = tensor("op_43685_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_43685_end_0 = const()[name = tensor("op_43685_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_43685_end_mask_0 = const()[name = tensor("op_43685_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43685_cast_fp16 = slice_by_index(begin = var_43685_begin_0, end = var_43685_end_0, end_mask = var_43685_end_mask_0, x = transpose_3)[name = tensor("op_43685_cast_fp16")]; + tensor var_43689_begin_0 = const()[name = tensor("op_43689_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_43689_end_0 = const()[name = tensor("op_43689_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_43689_end_mask_0 = const()[name = tensor("op_43689_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43689_cast_fp16 = slice_by_index(begin = var_43689_begin_0, end = var_43689_end_0, end_mask = var_43689_end_mask_0, x = transpose_3)[name = tensor("op_43689_cast_fp16")]; + tensor var_43693_begin_0 = const()[name = tensor("op_43693_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_43693_end_0 = const()[name = tensor("op_43693_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_43693_end_mask_0 = const()[name = tensor("op_43693_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43693_cast_fp16 = slice_by_index(begin = var_43693_begin_0, end = var_43693_end_0, end_mask = var_43693_end_mask_0, x = transpose_3)[name = tensor("op_43693_cast_fp16")]; + tensor var_43697_begin_0 = const()[name = tensor("op_43697_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_43697_end_0 = const()[name = tensor("op_43697_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_43697_end_mask_0 = const()[name = tensor("op_43697_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43697_cast_fp16 = slice_by_index(begin = var_43697_begin_0, end = var_43697_end_0, end_mask = var_43697_end_mask_0, x = transpose_3)[name = tensor("op_43697_cast_fp16")]; + tensor var_43701_begin_0 = const()[name = tensor("op_43701_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_43701_end_0 = const()[name = tensor("op_43701_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_43701_end_mask_0 = const()[name = tensor("op_43701_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43701_cast_fp16 = slice_by_index(begin = var_43701_begin_0, end = var_43701_end_0, end_mask = var_43701_end_mask_0, x = transpose_3)[name = tensor("op_43701_cast_fp16")]; + tensor var_43705_begin_0 = const()[name = tensor("op_43705_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_43705_end_0 = const()[name = tensor("op_43705_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_43705_end_mask_0 = const()[name = tensor("op_43705_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43705_cast_fp16 = slice_by_index(begin = var_43705_begin_0, end = var_43705_end_0, end_mask = var_43705_end_mask_0, x = transpose_3)[name = tensor("op_43705_cast_fp16")]; + tensor var_43709_begin_0 = const()[name = tensor("op_43709_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_43709_end_0 = const()[name = tensor("op_43709_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_43709_end_mask_0 = const()[name = tensor("op_43709_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43709_cast_fp16 = slice_by_index(begin = var_43709_begin_0, end = var_43709_end_0, end_mask = var_43709_end_mask_0, x = transpose_3)[name = tensor("op_43709_cast_fp16")]; + tensor var_43713_begin_0 = const()[name = tensor("op_43713_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_43713_end_0 = const()[name = tensor("op_43713_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_43713_end_mask_0 = const()[name = tensor("op_43713_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43713_cast_fp16 = slice_by_index(begin = var_43713_begin_0, end = var_43713_end_0, end_mask = var_43713_end_mask_0, x = transpose_3)[name = tensor("op_43713_cast_fp16")]; + tensor var_43717_begin_0 = const()[name = tensor("op_43717_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_43717_end_0 = const()[name = tensor("op_43717_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_43717_end_mask_0 = const()[name = tensor("op_43717_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43717_cast_fp16 = slice_by_index(begin = var_43717_begin_0, end = var_43717_end_0, end_mask = var_43717_end_mask_0, x = transpose_3)[name = tensor("op_43717_cast_fp16")]; + tensor var_43721_begin_0 = const()[name = tensor("op_43721_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_43721_end_0 = const()[name = tensor("op_43721_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_43721_end_mask_0 = const()[name = tensor("op_43721_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43721_cast_fp16 = slice_by_index(begin = var_43721_begin_0, end = var_43721_end_0, end_mask = var_43721_end_mask_0, x = transpose_3)[name = tensor("op_43721_cast_fp16")]; + tensor var_43725_begin_0 = const()[name = tensor("op_43725_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_43725_end_0 = const()[name = tensor("op_43725_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_43725_end_mask_0 = const()[name = tensor("op_43725_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43725_cast_fp16 = slice_by_index(begin = var_43725_begin_0, end = var_43725_end_0, end_mask = var_43725_end_mask_0, x = transpose_3)[name = tensor("op_43725_cast_fp16")]; + tensor var_43729_begin_0 = const()[name = tensor("op_43729_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_43729_end_0 = const()[name = tensor("op_43729_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_43729_end_mask_0 = const()[name = tensor("op_43729_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_43729_cast_fp16 = slice_by_index(begin = var_43729_begin_0, end = var_43729_end_0, end_mask = var_43729_end_mask_0, x = transpose_3)[name = tensor("op_43729_cast_fp16")]; + tensor var_43731_begin_0 = const()[name = tensor("op_43731_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_43731_end_0 = const()[name = tensor("op_43731_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_43731_end_mask_0 = const()[name = tensor("op_43731_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43731_cast_fp16 = slice_by_index(begin = var_43731_begin_0, end = var_43731_end_0, end_mask = var_43731_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_43731_cast_fp16")]; + tensor var_43735_begin_0 = const()[name = tensor("op_43735_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_43735_end_0 = const()[name = tensor("op_43735_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_43735_end_mask_0 = const()[name = tensor("op_43735_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43735_cast_fp16 = slice_by_index(begin = var_43735_begin_0, end = var_43735_end_0, end_mask = var_43735_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_43735_cast_fp16")]; + tensor var_43739_begin_0 = const()[name = tensor("op_43739_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_43739_end_0 = const()[name = tensor("op_43739_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_43739_end_mask_0 = const()[name = tensor("op_43739_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43739_cast_fp16 = slice_by_index(begin = var_43739_begin_0, end = var_43739_end_0, end_mask = var_43739_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_43739_cast_fp16")]; + tensor var_43743_begin_0 = const()[name = tensor("op_43743_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_43743_end_0 = const()[name = tensor("op_43743_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_43743_end_mask_0 = const()[name = tensor("op_43743_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43743_cast_fp16 = slice_by_index(begin = var_43743_begin_0, end = var_43743_end_0, end_mask = var_43743_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_43743_cast_fp16")]; + tensor var_43747_begin_0 = const()[name = tensor("op_43747_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_43747_end_0 = const()[name = tensor("op_43747_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_43747_end_mask_0 = const()[name = tensor("op_43747_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43747_cast_fp16 = slice_by_index(begin = var_43747_begin_0, end = var_43747_end_0, end_mask = var_43747_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_43747_cast_fp16")]; + tensor var_43751_begin_0 = const()[name = tensor("op_43751_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_43751_end_0 = const()[name = tensor("op_43751_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_43751_end_mask_0 = const()[name = tensor("op_43751_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43751_cast_fp16 = slice_by_index(begin = var_43751_begin_0, end = var_43751_end_0, end_mask = var_43751_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_43751_cast_fp16")]; + tensor var_43755_begin_0 = const()[name = tensor("op_43755_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_43755_end_0 = const()[name = tensor("op_43755_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_43755_end_mask_0 = const()[name = tensor("op_43755_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43755_cast_fp16 = slice_by_index(begin = var_43755_begin_0, end = var_43755_end_0, end_mask = var_43755_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_43755_cast_fp16")]; + tensor var_43759_begin_0 = const()[name = tensor("op_43759_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_43759_end_0 = const()[name = tensor("op_43759_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_43759_end_mask_0 = const()[name = tensor("op_43759_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43759_cast_fp16 = slice_by_index(begin = var_43759_begin_0, end = var_43759_end_0, end_mask = var_43759_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_43759_cast_fp16")]; + tensor var_43763_begin_0 = const()[name = tensor("op_43763_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_43763_end_0 = const()[name = tensor("op_43763_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_43763_end_mask_0 = const()[name = tensor("op_43763_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43763_cast_fp16 = slice_by_index(begin = var_43763_begin_0, end = var_43763_end_0, end_mask = var_43763_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_43763_cast_fp16")]; + tensor var_43767_begin_0 = const()[name = tensor("op_43767_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_43767_end_0 = const()[name = tensor("op_43767_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_43767_end_mask_0 = const()[name = tensor("op_43767_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43767_cast_fp16 = slice_by_index(begin = var_43767_begin_0, end = var_43767_end_0, end_mask = var_43767_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_43767_cast_fp16")]; + tensor var_43771_begin_0 = const()[name = tensor("op_43771_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_43771_end_0 = const()[name = tensor("op_43771_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_43771_end_mask_0 = const()[name = tensor("op_43771_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43771_cast_fp16 = slice_by_index(begin = var_43771_begin_0, end = var_43771_end_0, end_mask = var_43771_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_43771_cast_fp16")]; + tensor var_43775_begin_0 = const()[name = tensor("op_43775_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_43775_end_0 = const()[name = tensor("op_43775_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_43775_end_mask_0 = const()[name = tensor("op_43775_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43775_cast_fp16 = slice_by_index(begin = var_43775_begin_0, end = var_43775_end_0, end_mask = var_43775_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_43775_cast_fp16")]; + tensor var_43779_begin_0 = const()[name = tensor("op_43779_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_43779_end_0 = const()[name = tensor("op_43779_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_43779_end_mask_0 = const()[name = tensor("op_43779_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43779_cast_fp16 = slice_by_index(begin = var_43779_begin_0, end = var_43779_end_0, end_mask = var_43779_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_43779_cast_fp16")]; + tensor var_43783_begin_0 = const()[name = tensor("op_43783_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_43783_end_0 = const()[name = tensor("op_43783_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_43783_end_mask_0 = const()[name = tensor("op_43783_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43783_cast_fp16 = slice_by_index(begin = var_43783_begin_0, end = var_43783_end_0, end_mask = var_43783_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_43783_cast_fp16")]; + tensor var_43787_begin_0 = const()[name = tensor("op_43787_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_43787_end_0 = const()[name = tensor("op_43787_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_43787_end_mask_0 = const()[name = tensor("op_43787_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43787_cast_fp16 = slice_by_index(begin = var_43787_begin_0, end = var_43787_end_0, end_mask = var_43787_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_43787_cast_fp16")]; + tensor var_43791_begin_0 = const()[name = tensor("op_43791_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_43791_end_0 = const()[name = tensor("op_43791_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_43791_end_mask_0 = const()[name = tensor("op_43791_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43791_cast_fp16 = slice_by_index(begin = var_43791_begin_0, end = var_43791_end_0, end_mask = var_43791_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_43791_cast_fp16")]; + tensor var_43795_begin_0 = const()[name = tensor("op_43795_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_43795_end_0 = const()[name = tensor("op_43795_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_43795_end_mask_0 = const()[name = tensor("op_43795_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43795_cast_fp16 = slice_by_index(begin = var_43795_begin_0, end = var_43795_end_0, end_mask = var_43795_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_43795_cast_fp16")]; + tensor var_43799_begin_0 = const()[name = tensor("op_43799_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_43799_end_0 = const()[name = tensor("op_43799_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_43799_end_mask_0 = const()[name = tensor("op_43799_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43799_cast_fp16 = slice_by_index(begin = var_43799_begin_0, end = var_43799_end_0, end_mask = var_43799_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_43799_cast_fp16")]; + tensor var_43803_begin_0 = const()[name = tensor("op_43803_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_43803_end_0 = const()[name = tensor("op_43803_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_43803_end_mask_0 = const()[name = tensor("op_43803_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43803_cast_fp16 = slice_by_index(begin = var_43803_begin_0, end = var_43803_end_0, end_mask = var_43803_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_43803_cast_fp16")]; + tensor var_43807_begin_0 = const()[name = tensor("op_43807_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_43807_end_0 = const()[name = tensor("op_43807_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_43807_end_mask_0 = const()[name = tensor("op_43807_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_43807_cast_fp16 = slice_by_index(begin = var_43807_begin_0, end = var_43807_end_0, end_mask = var_43807_end_mask_0, x = value_57_cast_fp16)[name = tensor("op_43807_cast_fp16")]; + tensor var_43811_equation_0 = const()[name = tensor("op_43811_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43811_cast_fp16 = einsum(equation = var_43811_equation_0, values = (var_43653_cast_fp16, var_43095_cast_fp16))[name = tensor("op_43811_cast_fp16")]; + tensor var_43812_to_fp16 = const()[name = tensor("op_43812_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4481_cast_fp16 = mul(x = var_43811_cast_fp16, y = var_43812_to_fp16)[name = tensor("aw_chunk_4481_cast_fp16")]; + tensor var_43815_equation_0 = const()[name = tensor("op_43815_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43815_cast_fp16 = einsum(equation = var_43815_equation_0, values = (var_43653_cast_fp16, var_43102_cast_fp16))[name = tensor("op_43815_cast_fp16")]; + tensor var_43816_to_fp16 = const()[name = tensor("op_43816_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4483_cast_fp16 = mul(x = var_43815_cast_fp16, y = var_43816_to_fp16)[name = tensor("aw_chunk_4483_cast_fp16")]; + tensor var_43819_equation_0 = const()[name = tensor("op_43819_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43819_cast_fp16 = einsum(equation = var_43819_equation_0, values = (var_43653_cast_fp16, var_43109_cast_fp16))[name = tensor("op_43819_cast_fp16")]; + tensor var_43820_to_fp16 = const()[name = tensor("op_43820_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4485_cast_fp16 = mul(x = var_43819_cast_fp16, y = var_43820_to_fp16)[name = tensor("aw_chunk_4485_cast_fp16")]; + tensor var_43823_equation_0 = const()[name = tensor("op_43823_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43823_cast_fp16 = einsum(equation = var_43823_equation_0, values = (var_43653_cast_fp16, var_43116_cast_fp16))[name = tensor("op_43823_cast_fp16")]; + tensor var_43824_to_fp16 = const()[name = tensor("op_43824_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4487_cast_fp16 = mul(x = var_43823_cast_fp16, y = var_43824_to_fp16)[name = tensor("aw_chunk_4487_cast_fp16")]; + tensor var_43827_equation_0 = const()[name = tensor("op_43827_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43827_cast_fp16 = einsum(equation = var_43827_equation_0, values = (var_43657_cast_fp16, var_43123_cast_fp16))[name = tensor("op_43827_cast_fp16")]; + tensor var_43828_to_fp16 = const()[name = tensor("op_43828_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4489_cast_fp16 = mul(x = var_43827_cast_fp16, y = var_43828_to_fp16)[name = tensor("aw_chunk_4489_cast_fp16")]; + tensor var_43831_equation_0 = const()[name = tensor("op_43831_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43831_cast_fp16 = einsum(equation = var_43831_equation_0, values = (var_43657_cast_fp16, var_43130_cast_fp16))[name = tensor("op_43831_cast_fp16")]; + tensor var_43832_to_fp16 = const()[name = tensor("op_43832_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4491_cast_fp16 = mul(x = var_43831_cast_fp16, y = var_43832_to_fp16)[name = tensor("aw_chunk_4491_cast_fp16")]; + tensor var_43835_equation_0 = const()[name = tensor("op_43835_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43835_cast_fp16 = einsum(equation = var_43835_equation_0, values = (var_43657_cast_fp16, var_43137_cast_fp16))[name = tensor("op_43835_cast_fp16")]; + tensor var_43836_to_fp16 = const()[name = tensor("op_43836_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4493_cast_fp16 = mul(x = var_43835_cast_fp16, y = var_43836_to_fp16)[name = tensor("aw_chunk_4493_cast_fp16")]; + tensor var_43839_equation_0 = const()[name = tensor("op_43839_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43839_cast_fp16 = einsum(equation = var_43839_equation_0, values = (var_43657_cast_fp16, var_43144_cast_fp16))[name = tensor("op_43839_cast_fp16")]; + tensor var_43840_to_fp16 = const()[name = tensor("op_43840_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4495_cast_fp16 = mul(x = var_43839_cast_fp16, y = var_43840_to_fp16)[name = tensor("aw_chunk_4495_cast_fp16")]; + tensor var_43843_equation_0 = const()[name = tensor("op_43843_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43843_cast_fp16 = einsum(equation = var_43843_equation_0, values = (var_43661_cast_fp16, var_43151_cast_fp16))[name = tensor("op_43843_cast_fp16")]; + tensor var_43844_to_fp16 = const()[name = tensor("op_43844_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4497_cast_fp16 = mul(x = var_43843_cast_fp16, y = var_43844_to_fp16)[name = tensor("aw_chunk_4497_cast_fp16")]; + tensor var_43847_equation_0 = const()[name = tensor("op_43847_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43847_cast_fp16 = einsum(equation = var_43847_equation_0, values = (var_43661_cast_fp16, var_43158_cast_fp16))[name = tensor("op_43847_cast_fp16")]; + tensor var_43848_to_fp16 = const()[name = tensor("op_43848_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4499_cast_fp16 = mul(x = var_43847_cast_fp16, y = var_43848_to_fp16)[name = tensor("aw_chunk_4499_cast_fp16")]; + tensor var_43851_equation_0 = const()[name = tensor("op_43851_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43851_cast_fp16 = einsum(equation = var_43851_equation_0, values = (var_43661_cast_fp16, var_43165_cast_fp16))[name = tensor("op_43851_cast_fp16")]; + tensor var_43852_to_fp16 = const()[name = tensor("op_43852_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4501_cast_fp16 = mul(x = var_43851_cast_fp16, y = var_43852_to_fp16)[name = tensor("aw_chunk_4501_cast_fp16")]; + tensor var_43855_equation_0 = const()[name = tensor("op_43855_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43855_cast_fp16 = einsum(equation = var_43855_equation_0, values = (var_43661_cast_fp16, var_43172_cast_fp16))[name = tensor("op_43855_cast_fp16")]; + tensor var_43856_to_fp16 = const()[name = tensor("op_43856_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4503_cast_fp16 = mul(x = var_43855_cast_fp16, y = var_43856_to_fp16)[name = tensor("aw_chunk_4503_cast_fp16")]; + tensor var_43859_equation_0 = const()[name = tensor("op_43859_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43859_cast_fp16 = einsum(equation = var_43859_equation_0, values = (var_43665_cast_fp16, var_43179_cast_fp16))[name = tensor("op_43859_cast_fp16")]; + tensor var_43860_to_fp16 = const()[name = tensor("op_43860_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4505_cast_fp16 = mul(x = var_43859_cast_fp16, y = var_43860_to_fp16)[name = tensor("aw_chunk_4505_cast_fp16")]; + tensor var_43863_equation_0 = const()[name = tensor("op_43863_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43863_cast_fp16 = einsum(equation = var_43863_equation_0, values = (var_43665_cast_fp16, var_43186_cast_fp16))[name = tensor("op_43863_cast_fp16")]; + tensor var_43864_to_fp16 = const()[name = tensor("op_43864_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4507_cast_fp16 = mul(x = var_43863_cast_fp16, y = var_43864_to_fp16)[name = tensor("aw_chunk_4507_cast_fp16")]; + tensor var_43867_equation_0 = const()[name = tensor("op_43867_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43867_cast_fp16 = einsum(equation = var_43867_equation_0, values = (var_43665_cast_fp16, var_43193_cast_fp16))[name = tensor("op_43867_cast_fp16")]; + tensor var_43868_to_fp16 = const()[name = tensor("op_43868_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4509_cast_fp16 = mul(x = var_43867_cast_fp16, y = var_43868_to_fp16)[name = tensor("aw_chunk_4509_cast_fp16")]; + tensor var_43871_equation_0 = const()[name = tensor("op_43871_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43871_cast_fp16 = einsum(equation = var_43871_equation_0, values = (var_43665_cast_fp16, var_43200_cast_fp16))[name = tensor("op_43871_cast_fp16")]; + tensor var_43872_to_fp16 = const()[name = tensor("op_43872_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4511_cast_fp16 = mul(x = var_43871_cast_fp16, y = var_43872_to_fp16)[name = tensor("aw_chunk_4511_cast_fp16")]; + tensor var_43875_equation_0 = const()[name = tensor("op_43875_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43875_cast_fp16 = einsum(equation = var_43875_equation_0, values = (var_43669_cast_fp16, var_43207_cast_fp16))[name = tensor("op_43875_cast_fp16")]; + tensor var_43876_to_fp16 = const()[name = tensor("op_43876_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4513_cast_fp16 = mul(x = var_43875_cast_fp16, y = var_43876_to_fp16)[name = tensor("aw_chunk_4513_cast_fp16")]; + tensor var_43879_equation_0 = const()[name = tensor("op_43879_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43879_cast_fp16 = einsum(equation = var_43879_equation_0, values = (var_43669_cast_fp16, var_43214_cast_fp16))[name = tensor("op_43879_cast_fp16")]; + tensor var_43880_to_fp16 = const()[name = tensor("op_43880_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4515_cast_fp16 = mul(x = var_43879_cast_fp16, y = var_43880_to_fp16)[name = tensor("aw_chunk_4515_cast_fp16")]; + tensor var_43883_equation_0 = const()[name = tensor("op_43883_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43883_cast_fp16 = einsum(equation = var_43883_equation_0, values = (var_43669_cast_fp16, var_43221_cast_fp16))[name = tensor("op_43883_cast_fp16")]; + tensor var_43884_to_fp16 = const()[name = tensor("op_43884_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4517_cast_fp16 = mul(x = var_43883_cast_fp16, y = var_43884_to_fp16)[name = tensor("aw_chunk_4517_cast_fp16")]; + tensor var_43887_equation_0 = const()[name = tensor("op_43887_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43887_cast_fp16 = einsum(equation = var_43887_equation_0, values = (var_43669_cast_fp16, var_43228_cast_fp16))[name = tensor("op_43887_cast_fp16")]; + tensor var_43888_to_fp16 = const()[name = tensor("op_43888_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4519_cast_fp16 = mul(x = var_43887_cast_fp16, y = var_43888_to_fp16)[name = tensor("aw_chunk_4519_cast_fp16")]; + tensor var_43891_equation_0 = const()[name = tensor("op_43891_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43891_cast_fp16 = einsum(equation = var_43891_equation_0, values = (var_43673_cast_fp16, var_43235_cast_fp16))[name = tensor("op_43891_cast_fp16")]; + tensor var_43892_to_fp16 = const()[name = tensor("op_43892_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4521_cast_fp16 = mul(x = var_43891_cast_fp16, y = var_43892_to_fp16)[name = tensor("aw_chunk_4521_cast_fp16")]; + tensor var_43895_equation_0 = const()[name = tensor("op_43895_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43895_cast_fp16 = einsum(equation = var_43895_equation_0, values = (var_43673_cast_fp16, var_43242_cast_fp16))[name = tensor("op_43895_cast_fp16")]; + tensor var_43896_to_fp16 = const()[name = tensor("op_43896_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4523_cast_fp16 = mul(x = var_43895_cast_fp16, y = var_43896_to_fp16)[name = tensor("aw_chunk_4523_cast_fp16")]; + tensor var_43899_equation_0 = const()[name = tensor("op_43899_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43899_cast_fp16 = einsum(equation = var_43899_equation_0, values = (var_43673_cast_fp16, var_43249_cast_fp16))[name = tensor("op_43899_cast_fp16")]; + tensor var_43900_to_fp16 = const()[name = tensor("op_43900_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4525_cast_fp16 = mul(x = var_43899_cast_fp16, y = var_43900_to_fp16)[name = tensor("aw_chunk_4525_cast_fp16")]; + tensor var_43903_equation_0 = const()[name = tensor("op_43903_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43903_cast_fp16 = einsum(equation = var_43903_equation_0, values = (var_43673_cast_fp16, var_43256_cast_fp16))[name = tensor("op_43903_cast_fp16")]; + tensor var_43904_to_fp16 = const()[name = tensor("op_43904_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4527_cast_fp16 = mul(x = var_43903_cast_fp16, y = var_43904_to_fp16)[name = tensor("aw_chunk_4527_cast_fp16")]; + tensor var_43907_equation_0 = const()[name = tensor("op_43907_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43907_cast_fp16 = einsum(equation = var_43907_equation_0, values = (var_43677_cast_fp16, var_43263_cast_fp16))[name = tensor("op_43907_cast_fp16")]; + tensor var_43908_to_fp16 = const()[name = tensor("op_43908_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4529_cast_fp16 = mul(x = var_43907_cast_fp16, y = var_43908_to_fp16)[name = tensor("aw_chunk_4529_cast_fp16")]; + tensor var_43911_equation_0 = const()[name = tensor("op_43911_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43911_cast_fp16 = einsum(equation = var_43911_equation_0, values = (var_43677_cast_fp16, var_43270_cast_fp16))[name = tensor("op_43911_cast_fp16")]; + tensor var_43912_to_fp16 = const()[name = tensor("op_43912_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4531_cast_fp16 = mul(x = var_43911_cast_fp16, y = var_43912_to_fp16)[name = tensor("aw_chunk_4531_cast_fp16")]; + tensor var_43915_equation_0 = const()[name = tensor("op_43915_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43915_cast_fp16 = einsum(equation = var_43915_equation_0, values = (var_43677_cast_fp16, var_43277_cast_fp16))[name = tensor("op_43915_cast_fp16")]; + tensor var_43916_to_fp16 = const()[name = tensor("op_43916_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4533_cast_fp16 = mul(x = var_43915_cast_fp16, y = var_43916_to_fp16)[name = tensor("aw_chunk_4533_cast_fp16")]; + tensor var_43919_equation_0 = const()[name = tensor("op_43919_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43919_cast_fp16 = einsum(equation = var_43919_equation_0, values = (var_43677_cast_fp16, var_43284_cast_fp16))[name = tensor("op_43919_cast_fp16")]; + tensor var_43920_to_fp16 = const()[name = tensor("op_43920_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4535_cast_fp16 = mul(x = var_43919_cast_fp16, y = var_43920_to_fp16)[name = tensor("aw_chunk_4535_cast_fp16")]; + tensor var_43923_equation_0 = const()[name = tensor("op_43923_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43923_cast_fp16 = einsum(equation = var_43923_equation_0, values = (var_43681_cast_fp16, var_43291_cast_fp16))[name = tensor("op_43923_cast_fp16")]; + tensor var_43924_to_fp16 = const()[name = tensor("op_43924_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4537_cast_fp16 = mul(x = var_43923_cast_fp16, y = var_43924_to_fp16)[name = tensor("aw_chunk_4537_cast_fp16")]; + tensor var_43927_equation_0 = const()[name = tensor("op_43927_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43927_cast_fp16 = einsum(equation = var_43927_equation_0, values = (var_43681_cast_fp16, var_43298_cast_fp16))[name = tensor("op_43927_cast_fp16")]; + tensor var_43928_to_fp16 = const()[name = tensor("op_43928_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4539_cast_fp16 = mul(x = var_43927_cast_fp16, y = var_43928_to_fp16)[name = tensor("aw_chunk_4539_cast_fp16")]; + tensor var_43931_equation_0 = const()[name = tensor("op_43931_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43931_cast_fp16 = einsum(equation = var_43931_equation_0, values = (var_43681_cast_fp16, var_43305_cast_fp16))[name = tensor("op_43931_cast_fp16")]; + tensor var_43932_to_fp16 = const()[name = tensor("op_43932_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4541_cast_fp16 = mul(x = var_43931_cast_fp16, y = var_43932_to_fp16)[name = tensor("aw_chunk_4541_cast_fp16")]; + tensor var_43935_equation_0 = const()[name = tensor("op_43935_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43935_cast_fp16 = einsum(equation = var_43935_equation_0, values = (var_43681_cast_fp16, var_43312_cast_fp16))[name = tensor("op_43935_cast_fp16")]; + tensor var_43936_to_fp16 = const()[name = tensor("op_43936_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4543_cast_fp16 = mul(x = var_43935_cast_fp16, y = var_43936_to_fp16)[name = tensor("aw_chunk_4543_cast_fp16")]; + tensor var_43939_equation_0 = const()[name = tensor("op_43939_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43939_cast_fp16 = einsum(equation = var_43939_equation_0, values = (var_43685_cast_fp16, var_43319_cast_fp16))[name = tensor("op_43939_cast_fp16")]; + tensor var_43940_to_fp16 = const()[name = tensor("op_43940_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4545_cast_fp16 = mul(x = var_43939_cast_fp16, y = var_43940_to_fp16)[name = tensor("aw_chunk_4545_cast_fp16")]; + tensor var_43943_equation_0 = const()[name = tensor("op_43943_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43943_cast_fp16 = einsum(equation = var_43943_equation_0, values = (var_43685_cast_fp16, var_43326_cast_fp16))[name = tensor("op_43943_cast_fp16")]; + tensor var_43944_to_fp16 = const()[name = tensor("op_43944_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4547_cast_fp16 = mul(x = var_43943_cast_fp16, y = var_43944_to_fp16)[name = tensor("aw_chunk_4547_cast_fp16")]; + tensor var_43947_equation_0 = const()[name = tensor("op_43947_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43947_cast_fp16 = einsum(equation = var_43947_equation_0, values = (var_43685_cast_fp16, var_43333_cast_fp16))[name = tensor("op_43947_cast_fp16")]; + tensor var_43948_to_fp16 = const()[name = tensor("op_43948_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4549_cast_fp16 = mul(x = var_43947_cast_fp16, y = var_43948_to_fp16)[name = tensor("aw_chunk_4549_cast_fp16")]; + tensor var_43951_equation_0 = const()[name = tensor("op_43951_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43951_cast_fp16 = einsum(equation = var_43951_equation_0, values = (var_43685_cast_fp16, var_43340_cast_fp16))[name = tensor("op_43951_cast_fp16")]; + tensor var_43952_to_fp16 = const()[name = tensor("op_43952_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4551_cast_fp16 = mul(x = var_43951_cast_fp16, y = var_43952_to_fp16)[name = tensor("aw_chunk_4551_cast_fp16")]; + tensor var_43955_equation_0 = const()[name = tensor("op_43955_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43955_cast_fp16 = einsum(equation = var_43955_equation_0, values = (var_43689_cast_fp16, var_43347_cast_fp16))[name = tensor("op_43955_cast_fp16")]; + tensor var_43956_to_fp16 = const()[name = tensor("op_43956_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4553_cast_fp16 = mul(x = var_43955_cast_fp16, y = var_43956_to_fp16)[name = tensor("aw_chunk_4553_cast_fp16")]; + tensor var_43959_equation_0 = const()[name = tensor("op_43959_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43959_cast_fp16 = einsum(equation = var_43959_equation_0, values = (var_43689_cast_fp16, var_43354_cast_fp16))[name = tensor("op_43959_cast_fp16")]; + tensor var_43960_to_fp16 = const()[name = tensor("op_43960_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4555_cast_fp16 = mul(x = var_43959_cast_fp16, y = var_43960_to_fp16)[name = tensor("aw_chunk_4555_cast_fp16")]; + tensor var_43963_equation_0 = const()[name = tensor("op_43963_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43963_cast_fp16 = einsum(equation = var_43963_equation_0, values = (var_43689_cast_fp16, var_43361_cast_fp16))[name = tensor("op_43963_cast_fp16")]; + tensor var_43964_to_fp16 = const()[name = tensor("op_43964_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4557_cast_fp16 = mul(x = var_43963_cast_fp16, y = var_43964_to_fp16)[name = tensor("aw_chunk_4557_cast_fp16")]; + tensor var_43967_equation_0 = const()[name = tensor("op_43967_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43967_cast_fp16 = einsum(equation = var_43967_equation_0, values = (var_43689_cast_fp16, var_43368_cast_fp16))[name = tensor("op_43967_cast_fp16")]; + tensor var_43968_to_fp16 = const()[name = tensor("op_43968_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4559_cast_fp16 = mul(x = var_43967_cast_fp16, y = var_43968_to_fp16)[name = tensor("aw_chunk_4559_cast_fp16")]; + tensor var_43971_equation_0 = const()[name = tensor("op_43971_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43971_cast_fp16 = einsum(equation = var_43971_equation_0, values = (var_43693_cast_fp16, var_43375_cast_fp16))[name = tensor("op_43971_cast_fp16")]; + tensor var_43972_to_fp16 = const()[name = tensor("op_43972_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4561_cast_fp16 = mul(x = var_43971_cast_fp16, y = var_43972_to_fp16)[name = tensor("aw_chunk_4561_cast_fp16")]; + tensor var_43975_equation_0 = const()[name = tensor("op_43975_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43975_cast_fp16 = einsum(equation = var_43975_equation_0, values = (var_43693_cast_fp16, var_43382_cast_fp16))[name = tensor("op_43975_cast_fp16")]; + tensor var_43976_to_fp16 = const()[name = tensor("op_43976_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4563_cast_fp16 = mul(x = var_43975_cast_fp16, y = var_43976_to_fp16)[name = tensor("aw_chunk_4563_cast_fp16")]; + tensor var_43979_equation_0 = const()[name = tensor("op_43979_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43979_cast_fp16 = einsum(equation = var_43979_equation_0, values = (var_43693_cast_fp16, var_43389_cast_fp16))[name = tensor("op_43979_cast_fp16")]; + tensor var_43980_to_fp16 = const()[name = tensor("op_43980_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4565_cast_fp16 = mul(x = var_43979_cast_fp16, y = var_43980_to_fp16)[name = tensor("aw_chunk_4565_cast_fp16")]; + tensor var_43983_equation_0 = const()[name = tensor("op_43983_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43983_cast_fp16 = einsum(equation = var_43983_equation_0, values = (var_43693_cast_fp16, var_43396_cast_fp16))[name = tensor("op_43983_cast_fp16")]; + tensor var_43984_to_fp16 = const()[name = tensor("op_43984_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4567_cast_fp16 = mul(x = var_43983_cast_fp16, y = var_43984_to_fp16)[name = tensor("aw_chunk_4567_cast_fp16")]; + tensor var_43987_equation_0 = const()[name = tensor("op_43987_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43987_cast_fp16 = einsum(equation = var_43987_equation_0, values = (var_43697_cast_fp16, var_43403_cast_fp16))[name = tensor("op_43987_cast_fp16")]; + tensor var_43988_to_fp16 = const()[name = tensor("op_43988_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4569_cast_fp16 = mul(x = var_43987_cast_fp16, y = var_43988_to_fp16)[name = tensor("aw_chunk_4569_cast_fp16")]; + tensor var_43991_equation_0 = const()[name = tensor("op_43991_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43991_cast_fp16 = einsum(equation = var_43991_equation_0, values = (var_43697_cast_fp16, var_43410_cast_fp16))[name = tensor("op_43991_cast_fp16")]; + tensor var_43992_to_fp16 = const()[name = tensor("op_43992_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4571_cast_fp16 = mul(x = var_43991_cast_fp16, y = var_43992_to_fp16)[name = tensor("aw_chunk_4571_cast_fp16")]; + tensor var_43995_equation_0 = const()[name = tensor("op_43995_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43995_cast_fp16 = einsum(equation = var_43995_equation_0, values = (var_43697_cast_fp16, var_43417_cast_fp16))[name = tensor("op_43995_cast_fp16")]; + tensor var_43996_to_fp16 = const()[name = tensor("op_43996_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4573_cast_fp16 = mul(x = var_43995_cast_fp16, y = var_43996_to_fp16)[name = tensor("aw_chunk_4573_cast_fp16")]; + tensor var_43999_equation_0 = const()[name = tensor("op_43999_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_43999_cast_fp16 = einsum(equation = var_43999_equation_0, values = (var_43697_cast_fp16, var_43424_cast_fp16))[name = tensor("op_43999_cast_fp16")]; + tensor var_44000_to_fp16 = const()[name = tensor("op_44000_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4575_cast_fp16 = mul(x = var_43999_cast_fp16, y = var_44000_to_fp16)[name = tensor("aw_chunk_4575_cast_fp16")]; + tensor var_44003_equation_0 = const()[name = tensor("op_44003_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44003_cast_fp16 = einsum(equation = var_44003_equation_0, values = (var_43701_cast_fp16, var_43431_cast_fp16))[name = tensor("op_44003_cast_fp16")]; + tensor var_44004_to_fp16 = const()[name = tensor("op_44004_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4577_cast_fp16 = mul(x = var_44003_cast_fp16, y = var_44004_to_fp16)[name = tensor("aw_chunk_4577_cast_fp16")]; + tensor var_44007_equation_0 = const()[name = tensor("op_44007_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44007_cast_fp16 = einsum(equation = var_44007_equation_0, values = (var_43701_cast_fp16, var_43438_cast_fp16))[name = tensor("op_44007_cast_fp16")]; + tensor var_44008_to_fp16 = const()[name = tensor("op_44008_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4579_cast_fp16 = mul(x = var_44007_cast_fp16, y = var_44008_to_fp16)[name = tensor("aw_chunk_4579_cast_fp16")]; + tensor var_44011_equation_0 = const()[name = tensor("op_44011_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44011_cast_fp16 = einsum(equation = var_44011_equation_0, values = (var_43701_cast_fp16, var_43445_cast_fp16))[name = tensor("op_44011_cast_fp16")]; + tensor var_44012_to_fp16 = const()[name = tensor("op_44012_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4581_cast_fp16 = mul(x = var_44011_cast_fp16, y = var_44012_to_fp16)[name = tensor("aw_chunk_4581_cast_fp16")]; + tensor var_44015_equation_0 = const()[name = tensor("op_44015_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44015_cast_fp16 = einsum(equation = var_44015_equation_0, values = (var_43701_cast_fp16, var_43452_cast_fp16))[name = tensor("op_44015_cast_fp16")]; + tensor var_44016_to_fp16 = const()[name = tensor("op_44016_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4583_cast_fp16 = mul(x = var_44015_cast_fp16, y = var_44016_to_fp16)[name = tensor("aw_chunk_4583_cast_fp16")]; + tensor var_44019_equation_0 = const()[name = tensor("op_44019_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44019_cast_fp16 = einsum(equation = var_44019_equation_0, values = (var_43705_cast_fp16, var_43459_cast_fp16))[name = tensor("op_44019_cast_fp16")]; + tensor var_44020_to_fp16 = const()[name = tensor("op_44020_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4585_cast_fp16 = mul(x = var_44019_cast_fp16, y = var_44020_to_fp16)[name = tensor("aw_chunk_4585_cast_fp16")]; + tensor var_44023_equation_0 = const()[name = tensor("op_44023_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44023_cast_fp16 = einsum(equation = var_44023_equation_0, values = (var_43705_cast_fp16, var_43466_cast_fp16))[name = tensor("op_44023_cast_fp16")]; + tensor var_44024_to_fp16 = const()[name = tensor("op_44024_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4587_cast_fp16 = mul(x = var_44023_cast_fp16, y = var_44024_to_fp16)[name = tensor("aw_chunk_4587_cast_fp16")]; + tensor var_44027_equation_0 = const()[name = tensor("op_44027_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44027_cast_fp16 = einsum(equation = var_44027_equation_0, values = (var_43705_cast_fp16, var_43473_cast_fp16))[name = tensor("op_44027_cast_fp16")]; + tensor var_44028_to_fp16 = const()[name = tensor("op_44028_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4589_cast_fp16 = mul(x = var_44027_cast_fp16, y = var_44028_to_fp16)[name = tensor("aw_chunk_4589_cast_fp16")]; + tensor var_44031_equation_0 = const()[name = tensor("op_44031_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44031_cast_fp16 = einsum(equation = var_44031_equation_0, values = (var_43705_cast_fp16, var_43480_cast_fp16))[name = tensor("op_44031_cast_fp16")]; + tensor var_44032_to_fp16 = const()[name = tensor("op_44032_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4591_cast_fp16 = mul(x = var_44031_cast_fp16, y = var_44032_to_fp16)[name = tensor("aw_chunk_4591_cast_fp16")]; + tensor var_44035_equation_0 = const()[name = tensor("op_44035_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44035_cast_fp16 = einsum(equation = var_44035_equation_0, values = (var_43709_cast_fp16, var_43487_cast_fp16))[name = tensor("op_44035_cast_fp16")]; + tensor var_44036_to_fp16 = const()[name = tensor("op_44036_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4593_cast_fp16 = mul(x = var_44035_cast_fp16, y = var_44036_to_fp16)[name = tensor("aw_chunk_4593_cast_fp16")]; + tensor var_44039_equation_0 = const()[name = tensor("op_44039_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44039_cast_fp16 = einsum(equation = var_44039_equation_0, values = (var_43709_cast_fp16, var_43494_cast_fp16))[name = tensor("op_44039_cast_fp16")]; + tensor var_44040_to_fp16 = const()[name = tensor("op_44040_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4595_cast_fp16 = mul(x = var_44039_cast_fp16, y = var_44040_to_fp16)[name = tensor("aw_chunk_4595_cast_fp16")]; + tensor var_44043_equation_0 = const()[name = tensor("op_44043_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44043_cast_fp16 = einsum(equation = var_44043_equation_0, values = (var_43709_cast_fp16, var_43501_cast_fp16))[name = tensor("op_44043_cast_fp16")]; + tensor var_44044_to_fp16 = const()[name = tensor("op_44044_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4597_cast_fp16 = mul(x = var_44043_cast_fp16, y = var_44044_to_fp16)[name = tensor("aw_chunk_4597_cast_fp16")]; + tensor var_44047_equation_0 = const()[name = tensor("op_44047_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44047_cast_fp16 = einsum(equation = var_44047_equation_0, values = (var_43709_cast_fp16, var_43508_cast_fp16))[name = tensor("op_44047_cast_fp16")]; + tensor var_44048_to_fp16 = const()[name = tensor("op_44048_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4599_cast_fp16 = mul(x = var_44047_cast_fp16, y = var_44048_to_fp16)[name = tensor("aw_chunk_4599_cast_fp16")]; + tensor var_44051_equation_0 = const()[name = tensor("op_44051_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44051_cast_fp16 = einsum(equation = var_44051_equation_0, values = (var_43713_cast_fp16, var_43515_cast_fp16))[name = tensor("op_44051_cast_fp16")]; + tensor var_44052_to_fp16 = const()[name = tensor("op_44052_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4601_cast_fp16 = mul(x = var_44051_cast_fp16, y = var_44052_to_fp16)[name = tensor("aw_chunk_4601_cast_fp16")]; + tensor var_44055_equation_0 = const()[name = tensor("op_44055_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44055_cast_fp16 = einsum(equation = var_44055_equation_0, values = (var_43713_cast_fp16, var_43522_cast_fp16))[name = tensor("op_44055_cast_fp16")]; + tensor var_44056_to_fp16 = const()[name = tensor("op_44056_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4603_cast_fp16 = mul(x = var_44055_cast_fp16, y = var_44056_to_fp16)[name = tensor("aw_chunk_4603_cast_fp16")]; + tensor var_44059_equation_0 = const()[name = tensor("op_44059_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44059_cast_fp16 = einsum(equation = var_44059_equation_0, values = (var_43713_cast_fp16, var_43529_cast_fp16))[name = tensor("op_44059_cast_fp16")]; + tensor var_44060_to_fp16 = const()[name = tensor("op_44060_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4605_cast_fp16 = mul(x = var_44059_cast_fp16, y = var_44060_to_fp16)[name = tensor("aw_chunk_4605_cast_fp16")]; + tensor var_44063_equation_0 = const()[name = tensor("op_44063_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44063_cast_fp16 = einsum(equation = var_44063_equation_0, values = (var_43713_cast_fp16, var_43536_cast_fp16))[name = tensor("op_44063_cast_fp16")]; + tensor var_44064_to_fp16 = const()[name = tensor("op_44064_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4607_cast_fp16 = mul(x = var_44063_cast_fp16, y = var_44064_to_fp16)[name = tensor("aw_chunk_4607_cast_fp16")]; + tensor var_44067_equation_0 = const()[name = tensor("op_44067_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44067_cast_fp16 = einsum(equation = var_44067_equation_0, values = (var_43717_cast_fp16, var_43543_cast_fp16))[name = tensor("op_44067_cast_fp16")]; + tensor var_44068_to_fp16 = const()[name = tensor("op_44068_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4609_cast_fp16 = mul(x = var_44067_cast_fp16, y = var_44068_to_fp16)[name = tensor("aw_chunk_4609_cast_fp16")]; + tensor var_44071_equation_0 = const()[name = tensor("op_44071_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44071_cast_fp16 = einsum(equation = var_44071_equation_0, values = (var_43717_cast_fp16, var_43550_cast_fp16))[name = tensor("op_44071_cast_fp16")]; + tensor var_44072_to_fp16 = const()[name = tensor("op_44072_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4611_cast_fp16 = mul(x = var_44071_cast_fp16, y = var_44072_to_fp16)[name = tensor("aw_chunk_4611_cast_fp16")]; + tensor var_44075_equation_0 = const()[name = tensor("op_44075_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44075_cast_fp16 = einsum(equation = var_44075_equation_0, values = (var_43717_cast_fp16, var_43557_cast_fp16))[name = tensor("op_44075_cast_fp16")]; + tensor var_44076_to_fp16 = const()[name = tensor("op_44076_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4613_cast_fp16 = mul(x = var_44075_cast_fp16, y = var_44076_to_fp16)[name = tensor("aw_chunk_4613_cast_fp16")]; + tensor var_44079_equation_0 = const()[name = tensor("op_44079_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44079_cast_fp16 = einsum(equation = var_44079_equation_0, values = (var_43717_cast_fp16, var_43564_cast_fp16))[name = tensor("op_44079_cast_fp16")]; + tensor var_44080_to_fp16 = const()[name = tensor("op_44080_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4615_cast_fp16 = mul(x = var_44079_cast_fp16, y = var_44080_to_fp16)[name = tensor("aw_chunk_4615_cast_fp16")]; + tensor var_44083_equation_0 = const()[name = tensor("op_44083_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44083_cast_fp16 = einsum(equation = var_44083_equation_0, values = (var_43721_cast_fp16, var_43571_cast_fp16))[name = tensor("op_44083_cast_fp16")]; + tensor var_44084_to_fp16 = const()[name = tensor("op_44084_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4617_cast_fp16 = mul(x = var_44083_cast_fp16, y = var_44084_to_fp16)[name = tensor("aw_chunk_4617_cast_fp16")]; + tensor var_44087_equation_0 = const()[name = tensor("op_44087_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44087_cast_fp16 = einsum(equation = var_44087_equation_0, values = (var_43721_cast_fp16, var_43578_cast_fp16))[name = tensor("op_44087_cast_fp16")]; + tensor var_44088_to_fp16 = const()[name = tensor("op_44088_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4619_cast_fp16 = mul(x = var_44087_cast_fp16, y = var_44088_to_fp16)[name = tensor("aw_chunk_4619_cast_fp16")]; + tensor var_44091_equation_0 = const()[name = tensor("op_44091_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44091_cast_fp16 = einsum(equation = var_44091_equation_0, values = (var_43721_cast_fp16, var_43585_cast_fp16))[name = tensor("op_44091_cast_fp16")]; + tensor var_44092_to_fp16 = const()[name = tensor("op_44092_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4621_cast_fp16 = mul(x = var_44091_cast_fp16, y = var_44092_to_fp16)[name = tensor("aw_chunk_4621_cast_fp16")]; + tensor var_44095_equation_0 = const()[name = tensor("op_44095_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44095_cast_fp16 = einsum(equation = var_44095_equation_0, values = (var_43721_cast_fp16, var_43592_cast_fp16))[name = tensor("op_44095_cast_fp16")]; + tensor var_44096_to_fp16 = const()[name = tensor("op_44096_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4623_cast_fp16 = mul(x = var_44095_cast_fp16, y = var_44096_to_fp16)[name = tensor("aw_chunk_4623_cast_fp16")]; + tensor var_44099_equation_0 = const()[name = tensor("op_44099_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44099_cast_fp16 = einsum(equation = var_44099_equation_0, values = (var_43725_cast_fp16, var_43599_cast_fp16))[name = tensor("op_44099_cast_fp16")]; + tensor var_44100_to_fp16 = const()[name = tensor("op_44100_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4625_cast_fp16 = mul(x = var_44099_cast_fp16, y = var_44100_to_fp16)[name = tensor("aw_chunk_4625_cast_fp16")]; + tensor var_44103_equation_0 = const()[name = tensor("op_44103_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44103_cast_fp16 = einsum(equation = var_44103_equation_0, values = (var_43725_cast_fp16, var_43606_cast_fp16))[name = tensor("op_44103_cast_fp16")]; + tensor var_44104_to_fp16 = const()[name = tensor("op_44104_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4627_cast_fp16 = mul(x = var_44103_cast_fp16, y = var_44104_to_fp16)[name = tensor("aw_chunk_4627_cast_fp16")]; + tensor var_44107_equation_0 = const()[name = tensor("op_44107_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44107_cast_fp16 = einsum(equation = var_44107_equation_0, values = (var_43725_cast_fp16, var_43613_cast_fp16))[name = tensor("op_44107_cast_fp16")]; + tensor var_44108_to_fp16 = const()[name = tensor("op_44108_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4629_cast_fp16 = mul(x = var_44107_cast_fp16, y = var_44108_to_fp16)[name = tensor("aw_chunk_4629_cast_fp16")]; + tensor var_44111_equation_0 = const()[name = tensor("op_44111_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44111_cast_fp16 = einsum(equation = var_44111_equation_0, values = (var_43725_cast_fp16, var_43620_cast_fp16))[name = tensor("op_44111_cast_fp16")]; + tensor var_44112_to_fp16 = const()[name = tensor("op_44112_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4631_cast_fp16 = mul(x = var_44111_cast_fp16, y = var_44112_to_fp16)[name = tensor("aw_chunk_4631_cast_fp16")]; + tensor var_44115_equation_0 = const()[name = tensor("op_44115_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44115_cast_fp16 = einsum(equation = var_44115_equation_0, values = (var_43729_cast_fp16, var_43627_cast_fp16))[name = tensor("op_44115_cast_fp16")]; + tensor var_44116_to_fp16 = const()[name = tensor("op_44116_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4633_cast_fp16 = mul(x = var_44115_cast_fp16, y = var_44116_to_fp16)[name = tensor("aw_chunk_4633_cast_fp16")]; + tensor var_44119_equation_0 = const()[name = tensor("op_44119_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44119_cast_fp16 = einsum(equation = var_44119_equation_0, values = (var_43729_cast_fp16, var_43634_cast_fp16))[name = tensor("op_44119_cast_fp16")]; + tensor var_44120_to_fp16 = const()[name = tensor("op_44120_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4635_cast_fp16 = mul(x = var_44119_cast_fp16, y = var_44120_to_fp16)[name = tensor("aw_chunk_4635_cast_fp16")]; + tensor var_44123_equation_0 = const()[name = tensor("op_44123_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44123_cast_fp16 = einsum(equation = var_44123_equation_0, values = (var_43729_cast_fp16, var_43641_cast_fp16))[name = tensor("op_44123_cast_fp16")]; + tensor var_44124_to_fp16 = const()[name = tensor("op_44124_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4637_cast_fp16 = mul(x = var_44123_cast_fp16, y = var_44124_to_fp16)[name = tensor("aw_chunk_4637_cast_fp16")]; + tensor var_44127_equation_0 = const()[name = tensor("op_44127_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_44127_cast_fp16 = einsum(equation = var_44127_equation_0, values = (var_43729_cast_fp16, var_43648_cast_fp16))[name = tensor("op_44127_cast_fp16")]; + tensor var_44128_to_fp16 = const()[name = tensor("op_44128_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4639_cast_fp16 = mul(x = var_44127_cast_fp16, y = var_44128_to_fp16)[name = tensor("aw_chunk_4639_cast_fp16")]; + tensor var_44130_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4481_cast_fp16)[name = tensor("op_44130_cast_fp16")]; + tensor var_44131_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4483_cast_fp16)[name = tensor("op_44131_cast_fp16")]; + tensor var_44132_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4485_cast_fp16)[name = tensor("op_44132_cast_fp16")]; + tensor var_44133_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4487_cast_fp16)[name = tensor("op_44133_cast_fp16")]; + tensor var_44134_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4489_cast_fp16)[name = tensor("op_44134_cast_fp16")]; + tensor var_44135_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4491_cast_fp16)[name = tensor("op_44135_cast_fp16")]; + tensor var_44136_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4493_cast_fp16)[name = tensor("op_44136_cast_fp16")]; + tensor var_44137_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4495_cast_fp16)[name = tensor("op_44137_cast_fp16")]; + tensor var_44138_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4497_cast_fp16)[name = tensor("op_44138_cast_fp16")]; + tensor var_44139_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4499_cast_fp16)[name = tensor("op_44139_cast_fp16")]; + tensor var_44140_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4501_cast_fp16)[name = tensor("op_44140_cast_fp16")]; + tensor var_44141_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4503_cast_fp16)[name = tensor("op_44141_cast_fp16")]; + tensor var_44142_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4505_cast_fp16)[name = tensor("op_44142_cast_fp16")]; + tensor var_44143_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4507_cast_fp16)[name = tensor("op_44143_cast_fp16")]; + tensor var_44144_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4509_cast_fp16)[name = tensor("op_44144_cast_fp16")]; + tensor var_44145_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4511_cast_fp16)[name = tensor("op_44145_cast_fp16")]; + tensor var_44146_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4513_cast_fp16)[name = tensor("op_44146_cast_fp16")]; + tensor var_44147_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4515_cast_fp16)[name = tensor("op_44147_cast_fp16")]; + tensor var_44148_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4517_cast_fp16)[name = tensor("op_44148_cast_fp16")]; + tensor var_44149_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4519_cast_fp16)[name = tensor("op_44149_cast_fp16")]; + tensor var_44150_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4521_cast_fp16)[name = tensor("op_44150_cast_fp16")]; + tensor var_44151_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4523_cast_fp16)[name = tensor("op_44151_cast_fp16")]; + tensor var_44152_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4525_cast_fp16)[name = tensor("op_44152_cast_fp16")]; + tensor var_44153_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4527_cast_fp16)[name = tensor("op_44153_cast_fp16")]; + tensor var_44154_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4529_cast_fp16)[name = tensor("op_44154_cast_fp16")]; + tensor var_44155_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4531_cast_fp16)[name = tensor("op_44155_cast_fp16")]; + tensor var_44156_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4533_cast_fp16)[name = tensor("op_44156_cast_fp16")]; + tensor var_44157_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4535_cast_fp16)[name = tensor("op_44157_cast_fp16")]; + tensor var_44158_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4537_cast_fp16)[name = tensor("op_44158_cast_fp16")]; + tensor var_44159_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4539_cast_fp16)[name = tensor("op_44159_cast_fp16")]; + tensor var_44160_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4541_cast_fp16)[name = tensor("op_44160_cast_fp16")]; + tensor var_44161_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4543_cast_fp16)[name = tensor("op_44161_cast_fp16")]; + tensor var_44162_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4545_cast_fp16)[name = tensor("op_44162_cast_fp16")]; + tensor var_44163_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4547_cast_fp16)[name = tensor("op_44163_cast_fp16")]; + tensor var_44164_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4549_cast_fp16)[name = tensor("op_44164_cast_fp16")]; + tensor var_44165_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4551_cast_fp16)[name = tensor("op_44165_cast_fp16")]; + tensor var_44166_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4553_cast_fp16)[name = tensor("op_44166_cast_fp16")]; + tensor var_44167_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4555_cast_fp16)[name = tensor("op_44167_cast_fp16")]; + tensor var_44168_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4557_cast_fp16)[name = tensor("op_44168_cast_fp16")]; + tensor var_44169_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4559_cast_fp16)[name = tensor("op_44169_cast_fp16")]; + tensor var_44170_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4561_cast_fp16)[name = tensor("op_44170_cast_fp16")]; + tensor var_44171_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4563_cast_fp16)[name = tensor("op_44171_cast_fp16")]; + tensor var_44172_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4565_cast_fp16)[name = tensor("op_44172_cast_fp16")]; + tensor var_44173_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4567_cast_fp16)[name = tensor("op_44173_cast_fp16")]; + tensor var_44174_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4569_cast_fp16)[name = tensor("op_44174_cast_fp16")]; + tensor var_44175_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4571_cast_fp16)[name = tensor("op_44175_cast_fp16")]; + tensor var_44176_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4573_cast_fp16)[name = tensor("op_44176_cast_fp16")]; + tensor var_44177_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4575_cast_fp16)[name = tensor("op_44177_cast_fp16")]; + tensor var_44178_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4577_cast_fp16)[name = tensor("op_44178_cast_fp16")]; + tensor var_44179_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4579_cast_fp16)[name = tensor("op_44179_cast_fp16")]; + tensor var_44180_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4581_cast_fp16)[name = tensor("op_44180_cast_fp16")]; + tensor var_44181_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4583_cast_fp16)[name = tensor("op_44181_cast_fp16")]; + tensor var_44182_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4585_cast_fp16)[name = tensor("op_44182_cast_fp16")]; + tensor var_44183_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4587_cast_fp16)[name = tensor("op_44183_cast_fp16")]; + tensor var_44184_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4589_cast_fp16)[name = tensor("op_44184_cast_fp16")]; + tensor var_44185_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4591_cast_fp16)[name = tensor("op_44185_cast_fp16")]; + tensor var_44186_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4593_cast_fp16)[name = tensor("op_44186_cast_fp16")]; + tensor var_44187_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4595_cast_fp16)[name = tensor("op_44187_cast_fp16")]; + tensor var_44188_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4597_cast_fp16)[name = tensor("op_44188_cast_fp16")]; + tensor var_44189_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4599_cast_fp16)[name = tensor("op_44189_cast_fp16")]; + tensor var_44190_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4601_cast_fp16)[name = tensor("op_44190_cast_fp16")]; + tensor var_44191_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4603_cast_fp16)[name = tensor("op_44191_cast_fp16")]; + tensor var_44192_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4605_cast_fp16)[name = tensor("op_44192_cast_fp16")]; + tensor var_44193_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4607_cast_fp16)[name = tensor("op_44193_cast_fp16")]; + tensor var_44194_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4609_cast_fp16)[name = tensor("op_44194_cast_fp16")]; + tensor var_44195_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4611_cast_fp16)[name = tensor("op_44195_cast_fp16")]; + tensor var_44196_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4613_cast_fp16)[name = tensor("op_44196_cast_fp16")]; + tensor var_44197_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4615_cast_fp16)[name = tensor("op_44197_cast_fp16")]; + tensor var_44198_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4617_cast_fp16)[name = tensor("op_44198_cast_fp16")]; + tensor var_44199_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4619_cast_fp16)[name = tensor("op_44199_cast_fp16")]; + tensor var_44200_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4621_cast_fp16)[name = tensor("op_44200_cast_fp16")]; + tensor var_44201_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4623_cast_fp16)[name = tensor("op_44201_cast_fp16")]; + tensor var_44202_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4625_cast_fp16)[name = tensor("op_44202_cast_fp16")]; + tensor var_44203_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4627_cast_fp16)[name = tensor("op_44203_cast_fp16")]; + tensor var_44204_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4629_cast_fp16)[name = tensor("op_44204_cast_fp16")]; + tensor var_44205_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4631_cast_fp16)[name = tensor("op_44205_cast_fp16")]; + tensor var_44206_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4633_cast_fp16)[name = tensor("op_44206_cast_fp16")]; + tensor var_44207_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4635_cast_fp16)[name = tensor("op_44207_cast_fp16")]; + tensor var_44208_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4637_cast_fp16)[name = tensor("op_44208_cast_fp16")]; + tensor var_44209_cast_fp16 = softmax(axis = var_42955, x = aw_chunk_4639_cast_fp16)[name = tensor("op_44209_cast_fp16")]; + tensor var_44211_equation_0 = const()[name = tensor("op_44211_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44211_cast_fp16 = einsum(equation = var_44211_equation_0, values = (var_43731_cast_fp16, var_44130_cast_fp16))[name = tensor("op_44211_cast_fp16")]; + tensor var_44213_equation_0 = const()[name = tensor("op_44213_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44213_cast_fp16 = einsum(equation = var_44213_equation_0, values = (var_43731_cast_fp16, var_44131_cast_fp16))[name = tensor("op_44213_cast_fp16")]; + tensor var_44215_equation_0 = const()[name = tensor("op_44215_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44215_cast_fp16 = einsum(equation = var_44215_equation_0, values = (var_43731_cast_fp16, var_44132_cast_fp16))[name = tensor("op_44215_cast_fp16")]; + tensor var_44217_equation_0 = const()[name = tensor("op_44217_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44217_cast_fp16 = einsum(equation = var_44217_equation_0, values = (var_43731_cast_fp16, var_44133_cast_fp16))[name = tensor("op_44217_cast_fp16")]; + tensor var_44219_equation_0 = const()[name = tensor("op_44219_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44219_cast_fp16 = einsum(equation = var_44219_equation_0, values = (var_43735_cast_fp16, var_44134_cast_fp16))[name = tensor("op_44219_cast_fp16")]; + tensor var_44221_equation_0 = const()[name = tensor("op_44221_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44221_cast_fp16 = einsum(equation = var_44221_equation_0, values = (var_43735_cast_fp16, var_44135_cast_fp16))[name = tensor("op_44221_cast_fp16")]; + tensor var_44223_equation_0 = const()[name = tensor("op_44223_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44223_cast_fp16 = einsum(equation = var_44223_equation_0, values = (var_43735_cast_fp16, var_44136_cast_fp16))[name = tensor("op_44223_cast_fp16")]; + tensor var_44225_equation_0 = const()[name = tensor("op_44225_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44225_cast_fp16 = einsum(equation = var_44225_equation_0, values = (var_43735_cast_fp16, var_44137_cast_fp16))[name = tensor("op_44225_cast_fp16")]; + tensor var_44227_equation_0 = const()[name = tensor("op_44227_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44227_cast_fp16 = einsum(equation = var_44227_equation_0, values = (var_43739_cast_fp16, var_44138_cast_fp16))[name = tensor("op_44227_cast_fp16")]; + tensor var_44229_equation_0 = const()[name = tensor("op_44229_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44229_cast_fp16 = einsum(equation = var_44229_equation_0, values = (var_43739_cast_fp16, var_44139_cast_fp16))[name = tensor("op_44229_cast_fp16")]; + tensor var_44231_equation_0 = const()[name = tensor("op_44231_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44231_cast_fp16 = einsum(equation = var_44231_equation_0, values = (var_43739_cast_fp16, var_44140_cast_fp16))[name = tensor("op_44231_cast_fp16")]; + tensor var_44233_equation_0 = const()[name = tensor("op_44233_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44233_cast_fp16 = einsum(equation = var_44233_equation_0, values = (var_43739_cast_fp16, var_44141_cast_fp16))[name = tensor("op_44233_cast_fp16")]; + tensor var_44235_equation_0 = const()[name = tensor("op_44235_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44235_cast_fp16 = einsum(equation = var_44235_equation_0, values = (var_43743_cast_fp16, var_44142_cast_fp16))[name = tensor("op_44235_cast_fp16")]; + tensor var_44237_equation_0 = const()[name = tensor("op_44237_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44237_cast_fp16 = einsum(equation = var_44237_equation_0, values = (var_43743_cast_fp16, var_44143_cast_fp16))[name = tensor("op_44237_cast_fp16")]; + tensor var_44239_equation_0 = const()[name = tensor("op_44239_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44239_cast_fp16 = einsum(equation = var_44239_equation_0, values = (var_43743_cast_fp16, var_44144_cast_fp16))[name = tensor("op_44239_cast_fp16")]; + tensor var_44241_equation_0 = const()[name = tensor("op_44241_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44241_cast_fp16 = einsum(equation = var_44241_equation_0, values = (var_43743_cast_fp16, var_44145_cast_fp16))[name = tensor("op_44241_cast_fp16")]; + tensor var_44243_equation_0 = const()[name = tensor("op_44243_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44243_cast_fp16 = einsum(equation = var_44243_equation_0, values = (var_43747_cast_fp16, var_44146_cast_fp16))[name = tensor("op_44243_cast_fp16")]; + tensor var_44245_equation_0 = const()[name = tensor("op_44245_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44245_cast_fp16 = einsum(equation = var_44245_equation_0, values = (var_43747_cast_fp16, var_44147_cast_fp16))[name = tensor("op_44245_cast_fp16")]; + tensor var_44247_equation_0 = const()[name = tensor("op_44247_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44247_cast_fp16 = einsum(equation = var_44247_equation_0, values = (var_43747_cast_fp16, var_44148_cast_fp16))[name = tensor("op_44247_cast_fp16")]; + tensor var_44249_equation_0 = const()[name = tensor("op_44249_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44249_cast_fp16 = einsum(equation = var_44249_equation_0, values = (var_43747_cast_fp16, var_44149_cast_fp16))[name = tensor("op_44249_cast_fp16")]; + tensor var_44251_equation_0 = const()[name = tensor("op_44251_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44251_cast_fp16 = einsum(equation = var_44251_equation_0, values = (var_43751_cast_fp16, var_44150_cast_fp16))[name = tensor("op_44251_cast_fp16")]; + tensor var_44253_equation_0 = const()[name = tensor("op_44253_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44253_cast_fp16 = einsum(equation = var_44253_equation_0, values = (var_43751_cast_fp16, var_44151_cast_fp16))[name = tensor("op_44253_cast_fp16")]; + tensor var_44255_equation_0 = const()[name = tensor("op_44255_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44255_cast_fp16 = einsum(equation = var_44255_equation_0, values = (var_43751_cast_fp16, var_44152_cast_fp16))[name = tensor("op_44255_cast_fp16")]; + tensor var_44257_equation_0 = const()[name = tensor("op_44257_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44257_cast_fp16 = einsum(equation = var_44257_equation_0, values = (var_43751_cast_fp16, var_44153_cast_fp16))[name = tensor("op_44257_cast_fp16")]; + tensor var_44259_equation_0 = const()[name = tensor("op_44259_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44259_cast_fp16 = einsum(equation = var_44259_equation_0, values = (var_43755_cast_fp16, var_44154_cast_fp16))[name = tensor("op_44259_cast_fp16")]; + tensor var_44261_equation_0 = const()[name = tensor("op_44261_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44261_cast_fp16 = einsum(equation = var_44261_equation_0, values = (var_43755_cast_fp16, var_44155_cast_fp16))[name = tensor("op_44261_cast_fp16")]; + tensor var_44263_equation_0 = const()[name = tensor("op_44263_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44263_cast_fp16 = einsum(equation = var_44263_equation_0, values = (var_43755_cast_fp16, var_44156_cast_fp16))[name = tensor("op_44263_cast_fp16")]; + tensor var_44265_equation_0 = const()[name = tensor("op_44265_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44265_cast_fp16 = einsum(equation = var_44265_equation_0, values = (var_43755_cast_fp16, var_44157_cast_fp16))[name = tensor("op_44265_cast_fp16")]; + tensor var_44267_equation_0 = const()[name = tensor("op_44267_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44267_cast_fp16 = einsum(equation = var_44267_equation_0, values = (var_43759_cast_fp16, var_44158_cast_fp16))[name = tensor("op_44267_cast_fp16")]; + tensor var_44269_equation_0 = const()[name = tensor("op_44269_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44269_cast_fp16 = einsum(equation = var_44269_equation_0, values = (var_43759_cast_fp16, var_44159_cast_fp16))[name = tensor("op_44269_cast_fp16")]; + tensor var_44271_equation_0 = const()[name = tensor("op_44271_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44271_cast_fp16 = einsum(equation = var_44271_equation_0, values = (var_43759_cast_fp16, var_44160_cast_fp16))[name = tensor("op_44271_cast_fp16")]; + tensor var_44273_equation_0 = const()[name = tensor("op_44273_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44273_cast_fp16 = einsum(equation = var_44273_equation_0, values = (var_43759_cast_fp16, var_44161_cast_fp16))[name = tensor("op_44273_cast_fp16")]; + tensor var_44275_equation_0 = const()[name = tensor("op_44275_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44275_cast_fp16 = einsum(equation = var_44275_equation_0, values = (var_43763_cast_fp16, var_44162_cast_fp16))[name = tensor("op_44275_cast_fp16")]; + tensor var_44277_equation_0 = const()[name = tensor("op_44277_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44277_cast_fp16 = einsum(equation = var_44277_equation_0, values = (var_43763_cast_fp16, var_44163_cast_fp16))[name = tensor("op_44277_cast_fp16")]; + tensor var_44279_equation_0 = const()[name = tensor("op_44279_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44279_cast_fp16 = einsum(equation = var_44279_equation_0, values = (var_43763_cast_fp16, var_44164_cast_fp16))[name = tensor("op_44279_cast_fp16")]; + tensor var_44281_equation_0 = const()[name = tensor("op_44281_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44281_cast_fp16 = einsum(equation = var_44281_equation_0, values = (var_43763_cast_fp16, var_44165_cast_fp16))[name = tensor("op_44281_cast_fp16")]; + tensor var_44283_equation_0 = const()[name = tensor("op_44283_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44283_cast_fp16 = einsum(equation = var_44283_equation_0, values = (var_43767_cast_fp16, var_44166_cast_fp16))[name = tensor("op_44283_cast_fp16")]; + tensor var_44285_equation_0 = const()[name = tensor("op_44285_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44285_cast_fp16 = einsum(equation = var_44285_equation_0, values = (var_43767_cast_fp16, var_44167_cast_fp16))[name = tensor("op_44285_cast_fp16")]; + tensor var_44287_equation_0 = const()[name = tensor("op_44287_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44287_cast_fp16 = einsum(equation = var_44287_equation_0, values = (var_43767_cast_fp16, var_44168_cast_fp16))[name = tensor("op_44287_cast_fp16")]; + tensor var_44289_equation_0 = const()[name = tensor("op_44289_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44289_cast_fp16 = einsum(equation = var_44289_equation_0, values = (var_43767_cast_fp16, var_44169_cast_fp16))[name = tensor("op_44289_cast_fp16")]; + tensor var_44291_equation_0 = const()[name = tensor("op_44291_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44291_cast_fp16 = einsum(equation = var_44291_equation_0, values = (var_43771_cast_fp16, var_44170_cast_fp16))[name = tensor("op_44291_cast_fp16")]; + tensor var_44293_equation_0 = const()[name = tensor("op_44293_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44293_cast_fp16 = einsum(equation = var_44293_equation_0, values = (var_43771_cast_fp16, var_44171_cast_fp16))[name = tensor("op_44293_cast_fp16")]; + tensor var_44295_equation_0 = const()[name = tensor("op_44295_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44295_cast_fp16 = einsum(equation = var_44295_equation_0, values = (var_43771_cast_fp16, var_44172_cast_fp16))[name = tensor("op_44295_cast_fp16")]; + tensor var_44297_equation_0 = const()[name = tensor("op_44297_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44297_cast_fp16 = einsum(equation = var_44297_equation_0, values = (var_43771_cast_fp16, var_44173_cast_fp16))[name = tensor("op_44297_cast_fp16")]; + tensor var_44299_equation_0 = const()[name = tensor("op_44299_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44299_cast_fp16 = einsum(equation = var_44299_equation_0, values = (var_43775_cast_fp16, var_44174_cast_fp16))[name = tensor("op_44299_cast_fp16")]; + tensor var_44301_equation_0 = const()[name = tensor("op_44301_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44301_cast_fp16 = einsum(equation = var_44301_equation_0, values = (var_43775_cast_fp16, var_44175_cast_fp16))[name = tensor("op_44301_cast_fp16")]; + tensor var_44303_equation_0 = const()[name = tensor("op_44303_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44303_cast_fp16 = einsum(equation = var_44303_equation_0, values = (var_43775_cast_fp16, var_44176_cast_fp16))[name = tensor("op_44303_cast_fp16")]; + tensor var_44305_equation_0 = const()[name = tensor("op_44305_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44305_cast_fp16 = einsum(equation = var_44305_equation_0, values = (var_43775_cast_fp16, var_44177_cast_fp16))[name = tensor("op_44305_cast_fp16")]; + tensor var_44307_equation_0 = const()[name = tensor("op_44307_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44307_cast_fp16 = einsum(equation = var_44307_equation_0, values = (var_43779_cast_fp16, var_44178_cast_fp16))[name = tensor("op_44307_cast_fp16")]; + tensor var_44309_equation_0 = const()[name = tensor("op_44309_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44309_cast_fp16 = einsum(equation = var_44309_equation_0, values = (var_43779_cast_fp16, var_44179_cast_fp16))[name = tensor("op_44309_cast_fp16")]; + tensor var_44311_equation_0 = const()[name = tensor("op_44311_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44311_cast_fp16 = einsum(equation = var_44311_equation_0, values = (var_43779_cast_fp16, var_44180_cast_fp16))[name = tensor("op_44311_cast_fp16")]; + tensor var_44313_equation_0 = const()[name = tensor("op_44313_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44313_cast_fp16 = einsum(equation = var_44313_equation_0, values = (var_43779_cast_fp16, var_44181_cast_fp16))[name = tensor("op_44313_cast_fp16")]; + tensor var_44315_equation_0 = const()[name = tensor("op_44315_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44315_cast_fp16 = einsum(equation = var_44315_equation_0, values = (var_43783_cast_fp16, var_44182_cast_fp16))[name = tensor("op_44315_cast_fp16")]; + tensor var_44317_equation_0 = const()[name = tensor("op_44317_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44317_cast_fp16 = einsum(equation = var_44317_equation_0, values = (var_43783_cast_fp16, var_44183_cast_fp16))[name = tensor("op_44317_cast_fp16")]; + tensor var_44319_equation_0 = const()[name = tensor("op_44319_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44319_cast_fp16 = einsum(equation = var_44319_equation_0, values = (var_43783_cast_fp16, var_44184_cast_fp16))[name = tensor("op_44319_cast_fp16")]; + tensor var_44321_equation_0 = const()[name = tensor("op_44321_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44321_cast_fp16 = einsum(equation = var_44321_equation_0, values = (var_43783_cast_fp16, var_44185_cast_fp16))[name = tensor("op_44321_cast_fp16")]; + tensor var_44323_equation_0 = const()[name = tensor("op_44323_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44323_cast_fp16 = einsum(equation = var_44323_equation_0, values = (var_43787_cast_fp16, var_44186_cast_fp16))[name = tensor("op_44323_cast_fp16")]; + tensor var_44325_equation_0 = const()[name = tensor("op_44325_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44325_cast_fp16 = einsum(equation = var_44325_equation_0, values = (var_43787_cast_fp16, var_44187_cast_fp16))[name = tensor("op_44325_cast_fp16")]; + tensor var_44327_equation_0 = const()[name = tensor("op_44327_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44327_cast_fp16 = einsum(equation = var_44327_equation_0, values = (var_43787_cast_fp16, var_44188_cast_fp16))[name = tensor("op_44327_cast_fp16")]; + tensor var_44329_equation_0 = const()[name = tensor("op_44329_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44329_cast_fp16 = einsum(equation = var_44329_equation_0, values = (var_43787_cast_fp16, var_44189_cast_fp16))[name = tensor("op_44329_cast_fp16")]; + tensor var_44331_equation_0 = const()[name = tensor("op_44331_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44331_cast_fp16 = einsum(equation = var_44331_equation_0, values = (var_43791_cast_fp16, var_44190_cast_fp16))[name = tensor("op_44331_cast_fp16")]; + tensor var_44333_equation_0 = const()[name = tensor("op_44333_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44333_cast_fp16 = einsum(equation = var_44333_equation_0, values = (var_43791_cast_fp16, var_44191_cast_fp16))[name = tensor("op_44333_cast_fp16")]; + tensor var_44335_equation_0 = const()[name = tensor("op_44335_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44335_cast_fp16 = einsum(equation = var_44335_equation_0, values = (var_43791_cast_fp16, var_44192_cast_fp16))[name = tensor("op_44335_cast_fp16")]; + tensor var_44337_equation_0 = const()[name = tensor("op_44337_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44337_cast_fp16 = einsum(equation = var_44337_equation_0, values = (var_43791_cast_fp16, var_44193_cast_fp16))[name = tensor("op_44337_cast_fp16")]; + tensor var_44339_equation_0 = const()[name = tensor("op_44339_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44339_cast_fp16 = einsum(equation = var_44339_equation_0, values = (var_43795_cast_fp16, var_44194_cast_fp16))[name = tensor("op_44339_cast_fp16")]; + tensor var_44341_equation_0 = const()[name = tensor("op_44341_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44341_cast_fp16 = einsum(equation = var_44341_equation_0, values = (var_43795_cast_fp16, var_44195_cast_fp16))[name = tensor("op_44341_cast_fp16")]; + tensor var_44343_equation_0 = const()[name = tensor("op_44343_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44343_cast_fp16 = einsum(equation = var_44343_equation_0, values = (var_43795_cast_fp16, var_44196_cast_fp16))[name = tensor("op_44343_cast_fp16")]; + tensor var_44345_equation_0 = const()[name = tensor("op_44345_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44345_cast_fp16 = einsum(equation = var_44345_equation_0, values = (var_43795_cast_fp16, var_44197_cast_fp16))[name = tensor("op_44345_cast_fp16")]; + tensor var_44347_equation_0 = const()[name = tensor("op_44347_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44347_cast_fp16 = einsum(equation = var_44347_equation_0, values = (var_43799_cast_fp16, var_44198_cast_fp16))[name = tensor("op_44347_cast_fp16")]; + tensor var_44349_equation_0 = const()[name = tensor("op_44349_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44349_cast_fp16 = einsum(equation = var_44349_equation_0, values = (var_43799_cast_fp16, var_44199_cast_fp16))[name = tensor("op_44349_cast_fp16")]; + tensor var_44351_equation_0 = const()[name = tensor("op_44351_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44351_cast_fp16 = einsum(equation = var_44351_equation_0, values = (var_43799_cast_fp16, var_44200_cast_fp16))[name = tensor("op_44351_cast_fp16")]; + tensor var_44353_equation_0 = const()[name = tensor("op_44353_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44353_cast_fp16 = einsum(equation = var_44353_equation_0, values = (var_43799_cast_fp16, var_44201_cast_fp16))[name = tensor("op_44353_cast_fp16")]; + tensor var_44355_equation_0 = const()[name = tensor("op_44355_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44355_cast_fp16 = einsum(equation = var_44355_equation_0, values = (var_43803_cast_fp16, var_44202_cast_fp16))[name = tensor("op_44355_cast_fp16")]; + tensor var_44357_equation_0 = const()[name = tensor("op_44357_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44357_cast_fp16 = einsum(equation = var_44357_equation_0, values = (var_43803_cast_fp16, var_44203_cast_fp16))[name = tensor("op_44357_cast_fp16")]; + tensor var_44359_equation_0 = const()[name = tensor("op_44359_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44359_cast_fp16 = einsum(equation = var_44359_equation_0, values = (var_43803_cast_fp16, var_44204_cast_fp16))[name = tensor("op_44359_cast_fp16")]; + tensor var_44361_equation_0 = const()[name = tensor("op_44361_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44361_cast_fp16 = einsum(equation = var_44361_equation_0, values = (var_43803_cast_fp16, var_44205_cast_fp16))[name = tensor("op_44361_cast_fp16")]; + tensor var_44363_equation_0 = const()[name = tensor("op_44363_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44363_cast_fp16 = einsum(equation = var_44363_equation_0, values = (var_43807_cast_fp16, var_44206_cast_fp16))[name = tensor("op_44363_cast_fp16")]; + tensor var_44365_equation_0 = const()[name = tensor("op_44365_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44365_cast_fp16 = einsum(equation = var_44365_equation_0, values = (var_43807_cast_fp16, var_44207_cast_fp16))[name = tensor("op_44365_cast_fp16")]; + tensor var_44367_equation_0 = const()[name = tensor("op_44367_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44367_cast_fp16 = einsum(equation = var_44367_equation_0, values = (var_43807_cast_fp16, var_44208_cast_fp16))[name = tensor("op_44367_cast_fp16")]; + tensor var_44369_equation_0 = const()[name = tensor("op_44369_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_44369_cast_fp16 = einsum(equation = var_44369_equation_0, values = (var_43807_cast_fp16, var_44209_cast_fp16))[name = tensor("op_44369_cast_fp16")]; + tensor var_44371_interleave_0 = const()[name = tensor("op_44371_interleave_0"), val = tensor(false)]; + tensor var_44371_cast_fp16 = concat(axis = var_42930, interleave = var_44371_interleave_0, values = (var_44211_cast_fp16, var_44213_cast_fp16, var_44215_cast_fp16, var_44217_cast_fp16))[name = tensor("op_44371_cast_fp16")]; + tensor var_44373_interleave_0 = const()[name = tensor("op_44373_interleave_0"), val = tensor(false)]; + tensor var_44373_cast_fp16 = concat(axis = var_42930, interleave = var_44373_interleave_0, values = (var_44219_cast_fp16, var_44221_cast_fp16, var_44223_cast_fp16, var_44225_cast_fp16))[name = tensor("op_44373_cast_fp16")]; + tensor var_44375_interleave_0 = const()[name = tensor("op_44375_interleave_0"), val = tensor(false)]; + tensor var_44375_cast_fp16 = concat(axis = var_42930, interleave = var_44375_interleave_0, values = (var_44227_cast_fp16, var_44229_cast_fp16, var_44231_cast_fp16, var_44233_cast_fp16))[name = tensor("op_44375_cast_fp16")]; + tensor var_44377_interleave_0 = const()[name = tensor("op_44377_interleave_0"), val = tensor(false)]; + tensor var_44377_cast_fp16 = concat(axis = var_42930, interleave = var_44377_interleave_0, values = (var_44235_cast_fp16, var_44237_cast_fp16, var_44239_cast_fp16, var_44241_cast_fp16))[name = tensor("op_44377_cast_fp16")]; + tensor var_44379_interleave_0 = const()[name = tensor("op_44379_interleave_0"), val = tensor(false)]; + tensor var_44379_cast_fp16 = concat(axis = var_42930, interleave = var_44379_interleave_0, values = (var_44243_cast_fp16, var_44245_cast_fp16, var_44247_cast_fp16, var_44249_cast_fp16))[name = tensor("op_44379_cast_fp16")]; + tensor var_44381_interleave_0 = const()[name = tensor("op_44381_interleave_0"), val = tensor(false)]; + tensor var_44381_cast_fp16 = concat(axis = var_42930, interleave = var_44381_interleave_0, values = (var_44251_cast_fp16, var_44253_cast_fp16, var_44255_cast_fp16, var_44257_cast_fp16))[name = tensor("op_44381_cast_fp16")]; + tensor var_44383_interleave_0 = const()[name = tensor("op_44383_interleave_0"), val = tensor(false)]; + tensor var_44383_cast_fp16 = concat(axis = var_42930, interleave = var_44383_interleave_0, values = (var_44259_cast_fp16, var_44261_cast_fp16, var_44263_cast_fp16, var_44265_cast_fp16))[name = tensor("op_44383_cast_fp16")]; + tensor var_44385_interleave_0 = const()[name = tensor("op_44385_interleave_0"), val = tensor(false)]; + tensor var_44385_cast_fp16 = concat(axis = var_42930, interleave = var_44385_interleave_0, values = (var_44267_cast_fp16, var_44269_cast_fp16, var_44271_cast_fp16, var_44273_cast_fp16))[name = tensor("op_44385_cast_fp16")]; + tensor var_44387_interleave_0 = const()[name = tensor("op_44387_interleave_0"), val = tensor(false)]; + tensor var_44387_cast_fp16 = concat(axis = var_42930, interleave = var_44387_interleave_0, values = (var_44275_cast_fp16, var_44277_cast_fp16, var_44279_cast_fp16, var_44281_cast_fp16))[name = tensor("op_44387_cast_fp16")]; + tensor var_44389_interleave_0 = const()[name = tensor("op_44389_interleave_0"), val = tensor(false)]; + tensor var_44389_cast_fp16 = concat(axis = var_42930, interleave = var_44389_interleave_0, values = (var_44283_cast_fp16, var_44285_cast_fp16, var_44287_cast_fp16, var_44289_cast_fp16))[name = tensor("op_44389_cast_fp16")]; + tensor var_44391_interleave_0 = const()[name = tensor("op_44391_interleave_0"), val = tensor(false)]; + tensor var_44391_cast_fp16 = concat(axis = var_42930, interleave = var_44391_interleave_0, values = (var_44291_cast_fp16, var_44293_cast_fp16, var_44295_cast_fp16, var_44297_cast_fp16))[name = tensor("op_44391_cast_fp16")]; + tensor var_44393_interleave_0 = const()[name = tensor("op_44393_interleave_0"), val = tensor(false)]; + tensor var_44393_cast_fp16 = concat(axis = var_42930, interleave = var_44393_interleave_0, values = (var_44299_cast_fp16, var_44301_cast_fp16, var_44303_cast_fp16, var_44305_cast_fp16))[name = tensor("op_44393_cast_fp16")]; + tensor var_44395_interleave_0 = const()[name = tensor("op_44395_interleave_0"), val = tensor(false)]; + tensor var_44395_cast_fp16 = concat(axis = var_42930, interleave = var_44395_interleave_0, values = (var_44307_cast_fp16, var_44309_cast_fp16, var_44311_cast_fp16, var_44313_cast_fp16))[name = tensor("op_44395_cast_fp16")]; + tensor var_44397_interleave_0 = const()[name = tensor("op_44397_interleave_0"), val = tensor(false)]; + tensor var_44397_cast_fp16 = concat(axis = var_42930, interleave = var_44397_interleave_0, values = (var_44315_cast_fp16, var_44317_cast_fp16, var_44319_cast_fp16, var_44321_cast_fp16))[name = tensor("op_44397_cast_fp16")]; + tensor var_44399_interleave_0 = const()[name = tensor("op_44399_interleave_0"), val = tensor(false)]; + tensor var_44399_cast_fp16 = concat(axis = var_42930, interleave = var_44399_interleave_0, values = (var_44323_cast_fp16, var_44325_cast_fp16, var_44327_cast_fp16, var_44329_cast_fp16))[name = tensor("op_44399_cast_fp16")]; + tensor var_44401_interleave_0 = const()[name = tensor("op_44401_interleave_0"), val = tensor(false)]; + tensor var_44401_cast_fp16 = concat(axis = var_42930, interleave = var_44401_interleave_0, values = (var_44331_cast_fp16, var_44333_cast_fp16, var_44335_cast_fp16, var_44337_cast_fp16))[name = tensor("op_44401_cast_fp16")]; + tensor var_44403_interleave_0 = const()[name = tensor("op_44403_interleave_0"), val = tensor(false)]; + tensor var_44403_cast_fp16 = concat(axis = var_42930, interleave = var_44403_interleave_0, values = (var_44339_cast_fp16, var_44341_cast_fp16, var_44343_cast_fp16, var_44345_cast_fp16))[name = tensor("op_44403_cast_fp16")]; + tensor var_44405_interleave_0 = const()[name = tensor("op_44405_interleave_0"), val = tensor(false)]; + tensor var_44405_cast_fp16 = concat(axis = var_42930, interleave = var_44405_interleave_0, values = (var_44347_cast_fp16, var_44349_cast_fp16, var_44351_cast_fp16, var_44353_cast_fp16))[name = tensor("op_44405_cast_fp16")]; + tensor var_44407_interleave_0 = const()[name = tensor("op_44407_interleave_0"), val = tensor(false)]; + tensor var_44407_cast_fp16 = concat(axis = var_42930, interleave = var_44407_interleave_0, values = (var_44355_cast_fp16, var_44357_cast_fp16, var_44359_cast_fp16, var_44361_cast_fp16))[name = tensor("op_44407_cast_fp16")]; + tensor var_44409_interleave_0 = const()[name = tensor("op_44409_interleave_0"), val = tensor(false)]; + tensor var_44409_cast_fp16 = concat(axis = var_42930, interleave = var_44409_interleave_0, values = (var_44363_cast_fp16, var_44365_cast_fp16, var_44367_cast_fp16, var_44369_cast_fp16))[name = tensor("op_44409_cast_fp16")]; + tensor input_225_interleave_0 = const()[name = tensor("input_225_interleave_0"), val = tensor(false)]; + tensor input_225_cast_fp16 = concat(axis = var_42955, interleave = input_225_interleave_0, values = (var_44371_cast_fp16, var_44373_cast_fp16, var_44375_cast_fp16, var_44377_cast_fp16, var_44379_cast_fp16, var_44381_cast_fp16, var_44383_cast_fp16, var_44385_cast_fp16, var_44387_cast_fp16, var_44389_cast_fp16, var_44391_cast_fp16, var_44393_cast_fp16, var_44395_cast_fp16, var_44397_cast_fp16, var_44399_cast_fp16, var_44401_cast_fp16, var_44403_cast_fp16, var_44405_cast_fp16, var_44407_cast_fp16, var_44409_cast_fp16))[name = tensor("input_225_cast_fp16")]; + tensor var_44414 = const()[name = tensor("op_44414"), val = tensor([1, 1])]; + tensor var_44416 = const()[name = tensor("op_44416"), val = tensor([1, 1])]; + tensor obj_115_pad_type_0 = const()[name = tensor("obj_115_pad_type_0"), val = tensor("custom")]; + tensor obj_115_pad_0 = const()[name = tensor("obj_115_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_28_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_28_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1126028480)))]; + tensor layers_28_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_28_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1129305344)))]; + tensor obj_115_cast_fp16 = conv(bias = layers_28_self_attn_o_proj_bias_to_fp16, dilations = var_44416, groups = var_42955, pad = obj_115_pad_0, pad_type = obj_115_pad_type_0, strides = var_44414, weight = layers_28_self_attn_o_proj_weight_to_fp16, x = input_225_cast_fp16)[name = tensor("obj_115_cast_fp16")]; + tensor inputs_115_cast_fp16 = add(x = inputs_113_cast_fp16, y = obj_115_cast_fp16)[name = tensor("inputs_115_cast_fp16")]; + tensor var_44422 = const()[name = tensor("op_44422"), val = tensor([1])]; + tensor channels_mean_115_cast_fp16 = reduce_mean(axes = var_44422, keep_dims = var_42956, x = inputs_115_cast_fp16)[name = tensor("channels_mean_115_cast_fp16")]; + tensor zero_mean_115_cast_fp16 = sub(x = inputs_115_cast_fp16, y = channels_mean_115_cast_fp16)[name = tensor("zero_mean_115_cast_fp16")]; + tensor zero_mean_sq_115_cast_fp16 = mul(x = zero_mean_115_cast_fp16, y = zero_mean_115_cast_fp16)[name = tensor("zero_mean_sq_115_cast_fp16")]; + tensor var_44426 = const()[name = tensor("op_44426"), val = tensor([1])]; + tensor var_44427_cast_fp16 = reduce_mean(axes = var_44426, keep_dims = var_42956, x = zero_mean_sq_115_cast_fp16)[name = tensor("op_44427_cast_fp16")]; + tensor var_44428_to_fp16 = const()[name = tensor("op_44428_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_44429_cast_fp16 = add(x = var_44427_cast_fp16, y = var_44428_to_fp16)[name = tensor("op_44429_cast_fp16")]; + tensor denom_115_epsilon_0_to_fp16 = const()[name = tensor("denom_115_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_115_cast_fp16 = rsqrt(epsilon = denom_115_epsilon_0_to_fp16, x = var_44429_cast_fp16)[name = tensor("denom_115_cast_fp16")]; + tensor out_115_cast_fp16 = mul(x = zero_mean_115_cast_fp16, y = denom_115_cast_fp16)[name = tensor("out_115_cast_fp16")]; + tensor input_227_gamma_0_to_fp16 = const()[name = tensor("input_227_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1129307968)))]; + tensor input_227_beta_0_to_fp16 = const()[name = tensor("input_227_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1129310592)))]; + tensor input_227_epsilon_0_to_fp16 = const()[name = tensor("input_227_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_227_cast_fp16 = batch_norm(beta = input_227_beta_0_to_fp16, epsilon = input_227_epsilon_0_to_fp16, gamma = input_227_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_115_cast_fp16)[name = tensor("input_227_cast_fp16")]; + tensor var_44440 = const()[name = tensor("op_44440"), val = tensor([1, 1])]; + tensor var_44442 = const()[name = tensor("op_44442"), val = tensor([1, 1])]; + tensor input_229_pad_type_0 = const()[name = tensor("input_229_pad_type_0"), val = tensor("custom")]; + tensor input_229_pad_0 = const()[name = tensor("input_229_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_28_fc1_weight_to_fp16 = const()[name = tensor("layers_28_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1129313216)))]; + tensor layers_28_fc1_bias_to_fp16 = const()[name = tensor("layers_28_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1142420480)))]; + tensor input_229_cast_fp16 = conv(bias = layers_28_fc1_bias_to_fp16, dilations = var_44442, groups = var_42955, pad = input_229_pad_0, pad_type = input_229_pad_type_0, strides = var_44440, weight = layers_28_fc1_weight_to_fp16, x = input_227_cast_fp16)[name = tensor("input_229_cast_fp16")]; + tensor input_231_mode_0 = const()[name = tensor("input_231_mode_0"), val = tensor("EXACT")]; + tensor input_231_cast_fp16 = gelu(mode = input_231_mode_0, x = input_229_cast_fp16)[name = tensor("input_231_cast_fp16")]; + tensor var_44448 = const()[name = tensor("op_44448"), val = tensor([1, 1])]; + tensor var_44450 = const()[name = tensor("op_44450"), val = tensor([1, 1])]; + tensor hidden_states_61_pad_type_0 = const()[name = tensor("hidden_states_61_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_61_pad_0 = const()[name = tensor("hidden_states_61_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_28_fc2_weight_to_fp16 = const()[name = tensor("layers_28_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1142430784)))]; + tensor layers_28_fc2_bias_to_fp16 = const()[name = tensor("layers_28_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1155538048)))]; + tensor hidden_states_61_cast_fp16 = conv(bias = layers_28_fc2_bias_to_fp16, dilations = var_44450, groups = var_42955, pad = hidden_states_61_pad_0, pad_type = hidden_states_61_pad_type_0, strides = var_44448, weight = layers_28_fc2_weight_to_fp16, x = input_231_cast_fp16)[name = tensor("hidden_states_61_cast_fp16")]; + tensor inputs_117_cast_fp16 = add(x = inputs_115_cast_fp16, y = hidden_states_61_cast_fp16)[name = tensor("inputs_117_cast_fp16")]; + tensor var_44457 = const()[name = tensor("op_44457"), val = tensor(3)]; + tensor var_44482 = const()[name = tensor("op_44482"), val = tensor(1)]; + tensor var_44483 = const()[name = tensor("op_44483"), val = tensor(true)]; + tensor var_44493 = const()[name = tensor("op_44493"), val = tensor([1])]; + tensor channels_mean_117_cast_fp16 = reduce_mean(axes = var_44493, keep_dims = var_44483, x = inputs_117_cast_fp16)[name = tensor("channels_mean_117_cast_fp16")]; + tensor zero_mean_117_cast_fp16 = sub(x = inputs_117_cast_fp16, y = channels_mean_117_cast_fp16)[name = tensor("zero_mean_117_cast_fp16")]; + tensor zero_mean_sq_117_cast_fp16 = mul(x = zero_mean_117_cast_fp16, y = zero_mean_117_cast_fp16)[name = tensor("zero_mean_sq_117_cast_fp16")]; + tensor var_44497 = const()[name = tensor("op_44497"), val = tensor([1])]; + tensor var_44498_cast_fp16 = reduce_mean(axes = var_44497, keep_dims = var_44483, x = zero_mean_sq_117_cast_fp16)[name = tensor("op_44498_cast_fp16")]; + tensor var_44499_to_fp16 = const()[name = tensor("op_44499_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_44500_cast_fp16 = add(x = var_44498_cast_fp16, y = var_44499_to_fp16)[name = tensor("op_44500_cast_fp16")]; + tensor denom_117_epsilon_0_to_fp16 = const()[name = tensor("denom_117_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_117_cast_fp16 = rsqrt(epsilon = denom_117_epsilon_0_to_fp16, x = var_44500_cast_fp16)[name = tensor("denom_117_cast_fp16")]; + tensor out_117_cast_fp16 = mul(x = zero_mean_117_cast_fp16, y = denom_117_cast_fp16)[name = tensor("out_117_cast_fp16")]; + tensor obj_117_gamma_0_to_fp16 = const()[name = tensor("obj_117_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1155540672)))]; + tensor obj_117_beta_0_to_fp16 = const()[name = tensor("obj_117_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1155543296)))]; + tensor obj_117_epsilon_0_to_fp16 = const()[name = tensor("obj_117_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_117_cast_fp16 = batch_norm(beta = obj_117_beta_0_to_fp16, epsilon = obj_117_epsilon_0_to_fp16, gamma = obj_117_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_117_cast_fp16)[name = tensor("obj_117_cast_fp16")]; + tensor var_44515 = const()[name = tensor("op_44515"), val = tensor([1, 1])]; + tensor var_44517 = const()[name = tensor("op_44517"), val = tensor([1, 1])]; + tensor query_59_pad_type_0 = const()[name = tensor("query_59_pad_type_0"), val = tensor("custom")]; + tensor query_59_pad_0 = const()[name = tensor("query_59_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_29_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_29_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1155545920)))]; + tensor layers_29_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_29_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1158822784)))]; + tensor query_59_cast_fp16 = conv(bias = layers_29_self_attn_q_proj_bias_to_fp16, dilations = var_44517, groups = var_44482, pad = query_59_pad_0, pad_type = query_59_pad_type_0, strides = var_44515, weight = layers_29_self_attn_q_proj_weight_to_fp16, x = obj_117_cast_fp16)[name = tensor("query_59_cast_fp16")]; + tensor var_44521 = const()[name = tensor("op_44521"), val = tensor([1, 1])]; + tensor var_44523 = const()[name = tensor("op_44523"), val = tensor([1, 1])]; + tensor key_59_pad_type_0 = const()[name = tensor("key_59_pad_type_0"), val = tensor("custom")]; + tensor key_59_pad_0 = const()[name = tensor("key_59_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_29_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_29_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1158825408)))]; + tensor key_59_cast_fp16 = conv(dilations = var_44523, groups = var_44482, pad = key_59_pad_0, pad_type = key_59_pad_type_0, strides = var_44521, weight = layers_29_self_attn_k_proj_weight_to_fp16, x = obj_117_cast_fp16)[name = tensor("key_59_cast_fp16")]; + tensor var_44528 = const()[name = tensor("op_44528"), val = tensor([1, 1])]; + tensor var_44530 = const()[name = tensor("op_44530"), val = tensor([1, 1])]; + tensor value_59_pad_type_0 = const()[name = tensor("value_59_pad_type_0"), val = tensor("custom")]; + tensor value_59_pad_0 = const()[name = tensor("value_59_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_29_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_29_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1162102272)))]; + tensor layers_29_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_29_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1165379136)))]; + tensor value_59_cast_fp16 = conv(bias = layers_29_self_attn_v_proj_bias_to_fp16, dilations = var_44530, groups = var_44482, pad = value_59_pad_0, pad_type = value_59_pad_type_0, strides = var_44528, weight = layers_29_self_attn_v_proj_weight_to_fp16, x = obj_117_cast_fp16)[name = tensor("value_59_cast_fp16")]; + tensor var_44537_begin_0 = const()[name = tensor("op_44537_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_44537_end_0 = const()[name = tensor("op_44537_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_44537_end_mask_0 = const()[name = tensor("op_44537_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_44537_cast_fp16 = slice_by_index(begin = var_44537_begin_0, end = var_44537_end_0, end_mask = var_44537_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_44537_cast_fp16")]; + tensor var_44541_begin_0 = const()[name = tensor("op_44541_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_44541_end_0 = const()[name = tensor("op_44541_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_44541_end_mask_0 = const()[name = tensor("op_44541_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_44541_cast_fp16 = slice_by_index(begin = var_44541_begin_0, end = var_44541_end_0, end_mask = var_44541_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_44541_cast_fp16")]; + tensor var_44545_begin_0 = const()[name = tensor("op_44545_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_44545_end_0 = const()[name = tensor("op_44545_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_44545_end_mask_0 = const()[name = tensor("op_44545_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_44545_cast_fp16 = slice_by_index(begin = var_44545_begin_0, end = var_44545_end_0, end_mask = var_44545_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_44545_cast_fp16")]; + tensor var_44549_begin_0 = const()[name = tensor("op_44549_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_44549_end_0 = const()[name = tensor("op_44549_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_44549_end_mask_0 = const()[name = tensor("op_44549_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_44549_cast_fp16 = slice_by_index(begin = var_44549_begin_0, end = var_44549_end_0, end_mask = var_44549_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_44549_cast_fp16")]; + tensor var_44553_begin_0 = const()[name = tensor("op_44553_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_44553_end_0 = const()[name = tensor("op_44553_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_44553_end_mask_0 = const()[name = tensor("op_44553_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_44553_cast_fp16 = slice_by_index(begin = var_44553_begin_0, end = var_44553_end_0, end_mask = var_44553_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_44553_cast_fp16")]; + tensor var_44557_begin_0 = const()[name = tensor("op_44557_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_44557_end_0 = const()[name = tensor("op_44557_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_44557_end_mask_0 = const()[name = tensor("op_44557_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_44557_cast_fp16 = slice_by_index(begin = var_44557_begin_0, end = var_44557_end_0, end_mask = var_44557_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_44557_cast_fp16")]; + tensor var_44561_begin_0 = const()[name = tensor("op_44561_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_44561_end_0 = const()[name = tensor("op_44561_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_44561_end_mask_0 = const()[name = tensor("op_44561_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_44561_cast_fp16 = slice_by_index(begin = var_44561_begin_0, end = var_44561_end_0, end_mask = var_44561_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_44561_cast_fp16")]; + tensor var_44565_begin_0 = const()[name = tensor("op_44565_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_44565_end_0 = const()[name = tensor("op_44565_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_44565_end_mask_0 = const()[name = tensor("op_44565_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_44565_cast_fp16 = slice_by_index(begin = var_44565_begin_0, end = var_44565_end_0, end_mask = var_44565_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_44565_cast_fp16")]; + tensor var_44569_begin_0 = const()[name = tensor("op_44569_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_44569_end_0 = const()[name = tensor("op_44569_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_44569_end_mask_0 = const()[name = tensor("op_44569_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_44569_cast_fp16 = slice_by_index(begin = var_44569_begin_0, end = var_44569_end_0, end_mask = var_44569_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_44569_cast_fp16")]; + tensor var_44573_begin_0 = const()[name = tensor("op_44573_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_44573_end_0 = const()[name = tensor("op_44573_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_44573_end_mask_0 = const()[name = tensor("op_44573_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_44573_cast_fp16 = slice_by_index(begin = var_44573_begin_0, end = var_44573_end_0, end_mask = var_44573_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_44573_cast_fp16")]; + tensor var_44577_begin_0 = const()[name = tensor("op_44577_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_44577_end_0 = const()[name = tensor("op_44577_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_44577_end_mask_0 = const()[name = tensor("op_44577_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_44577_cast_fp16 = slice_by_index(begin = var_44577_begin_0, end = var_44577_end_0, end_mask = var_44577_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_44577_cast_fp16")]; + tensor var_44581_begin_0 = const()[name = tensor("op_44581_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_44581_end_0 = const()[name = tensor("op_44581_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_44581_end_mask_0 = const()[name = tensor("op_44581_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_44581_cast_fp16 = slice_by_index(begin = var_44581_begin_0, end = var_44581_end_0, end_mask = var_44581_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_44581_cast_fp16")]; + tensor var_44585_begin_0 = const()[name = tensor("op_44585_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_44585_end_0 = const()[name = tensor("op_44585_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_44585_end_mask_0 = const()[name = tensor("op_44585_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_44585_cast_fp16 = slice_by_index(begin = var_44585_begin_0, end = var_44585_end_0, end_mask = var_44585_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_44585_cast_fp16")]; + tensor var_44589_begin_0 = const()[name = tensor("op_44589_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_44589_end_0 = const()[name = tensor("op_44589_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_44589_end_mask_0 = const()[name = tensor("op_44589_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_44589_cast_fp16 = slice_by_index(begin = var_44589_begin_0, end = var_44589_end_0, end_mask = var_44589_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_44589_cast_fp16")]; + tensor var_44593_begin_0 = const()[name = tensor("op_44593_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_44593_end_0 = const()[name = tensor("op_44593_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_44593_end_mask_0 = const()[name = tensor("op_44593_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_44593_cast_fp16 = slice_by_index(begin = var_44593_begin_0, end = var_44593_end_0, end_mask = var_44593_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_44593_cast_fp16")]; + tensor var_44597_begin_0 = const()[name = tensor("op_44597_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_44597_end_0 = const()[name = tensor("op_44597_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_44597_end_mask_0 = const()[name = tensor("op_44597_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_44597_cast_fp16 = slice_by_index(begin = var_44597_begin_0, end = var_44597_end_0, end_mask = var_44597_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_44597_cast_fp16")]; + tensor var_44601_begin_0 = const()[name = tensor("op_44601_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_44601_end_0 = const()[name = tensor("op_44601_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_44601_end_mask_0 = const()[name = tensor("op_44601_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_44601_cast_fp16 = slice_by_index(begin = var_44601_begin_0, end = var_44601_end_0, end_mask = var_44601_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_44601_cast_fp16")]; + tensor var_44605_begin_0 = const()[name = tensor("op_44605_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_44605_end_0 = const()[name = tensor("op_44605_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_44605_end_mask_0 = const()[name = tensor("op_44605_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_44605_cast_fp16 = slice_by_index(begin = var_44605_begin_0, end = var_44605_end_0, end_mask = var_44605_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_44605_cast_fp16")]; + tensor var_44609_begin_0 = const()[name = tensor("op_44609_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_44609_end_0 = const()[name = tensor("op_44609_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_44609_end_mask_0 = const()[name = tensor("op_44609_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_44609_cast_fp16 = slice_by_index(begin = var_44609_begin_0, end = var_44609_end_0, end_mask = var_44609_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_44609_cast_fp16")]; + tensor var_44613_begin_0 = const()[name = tensor("op_44613_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_44613_end_0 = const()[name = tensor("op_44613_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_44613_end_mask_0 = const()[name = tensor("op_44613_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_44613_cast_fp16 = slice_by_index(begin = var_44613_begin_0, end = var_44613_end_0, end_mask = var_44613_end_mask_0, x = query_59_cast_fp16)[name = tensor("op_44613_cast_fp16")]; + tensor var_44622_begin_0 = const()[name = tensor("op_44622_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_44622_end_0 = const()[name = tensor("op_44622_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_44622_end_mask_0 = const()[name = tensor("op_44622_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44622_cast_fp16 = slice_by_index(begin = var_44622_begin_0, end = var_44622_end_0, end_mask = var_44622_end_mask_0, x = var_44537_cast_fp16)[name = tensor("op_44622_cast_fp16")]; + tensor var_44629_begin_0 = const()[name = tensor("op_44629_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_44629_end_0 = const()[name = tensor("op_44629_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_44629_end_mask_0 = const()[name = tensor("op_44629_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44629_cast_fp16 = slice_by_index(begin = var_44629_begin_0, end = var_44629_end_0, end_mask = var_44629_end_mask_0, x = var_44537_cast_fp16)[name = tensor("op_44629_cast_fp16")]; + tensor var_44636_begin_0 = const()[name = tensor("op_44636_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_44636_end_0 = const()[name = tensor("op_44636_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_44636_end_mask_0 = const()[name = tensor("op_44636_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44636_cast_fp16 = slice_by_index(begin = var_44636_begin_0, end = var_44636_end_0, end_mask = var_44636_end_mask_0, x = var_44537_cast_fp16)[name = tensor("op_44636_cast_fp16")]; + tensor var_44643_begin_0 = const()[name = tensor("op_44643_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_44643_end_0 = const()[name = tensor("op_44643_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_44643_end_mask_0 = const()[name = tensor("op_44643_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44643_cast_fp16 = slice_by_index(begin = var_44643_begin_0, end = var_44643_end_0, end_mask = var_44643_end_mask_0, x = var_44537_cast_fp16)[name = tensor("op_44643_cast_fp16")]; + tensor var_44650_begin_0 = const()[name = tensor("op_44650_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_44650_end_0 = const()[name = tensor("op_44650_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_44650_end_mask_0 = const()[name = tensor("op_44650_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44650_cast_fp16 = slice_by_index(begin = var_44650_begin_0, end = var_44650_end_0, end_mask = var_44650_end_mask_0, x = var_44541_cast_fp16)[name = tensor("op_44650_cast_fp16")]; + tensor var_44657_begin_0 = const()[name = tensor("op_44657_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_44657_end_0 = const()[name = tensor("op_44657_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_44657_end_mask_0 = const()[name = tensor("op_44657_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44657_cast_fp16 = slice_by_index(begin = var_44657_begin_0, end = var_44657_end_0, end_mask = var_44657_end_mask_0, x = var_44541_cast_fp16)[name = tensor("op_44657_cast_fp16")]; + tensor var_44664_begin_0 = const()[name = tensor("op_44664_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_44664_end_0 = const()[name = tensor("op_44664_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_44664_end_mask_0 = const()[name = tensor("op_44664_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44664_cast_fp16 = slice_by_index(begin = var_44664_begin_0, end = var_44664_end_0, end_mask = var_44664_end_mask_0, x = var_44541_cast_fp16)[name = tensor("op_44664_cast_fp16")]; + tensor var_44671_begin_0 = const()[name = tensor("op_44671_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_44671_end_0 = const()[name = tensor("op_44671_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_44671_end_mask_0 = const()[name = tensor("op_44671_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44671_cast_fp16 = slice_by_index(begin = var_44671_begin_0, end = var_44671_end_0, end_mask = var_44671_end_mask_0, x = var_44541_cast_fp16)[name = tensor("op_44671_cast_fp16")]; + tensor var_44678_begin_0 = const()[name = tensor("op_44678_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_44678_end_0 = const()[name = tensor("op_44678_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_44678_end_mask_0 = const()[name = tensor("op_44678_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44678_cast_fp16 = slice_by_index(begin = var_44678_begin_0, end = var_44678_end_0, end_mask = var_44678_end_mask_0, x = var_44545_cast_fp16)[name = tensor("op_44678_cast_fp16")]; + tensor var_44685_begin_0 = const()[name = tensor("op_44685_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_44685_end_0 = const()[name = tensor("op_44685_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_44685_end_mask_0 = const()[name = tensor("op_44685_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44685_cast_fp16 = slice_by_index(begin = var_44685_begin_0, end = var_44685_end_0, end_mask = var_44685_end_mask_0, x = var_44545_cast_fp16)[name = tensor("op_44685_cast_fp16")]; + tensor var_44692_begin_0 = const()[name = tensor("op_44692_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_44692_end_0 = const()[name = tensor("op_44692_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_44692_end_mask_0 = const()[name = tensor("op_44692_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44692_cast_fp16 = slice_by_index(begin = var_44692_begin_0, end = var_44692_end_0, end_mask = var_44692_end_mask_0, x = var_44545_cast_fp16)[name = tensor("op_44692_cast_fp16")]; + tensor var_44699_begin_0 = const()[name = tensor("op_44699_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_44699_end_0 = const()[name = tensor("op_44699_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_44699_end_mask_0 = const()[name = tensor("op_44699_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44699_cast_fp16 = slice_by_index(begin = var_44699_begin_0, end = var_44699_end_0, end_mask = var_44699_end_mask_0, x = var_44545_cast_fp16)[name = tensor("op_44699_cast_fp16")]; + tensor var_44706_begin_0 = const()[name = tensor("op_44706_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_44706_end_0 = const()[name = tensor("op_44706_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_44706_end_mask_0 = const()[name = tensor("op_44706_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44706_cast_fp16 = slice_by_index(begin = var_44706_begin_0, end = var_44706_end_0, end_mask = var_44706_end_mask_0, x = var_44549_cast_fp16)[name = tensor("op_44706_cast_fp16")]; + tensor var_44713_begin_0 = const()[name = tensor("op_44713_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_44713_end_0 = const()[name = tensor("op_44713_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_44713_end_mask_0 = const()[name = tensor("op_44713_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44713_cast_fp16 = slice_by_index(begin = var_44713_begin_0, end = var_44713_end_0, end_mask = var_44713_end_mask_0, x = var_44549_cast_fp16)[name = tensor("op_44713_cast_fp16")]; + tensor var_44720_begin_0 = const()[name = tensor("op_44720_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_44720_end_0 = const()[name = tensor("op_44720_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_44720_end_mask_0 = const()[name = tensor("op_44720_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44720_cast_fp16 = slice_by_index(begin = var_44720_begin_0, end = var_44720_end_0, end_mask = var_44720_end_mask_0, x = var_44549_cast_fp16)[name = tensor("op_44720_cast_fp16")]; + tensor var_44727_begin_0 = const()[name = tensor("op_44727_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_44727_end_0 = const()[name = tensor("op_44727_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_44727_end_mask_0 = const()[name = tensor("op_44727_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44727_cast_fp16 = slice_by_index(begin = var_44727_begin_0, end = var_44727_end_0, end_mask = var_44727_end_mask_0, x = var_44549_cast_fp16)[name = tensor("op_44727_cast_fp16")]; + tensor var_44734_begin_0 = const()[name = tensor("op_44734_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_44734_end_0 = const()[name = tensor("op_44734_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_44734_end_mask_0 = const()[name = tensor("op_44734_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44734_cast_fp16 = slice_by_index(begin = var_44734_begin_0, end = var_44734_end_0, end_mask = var_44734_end_mask_0, x = var_44553_cast_fp16)[name = tensor("op_44734_cast_fp16")]; + tensor var_44741_begin_0 = const()[name = tensor("op_44741_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_44741_end_0 = const()[name = tensor("op_44741_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_44741_end_mask_0 = const()[name = tensor("op_44741_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44741_cast_fp16 = slice_by_index(begin = var_44741_begin_0, end = var_44741_end_0, end_mask = var_44741_end_mask_0, x = var_44553_cast_fp16)[name = tensor("op_44741_cast_fp16")]; + tensor var_44748_begin_0 = const()[name = tensor("op_44748_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_44748_end_0 = const()[name = tensor("op_44748_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_44748_end_mask_0 = const()[name = tensor("op_44748_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44748_cast_fp16 = slice_by_index(begin = var_44748_begin_0, end = var_44748_end_0, end_mask = var_44748_end_mask_0, x = var_44553_cast_fp16)[name = tensor("op_44748_cast_fp16")]; + tensor var_44755_begin_0 = const()[name = tensor("op_44755_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_44755_end_0 = const()[name = tensor("op_44755_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_44755_end_mask_0 = const()[name = tensor("op_44755_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44755_cast_fp16 = slice_by_index(begin = var_44755_begin_0, end = var_44755_end_0, end_mask = var_44755_end_mask_0, x = var_44553_cast_fp16)[name = tensor("op_44755_cast_fp16")]; + tensor var_44762_begin_0 = const()[name = tensor("op_44762_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_44762_end_0 = const()[name = tensor("op_44762_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_44762_end_mask_0 = const()[name = tensor("op_44762_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44762_cast_fp16 = slice_by_index(begin = var_44762_begin_0, end = var_44762_end_0, end_mask = var_44762_end_mask_0, x = var_44557_cast_fp16)[name = tensor("op_44762_cast_fp16")]; + tensor var_44769_begin_0 = const()[name = tensor("op_44769_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_44769_end_0 = const()[name = tensor("op_44769_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_44769_end_mask_0 = const()[name = tensor("op_44769_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44769_cast_fp16 = slice_by_index(begin = var_44769_begin_0, end = var_44769_end_0, end_mask = var_44769_end_mask_0, x = var_44557_cast_fp16)[name = tensor("op_44769_cast_fp16")]; + tensor var_44776_begin_0 = const()[name = tensor("op_44776_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_44776_end_0 = const()[name = tensor("op_44776_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_44776_end_mask_0 = const()[name = tensor("op_44776_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44776_cast_fp16 = slice_by_index(begin = var_44776_begin_0, end = var_44776_end_0, end_mask = var_44776_end_mask_0, x = var_44557_cast_fp16)[name = tensor("op_44776_cast_fp16")]; + tensor var_44783_begin_0 = const()[name = tensor("op_44783_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_44783_end_0 = const()[name = tensor("op_44783_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_44783_end_mask_0 = const()[name = tensor("op_44783_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44783_cast_fp16 = slice_by_index(begin = var_44783_begin_0, end = var_44783_end_0, end_mask = var_44783_end_mask_0, x = var_44557_cast_fp16)[name = tensor("op_44783_cast_fp16")]; + tensor var_44790_begin_0 = const()[name = tensor("op_44790_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_44790_end_0 = const()[name = tensor("op_44790_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_44790_end_mask_0 = const()[name = tensor("op_44790_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44790_cast_fp16 = slice_by_index(begin = var_44790_begin_0, end = var_44790_end_0, end_mask = var_44790_end_mask_0, x = var_44561_cast_fp16)[name = tensor("op_44790_cast_fp16")]; + tensor var_44797_begin_0 = const()[name = tensor("op_44797_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_44797_end_0 = const()[name = tensor("op_44797_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_44797_end_mask_0 = const()[name = tensor("op_44797_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44797_cast_fp16 = slice_by_index(begin = var_44797_begin_0, end = var_44797_end_0, end_mask = var_44797_end_mask_0, x = var_44561_cast_fp16)[name = tensor("op_44797_cast_fp16")]; + tensor var_44804_begin_0 = const()[name = tensor("op_44804_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_44804_end_0 = const()[name = tensor("op_44804_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_44804_end_mask_0 = const()[name = tensor("op_44804_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44804_cast_fp16 = slice_by_index(begin = var_44804_begin_0, end = var_44804_end_0, end_mask = var_44804_end_mask_0, x = var_44561_cast_fp16)[name = tensor("op_44804_cast_fp16")]; + tensor var_44811_begin_0 = const()[name = tensor("op_44811_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_44811_end_0 = const()[name = tensor("op_44811_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_44811_end_mask_0 = const()[name = tensor("op_44811_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44811_cast_fp16 = slice_by_index(begin = var_44811_begin_0, end = var_44811_end_0, end_mask = var_44811_end_mask_0, x = var_44561_cast_fp16)[name = tensor("op_44811_cast_fp16")]; + tensor var_44818_begin_0 = const()[name = tensor("op_44818_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_44818_end_0 = const()[name = tensor("op_44818_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_44818_end_mask_0 = const()[name = tensor("op_44818_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44818_cast_fp16 = slice_by_index(begin = var_44818_begin_0, end = var_44818_end_0, end_mask = var_44818_end_mask_0, x = var_44565_cast_fp16)[name = tensor("op_44818_cast_fp16")]; + tensor var_44825_begin_0 = const()[name = tensor("op_44825_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_44825_end_0 = const()[name = tensor("op_44825_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_44825_end_mask_0 = const()[name = tensor("op_44825_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44825_cast_fp16 = slice_by_index(begin = var_44825_begin_0, end = var_44825_end_0, end_mask = var_44825_end_mask_0, x = var_44565_cast_fp16)[name = tensor("op_44825_cast_fp16")]; + tensor var_44832_begin_0 = const()[name = tensor("op_44832_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_44832_end_0 = const()[name = tensor("op_44832_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_44832_end_mask_0 = const()[name = tensor("op_44832_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44832_cast_fp16 = slice_by_index(begin = var_44832_begin_0, end = var_44832_end_0, end_mask = var_44832_end_mask_0, x = var_44565_cast_fp16)[name = tensor("op_44832_cast_fp16")]; + tensor var_44839_begin_0 = const()[name = tensor("op_44839_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_44839_end_0 = const()[name = tensor("op_44839_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_44839_end_mask_0 = const()[name = tensor("op_44839_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44839_cast_fp16 = slice_by_index(begin = var_44839_begin_0, end = var_44839_end_0, end_mask = var_44839_end_mask_0, x = var_44565_cast_fp16)[name = tensor("op_44839_cast_fp16")]; + tensor var_44846_begin_0 = const()[name = tensor("op_44846_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_44846_end_0 = const()[name = tensor("op_44846_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_44846_end_mask_0 = const()[name = tensor("op_44846_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44846_cast_fp16 = slice_by_index(begin = var_44846_begin_0, end = var_44846_end_0, end_mask = var_44846_end_mask_0, x = var_44569_cast_fp16)[name = tensor("op_44846_cast_fp16")]; + tensor var_44853_begin_0 = const()[name = tensor("op_44853_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_44853_end_0 = const()[name = tensor("op_44853_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_44853_end_mask_0 = const()[name = tensor("op_44853_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44853_cast_fp16 = slice_by_index(begin = var_44853_begin_0, end = var_44853_end_0, end_mask = var_44853_end_mask_0, x = var_44569_cast_fp16)[name = tensor("op_44853_cast_fp16")]; + tensor var_44860_begin_0 = const()[name = tensor("op_44860_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_44860_end_0 = const()[name = tensor("op_44860_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_44860_end_mask_0 = const()[name = tensor("op_44860_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44860_cast_fp16 = slice_by_index(begin = var_44860_begin_0, end = var_44860_end_0, end_mask = var_44860_end_mask_0, x = var_44569_cast_fp16)[name = tensor("op_44860_cast_fp16")]; + tensor var_44867_begin_0 = const()[name = tensor("op_44867_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_44867_end_0 = const()[name = tensor("op_44867_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_44867_end_mask_0 = const()[name = tensor("op_44867_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44867_cast_fp16 = slice_by_index(begin = var_44867_begin_0, end = var_44867_end_0, end_mask = var_44867_end_mask_0, x = var_44569_cast_fp16)[name = tensor("op_44867_cast_fp16")]; + tensor var_44874_begin_0 = const()[name = tensor("op_44874_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_44874_end_0 = const()[name = tensor("op_44874_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_44874_end_mask_0 = const()[name = tensor("op_44874_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44874_cast_fp16 = slice_by_index(begin = var_44874_begin_0, end = var_44874_end_0, end_mask = var_44874_end_mask_0, x = var_44573_cast_fp16)[name = tensor("op_44874_cast_fp16")]; + tensor var_44881_begin_0 = const()[name = tensor("op_44881_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_44881_end_0 = const()[name = tensor("op_44881_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_44881_end_mask_0 = const()[name = tensor("op_44881_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44881_cast_fp16 = slice_by_index(begin = var_44881_begin_0, end = var_44881_end_0, end_mask = var_44881_end_mask_0, x = var_44573_cast_fp16)[name = tensor("op_44881_cast_fp16")]; + tensor var_44888_begin_0 = const()[name = tensor("op_44888_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_44888_end_0 = const()[name = tensor("op_44888_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_44888_end_mask_0 = const()[name = tensor("op_44888_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44888_cast_fp16 = slice_by_index(begin = var_44888_begin_0, end = var_44888_end_0, end_mask = var_44888_end_mask_0, x = var_44573_cast_fp16)[name = tensor("op_44888_cast_fp16")]; + tensor var_44895_begin_0 = const()[name = tensor("op_44895_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_44895_end_0 = const()[name = tensor("op_44895_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_44895_end_mask_0 = const()[name = tensor("op_44895_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44895_cast_fp16 = slice_by_index(begin = var_44895_begin_0, end = var_44895_end_0, end_mask = var_44895_end_mask_0, x = var_44573_cast_fp16)[name = tensor("op_44895_cast_fp16")]; + tensor var_44902_begin_0 = const()[name = tensor("op_44902_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_44902_end_0 = const()[name = tensor("op_44902_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_44902_end_mask_0 = const()[name = tensor("op_44902_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44902_cast_fp16 = slice_by_index(begin = var_44902_begin_0, end = var_44902_end_0, end_mask = var_44902_end_mask_0, x = var_44577_cast_fp16)[name = tensor("op_44902_cast_fp16")]; + tensor var_44909_begin_0 = const()[name = tensor("op_44909_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_44909_end_0 = const()[name = tensor("op_44909_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_44909_end_mask_0 = const()[name = tensor("op_44909_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44909_cast_fp16 = slice_by_index(begin = var_44909_begin_0, end = var_44909_end_0, end_mask = var_44909_end_mask_0, x = var_44577_cast_fp16)[name = tensor("op_44909_cast_fp16")]; + tensor var_44916_begin_0 = const()[name = tensor("op_44916_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_44916_end_0 = const()[name = tensor("op_44916_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_44916_end_mask_0 = const()[name = tensor("op_44916_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44916_cast_fp16 = slice_by_index(begin = var_44916_begin_0, end = var_44916_end_0, end_mask = var_44916_end_mask_0, x = var_44577_cast_fp16)[name = tensor("op_44916_cast_fp16")]; + tensor var_44923_begin_0 = const()[name = tensor("op_44923_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_44923_end_0 = const()[name = tensor("op_44923_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_44923_end_mask_0 = const()[name = tensor("op_44923_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44923_cast_fp16 = slice_by_index(begin = var_44923_begin_0, end = var_44923_end_0, end_mask = var_44923_end_mask_0, x = var_44577_cast_fp16)[name = tensor("op_44923_cast_fp16")]; + tensor var_44930_begin_0 = const()[name = tensor("op_44930_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_44930_end_0 = const()[name = tensor("op_44930_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_44930_end_mask_0 = const()[name = tensor("op_44930_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44930_cast_fp16 = slice_by_index(begin = var_44930_begin_0, end = var_44930_end_0, end_mask = var_44930_end_mask_0, x = var_44581_cast_fp16)[name = tensor("op_44930_cast_fp16")]; + tensor var_44937_begin_0 = const()[name = tensor("op_44937_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_44937_end_0 = const()[name = tensor("op_44937_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_44937_end_mask_0 = const()[name = tensor("op_44937_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44937_cast_fp16 = slice_by_index(begin = var_44937_begin_0, end = var_44937_end_0, end_mask = var_44937_end_mask_0, x = var_44581_cast_fp16)[name = tensor("op_44937_cast_fp16")]; + tensor var_44944_begin_0 = const()[name = tensor("op_44944_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_44944_end_0 = const()[name = tensor("op_44944_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_44944_end_mask_0 = const()[name = tensor("op_44944_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44944_cast_fp16 = slice_by_index(begin = var_44944_begin_0, end = var_44944_end_0, end_mask = var_44944_end_mask_0, x = var_44581_cast_fp16)[name = tensor("op_44944_cast_fp16")]; + tensor var_44951_begin_0 = const()[name = tensor("op_44951_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_44951_end_0 = const()[name = tensor("op_44951_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_44951_end_mask_0 = const()[name = tensor("op_44951_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44951_cast_fp16 = slice_by_index(begin = var_44951_begin_0, end = var_44951_end_0, end_mask = var_44951_end_mask_0, x = var_44581_cast_fp16)[name = tensor("op_44951_cast_fp16")]; + tensor var_44958_begin_0 = const()[name = tensor("op_44958_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_44958_end_0 = const()[name = tensor("op_44958_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_44958_end_mask_0 = const()[name = tensor("op_44958_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44958_cast_fp16 = slice_by_index(begin = var_44958_begin_0, end = var_44958_end_0, end_mask = var_44958_end_mask_0, x = var_44585_cast_fp16)[name = tensor("op_44958_cast_fp16")]; + tensor var_44965_begin_0 = const()[name = tensor("op_44965_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_44965_end_0 = const()[name = tensor("op_44965_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_44965_end_mask_0 = const()[name = tensor("op_44965_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44965_cast_fp16 = slice_by_index(begin = var_44965_begin_0, end = var_44965_end_0, end_mask = var_44965_end_mask_0, x = var_44585_cast_fp16)[name = tensor("op_44965_cast_fp16")]; + tensor var_44972_begin_0 = const()[name = tensor("op_44972_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_44972_end_0 = const()[name = tensor("op_44972_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_44972_end_mask_0 = const()[name = tensor("op_44972_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44972_cast_fp16 = slice_by_index(begin = var_44972_begin_0, end = var_44972_end_0, end_mask = var_44972_end_mask_0, x = var_44585_cast_fp16)[name = tensor("op_44972_cast_fp16")]; + tensor var_44979_begin_0 = const()[name = tensor("op_44979_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_44979_end_0 = const()[name = tensor("op_44979_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_44979_end_mask_0 = const()[name = tensor("op_44979_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44979_cast_fp16 = slice_by_index(begin = var_44979_begin_0, end = var_44979_end_0, end_mask = var_44979_end_mask_0, x = var_44585_cast_fp16)[name = tensor("op_44979_cast_fp16")]; + tensor var_44986_begin_0 = const()[name = tensor("op_44986_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_44986_end_0 = const()[name = tensor("op_44986_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_44986_end_mask_0 = const()[name = tensor("op_44986_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44986_cast_fp16 = slice_by_index(begin = var_44986_begin_0, end = var_44986_end_0, end_mask = var_44986_end_mask_0, x = var_44589_cast_fp16)[name = tensor("op_44986_cast_fp16")]; + tensor var_44993_begin_0 = const()[name = tensor("op_44993_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_44993_end_0 = const()[name = tensor("op_44993_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_44993_end_mask_0 = const()[name = tensor("op_44993_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_44993_cast_fp16 = slice_by_index(begin = var_44993_begin_0, end = var_44993_end_0, end_mask = var_44993_end_mask_0, x = var_44589_cast_fp16)[name = tensor("op_44993_cast_fp16")]; + tensor var_45000_begin_0 = const()[name = tensor("op_45000_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_45000_end_0 = const()[name = tensor("op_45000_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_45000_end_mask_0 = const()[name = tensor("op_45000_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45000_cast_fp16 = slice_by_index(begin = var_45000_begin_0, end = var_45000_end_0, end_mask = var_45000_end_mask_0, x = var_44589_cast_fp16)[name = tensor("op_45000_cast_fp16")]; + tensor var_45007_begin_0 = const()[name = tensor("op_45007_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_45007_end_0 = const()[name = tensor("op_45007_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_45007_end_mask_0 = const()[name = tensor("op_45007_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45007_cast_fp16 = slice_by_index(begin = var_45007_begin_0, end = var_45007_end_0, end_mask = var_45007_end_mask_0, x = var_44589_cast_fp16)[name = tensor("op_45007_cast_fp16")]; + tensor var_45014_begin_0 = const()[name = tensor("op_45014_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_45014_end_0 = const()[name = tensor("op_45014_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_45014_end_mask_0 = const()[name = tensor("op_45014_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45014_cast_fp16 = slice_by_index(begin = var_45014_begin_0, end = var_45014_end_0, end_mask = var_45014_end_mask_0, x = var_44593_cast_fp16)[name = tensor("op_45014_cast_fp16")]; + tensor var_45021_begin_0 = const()[name = tensor("op_45021_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_45021_end_0 = const()[name = tensor("op_45021_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_45021_end_mask_0 = const()[name = tensor("op_45021_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45021_cast_fp16 = slice_by_index(begin = var_45021_begin_0, end = var_45021_end_0, end_mask = var_45021_end_mask_0, x = var_44593_cast_fp16)[name = tensor("op_45021_cast_fp16")]; + tensor var_45028_begin_0 = const()[name = tensor("op_45028_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_45028_end_0 = const()[name = tensor("op_45028_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_45028_end_mask_0 = const()[name = tensor("op_45028_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45028_cast_fp16 = slice_by_index(begin = var_45028_begin_0, end = var_45028_end_0, end_mask = var_45028_end_mask_0, x = var_44593_cast_fp16)[name = tensor("op_45028_cast_fp16")]; + tensor var_45035_begin_0 = const()[name = tensor("op_45035_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_45035_end_0 = const()[name = tensor("op_45035_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_45035_end_mask_0 = const()[name = tensor("op_45035_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45035_cast_fp16 = slice_by_index(begin = var_45035_begin_0, end = var_45035_end_0, end_mask = var_45035_end_mask_0, x = var_44593_cast_fp16)[name = tensor("op_45035_cast_fp16")]; + tensor var_45042_begin_0 = const()[name = tensor("op_45042_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_45042_end_0 = const()[name = tensor("op_45042_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_45042_end_mask_0 = const()[name = tensor("op_45042_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45042_cast_fp16 = slice_by_index(begin = var_45042_begin_0, end = var_45042_end_0, end_mask = var_45042_end_mask_0, x = var_44597_cast_fp16)[name = tensor("op_45042_cast_fp16")]; + tensor var_45049_begin_0 = const()[name = tensor("op_45049_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_45049_end_0 = const()[name = tensor("op_45049_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_45049_end_mask_0 = const()[name = tensor("op_45049_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45049_cast_fp16 = slice_by_index(begin = var_45049_begin_0, end = var_45049_end_0, end_mask = var_45049_end_mask_0, x = var_44597_cast_fp16)[name = tensor("op_45049_cast_fp16")]; + tensor var_45056_begin_0 = const()[name = tensor("op_45056_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_45056_end_0 = const()[name = tensor("op_45056_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_45056_end_mask_0 = const()[name = tensor("op_45056_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45056_cast_fp16 = slice_by_index(begin = var_45056_begin_0, end = var_45056_end_0, end_mask = var_45056_end_mask_0, x = var_44597_cast_fp16)[name = tensor("op_45056_cast_fp16")]; + tensor var_45063_begin_0 = const()[name = tensor("op_45063_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_45063_end_0 = const()[name = tensor("op_45063_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_45063_end_mask_0 = const()[name = tensor("op_45063_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45063_cast_fp16 = slice_by_index(begin = var_45063_begin_0, end = var_45063_end_0, end_mask = var_45063_end_mask_0, x = var_44597_cast_fp16)[name = tensor("op_45063_cast_fp16")]; + tensor var_45070_begin_0 = const()[name = tensor("op_45070_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_45070_end_0 = const()[name = tensor("op_45070_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_45070_end_mask_0 = const()[name = tensor("op_45070_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45070_cast_fp16 = slice_by_index(begin = var_45070_begin_0, end = var_45070_end_0, end_mask = var_45070_end_mask_0, x = var_44601_cast_fp16)[name = tensor("op_45070_cast_fp16")]; + tensor var_45077_begin_0 = const()[name = tensor("op_45077_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_45077_end_0 = const()[name = tensor("op_45077_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_45077_end_mask_0 = const()[name = tensor("op_45077_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45077_cast_fp16 = slice_by_index(begin = var_45077_begin_0, end = var_45077_end_0, end_mask = var_45077_end_mask_0, x = var_44601_cast_fp16)[name = tensor("op_45077_cast_fp16")]; + tensor var_45084_begin_0 = const()[name = tensor("op_45084_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_45084_end_0 = const()[name = tensor("op_45084_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_45084_end_mask_0 = const()[name = tensor("op_45084_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45084_cast_fp16 = slice_by_index(begin = var_45084_begin_0, end = var_45084_end_0, end_mask = var_45084_end_mask_0, x = var_44601_cast_fp16)[name = tensor("op_45084_cast_fp16")]; + tensor var_45091_begin_0 = const()[name = tensor("op_45091_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_45091_end_0 = const()[name = tensor("op_45091_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_45091_end_mask_0 = const()[name = tensor("op_45091_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45091_cast_fp16 = slice_by_index(begin = var_45091_begin_0, end = var_45091_end_0, end_mask = var_45091_end_mask_0, x = var_44601_cast_fp16)[name = tensor("op_45091_cast_fp16")]; + tensor var_45098_begin_0 = const()[name = tensor("op_45098_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_45098_end_0 = const()[name = tensor("op_45098_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_45098_end_mask_0 = const()[name = tensor("op_45098_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45098_cast_fp16 = slice_by_index(begin = var_45098_begin_0, end = var_45098_end_0, end_mask = var_45098_end_mask_0, x = var_44605_cast_fp16)[name = tensor("op_45098_cast_fp16")]; + tensor var_45105_begin_0 = const()[name = tensor("op_45105_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_45105_end_0 = const()[name = tensor("op_45105_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_45105_end_mask_0 = const()[name = tensor("op_45105_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45105_cast_fp16 = slice_by_index(begin = var_45105_begin_0, end = var_45105_end_0, end_mask = var_45105_end_mask_0, x = var_44605_cast_fp16)[name = tensor("op_45105_cast_fp16")]; + tensor var_45112_begin_0 = const()[name = tensor("op_45112_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_45112_end_0 = const()[name = tensor("op_45112_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_45112_end_mask_0 = const()[name = tensor("op_45112_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45112_cast_fp16 = slice_by_index(begin = var_45112_begin_0, end = var_45112_end_0, end_mask = var_45112_end_mask_0, x = var_44605_cast_fp16)[name = tensor("op_45112_cast_fp16")]; + tensor var_45119_begin_0 = const()[name = tensor("op_45119_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_45119_end_0 = const()[name = tensor("op_45119_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_45119_end_mask_0 = const()[name = tensor("op_45119_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45119_cast_fp16 = slice_by_index(begin = var_45119_begin_0, end = var_45119_end_0, end_mask = var_45119_end_mask_0, x = var_44605_cast_fp16)[name = tensor("op_45119_cast_fp16")]; + tensor var_45126_begin_0 = const()[name = tensor("op_45126_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_45126_end_0 = const()[name = tensor("op_45126_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_45126_end_mask_0 = const()[name = tensor("op_45126_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45126_cast_fp16 = slice_by_index(begin = var_45126_begin_0, end = var_45126_end_0, end_mask = var_45126_end_mask_0, x = var_44609_cast_fp16)[name = tensor("op_45126_cast_fp16")]; + tensor var_45133_begin_0 = const()[name = tensor("op_45133_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_45133_end_0 = const()[name = tensor("op_45133_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_45133_end_mask_0 = const()[name = tensor("op_45133_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45133_cast_fp16 = slice_by_index(begin = var_45133_begin_0, end = var_45133_end_0, end_mask = var_45133_end_mask_0, x = var_44609_cast_fp16)[name = tensor("op_45133_cast_fp16")]; + tensor var_45140_begin_0 = const()[name = tensor("op_45140_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_45140_end_0 = const()[name = tensor("op_45140_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_45140_end_mask_0 = const()[name = tensor("op_45140_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45140_cast_fp16 = slice_by_index(begin = var_45140_begin_0, end = var_45140_end_0, end_mask = var_45140_end_mask_0, x = var_44609_cast_fp16)[name = tensor("op_45140_cast_fp16")]; + tensor var_45147_begin_0 = const()[name = tensor("op_45147_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_45147_end_0 = const()[name = tensor("op_45147_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_45147_end_mask_0 = const()[name = tensor("op_45147_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45147_cast_fp16 = slice_by_index(begin = var_45147_begin_0, end = var_45147_end_0, end_mask = var_45147_end_mask_0, x = var_44609_cast_fp16)[name = tensor("op_45147_cast_fp16")]; + tensor var_45154_begin_0 = const()[name = tensor("op_45154_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_45154_end_0 = const()[name = tensor("op_45154_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_45154_end_mask_0 = const()[name = tensor("op_45154_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45154_cast_fp16 = slice_by_index(begin = var_45154_begin_0, end = var_45154_end_0, end_mask = var_45154_end_mask_0, x = var_44613_cast_fp16)[name = tensor("op_45154_cast_fp16")]; + tensor var_45161_begin_0 = const()[name = tensor("op_45161_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_45161_end_0 = const()[name = tensor("op_45161_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_45161_end_mask_0 = const()[name = tensor("op_45161_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45161_cast_fp16 = slice_by_index(begin = var_45161_begin_0, end = var_45161_end_0, end_mask = var_45161_end_mask_0, x = var_44613_cast_fp16)[name = tensor("op_45161_cast_fp16")]; + tensor var_45168_begin_0 = const()[name = tensor("op_45168_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_45168_end_0 = const()[name = tensor("op_45168_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_45168_end_mask_0 = const()[name = tensor("op_45168_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45168_cast_fp16 = slice_by_index(begin = var_45168_begin_0, end = var_45168_end_0, end_mask = var_45168_end_mask_0, x = var_44613_cast_fp16)[name = tensor("op_45168_cast_fp16")]; + tensor var_45175_begin_0 = const()[name = tensor("op_45175_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_45175_end_0 = const()[name = tensor("op_45175_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_45175_end_mask_0 = const()[name = tensor("op_45175_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45175_cast_fp16 = slice_by_index(begin = var_45175_begin_0, end = var_45175_end_0, end_mask = var_45175_end_mask_0, x = var_44613_cast_fp16)[name = tensor("op_45175_cast_fp16")]; + tensor k_59_perm_0 = const()[name = tensor("k_59_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_45180_begin_0 = const()[name = tensor("op_45180_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_45180_end_0 = const()[name = tensor("op_45180_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_45180_end_mask_0 = const()[name = tensor("op_45180_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_2 = transpose(perm = k_59_perm_0, x = key_59_cast_fp16)[name = tensor("transpose_2")]; + tensor var_45180_cast_fp16 = slice_by_index(begin = var_45180_begin_0, end = var_45180_end_0, end_mask = var_45180_end_mask_0, x = transpose_2)[name = tensor("op_45180_cast_fp16")]; + tensor var_45184_begin_0 = const()[name = tensor("op_45184_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_45184_end_0 = const()[name = tensor("op_45184_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_45184_end_mask_0 = const()[name = tensor("op_45184_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45184_cast_fp16 = slice_by_index(begin = var_45184_begin_0, end = var_45184_end_0, end_mask = var_45184_end_mask_0, x = transpose_2)[name = tensor("op_45184_cast_fp16")]; + tensor var_45188_begin_0 = const()[name = tensor("op_45188_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_45188_end_0 = const()[name = tensor("op_45188_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_45188_end_mask_0 = const()[name = tensor("op_45188_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45188_cast_fp16 = slice_by_index(begin = var_45188_begin_0, end = var_45188_end_0, end_mask = var_45188_end_mask_0, x = transpose_2)[name = tensor("op_45188_cast_fp16")]; + tensor var_45192_begin_0 = const()[name = tensor("op_45192_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_45192_end_0 = const()[name = tensor("op_45192_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_45192_end_mask_0 = const()[name = tensor("op_45192_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45192_cast_fp16 = slice_by_index(begin = var_45192_begin_0, end = var_45192_end_0, end_mask = var_45192_end_mask_0, x = transpose_2)[name = tensor("op_45192_cast_fp16")]; + tensor var_45196_begin_0 = const()[name = tensor("op_45196_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_45196_end_0 = const()[name = tensor("op_45196_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_45196_end_mask_0 = const()[name = tensor("op_45196_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45196_cast_fp16 = slice_by_index(begin = var_45196_begin_0, end = var_45196_end_0, end_mask = var_45196_end_mask_0, x = transpose_2)[name = tensor("op_45196_cast_fp16")]; + tensor var_45200_begin_0 = const()[name = tensor("op_45200_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_45200_end_0 = const()[name = tensor("op_45200_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_45200_end_mask_0 = const()[name = tensor("op_45200_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45200_cast_fp16 = slice_by_index(begin = var_45200_begin_0, end = var_45200_end_0, end_mask = var_45200_end_mask_0, x = transpose_2)[name = tensor("op_45200_cast_fp16")]; + tensor var_45204_begin_0 = const()[name = tensor("op_45204_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_45204_end_0 = const()[name = tensor("op_45204_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_45204_end_mask_0 = const()[name = tensor("op_45204_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45204_cast_fp16 = slice_by_index(begin = var_45204_begin_0, end = var_45204_end_0, end_mask = var_45204_end_mask_0, x = transpose_2)[name = tensor("op_45204_cast_fp16")]; + tensor var_45208_begin_0 = const()[name = tensor("op_45208_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_45208_end_0 = const()[name = tensor("op_45208_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_45208_end_mask_0 = const()[name = tensor("op_45208_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45208_cast_fp16 = slice_by_index(begin = var_45208_begin_0, end = var_45208_end_0, end_mask = var_45208_end_mask_0, x = transpose_2)[name = tensor("op_45208_cast_fp16")]; + tensor var_45212_begin_0 = const()[name = tensor("op_45212_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_45212_end_0 = const()[name = tensor("op_45212_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_45212_end_mask_0 = const()[name = tensor("op_45212_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45212_cast_fp16 = slice_by_index(begin = var_45212_begin_0, end = var_45212_end_0, end_mask = var_45212_end_mask_0, x = transpose_2)[name = tensor("op_45212_cast_fp16")]; + tensor var_45216_begin_0 = const()[name = tensor("op_45216_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_45216_end_0 = const()[name = tensor("op_45216_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_45216_end_mask_0 = const()[name = tensor("op_45216_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45216_cast_fp16 = slice_by_index(begin = var_45216_begin_0, end = var_45216_end_0, end_mask = var_45216_end_mask_0, x = transpose_2)[name = tensor("op_45216_cast_fp16")]; + tensor var_45220_begin_0 = const()[name = tensor("op_45220_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_45220_end_0 = const()[name = tensor("op_45220_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_45220_end_mask_0 = const()[name = tensor("op_45220_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45220_cast_fp16 = slice_by_index(begin = var_45220_begin_0, end = var_45220_end_0, end_mask = var_45220_end_mask_0, x = transpose_2)[name = tensor("op_45220_cast_fp16")]; + tensor var_45224_begin_0 = const()[name = tensor("op_45224_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_45224_end_0 = const()[name = tensor("op_45224_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_45224_end_mask_0 = const()[name = tensor("op_45224_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45224_cast_fp16 = slice_by_index(begin = var_45224_begin_0, end = var_45224_end_0, end_mask = var_45224_end_mask_0, x = transpose_2)[name = tensor("op_45224_cast_fp16")]; + tensor var_45228_begin_0 = const()[name = tensor("op_45228_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_45228_end_0 = const()[name = tensor("op_45228_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_45228_end_mask_0 = const()[name = tensor("op_45228_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45228_cast_fp16 = slice_by_index(begin = var_45228_begin_0, end = var_45228_end_0, end_mask = var_45228_end_mask_0, x = transpose_2)[name = tensor("op_45228_cast_fp16")]; + tensor var_45232_begin_0 = const()[name = tensor("op_45232_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_45232_end_0 = const()[name = tensor("op_45232_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_45232_end_mask_0 = const()[name = tensor("op_45232_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45232_cast_fp16 = slice_by_index(begin = var_45232_begin_0, end = var_45232_end_0, end_mask = var_45232_end_mask_0, x = transpose_2)[name = tensor("op_45232_cast_fp16")]; + tensor var_45236_begin_0 = const()[name = tensor("op_45236_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_45236_end_0 = const()[name = tensor("op_45236_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_45236_end_mask_0 = const()[name = tensor("op_45236_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45236_cast_fp16 = slice_by_index(begin = var_45236_begin_0, end = var_45236_end_0, end_mask = var_45236_end_mask_0, x = transpose_2)[name = tensor("op_45236_cast_fp16")]; + tensor var_45240_begin_0 = const()[name = tensor("op_45240_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_45240_end_0 = const()[name = tensor("op_45240_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_45240_end_mask_0 = const()[name = tensor("op_45240_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45240_cast_fp16 = slice_by_index(begin = var_45240_begin_0, end = var_45240_end_0, end_mask = var_45240_end_mask_0, x = transpose_2)[name = tensor("op_45240_cast_fp16")]; + tensor var_45244_begin_0 = const()[name = tensor("op_45244_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_45244_end_0 = const()[name = tensor("op_45244_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_45244_end_mask_0 = const()[name = tensor("op_45244_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45244_cast_fp16 = slice_by_index(begin = var_45244_begin_0, end = var_45244_end_0, end_mask = var_45244_end_mask_0, x = transpose_2)[name = tensor("op_45244_cast_fp16")]; + tensor var_45248_begin_0 = const()[name = tensor("op_45248_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_45248_end_0 = const()[name = tensor("op_45248_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_45248_end_mask_0 = const()[name = tensor("op_45248_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45248_cast_fp16 = slice_by_index(begin = var_45248_begin_0, end = var_45248_end_0, end_mask = var_45248_end_mask_0, x = transpose_2)[name = tensor("op_45248_cast_fp16")]; + tensor var_45252_begin_0 = const()[name = tensor("op_45252_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_45252_end_0 = const()[name = tensor("op_45252_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_45252_end_mask_0 = const()[name = tensor("op_45252_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45252_cast_fp16 = slice_by_index(begin = var_45252_begin_0, end = var_45252_end_0, end_mask = var_45252_end_mask_0, x = transpose_2)[name = tensor("op_45252_cast_fp16")]; + tensor var_45256_begin_0 = const()[name = tensor("op_45256_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_45256_end_0 = const()[name = tensor("op_45256_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_45256_end_mask_0 = const()[name = tensor("op_45256_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_45256_cast_fp16 = slice_by_index(begin = var_45256_begin_0, end = var_45256_end_0, end_mask = var_45256_end_mask_0, x = transpose_2)[name = tensor("op_45256_cast_fp16")]; + tensor var_45258_begin_0 = const()[name = tensor("op_45258_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_45258_end_0 = const()[name = tensor("op_45258_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_45258_end_mask_0 = const()[name = tensor("op_45258_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_45258_cast_fp16 = slice_by_index(begin = var_45258_begin_0, end = var_45258_end_0, end_mask = var_45258_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_45258_cast_fp16")]; + tensor var_45262_begin_0 = const()[name = tensor("op_45262_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_45262_end_0 = const()[name = tensor("op_45262_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_45262_end_mask_0 = const()[name = tensor("op_45262_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_45262_cast_fp16 = slice_by_index(begin = var_45262_begin_0, end = var_45262_end_0, end_mask = var_45262_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_45262_cast_fp16")]; + tensor var_45266_begin_0 = const()[name = tensor("op_45266_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_45266_end_0 = const()[name = tensor("op_45266_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_45266_end_mask_0 = const()[name = tensor("op_45266_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_45266_cast_fp16 = slice_by_index(begin = var_45266_begin_0, end = var_45266_end_0, end_mask = var_45266_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_45266_cast_fp16")]; + tensor var_45270_begin_0 = const()[name = tensor("op_45270_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_45270_end_0 = const()[name = tensor("op_45270_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_45270_end_mask_0 = const()[name = tensor("op_45270_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_45270_cast_fp16 = slice_by_index(begin = var_45270_begin_0, end = var_45270_end_0, end_mask = var_45270_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_45270_cast_fp16")]; + tensor var_45274_begin_0 = const()[name = tensor("op_45274_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_45274_end_0 = const()[name = tensor("op_45274_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_45274_end_mask_0 = const()[name = tensor("op_45274_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_45274_cast_fp16 = slice_by_index(begin = var_45274_begin_0, end = var_45274_end_0, end_mask = var_45274_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_45274_cast_fp16")]; + tensor var_45278_begin_0 = const()[name = tensor("op_45278_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_45278_end_0 = const()[name = tensor("op_45278_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_45278_end_mask_0 = const()[name = tensor("op_45278_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_45278_cast_fp16 = slice_by_index(begin = var_45278_begin_0, end = var_45278_end_0, end_mask = var_45278_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_45278_cast_fp16")]; + tensor var_45282_begin_0 = const()[name = tensor("op_45282_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_45282_end_0 = const()[name = tensor("op_45282_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_45282_end_mask_0 = const()[name = tensor("op_45282_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_45282_cast_fp16 = slice_by_index(begin = var_45282_begin_0, end = var_45282_end_0, end_mask = var_45282_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_45282_cast_fp16")]; + tensor var_45286_begin_0 = const()[name = tensor("op_45286_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_45286_end_0 = const()[name = tensor("op_45286_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_45286_end_mask_0 = const()[name = tensor("op_45286_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_45286_cast_fp16 = slice_by_index(begin = var_45286_begin_0, end = var_45286_end_0, end_mask = var_45286_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_45286_cast_fp16")]; + tensor var_45290_begin_0 = const()[name = tensor("op_45290_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_45290_end_0 = const()[name = tensor("op_45290_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_45290_end_mask_0 = const()[name = tensor("op_45290_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_45290_cast_fp16 = slice_by_index(begin = var_45290_begin_0, end = var_45290_end_0, end_mask = var_45290_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_45290_cast_fp16")]; + tensor var_45294_begin_0 = const()[name = tensor("op_45294_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_45294_end_0 = const()[name = tensor("op_45294_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_45294_end_mask_0 = const()[name = tensor("op_45294_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_45294_cast_fp16 = slice_by_index(begin = var_45294_begin_0, end = var_45294_end_0, end_mask = var_45294_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_45294_cast_fp16")]; + tensor var_45298_begin_0 = const()[name = tensor("op_45298_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_45298_end_0 = const()[name = tensor("op_45298_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_45298_end_mask_0 = const()[name = tensor("op_45298_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_45298_cast_fp16 = slice_by_index(begin = var_45298_begin_0, end = var_45298_end_0, end_mask = var_45298_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_45298_cast_fp16")]; + tensor var_45302_begin_0 = const()[name = tensor("op_45302_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_45302_end_0 = const()[name = tensor("op_45302_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_45302_end_mask_0 = const()[name = tensor("op_45302_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_45302_cast_fp16 = slice_by_index(begin = var_45302_begin_0, end = var_45302_end_0, end_mask = var_45302_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_45302_cast_fp16")]; + tensor var_45306_begin_0 = const()[name = tensor("op_45306_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_45306_end_0 = const()[name = tensor("op_45306_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_45306_end_mask_0 = const()[name = tensor("op_45306_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_45306_cast_fp16 = slice_by_index(begin = var_45306_begin_0, end = var_45306_end_0, end_mask = var_45306_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_45306_cast_fp16")]; + tensor var_45310_begin_0 = const()[name = tensor("op_45310_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_45310_end_0 = const()[name = tensor("op_45310_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_45310_end_mask_0 = const()[name = tensor("op_45310_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_45310_cast_fp16 = slice_by_index(begin = var_45310_begin_0, end = var_45310_end_0, end_mask = var_45310_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_45310_cast_fp16")]; + tensor var_45314_begin_0 = const()[name = tensor("op_45314_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_45314_end_0 = const()[name = tensor("op_45314_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_45314_end_mask_0 = const()[name = tensor("op_45314_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_45314_cast_fp16 = slice_by_index(begin = var_45314_begin_0, end = var_45314_end_0, end_mask = var_45314_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_45314_cast_fp16")]; + tensor var_45318_begin_0 = const()[name = tensor("op_45318_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_45318_end_0 = const()[name = tensor("op_45318_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_45318_end_mask_0 = const()[name = tensor("op_45318_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_45318_cast_fp16 = slice_by_index(begin = var_45318_begin_0, end = var_45318_end_0, end_mask = var_45318_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_45318_cast_fp16")]; + tensor var_45322_begin_0 = const()[name = tensor("op_45322_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_45322_end_0 = const()[name = tensor("op_45322_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_45322_end_mask_0 = const()[name = tensor("op_45322_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_45322_cast_fp16 = slice_by_index(begin = var_45322_begin_0, end = var_45322_end_0, end_mask = var_45322_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_45322_cast_fp16")]; + tensor var_45326_begin_0 = const()[name = tensor("op_45326_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_45326_end_0 = const()[name = tensor("op_45326_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_45326_end_mask_0 = const()[name = tensor("op_45326_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_45326_cast_fp16 = slice_by_index(begin = var_45326_begin_0, end = var_45326_end_0, end_mask = var_45326_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_45326_cast_fp16")]; + tensor var_45330_begin_0 = const()[name = tensor("op_45330_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_45330_end_0 = const()[name = tensor("op_45330_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_45330_end_mask_0 = const()[name = tensor("op_45330_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_45330_cast_fp16 = slice_by_index(begin = var_45330_begin_0, end = var_45330_end_0, end_mask = var_45330_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_45330_cast_fp16")]; + tensor var_45334_begin_0 = const()[name = tensor("op_45334_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_45334_end_0 = const()[name = tensor("op_45334_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_45334_end_mask_0 = const()[name = tensor("op_45334_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_45334_cast_fp16 = slice_by_index(begin = var_45334_begin_0, end = var_45334_end_0, end_mask = var_45334_end_mask_0, x = value_59_cast_fp16)[name = tensor("op_45334_cast_fp16")]; + tensor var_45338_equation_0 = const()[name = tensor("op_45338_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45338_cast_fp16 = einsum(equation = var_45338_equation_0, values = (var_45180_cast_fp16, var_44622_cast_fp16))[name = tensor("op_45338_cast_fp16")]; + tensor var_45339_to_fp16 = const()[name = tensor("op_45339_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4641_cast_fp16 = mul(x = var_45338_cast_fp16, y = var_45339_to_fp16)[name = tensor("aw_chunk_4641_cast_fp16")]; + tensor var_45342_equation_0 = const()[name = tensor("op_45342_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45342_cast_fp16 = einsum(equation = var_45342_equation_0, values = (var_45180_cast_fp16, var_44629_cast_fp16))[name = tensor("op_45342_cast_fp16")]; + tensor var_45343_to_fp16 = const()[name = tensor("op_45343_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4643_cast_fp16 = mul(x = var_45342_cast_fp16, y = var_45343_to_fp16)[name = tensor("aw_chunk_4643_cast_fp16")]; + tensor var_45346_equation_0 = const()[name = tensor("op_45346_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45346_cast_fp16 = einsum(equation = var_45346_equation_0, values = (var_45180_cast_fp16, var_44636_cast_fp16))[name = tensor("op_45346_cast_fp16")]; + tensor var_45347_to_fp16 = const()[name = tensor("op_45347_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4645_cast_fp16 = mul(x = var_45346_cast_fp16, y = var_45347_to_fp16)[name = tensor("aw_chunk_4645_cast_fp16")]; + tensor var_45350_equation_0 = const()[name = tensor("op_45350_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45350_cast_fp16 = einsum(equation = var_45350_equation_0, values = (var_45180_cast_fp16, var_44643_cast_fp16))[name = tensor("op_45350_cast_fp16")]; + tensor var_45351_to_fp16 = const()[name = tensor("op_45351_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4647_cast_fp16 = mul(x = var_45350_cast_fp16, y = var_45351_to_fp16)[name = tensor("aw_chunk_4647_cast_fp16")]; + tensor var_45354_equation_0 = const()[name = tensor("op_45354_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45354_cast_fp16 = einsum(equation = var_45354_equation_0, values = (var_45184_cast_fp16, var_44650_cast_fp16))[name = tensor("op_45354_cast_fp16")]; + tensor var_45355_to_fp16 = const()[name = tensor("op_45355_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4649_cast_fp16 = mul(x = var_45354_cast_fp16, y = var_45355_to_fp16)[name = tensor("aw_chunk_4649_cast_fp16")]; + tensor var_45358_equation_0 = const()[name = tensor("op_45358_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45358_cast_fp16 = einsum(equation = var_45358_equation_0, values = (var_45184_cast_fp16, var_44657_cast_fp16))[name = tensor("op_45358_cast_fp16")]; + tensor var_45359_to_fp16 = const()[name = tensor("op_45359_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4651_cast_fp16 = mul(x = var_45358_cast_fp16, y = var_45359_to_fp16)[name = tensor("aw_chunk_4651_cast_fp16")]; + tensor var_45362_equation_0 = const()[name = tensor("op_45362_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45362_cast_fp16 = einsum(equation = var_45362_equation_0, values = (var_45184_cast_fp16, var_44664_cast_fp16))[name = tensor("op_45362_cast_fp16")]; + tensor var_45363_to_fp16 = const()[name = tensor("op_45363_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4653_cast_fp16 = mul(x = var_45362_cast_fp16, y = var_45363_to_fp16)[name = tensor("aw_chunk_4653_cast_fp16")]; + tensor var_45366_equation_0 = const()[name = tensor("op_45366_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45366_cast_fp16 = einsum(equation = var_45366_equation_0, values = (var_45184_cast_fp16, var_44671_cast_fp16))[name = tensor("op_45366_cast_fp16")]; + tensor var_45367_to_fp16 = const()[name = tensor("op_45367_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4655_cast_fp16 = mul(x = var_45366_cast_fp16, y = var_45367_to_fp16)[name = tensor("aw_chunk_4655_cast_fp16")]; + tensor var_45370_equation_0 = const()[name = tensor("op_45370_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45370_cast_fp16 = einsum(equation = var_45370_equation_0, values = (var_45188_cast_fp16, var_44678_cast_fp16))[name = tensor("op_45370_cast_fp16")]; + tensor var_45371_to_fp16 = const()[name = tensor("op_45371_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4657_cast_fp16 = mul(x = var_45370_cast_fp16, y = var_45371_to_fp16)[name = tensor("aw_chunk_4657_cast_fp16")]; + tensor var_45374_equation_0 = const()[name = tensor("op_45374_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45374_cast_fp16 = einsum(equation = var_45374_equation_0, values = (var_45188_cast_fp16, var_44685_cast_fp16))[name = tensor("op_45374_cast_fp16")]; + tensor var_45375_to_fp16 = const()[name = tensor("op_45375_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4659_cast_fp16 = mul(x = var_45374_cast_fp16, y = var_45375_to_fp16)[name = tensor("aw_chunk_4659_cast_fp16")]; + tensor var_45378_equation_0 = const()[name = tensor("op_45378_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45378_cast_fp16 = einsum(equation = var_45378_equation_0, values = (var_45188_cast_fp16, var_44692_cast_fp16))[name = tensor("op_45378_cast_fp16")]; + tensor var_45379_to_fp16 = const()[name = tensor("op_45379_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4661_cast_fp16 = mul(x = var_45378_cast_fp16, y = var_45379_to_fp16)[name = tensor("aw_chunk_4661_cast_fp16")]; + tensor var_45382_equation_0 = const()[name = tensor("op_45382_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45382_cast_fp16 = einsum(equation = var_45382_equation_0, values = (var_45188_cast_fp16, var_44699_cast_fp16))[name = tensor("op_45382_cast_fp16")]; + tensor var_45383_to_fp16 = const()[name = tensor("op_45383_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4663_cast_fp16 = mul(x = var_45382_cast_fp16, y = var_45383_to_fp16)[name = tensor("aw_chunk_4663_cast_fp16")]; + tensor var_45386_equation_0 = const()[name = tensor("op_45386_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45386_cast_fp16 = einsum(equation = var_45386_equation_0, values = (var_45192_cast_fp16, var_44706_cast_fp16))[name = tensor("op_45386_cast_fp16")]; + tensor var_45387_to_fp16 = const()[name = tensor("op_45387_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4665_cast_fp16 = mul(x = var_45386_cast_fp16, y = var_45387_to_fp16)[name = tensor("aw_chunk_4665_cast_fp16")]; + tensor var_45390_equation_0 = const()[name = tensor("op_45390_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45390_cast_fp16 = einsum(equation = var_45390_equation_0, values = (var_45192_cast_fp16, var_44713_cast_fp16))[name = tensor("op_45390_cast_fp16")]; + tensor var_45391_to_fp16 = const()[name = tensor("op_45391_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4667_cast_fp16 = mul(x = var_45390_cast_fp16, y = var_45391_to_fp16)[name = tensor("aw_chunk_4667_cast_fp16")]; + tensor var_45394_equation_0 = const()[name = tensor("op_45394_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45394_cast_fp16 = einsum(equation = var_45394_equation_0, values = (var_45192_cast_fp16, var_44720_cast_fp16))[name = tensor("op_45394_cast_fp16")]; + tensor var_45395_to_fp16 = const()[name = tensor("op_45395_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4669_cast_fp16 = mul(x = var_45394_cast_fp16, y = var_45395_to_fp16)[name = tensor("aw_chunk_4669_cast_fp16")]; + tensor var_45398_equation_0 = const()[name = tensor("op_45398_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45398_cast_fp16 = einsum(equation = var_45398_equation_0, values = (var_45192_cast_fp16, var_44727_cast_fp16))[name = tensor("op_45398_cast_fp16")]; + tensor var_45399_to_fp16 = const()[name = tensor("op_45399_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4671_cast_fp16 = mul(x = var_45398_cast_fp16, y = var_45399_to_fp16)[name = tensor("aw_chunk_4671_cast_fp16")]; + tensor var_45402_equation_0 = const()[name = tensor("op_45402_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45402_cast_fp16 = einsum(equation = var_45402_equation_0, values = (var_45196_cast_fp16, var_44734_cast_fp16))[name = tensor("op_45402_cast_fp16")]; + tensor var_45403_to_fp16 = const()[name = tensor("op_45403_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4673_cast_fp16 = mul(x = var_45402_cast_fp16, y = var_45403_to_fp16)[name = tensor("aw_chunk_4673_cast_fp16")]; + tensor var_45406_equation_0 = const()[name = tensor("op_45406_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45406_cast_fp16 = einsum(equation = var_45406_equation_0, values = (var_45196_cast_fp16, var_44741_cast_fp16))[name = tensor("op_45406_cast_fp16")]; + tensor var_45407_to_fp16 = const()[name = tensor("op_45407_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4675_cast_fp16 = mul(x = var_45406_cast_fp16, y = var_45407_to_fp16)[name = tensor("aw_chunk_4675_cast_fp16")]; + tensor var_45410_equation_0 = const()[name = tensor("op_45410_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45410_cast_fp16 = einsum(equation = var_45410_equation_0, values = (var_45196_cast_fp16, var_44748_cast_fp16))[name = tensor("op_45410_cast_fp16")]; + tensor var_45411_to_fp16 = const()[name = tensor("op_45411_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4677_cast_fp16 = mul(x = var_45410_cast_fp16, y = var_45411_to_fp16)[name = tensor("aw_chunk_4677_cast_fp16")]; + tensor var_45414_equation_0 = const()[name = tensor("op_45414_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45414_cast_fp16 = einsum(equation = var_45414_equation_0, values = (var_45196_cast_fp16, var_44755_cast_fp16))[name = tensor("op_45414_cast_fp16")]; + tensor var_45415_to_fp16 = const()[name = tensor("op_45415_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4679_cast_fp16 = mul(x = var_45414_cast_fp16, y = var_45415_to_fp16)[name = tensor("aw_chunk_4679_cast_fp16")]; + tensor var_45418_equation_0 = const()[name = tensor("op_45418_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45418_cast_fp16 = einsum(equation = var_45418_equation_0, values = (var_45200_cast_fp16, var_44762_cast_fp16))[name = tensor("op_45418_cast_fp16")]; + tensor var_45419_to_fp16 = const()[name = tensor("op_45419_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4681_cast_fp16 = mul(x = var_45418_cast_fp16, y = var_45419_to_fp16)[name = tensor("aw_chunk_4681_cast_fp16")]; + tensor var_45422_equation_0 = const()[name = tensor("op_45422_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45422_cast_fp16 = einsum(equation = var_45422_equation_0, values = (var_45200_cast_fp16, var_44769_cast_fp16))[name = tensor("op_45422_cast_fp16")]; + tensor var_45423_to_fp16 = const()[name = tensor("op_45423_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4683_cast_fp16 = mul(x = var_45422_cast_fp16, y = var_45423_to_fp16)[name = tensor("aw_chunk_4683_cast_fp16")]; + tensor var_45426_equation_0 = const()[name = tensor("op_45426_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45426_cast_fp16 = einsum(equation = var_45426_equation_0, values = (var_45200_cast_fp16, var_44776_cast_fp16))[name = tensor("op_45426_cast_fp16")]; + tensor var_45427_to_fp16 = const()[name = tensor("op_45427_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4685_cast_fp16 = mul(x = var_45426_cast_fp16, y = var_45427_to_fp16)[name = tensor("aw_chunk_4685_cast_fp16")]; + tensor var_45430_equation_0 = const()[name = tensor("op_45430_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45430_cast_fp16 = einsum(equation = var_45430_equation_0, values = (var_45200_cast_fp16, var_44783_cast_fp16))[name = tensor("op_45430_cast_fp16")]; + tensor var_45431_to_fp16 = const()[name = tensor("op_45431_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4687_cast_fp16 = mul(x = var_45430_cast_fp16, y = var_45431_to_fp16)[name = tensor("aw_chunk_4687_cast_fp16")]; + tensor var_45434_equation_0 = const()[name = tensor("op_45434_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45434_cast_fp16 = einsum(equation = var_45434_equation_0, values = (var_45204_cast_fp16, var_44790_cast_fp16))[name = tensor("op_45434_cast_fp16")]; + tensor var_45435_to_fp16 = const()[name = tensor("op_45435_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4689_cast_fp16 = mul(x = var_45434_cast_fp16, y = var_45435_to_fp16)[name = tensor("aw_chunk_4689_cast_fp16")]; + tensor var_45438_equation_0 = const()[name = tensor("op_45438_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45438_cast_fp16 = einsum(equation = var_45438_equation_0, values = (var_45204_cast_fp16, var_44797_cast_fp16))[name = tensor("op_45438_cast_fp16")]; + tensor var_45439_to_fp16 = const()[name = tensor("op_45439_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4691_cast_fp16 = mul(x = var_45438_cast_fp16, y = var_45439_to_fp16)[name = tensor("aw_chunk_4691_cast_fp16")]; + tensor var_45442_equation_0 = const()[name = tensor("op_45442_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45442_cast_fp16 = einsum(equation = var_45442_equation_0, values = (var_45204_cast_fp16, var_44804_cast_fp16))[name = tensor("op_45442_cast_fp16")]; + tensor var_45443_to_fp16 = const()[name = tensor("op_45443_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4693_cast_fp16 = mul(x = var_45442_cast_fp16, y = var_45443_to_fp16)[name = tensor("aw_chunk_4693_cast_fp16")]; + tensor var_45446_equation_0 = const()[name = tensor("op_45446_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45446_cast_fp16 = einsum(equation = var_45446_equation_0, values = (var_45204_cast_fp16, var_44811_cast_fp16))[name = tensor("op_45446_cast_fp16")]; + tensor var_45447_to_fp16 = const()[name = tensor("op_45447_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4695_cast_fp16 = mul(x = var_45446_cast_fp16, y = var_45447_to_fp16)[name = tensor("aw_chunk_4695_cast_fp16")]; + tensor var_45450_equation_0 = const()[name = tensor("op_45450_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45450_cast_fp16 = einsum(equation = var_45450_equation_0, values = (var_45208_cast_fp16, var_44818_cast_fp16))[name = tensor("op_45450_cast_fp16")]; + tensor var_45451_to_fp16 = const()[name = tensor("op_45451_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4697_cast_fp16 = mul(x = var_45450_cast_fp16, y = var_45451_to_fp16)[name = tensor("aw_chunk_4697_cast_fp16")]; + tensor var_45454_equation_0 = const()[name = tensor("op_45454_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45454_cast_fp16 = einsum(equation = var_45454_equation_0, values = (var_45208_cast_fp16, var_44825_cast_fp16))[name = tensor("op_45454_cast_fp16")]; + tensor var_45455_to_fp16 = const()[name = tensor("op_45455_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4699_cast_fp16 = mul(x = var_45454_cast_fp16, y = var_45455_to_fp16)[name = tensor("aw_chunk_4699_cast_fp16")]; + tensor var_45458_equation_0 = const()[name = tensor("op_45458_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45458_cast_fp16 = einsum(equation = var_45458_equation_0, values = (var_45208_cast_fp16, var_44832_cast_fp16))[name = tensor("op_45458_cast_fp16")]; + tensor var_45459_to_fp16 = const()[name = tensor("op_45459_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4701_cast_fp16 = mul(x = var_45458_cast_fp16, y = var_45459_to_fp16)[name = tensor("aw_chunk_4701_cast_fp16")]; + tensor var_45462_equation_0 = const()[name = tensor("op_45462_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45462_cast_fp16 = einsum(equation = var_45462_equation_0, values = (var_45208_cast_fp16, var_44839_cast_fp16))[name = tensor("op_45462_cast_fp16")]; + tensor var_45463_to_fp16 = const()[name = tensor("op_45463_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4703_cast_fp16 = mul(x = var_45462_cast_fp16, y = var_45463_to_fp16)[name = tensor("aw_chunk_4703_cast_fp16")]; + tensor var_45466_equation_0 = const()[name = tensor("op_45466_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45466_cast_fp16 = einsum(equation = var_45466_equation_0, values = (var_45212_cast_fp16, var_44846_cast_fp16))[name = tensor("op_45466_cast_fp16")]; + tensor var_45467_to_fp16 = const()[name = tensor("op_45467_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4705_cast_fp16 = mul(x = var_45466_cast_fp16, y = var_45467_to_fp16)[name = tensor("aw_chunk_4705_cast_fp16")]; + tensor var_45470_equation_0 = const()[name = tensor("op_45470_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45470_cast_fp16 = einsum(equation = var_45470_equation_0, values = (var_45212_cast_fp16, var_44853_cast_fp16))[name = tensor("op_45470_cast_fp16")]; + tensor var_45471_to_fp16 = const()[name = tensor("op_45471_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4707_cast_fp16 = mul(x = var_45470_cast_fp16, y = var_45471_to_fp16)[name = tensor("aw_chunk_4707_cast_fp16")]; + tensor var_45474_equation_0 = const()[name = tensor("op_45474_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45474_cast_fp16 = einsum(equation = var_45474_equation_0, values = (var_45212_cast_fp16, var_44860_cast_fp16))[name = tensor("op_45474_cast_fp16")]; + tensor var_45475_to_fp16 = const()[name = tensor("op_45475_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4709_cast_fp16 = mul(x = var_45474_cast_fp16, y = var_45475_to_fp16)[name = tensor("aw_chunk_4709_cast_fp16")]; + tensor var_45478_equation_0 = const()[name = tensor("op_45478_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45478_cast_fp16 = einsum(equation = var_45478_equation_0, values = (var_45212_cast_fp16, var_44867_cast_fp16))[name = tensor("op_45478_cast_fp16")]; + tensor var_45479_to_fp16 = const()[name = tensor("op_45479_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4711_cast_fp16 = mul(x = var_45478_cast_fp16, y = var_45479_to_fp16)[name = tensor("aw_chunk_4711_cast_fp16")]; + tensor var_45482_equation_0 = const()[name = tensor("op_45482_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45482_cast_fp16 = einsum(equation = var_45482_equation_0, values = (var_45216_cast_fp16, var_44874_cast_fp16))[name = tensor("op_45482_cast_fp16")]; + tensor var_45483_to_fp16 = const()[name = tensor("op_45483_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4713_cast_fp16 = mul(x = var_45482_cast_fp16, y = var_45483_to_fp16)[name = tensor("aw_chunk_4713_cast_fp16")]; + tensor var_45486_equation_0 = const()[name = tensor("op_45486_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45486_cast_fp16 = einsum(equation = var_45486_equation_0, values = (var_45216_cast_fp16, var_44881_cast_fp16))[name = tensor("op_45486_cast_fp16")]; + tensor var_45487_to_fp16 = const()[name = tensor("op_45487_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4715_cast_fp16 = mul(x = var_45486_cast_fp16, y = var_45487_to_fp16)[name = tensor("aw_chunk_4715_cast_fp16")]; + tensor var_45490_equation_0 = const()[name = tensor("op_45490_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45490_cast_fp16 = einsum(equation = var_45490_equation_0, values = (var_45216_cast_fp16, var_44888_cast_fp16))[name = tensor("op_45490_cast_fp16")]; + tensor var_45491_to_fp16 = const()[name = tensor("op_45491_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4717_cast_fp16 = mul(x = var_45490_cast_fp16, y = var_45491_to_fp16)[name = tensor("aw_chunk_4717_cast_fp16")]; + tensor var_45494_equation_0 = const()[name = tensor("op_45494_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45494_cast_fp16 = einsum(equation = var_45494_equation_0, values = (var_45216_cast_fp16, var_44895_cast_fp16))[name = tensor("op_45494_cast_fp16")]; + tensor var_45495_to_fp16 = const()[name = tensor("op_45495_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4719_cast_fp16 = mul(x = var_45494_cast_fp16, y = var_45495_to_fp16)[name = tensor("aw_chunk_4719_cast_fp16")]; + tensor var_45498_equation_0 = const()[name = tensor("op_45498_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45498_cast_fp16 = einsum(equation = var_45498_equation_0, values = (var_45220_cast_fp16, var_44902_cast_fp16))[name = tensor("op_45498_cast_fp16")]; + tensor var_45499_to_fp16 = const()[name = tensor("op_45499_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4721_cast_fp16 = mul(x = var_45498_cast_fp16, y = var_45499_to_fp16)[name = tensor("aw_chunk_4721_cast_fp16")]; + tensor var_45502_equation_0 = const()[name = tensor("op_45502_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45502_cast_fp16 = einsum(equation = var_45502_equation_0, values = (var_45220_cast_fp16, var_44909_cast_fp16))[name = tensor("op_45502_cast_fp16")]; + tensor var_45503_to_fp16 = const()[name = tensor("op_45503_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4723_cast_fp16 = mul(x = var_45502_cast_fp16, y = var_45503_to_fp16)[name = tensor("aw_chunk_4723_cast_fp16")]; + tensor var_45506_equation_0 = const()[name = tensor("op_45506_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45506_cast_fp16 = einsum(equation = var_45506_equation_0, values = (var_45220_cast_fp16, var_44916_cast_fp16))[name = tensor("op_45506_cast_fp16")]; + tensor var_45507_to_fp16 = const()[name = tensor("op_45507_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4725_cast_fp16 = mul(x = var_45506_cast_fp16, y = var_45507_to_fp16)[name = tensor("aw_chunk_4725_cast_fp16")]; + tensor var_45510_equation_0 = const()[name = tensor("op_45510_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45510_cast_fp16 = einsum(equation = var_45510_equation_0, values = (var_45220_cast_fp16, var_44923_cast_fp16))[name = tensor("op_45510_cast_fp16")]; + tensor var_45511_to_fp16 = const()[name = tensor("op_45511_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4727_cast_fp16 = mul(x = var_45510_cast_fp16, y = var_45511_to_fp16)[name = tensor("aw_chunk_4727_cast_fp16")]; + tensor var_45514_equation_0 = const()[name = tensor("op_45514_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45514_cast_fp16 = einsum(equation = var_45514_equation_0, values = (var_45224_cast_fp16, var_44930_cast_fp16))[name = tensor("op_45514_cast_fp16")]; + tensor var_45515_to_fp16 = const()[name = tensor("op_45515_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4729_cast_fp16 = mul(x = var_45514_cast_fp16, y = var_45515_to_fp16)[name = tensor("aw_chunk_4729_cast_fp16")]; + tensor var_45518_equation_0 = const()[name = tensor("op_45518_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45518_cast_fp16 = einsum(equation = var_45518_equation_0, values = (var_45224_cast_fp16, var_44937_cast_fp16))[name = tensor("op_45518_cast_fp16")]; + tensor var_45519_to_fp16 = const()[name = tensor("op_45519_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4731_cast_fp16 = mul(x = var_45518_cast_fp16, y = var_45519_to_fp16)[name = tensor("aw_chunk_4731_cast_fp16")]; + tensor var_45522_equation_0 = const()[name = tensor("op_45522_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45522_cast_fp16 = einsum(equation = var_45522_equation_0, values = (var_45224_cast_fp16, var_44944_cast_fp16))[name = tensor("op_45522_cast_fp16")]; + tensor var_45523_to_fp16 = const()[name = tensor("op_45523_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4733_cast_fp16 = mul(x = var_45522_cast_fp16, y = var_45523_to_fp16)[name = tensor("aw_chunk_4733_cast_fp16")]; + tensor var_45526_equation_0 = const()[name = tensor("op_45526_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45526_cast_fp16 = einsum(equation = var_45526_equation_0, values = (var_45224_cast_fp16, var_44951_cast_fp16))[name = tensor("op_45526_cast_fp16")]; + tensor var_45527_to_fp16 = const()[name = tensor("op_45527_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4735_cast_fp16 = mul(x = var_45526_cast_fp16, y = var_45527_to_fp16)[name = tensor("aw_chunk_4735_cast_fp16")]; + tensor var_45530_equation_0 = const()[name = tensor("op_45530_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45530_cast_fp16 = einsum(equation = var_45530_equation_0, values = (var_45228_cast_fp16, var_44958_cast_fp16))[name = tensor("op_45530_cast_fp16")]; + tensor var_45531_to_fp16 = const()[name = tensor("op_45531_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4737_cast_fp16 = mul(x = var_45530_cast_fp16, y = var_45531_to_fp16)[name = tensor("aw_chunk_4737_cast_fp16")]; + tensor var_45534_equation_0 = const()[name = tensor("op_45534_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45534_cast_fp16 = einsum(equation = var_45534_equation_0, values = (var_45228_cast_fp16, var_44965_cast_fp16))[name = tensor("op_45534_cast_fp16")]; + tensor var_45535_to_fp16 = const()[name = tensor("op_45535_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4739_cast_fp16 = mul(x = var_45534_cast_fp16, y = var_45535_to_fp16)[name = tensor("aw_chunk_4739_cast_fp16")]; + tensor var_45538_equation_0 = const()[name = tensor("op_45538_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45538_cast_fp16 = einsum(equation = var_45538_equation_0, values = (var_45228_cast_fp16, var_44972_cast_fp16))[name = tensor("op_45538_cast_fp16")]; + tensor var_45539_to_fp16 = const()[name = tensor("op_45539_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4741_cast_fp16 = mul(x = var_45538_cast_fp16, y = var_45539_to_fp16)[name = tensor("aw_chunk_4741_cast_fp16")]; + tensor var_45542_equation_0 = const()[name = tensor("op_45542_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45542_cast_fp16 = einsum(equation = var_45542_equation_0, values = (var_45228_cast_fp16, var_44979_cast_fp16))[name = tensor("op_45542_cast_fp16")]; + tensor var_45543_to_fp16 = const()[name = tensor("op_45543_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4743_cast_fp16 = mul(x = var_45542_cast_fp16, y = var_45543_to_fp16)[name = tensor("aw_chunk_4743_cast_fp16")]; + tensor var_45546_equation_0 = const()[name = tensor("op_45546_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45546_cast_fp16 = einsum(equation = var_45546_equation_0, values = (var_45232_cast_fp16, var_44986_cast_fp16))[name = tensor("op_45546_cast_fp16")]; + tensor var_45547_to_fp16 = const()[name = tensor("op_45547_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4745_cast_fp16 = mul(x = var_45546_cast_fp16, y = var_45547_to_fp16)[name = tensor("aw_chunk_4745_cast_fp16")]; + tensor var_45550_equation_0 = const()[name = tensor("op_45550_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45550_cast_fp16 = einsum(equation = var_45550_equation_0, values = (var_45232_cast_fp16, var_44993_cast_fp16))[name = tensor("op_45550_cast_fp16")]; + tensor var_45551_to_fp16 = const()[name = tensor("op_45551_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4747_cast_fp16 = mul(x = var_45550_cast_fp16, y = var_45551_to_fp16)[name = tensor("aw_chunk_4747_cast_fp16")]; + tensor var_45554_equation_0 = const()[name = tensor("op_45554_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45554_cast_fp16 = einsum(equation = var_45554_equation_0, values = (var_45232_cast_fp16, var_45000_cast_fp16))[name = tensor("op_45554_cast_fp16")]; + tensor var_45555_to_fp16 = const()[name = tensor("op_45555_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4749_cast_fp16 = mul(x = var_45554_cast_fp16, y = var_45555_to_fp16)[name = tensor("aw_chunk_4749_cast_fp16")]; + tensor var_45558_equation_0 = const()[name = tensor("op_45558_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45558_cast_fp16 = einsum(equation = var_45558_equation_0, values = (var_45232_cast_fp16, var_45007_cast_fp16))[name = tensor("op_45558_cast_fp16")]; + tensor var_45559_to_fp16 = const()[name = tensor("op_45559_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4751_cast_fp16 = mul(x = var_45558_cast_fp16, y = var_45559_to_fp16)[name = tensor("aw_chunk_4751_cast_fp16")]; + tensor var_45562_equation_0 = const()[name = tensor("op_45562_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45562_cast_fp16 = einsum(equation = var_45562_equation_0, values = (var_45236_cast_fp16, var_45014_cast_fp16))[name = tensor("op_45562_cast_fp16")]; + tensor var_45563_to_fp16 = const()[name = tensor("op_45563_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4753_cast_fp16 = mul(x = var_45562_cast_fp16, y = var_45563_to_fp16)[name = tensor("aw_chunk_4753_cast_fp16")]; + tensor var_45566_equation_0 = const()[name = tensor("op_45566_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45566_cast_fp16 = einsum(equation = var_45566_equation_0, values = (var_45236_cast_fp16, var_45021_cast_fp16))[name = tensor("op_45566_cast_fp16")]; + tensor var_45567_to_fp16 = const()[name = tensor("op_45567_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4755_cast_fp16 = mul(x = var_45566_cast_fp16, y = var_45567_to_fp16)[name = tensor("aw_chunk_4755_cast_fp16")]; + tensor var_45570_equation_0 = const()[name = tensor("op_45570_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45570_cast_fp16 = einsum(equation = var_45570_equation_0, values = (var_45236_cast_fp16, var_45028_cast_fp16))[name = tensor("op_45570_cast_fp16")]; + tensor var_45571_to_fp16 = const()[name = tensor("op_45571_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4757_cast_fp16 = mul(x = var_45570_cast_fp16, y = var_45571_to_fp16)[name = tensor("aw_chunk_4757_cast_fp16")]; + tensor var_45574_equation_0 = const()[name = tensor("op_45574_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45574_cast_fp16 = einsum(equation = var_45574_equation_0, values = (var_45236_cast_fp16, var_45035_cast_fp16))[name = tensor("op_45574_cast_fp16")]; + tensor var_45575_to_fp16 = const()[name = tensor("op_45575_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4759_cast_fp16 = mul(x = var_45574_cast_fp16, y = var_45575_to_fp16)[name = tensor("aw_chunk_4759_cast_fp16")]; + tensor var_45578_equation_0 = const()[name = tensor("op_45578_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45578_cast_fp16 = einsum(equation = var_45578_equation_0, values = (var_45240_cast_fp16, var_45042_cast_fp16))[name = tensor("op_45578_cast_fp16")]; + tensor var_45579_to_fp16 = const()[name = tensor("op_45579_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4761_cast_fp16 = mul(x = var_45578_cast_fp16, y = var_45579_to_fp16)[name = tensor("aw_chunk_4761_cast_fp16")]; + tensor var_45582_equation_0 = const()[name = tensor("op_45582_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45582_cast_fp16 = einsum(equation = var_45582_equation_0, values = (var_45240_cast_fp16, var_45049_cast_fp16))[name = tensor("op_45582_cast_fp16")]; + tensor var_45583_to_fp16 = const()[name = tensor("op_45583_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4763_cast_fp16 = mul(x = var_45582_cast_fp16, y = var_45583_to_fp16)[name = tensor("aw_chunk_4763_cast_fp16")]; + tensor var_45586_equation_0 = const()[name = tensor("op_45586_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45586_cast_fp16 = einsum(equation = var_45586_equation_0, values = (var_45240_cast_fp16, var_45056_cast_fp16))[name = tensor("op_45586_cast_fp16")]; + tensor var_45587_to_fp16 = const()[name = tensor("op_45587_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4765_cast_fp16 = mul(x = var_45586_cast_fp16, y = var_45587_to_fp16)[name = tensor("aw_chunk_4765_cast_fp16")]; + tensor var_45590_equation_0 = const()[name = tensor("op_45590_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45590_cast_fp16 = einsum(equation = var_45590_equation_0, values = (var_45240_cast_fp16, var_45063_cast_fp16))[name = tensor("op_45590_cast_fp16")]; + tensor var_45591_to_fp16 = const()[name = tensor("op_45591_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4767_cast_fp16 = mul(x = var_45590_cast_fp16, y = var_45591_to_fp16)[name = tensor("aw_chunk_4767_cast_fp16")]; + tensor var_45594_equation_0 = const()[name = tensor("op_45594_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45594_cast_fp16 = einsum(equation = var_45594_equation_0, values = (var_45244_cast_fp16, var_45070_cast_fp16))[name = tensor("op_45594_cast_fp16")]; + tensor var_45595_to_fp16 = const()[name = tensor("op_45595_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4769_cast_fp16 = mul(x = var_45594_cast_fp16, y = var_45595_to_fp16)[name = tensor("aw_chunk_4769_cast_fp16")]; + tensor var_45598_equation_0 = const()[name = tensor("op_45598_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45598_cast_fp16 = einsum(equation = var_45598_equation_0, values = (var_45244_cast_fp16, var_45077_cast_fp16))[name = tensor("op_45598_cast_fp16")]; + tensor var_45599_to_fp16 = const()[name = tensor("op_45599_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4771_cast_fp16 = mul(x = var_45598_cast_fp16, y = var_45599_to_fp16)[name = tensor("aw_chunk_4771_cast_fp16")]; + tensor var_45602_equation_0 = const()[name = tensor("op_45602_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45602_cast_fp16 = einsum(equation = var_45602_equation_0, values = (var_45244_cast_fp16, var_45084_cast_fp16))[name = tensor("op_45602_cast_fp16")]; + tensor var_45603_to_fp16 = const()[name = tensor("op_45603_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4773_cast_fp16 = mul(x = var_45602_cast_fp16, y = var_45603_to_fp16)[name = tensor("aw_chunk_4773_cast_fp16")]; + tensor var_45606_equation_0 = const()[name = tensor("op_45606_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45606_cast_fp16 = einsum(equation = var_45606_equation_0, values = (var_45244_cast_fp16, var_45091_cast_fp16))[name = tensor("op_45606_cast_fp16")]; + tensor var_45607_to_fp16 = const()[name = tensor("op_45607_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4775_cast_fp16 = mul(x = var_45606_cast_fp16, y = var_45607_to_fp16)[name = tensor("aw_chunk_4775_cast_fp16")]; + tensor var_45610_equation_0 = const()[name = tensor("op_45610_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45610_cast_fp16 = einsum(equation = var_45610_equation_0, values = (var_45248_cast_fp16, var_45098_cast_fp16))[name = tensor("op_45610_cast_fp16")]; + tensor var_45611_to_fp16 = const()[name = tensor("op_45611_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4777_cast_fp16 = mul(x = var_45610_cast_fp16, y = var_45611_to_fp16)[name = tensor("aw_chunk_4777_cast_fp16")]; + tensor var_45614_equation_0 = const()[name = tensor("op_45614_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45614_cast_fp16 = einsum(equation = var_45614_equation_0, values = (var_45248_cast_fp16, var_45105_cast_fp16))[name = tensor("op_45614_cast_fp16")]; + tensor var_45615_to_fp16 = const()[name = tensor("op_45615_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4779_cast_fp16 = mul(x = var_45614_cast_fp16, y = var_45615_to_fp16)[name = tensor("aw_chunk_4779_cast_fp16")]; + tensor var_45618_equation_0 = const()[name = tensor("op_45618_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45618_cast_fp16 = einsum(equation = var_45618_equation_0, values = (var_45248_cast_fp16, var_45112_cast_fp16))[name = tensor("op_45618_cast_fp16")]; + tensor var_45619_to_fp16 = const()[name = tensor("op_45619_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4781_cast_fp16 = mul(x = var_45618_cast_fp16, y = var_45619_to_fp16)[name = tensor("aw_chunk_4781_cast_fp16")]; + tensor var_45622_equation_0 = const()[name = tensor("op_45622_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45622_cast_fp16 = einsum(equation = var_45622_equation_0, values = (var_45248_cast_fp16, var_45119_cast_fp16))[name = tensor("op_45622_cast_fp16")]; + tensor var_45623_to_fp16 = const()[name = tensor("op_45623_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4783_cast_fp16 = mul(x = var_45622_cast_fp16, y = var_45623_to_fp16)[name = tensor("aw_chunk_4783_cast_fp16")]; + tensor var_45626_equation_0 = const()[name = tensor("op_45626_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45626_cast_fp16 = einsum(equation = var_45626_equation_0, values = (var_45252_cast_fp16, var_45126_cast_fp16))[name = tensor("op_45626_cast_fp16")]; + tensor var_45627_to_fp16 = const()[name = tensor("op_45627_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4785_cast_fp16 = mul(x = var_45626_cast_fp16, y = var_45627_to_fp16)[name = tensor("aw_chunk_4785_cast_fp16")]; + tensor var_45630_equation_0 = const()[name = tensor("op_45630_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45630_cast_fp16 = einsum(equation = var_45630_equation_0, values = (var_45252_cast_fp16, var_45133_cast_fp16))[name = tensor("op_45630_cast_fp16")]; + tensor var_45631_to_fp16 = const()[name = tensor("op_45631_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4787_cast_fp16 = mul(x = var_45630_cast_fp16, y = var_45631_to_fp16)[name = tensor("aw_chunk_4787_cast_fp16")]; + tensor var_45634_equation_0 = const()[name = tensor("op_45634_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45634_cast_fp16 = einsum(equation = var_45634_equation_0, values = (var_45252_cast_fp16, var_45140_cast_fp16))[name = tensor("op_45634_cast_fp16")]; + tensor var_45635_to_fp16 = const()[name = tensor("op_45635_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4789_cast_fp16 = mul(x = var_45634_cast_fp16, y = var_45635_to_fp16)[name = tensor("aw_chunk_4789_cast_fp16")]; + tensor var_45638_equation_0 = const()[name = tensor("op_45638_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45638_cast_fp16 = einsum(equation = var_45638_equation_0, values = (var_45252_cast_fp16, var_45147_cast_fp16))[name = tensor("op_45638_cast_fp16")]; + tensor var_45639_to_fp16 = const()[name = tensor("op_45639_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4791_cast_fp16 = mul(x = var_45638_cast_fp16, y = var_45639_to_fp16)[name = tensor("aw_chunk_4791_cast_fp16")]; + tensor var_45642_equation_0 = const()[name = tensor("op_45642_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45642_cast_fp16 = einsum(equation = var_45642_equation_0, values = (var_45256_cast_fp16, var_45154_cast_fp16))[name = tensor("op_45642_cast_fp16")]; + tensor var_45643_to_fp16 = const()[name = tensor("op_45643_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4793_cast_fp16 = mul(x = var_45642_cast_fp16, y = var_45643_to_fp16)[name = tensor("aw_chunk_4793_cast_fp16")]; + tensor var_45646_equation_0 = const()[name = tensor("op_45646_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45646_cast_fp16 = einsum(equation = var_45646_equation_0, values = (var_45256_cast_fp16, var_45161_cast_fp16))[name = tensor("op_45646_cast_fp16")]; + tensor var_45647_to_fp16 = const()[name = tensor("op_45647_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4795_cast_fp16 = mul(x = var_45646_cast_fp16, y = var_45647_to_fp16)[name = tensor("aw_chunk_4795_cast_fp16")]; + tensor var_45650_equation_0 = const()[name = tensor("op_45650_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45650_cast_fp16 = einsum(equation = var_45650_equation_0, values = (var_45256_cast_fp16, var_45168_cast_fp16))[name = tensor("op_45650_cast_fp16")]; + tensor var_45651_to_fp16 = const()[name = tensor("op_45651_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4797_cast_fp16 = mul(x = var_45650_cast_fp16, y = var_45651_to_fp16)[name = tensor("aw_chunk_4797_cast_fp16")]; + tensor var_45654_equation_0 = const()[name = tensor("op_45654_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_45654_cast_fp16 = einsum(equation = var_45654_equation_0, values = (var_45256_cast_fp16, var_45175_cast_fp16))[name = tensor("op_45654_cast_fp16")]; + tensor var_45655_to_fp16 = const()[name = tensor("op_45655_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4799_cast_fp16 = mul(x = var_45654_cast_fp16, y = var_45655_to_fp16)[name = tensor("aw_chunk_4799_cast_fp16")]; + tensor var_45657_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4641_cast_fp16)[name = tensor("op_45657_cast_fp16")]; + tensor var_45658_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4643_cast_fp16)[name = tensor("op_45658_cast_fp16")]; + tensor var_45659_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4645_cast_fp16)[name = tensor("op_45659_cast_fp16")]; + tensor var_45660_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4647_cast_fp16)[name = tensor("op_45660_cast_fp16")]; + tensor var_45661_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4649_cast_fp16)[name = tensor("op_45661_cast_fp16")]; + tensor var_45662_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4651_cast_fp16)[name = tensor("op_45662_cast_fp16")]; + tensor var_45663_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4653_cast_fp16)[name = tensor("op_45663_cast_fp16")]; + tensor var_45664_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4655_cast_fp16)[name = tensor("op_45664_cast_fp16")]; + tensor var_45665_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4657_cast_fp16)[name = tensor("op_45665_cast_fp16")]; + tensor var_45666_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4659_cast_fp16)[name = tensor("op_45666_cast_fp16")]; + tensor var_45667_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4661_cast_fp16)[name = tensor("op_45667_cast_fp16")]; + tensor var_45668_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4663_cast_fp16)[name = tensor("op_45668_cast_fp16")]; + tensor var_45669_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4665_cast_fp16)[name = tensor("op_45669_cast_fp16")]; + tensor var_45670_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4667_cast_fp16)[name = tensor("op_45670_cast_fp16")]; + tensor var_45671_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4669_cast_fp16)[name = tensor("op_45671_cast_fp16")]; + tensor var_45672_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4671_cast_fp16)[name = tensor("op_45672_cast_fp16")]; + tensor var_45673_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4673_cast_fp16)[name = tensor("op_45673_cast_fp16")]; + tensor var_45674_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4675_cast_fp16)[name = tensor("op_45674_cast_fp16")]; + tensor var_45675_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4677_cast_fp16)[name = tensor("op_45675_cast_fp16")]; + tensor var_45676_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4679_cast_fp16)[name = tensor("op_45676_cast_fp16")]; + tensor var_45677_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4681_cast_fp16)[name = tensor("op_45677_cast_fp16")]; + tensor var_45678_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4683_cast_fp16)[name = tensor("op_45678_cast_fp16")]; + tensor var_45679_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4685_cast_fp16)[name = tensor("op_45679_cast_fp16")]; + tensor var_45680_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4687_cast_fp16)[name = tensor("op_45680_cast_fp16")]; + tensor var_45681_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4689_cast_fp16)[name = tensor("op_45681_cast_fp16")]; + tensor var_45682_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4691_cast_fp16)[name = tensor("op_45682_cast_fp16")]; + tensor var_45683_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4693_cast_fp16)[name = tensor("op_45683_cast_fp16")]; + tensor var_45684_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4695_cast_fp16)[name = tensor("op_45684_cast_fp16")]; + tensor var_45685_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4697_cast_fp16)[name = tensor("op_45685_cast_fp16")]; + tensor var_45686_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4699_cast_fp16)[name = tensor("op_45686_cast_fp16")]; + tensor var_45687_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4701_cast_fp16)[name = tensor("op_45687_cast_fp16")]; + tensor var_45688_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4703_cast_fp16)[name = tensor("op_45688_cast_fp16")]; + tensor var_45689_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4705_cast_fp16)[name = tensor("op_45689_cast_fp16")]; + tensor var_45690_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4707_cast_fp16)[name = tensor("op_45690_cast_fp16")]; + tensor var_45691_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4709_cast_fp16)[name = tensor("op_45691_cast_fp16")]; + tensor var_45692_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4711_cast_fp16)[name = tensor("op_45692_cast_fp16")]; + tensor var_45693_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4713_cast_fp16)[name = tensor("op_45693_cast_fp16")]; + tensor var_45694_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4715_cast_fp16)[name = tensor("op_45694_cast_fp16")]; + tensor var_45695_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4717_cast_fp16)[name = tensor("op_45695_cast_fp16")]; + tensor var_45696_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4719_cast_fp16)[name = tensor("op_45696_cast_fp16")]; + tensor var_45697_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4721_cast_fp16)[name = tensor("op_45697_cast_fp16")]; + tensor var_45698_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4723_cast_fp16)[name = tensor("op_45698_cast_fp16")]; + tensor var_45699_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4725_cast_fp16)[name = tensor("op_45699_cast_fp16")]; + tensor var_45700_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4727_cast_fp16)[name = tensor("op_45700_cast_fp16")]; + tensor var_45701_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4729_cast_fp16)[name = tensor("op_45701_cast_fp16")]; + tensor var_45702_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4731_cast_fp16)[name = tensor("op_45702_cast_fp16")]; + tensor var_45703_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4733_cast_fp16)[name = tensor("op_45703_cast_fp16")]; + tensor var_45704_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4735_cast_fp16)[name = tensor("op_45704_cast_fp16")]; + tensor var_45705_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4737_cast_fp16)[name = tensor("op_45705_cast_fp16")]; + tensor var_45706_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4739_cast_fp16)[name = tensor("op_45706_cast_fp16")]; + tensor var_45707_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4741_cast_fp16)[name = tensor("op_45707_cast_fp16")]; + tensor var_45708_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4743_cast_fp16)[name = tensor("op_45708_cast_fp16")]; + tensor var_45709_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4745_cast_fp16)[name = tensor("op_45709_cast_fp16")]; + tensor var_45710_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4747_cast_fp16)[name = tensor("op_45710_cast_fp16")]; + tensor var_45711_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4749_cast_fp16)[name = tensor("op_45711_cast_fp16")]; + tensor var_45712_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4751_cast_fp16)[name = tensor("op_45712_cast_fp16")]; + tensor var_45713_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4753_cast_fp16)[name = tensor("op_45713_cast_fp16")]; + tensor var_45714_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4755_cast_fp16)[name = tensor("op_45714_cast_fp16")]; + tensor var_45715_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4757_cast_fp16)[name = tensor("op_45715_cast_fp16")]; + tensor var_45716_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4759_cast_fp16)[name = tensor("op_45716_cast_fp16")]; + tensor var_45717_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4761_cast_fp16)[name = tensor("op_45717_cast_fp16")]; + tensor var_45718_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4763_cast_fp16)[name = tensor("op_45718_cast_fp16")]; + tensor var_45719_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4765_cast_fp16)[name = tensor("op_45719_cast_fp16")]; + tensor var_45720_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4767_cast_fp16)[name = tensor("op_45720_cast_fp16")]; + tensor var_45721_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4769_cast_fp16)[name = tensor("op_45721_cast_fp16")]; + tensor var_45722_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4771_cast_fp16)[name = tensor("op_45722_cast_fp16")]; + tensor var_45723_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4773_cast_fp16)[name = tensor("op_45723_cast_fp16")]; + tensor var_45724_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4775_cast_fp16)[name = tensor("op_45724_cast_fp16")]; + tensor var_45725_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4777_cast_fp16)[name = tensor("op_45725_cast_fp16")]; + tensor var_45726_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4779_cast_fp16)[name = tensor("op_45726_cast_fp16")]; + tensor var_45727_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4781_cast_fp16)[name = tensor("op_45727_cast_fp16")]; + tensor var_45728_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4783_cast_fp16)[name = tensor("op_45728_cast_fp16")]; + tensor var_45729_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4785_cast_fp16)[name = tensor("op_45729_cast_fp16")]; + tensor var_45730_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4787_cast_fp16)[name = tensor("op_45730_cast_fp16")]; + tensor var_45731_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4789_cast_fp16)[name = tensor("op_45731_cast_fp16")]; + tensor var_45732_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4791_cast_fp16)[name = tensor("op_45732_cast_fp16")]; + tensor var_45733_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4793_cast_fp16)[name = tensor("op_45733_cast_fp16")]; + tensor var_45734_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4795_cast_fp16)[name = tensor("op_45734_cast_fp16")]; + tensor var_45735_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4797_cast_fp16)[name = tensor("op_45735_cast_fp16")]; + tensor var_45736_cast_fp16 = softmax(axis = var_44482, x = aw_chunk_4799_cast_fp16)[name = tensor("op_45736_cast_fp16")]; + tensor var_45738_equation_0 = const()[name = tensor("op_45738_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45738_cast_fp16 = einsum(equation = var_45738_equation_0, values = (var_45258_cast_fp16, var_45657_cast_fp16))[name = tensor("op_45738_cast_fp16")]; + tensor var_45740_equation_0 = const()[name = tensor("op_45740_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45740_cast_fp16 = einsum(equation = var_45740_equation_0, values = (var_45258_cast_fp16, var_45658_cast_fp16))[name = tensor("op_45740_cast_fp16")]; + tensor var_45742_equation_0 = const()[name = tensor("op_45742_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45742_cast_fp16 = einsum(equation = var_45742_equation_0, values = (var_45258_cast_fp16, var_45659_cast_fp16))[name = tensor("op_45742_cast_fp16")]; + tensor var_45744_equation_0 = const()[name = tensor("op_45744_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45744_cast_fp16 = einsum(equation = var_45744_equation_0, values = (var_45258_cast_fp16, var_45660_cast_fp16))[name = tensor("op_45744_cast_fp16")]; + tensor var_45746_equation_0 = const()[name = tensor("op_45746_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45746_cast_fp16 = einsum(equation = var_45746_equation_0, values = (var_45262_cast_fp16, var_45661_cast_fp16))[name = tensor("op_45746_cast_fp16")]; + tensor var_45748_equation_0 = const()[name = tensor("op_45748_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45748_cast_fp16 = einsum(equation = var_45748_equation_0, values = (var_45262_cast_fp16, var_45662_cast_fp16))[name = tensor("op_45748_cast_fp16")]; + tensor var_45750_equation_0 = const()[name = tensor("op_45750_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45750_cast_fp16 = einsum(equation = var_45750_equation_0, values = (var_45262_cast_fp16, var_45663_cast_fp16))[name = tensor("op_45750_cast_fp16")]; + tensor var_45752_equation_0 = const()[name = tensor("op_45752_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45752_cast_fp16 = einsum(equation = var_45752_equation_0, values = (var_45262_cast_fp16, var_45664_cast_fp16))[name = tensor("op_45752_cast_fp16")]; + tensor var_45754_equation_0 = const()[name = tensor("op_45754_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45754_cast_fp16 = einsum(equation = var_45754_equation_0, values = (var_45266_cast_fp16, var_45665_cast_fp16))[name = tensor("op_45754_cast_fp16")]; + tensor var_45756_equation_0 = const()[name = tensor("op_45756_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45756_cast_fp16 = einsum(equation = var_45756_equation_0, values = (var_45266_cast_fp16, var_45666_cast_fp16))[name = tensor("op_45756_cast_fp16")]; + tensor var_45758_equation_0 = const()[name = tensor("op_45758_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45758_cast_fp16 = einsum(equation = var_45758_equation_0, values = (var_45266_cast_fp16, var_45667_cast_fp16))[name = tensor("op_45758_cast_fp16")]; + tensor var_45760_equation_0 = const()[name = tensor("op_45760_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45760_cast_fp16 = einsum(equation = var_45760_equation_0, values = (var_45266_cast_fp16, var_45668_cast_fp16))[name = tensor("op_45760_cast_fp16")]; + tensor var_45762_equation_0 = const()[name = tensor("op_45762_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45762_cast_fp16 = einsum(equation = var_45762_equation_0, values = (var_45270_cast_fp16, var_45669_cast_fp16))[name = tensor("op_45762_cast_fp16")]; + tensor var_45764_equation_0 = const()[name = tensor("op_45764_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45764_cast_fp16 = einsum(equation = var_45764_equation_0, values = (var_45270_cast_fp16, var_45670_cast_fp16))[name = tensor("op_45764_cast_fp16")]; + tensor var_45766_equation_0 = const()[name = tensor("op_45766_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45766_cast_fp16 = einsum(equation = var_45766_equation_0, values = (var_45270_cast_fp16, var_45671_cast_fp16))[name = tensor("op_45766_cast_fp16")]; + tensor var_45768_equation_0 = const()[name = tensor("op_45768_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45768_cast_fp16 = einsum(equation = var_45768_equation_0, values = (var_45270_cast_fp16, var_45672_cast_fp16))[name = tensor("op_45768_cast_fp16")]; + tensor var_45770_equation_0 = const()[name = tensor("op_45770_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45770_cast_fp16 = einsum(equation = var_45770_equation_0, values = (var_45274_cast_fp16, var_45673_cast_fp16))[name = tensor("op_45770_cast_fp16")]; + tensor var_45772_equation_0 = const()[name = tensor("op_45772_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45772_cast_fp16 = einsum(equation = var_45772_equation_0, values = (var_45274_cast_fp16, var_45674_cast_fp16))[name = tensor("op_45772_cast_fp16")]; + tensor var_45774_equation_0 = const()[name = tensor("op_45774_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45774_cast_fp16 = einsum(equation = var_45774_equation_0, values = (var_45274_cast_fp16, var_45675_cast_fp16))[name = tensor("op_45774_cast_fp16")]; + tensor var_45776_equation_0 = const()[name = tensor("op_45776_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45776_cast_fp16 = einsum(equation = var_45776_equation_0, values = (var_45274_cast_fp16, var_45676_cast_fp16))[name = tensor("op_45776_cast_fp16")]; + tensor var_45778_equation_0 = const()[name = tensor("op_45778_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45778_cast_fp16 = einsum(equation = var_45778_equation_0, values = (var_45278_cast_fp16, var_45677_cast_fp16))[name = tensor("op_45778_cast_fp16")]; + tensor var_45780_equation_0 = const()[name = tensor("op_45780_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45780_cast_fp16 = einsum(equation = var_45780_equation_0, values = (var_45278_cast_fp16, var_45678_cast_fp16))[name = tensor("op_45780_cast_fp16")]; + tensor var_45782_equation_0 = const()[name = tensor("op_45782_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45782_cast_fp16 = einsum(equation = var_45782_equation_0, values = (var_45278_cast_fp16, var_45679_cast_fp16))[name = tensor("op_45782_cast_fp16")]; + tensor var_45784_equation_0 = const()[name = tensor("op_45784_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45784_cast_fp16 = einsum(equation = var_45784_equation_0, values = (var_45278_cast_fp16, var_45680_cast_fp16))[name = tensor("op_45784_cast_fp16")]; + tensor var_45786_equation_0 = const()[name = tensor("op_45786_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45786_cast_fp16 = einsum(equation = var_45786_equation_0, values = (var_45282_cast_fp16, var_45681_cast_fp16))[name = tensor("op_45786_cast_fp16")]; + tensor var_45788_equation_0 = const()[name = tensor("op_45788_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45788_cast_fp16 = einsum(equation = var_45788_equation_0, values = (var_45282_cast_fp16, var_45682_cast_fp16))[name = tensor("op_45788_cast_fp16")]; + tensor var_45790_equation_0 = const()[name = tensor("op_45790_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45790_cast_fp16 = einsum(equation = var_45790_equation_0, values = (var_45282_cast_fp16, var_45683_cast_fp16))[name = tensor("op_45790_cast_fp16")]; + tensor var_45792_equation_0 = const()[name = tensor("op_45792_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45792_cast_fp16 = einsum(equation = var_45792_equation_0, values = (var_45282_cast_fp16, var_45684_cast_fp16))[name = tensor("op_45792_cast_fp16")]; + tensor var_45794_equation_0 = const()[name = tensor("op_45794_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45794_cast_fp16 = einsum(equation = var_45794_equation_0, values = (var_45286_cast_fp16, var_45685_cast_fp16))[name = tensor("op_45794_cast_fp16")]; + tensor var_45796_equation_0 = const()[name = tensor("op_45796_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45796_cast_fp16 = einsum(equation = var_45796_equation_0, values = (var_45286_cast_fp16, var_45686_cast_fp16))[name = tensor("op_45796_cast_fp16")]; + tensor var_45798_equation_0 = const()[name = tensor("op_45798_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45798_cast_fp16 = einsum(equation = var_45798_equation_0, values = (var_45286_cast_fp16, var_45687_cast_fp16))[name = tensor("op_45798_cast_fp16")]; + tensor var_45800_equation_0 = const()[name = tensor("op_45800_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45800_cast_fp16 = einsum(equation = var_45800_equation_0, values = (var_45286_cast_fp16, var_45688_cast_fp16))[name = tensor("op_45800_cast_fp16")]; + tensor var_45802_equation_0 = const()[name = tensor("op_45802_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45802_cast_fp16 = einsum(equation = var_45802_equation_0, values = (var_45290_cast_fp16, var_45689_cast_fp16))[name = tensor("op_45802_cast_fp16")]; + tensor var_45804_equation_0 = const()[name = tensor("op_45804_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45804_cast_fp16 = einsum(equation = var_45804_equation_0, values = (var_45290_cast_fp16, var_45690_cast_fp16))[name = tensor("op_45804_cast_fp16")]; + tensor var_45806_equation_0 = const()[name = tensor("op_45806_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45806_cast_fp16 = einsum(equation = var_45806_equation_0, values = (var_45290_cast_fp16, var_45691_cast_fp16))[name = tensor("op_45806_cast_fp16")]; + tensor var_45808_equation_0 = const()[name = tensor("op_45808_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45808_cast_fp16 = einsum(equation = var_45808_equation_0, values = (var_45290_cast_fp16, var_45692_cast_fp16))[name = tensor("op_45808_cast_fp16")]; + tensor var_45810_equation_0 = const()[name = tensor("op_45810_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45810_cast_fp16 = einsum(equation = var_45810_equation_0, values = (var_45294_cast_fp16, var_45693_cast_fp16))[name = tensor("op_45810_cast_fp16")]; + tensor var_45812_equation_0 = const()[name = tensor("op_45812_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45812_cast_fp16 = einsum(equation = var_45812_equation_0, values = (var_45294_cast_fp16, var_45694_cast_fp16))[name = tensor("op_45812_cast_fp16")]; + tensor var_45814_equation_0 = const()[name = tensor("op_45814_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45814_cast_fp16 = einsum(equation = var_45814_equation_0, values = (var_45294_cast_fp16, var_45695_cast_fp16))[name = tensor("op_45814_cast_fp16")]; + tensor var_45816_equation_0 = const()[name = tensor("op_45816_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45816_cast_fp16 = einsum(equation = var_45816_equation_0, values = (var_45294_cast_fp16, var_45696_cast_fp16))[name = tensor("op_45816_cast_fp16")]; + tensor var_45818_equation_0 = const()[name = tensor("op_45818_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45818_cast_fp16 = einsum(equation = var_45818_equation_0, values = (var_45298_cast_fp16, var_45697_cast_fp16))[name = tensor("op_45818_cast_fp16")]; + tensor var_45820_equation_0 = const()[name = tensor("op_45820_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45820_cast_fp16 = einsum(equation = var_45820_equation_0, values = (var_45298_cast_fp16, var_45698_cast_fp16))[name = tensor("op_45820_cast_fp16")]; + tensor var_45822_equation_0 = const()[name = tensor("op_45822_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45822_cast_fp16 = einsum(equation = var_45822_equation_0, values = (var_45298_cast_fp16, var_45699_cast_fp16))[name = tensor("op_45822_cast_fp16")]; + tensor var_45824_equation_0 = const()[name = tensor("op_45824_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45824_cast_fp16 = einsum(equation = var_45824_equation_0, values = (var_45298_cast_fp16, var_45700_cast_fp16))[name = tensor("op_45824_cast_fp16")]; + tensor var_45826_equation_0 = const()[name = tensor("op_45826_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45826_cast_fp16 = einsum(equation = var_45826_equation_0, values = (var_45302_cast_fp16, var_45701_cast_fp16))[name = tensor("op_45826_cast_fp16")]; + tensor var_45828_equation_0 = const()[name = tensor("op_45828_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45828_cast_fp16 = einsum(equation = var_45828_equation_0, values = (var_45302_cast_fp16, var_45702_cast_fp16))[name = tensor("op_45828_cast_fp16")]; + tensor var_45830_equation_0 = const()[name = tensor("op_45830_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45830_cast_fp16 = einsum(equation = var_45830_equation_0, values = (var_45302_cast_fp16, var_45703_cast_fp16))[name = tensor("op_45830_cast_fp16")]; + tensor var_45832_equation_0 = const()[name = tensor("op_45832_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45832_cast_fp16 = einsum(equation = var_45832_equation_0, values = (var_45302_cast_fp16, var_45704_cast_fp16))[name = tensor("op_45832_cast_fp16")]; + tensor var_45834_equation_0 = const()[name = tensor("op_45834_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45834_cast_fp16 = einsum(equation = var_45834_equation_0, values = (var_45306_cast_fp16, var_45705_cast_fp16))[name = tensor("op_45834_cast_fp16")]; + tensor var_45836_equation_0 = const()[name = tensor("op_45836_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45836_cast_fp16 = einsum(equation = var_45836_equation_0, values = (var_45306_cast_fp16, var_45706_cast_fp16))[name = tensor("op_45836_cast_fp16")]; + tensor var_45838_equation_0 = const()[name = tensor("op_45838_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45838_cast_fp16 = einsum(equation = var_45838_equation_0, values = (var_45306_cast_fp16, var_45707_cast_fp16))[name = tensor("op_45838_cast_fp16")]; + tensor var_45840_equation_0 = const()[name = tensor("op_45840_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45840_cast_fp16 = einsum(equation = var_45840_equation_0, values = (var_45306_cast_fp16, var_45708_cast_fp16))[name = tensor("op_45840_cast_fp16")]; + tensor var_45842_equation_0 = const()[name = tensor("op_45842_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45842_cast_fp16 = einsum(equation = var_45842_equation_0, values = (var_45310_cast_fp16, var_45709_cast_fp16))[name = tensor("op_45842_cast_fp16")]; + tensor var_45844_equation_0 = const()[name = tensor("op_45844_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45844_cast_fp16 = einsum(equation = var_45844_equation_0, values = (var_45310_cast_fp16, var_45710_cast_fp16))[name = tensor("op_45844_cast_fp16")]; + tensor var_45846_equation_0 = const()[name = tensor("op_45846_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45846_cast_fp16 = einsum(equation = var_45846_equation_0, values = (var_45310_cast_fp16, var_45711_cast_fp16))[name = tensor("op_45846_cast_fp16")]; + tensor var_45848_equation_0 = const()[name = tensor("op_45848_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45848_cast_fp16 = einsum(equation = var_45848_equation_0, values = (var_45310_cast_fp16, var_45712_cast_fp16))[name = tensor("op_45848_cast_fp16")]; + tensor var_45850_equation_0 = const()[name = tensor("op_45850_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45850_cast_fp16 = einsum(equation = var_45850_equation_0, values = (var_45314_cast_fp16, var_45713_cast_fp16))[name = tensor("op_45850_cast_fp16")]; + tensor var_45852_equation_0 = const()[name = tensor("op_45852_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45852_cast_fp16 = einsum(equation = var_45852_equation_0, values = (var_45314_cast_fp16, var_45714_cast_fp16))[name = tensor("op_45852_cast_fp16")]; + tensor var_45854_equation_0 = const()[name = tensor("op_45854_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45854_cast_fp16 = einsum(equation = var_45854_equation_0, values = (var_45314_cast_fp16, var_45715_cast_fp16))[name = tensor("op_45854_cast_fp16")]; + tensor var_45856_equation_0 = const()[name = tensor("op_45856_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45856_cast_fp16 = einsum(equation = var_45856_equation_0, values = (var_45314_cast_fp16, var_45716_cast_fp16))[name = tensor("op_45856_cast_fp16")]; + tensor var_45858_equation_0 = const()[name = tensor("op_45858_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45858_cast_fp16 = einsum(equation = var_45858_equation_0, values = (var_45318_cast_fp16, var_45717_cast_fp16))[name = tensor("op_45858_cast_fp16")]; + tensor var_45860_equation_0 = const()[name = tensor("op_45860_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45860_cast_fp16 = einsum(equation = var_45860_equation_0, values = (var_45318_cast_fp16, var_45718_cast_fp16))[name = tensor("op_45860_cast_fp16")]; + tensor var_45862_equation_0 = const()[name = tensor("op_45862_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45862_cast_fp16 = einsum(equation = var_45862_equation_0, values = (var_45318_cast_fp16, var_45719_cast_fp16))[name = tensor("op_45862_cast_fp16")]; + tensor var_45864_equation_0 = const()[name = tensor("op_45864_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45864_cast_fp16 = einsum(equation = var_45864_equation_0, values = (var_45318_cast_fp16, var_45720_cast_fp16))[name = tensor("op_45864_cast_fp16")]; + tensor var_45866_equation_0 = const()[name = tensor("op_45866_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45866_cast_fp16 = einsum(equation = var_45866_equation_0, values = (var_45322_cast_fp16, var_45721_cast_fp16))[name = tensor("op_45866_cast_fp16")]; + tensor var_45868_equation_0 = const()[name = tensor("op_45868_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45868_cast_fp16 = einsum(equation = var_45868_equation_0, values = (var_45322_cast_fp16, var_45722_cast_fp16))[name = tensor("op_45868_cast_fp16")]; + tensor var_45870_equation_0 = const()[name = tensor("op_45870_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45870_cast_fp16 = einsum(equation = var_45870_equation_0, values = (var_45322_cast_fp16, var_45723_cast_fp16))[name = tensor("op_45870_cast_fp16")]; + tensor var_45872_equation_0 = const()[name = tensor("op_45872_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45872_cast_fp16 = einsum(equation = var_45872_equation_0, values = (var_45322_cast_fp16, var_45724_cast_fp16))[name = tensor("op_45872_cast_fp16")]; + tensor var_45874_equation_0 = const()[name = tensor("op_45874_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45874_cast_fp16 = einsum(equation = var_45874_equation_0, values = (var_45326_cast_fp16, var_45725_cast_fp16))[name = tensor("op_45874_cast_fp16")]; + tensor var_45876_equation_0 = const()[name = tensor("op_45876_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45876_cast_fp16 = einsum(equation = var_45876_equation_0, values = (var_45326_cast_fp16, var_45726_cast_fp16))[name = tensor("op_45876_cast_fp16")]; + tensor var_45878_equation_0 = const()[name = tensor("op_45878_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45878_cast_fp16 = einsum(equation = var_45878_equation_0, values = (var_45326_cast_fp16, var_45727_cast_fp16))[name = tensor("op_45878_cast_fp16")]; + tensor var_45880_equation_0 = const()[name = tensor("op_45880_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45880_cast_fp16 = einsum(equation = var_45880_equation_0, values = (var_45326_cast_fp16, var_45728_cast_fp16))[name = tensor("op_45880_cast_fp16")]; + tensor var_45882_equation_0 = const()[name = tensor("op_45882_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45882_cast_fp16 = einsum(equation = var_45882_equation_0, values = (var_45330_cast_fp16, var_45729_cast_fp16))[name = tensor("op_45882_cast_fp16")]; + tensor var_45884_equation_0 = const()[name = tensor("op_45884_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45884_cast_fp16 = einsum(equation = var_45884_equation_0, values = (var_45330_cast_fp16, var_45730_cast_fp16))[name = tensor("op_45884_cast_fp16")]; + tensor var_45886_equation_0 = const()[name = tensor("op_45886_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45886_cast_fp16 = einsum(equation = var_45886_equation_0, values = (var_45330_cast_fp16, var_45731_cast_fp16))[name = tensor("op_45886_cast_fp16")]; + tensor var_45888_equation_0 = const()[name = tensor("op_45888_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45888_cast_fp16 = einsum(equation = var_45888_equation_0, values = (var_45330_cast_fp16, var_45732_cast_fp16))[name = tensor("op_45888_cast_fp16")]; + tensor var_45890_equation_0 = const()[name = tensor("op_45890_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45890_cast_fp16 = einsum(equation = var_45890_equation_0, values = (var_45334_cast_fp16, var_45733_cast_fp16))[name = tensor("op_45890_cast_fp16")]; + tensor var_45892_equation_0 = const()[name = tensor("op_45892_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45892_cast_fp16 = einsum(equation = var_45892_equation_0, values = (var_45334_cast_fp16, var_45734_cast_fp16))[name = tensor("op_45892_cast_fp16")]; + tensor var_45894_equation_0 = const()[name = tensor("op_45894_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45894_cast_fp16 = einsum(equation = var_45894_equation_0, values = (var_45334_cast_fp16, var_45735_cast_fp16))[name = tensor("op_45894_cast_fp16")]; + tensor var_45896_equation_0 = const()[name = tensor("op_45896_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_45896_cast_fp16 = einsum(equation = var_45896_equation_0, values = (var_45334_cast_fp16, var_45736_cast_fp16))[name = tensor("op_45896_cast_fp16")]; + tensor var_45898_interleave_0 = const()[name = tensor("op_45898_interleave_0"), val = tensor(false)]; + tensor var_45898_cast_fp16 = concat(axis = var_44457, interleave = var_45898_interleave_0, values = (var_45738_cast_fp16, var_45740_cast_fp16, var_45742_cast_fp16, var_45744_cast_fp16))[name = tensor("op_45898_cast_fp16")]; + tensor var_45900_interleave_0 = const()[name = tensor("op_45900_interleave_0"), val = tensor(false)]; + tensor var_45900_cast_fp16 = concat(axis = var_44457, interleave = var_45900_interleave_0, values = (var_45746_cast_fp16, var_45748_cast_fp16, var_45750_cast_fp16, var_45752_cast_fp16))[name = tensor("op_45900_cast_fp16")]; + tensor var_45902_interleave_0 = const()[name = tensor("op_45902_interleave_0"), val = tensor(false)]; + tensor var_45902_cast_fp16 = concat(axis = var_44457, interleave = var_45902_interleave_0, values = (var_45754_cast_fp16, var_45756_cast_fp16, var_45758_cast_fp16, var_45760_cast_fp16))[name = tensor("op_45902_cast_fp16")]; + tensor var_45904_interleave_0 = const()[name = tensor("op_45904_interleave_0"), val = tensor(false)]; + tensor var_45904_cast_fp16 = concat(axis = var_44457, interleave = var_45904_interleave_0, values = (var_45762_cast_fp16, var_45764_cast_fp16, var_45766_cast_fp16, var_45768_cast_fp16))[name = tensor("op_45904_cast_fp16")]; + tensor var_45906_interleave_0 = const()[name = tensor("op_45906_interleave_0"), val = tensor(false)]; + tensor var_45906_cast_fp16 = concat(axis = var_44457, interleave = var_45906_interleave_0, values = (var_45770_cast_fp16, var_45772_cast_fp16, var_45774_cast_fp16, var_45776_cast_fp16))[name = tensor("op_45906_cast_fp16")]; + tensor var_45908_interleave_0 = const()[name = tensor("op_45908_interleave_0"), val = tensor(false)]; + tensor var_45908_cast_fp16 = concat(axis = var_44457, interleave = var_45908_interleave_0, values = (var_45778_cast_fp16, var_45780_cast_fp16, var_45782_cast_fp16, var_45784_cast_fp16))[name = tensor("op_45908_cast_fp16")]; + tensor var_45910_interleave_0 = const()[name = tensor("op_45910_interleave_0"), val = tensor(false)]; + tensor var_45910_cast_fp16 = concat(axis = var_44457, interleave = var_45910_interleave_0, values = (var_45786_cast_fp16, var_45788_cast_fp16, var_45790_cast_fp16, var_45792_cast_fp16))[name = tensor("op_45910_cast_fp16")]; + tensor var_45912_interleave_0 = const()[name = tensor("op_45912_interleave_0"), val = tensor(false)]; + tensor var_45912_cast_fp16 = concat(axis = var_44457, interleave = var_45912_interleave_0, values = (var_45794_cast_fp16, var_45796_cast_fp16, var_45798_cast_fp16, var_45800_cast_fp16))[name = tensor("op_45912_cast_fp16")]; + tensor var_45914_interleave_0 = const()[name = tensor("op_45914_interleave_0"), val = tensor(false)]; + tensor var_45914_cast_fp16 = concat(axis = var_44457, interleave = var_45914_interleave_0, values = (var_45802_cast_fp16, var_45804_cast_fp16, var_45806_cast_fp16, var_45808_cast_fp16))[name = tensor("op_45914_cast_fp16")]; + tensor var_45916_interleave_0 = const()[name = tensor("op_45916_interleave_0"), val = tensor(false)]; + tensor var_45916_cast_fp16 = concat(axis = var_44457, interleave = var_45916_interleave_0, values = (var_45810_cast_fp16, var_45812_cast_fp16, var_45814_cast_fp16, var_45816_cast_fp16))[name = tensor("op_45916_cast_fp16")]; + tensor var_45918_interleave_0 = const()[name = tensor("op_45918_interleave_0"), val = tensor(false)]; + tensor var_45918_cast_fp16 = concat(axis = var_44457, interleave = var_45918_interleave_0, values = (var_45818_cast_fp16, var_45820_cast_fp16, var_45822_cast_fp16, var_45824_cast_fp16))[name = tensor("op_45918_cast_fp16")]; + tensor var_45920_interleave_0 = const()[name = tensor("op_45920_interleave_0"), val = tensor(false)]; + tensor var_45920_cast_fp16 = concat(axis = var_44457, interleave = var_45920_interleave_0, values = (var_45826_cast_fp16, var_45828_cast_fp16, var_45830_cast_fp16, var_45832_cast_fp16))[name = tensor("op_45920_cast_fp16")]; + tensor var_45922_interleave_0 = const()[name = tensor("op_45922_interleave_0"), val = tensor(false)]; + tensor var_45922_cast_fp16 = concat(axis = var_44457, interleave = var_45922_interleave_0, values = (var_45834_cast_fp16, var_45836_cast_fp16, var_45838_cast_fp16, var_45840_cast_fp16))[name = tensor("op_45922_cast_fp16")]; + tensor var_45924_interleave_0 = const()[name = tensor("op_45924_interleave_0"), val = tensor(false)]; + tensor var_45924_cast_fp16 = concat(axis = var_44457, interleave = var_45924_interleave_0, values = (var_45842_cast_fp16, var_45844_cast_fp16, var_45846_cast_fp16, var_45848_cast_fp16))[name = tensor("op_45924_cast_fp16")]; + tensor var_45926_interleave_0 = const()[name = tensor("op_45926_interleave_0"), val = tensor(false)]; + tensor var_45926_cast_fp16 = concat(axis = var_44457, interleave = var_45926_interleave_0, values = (var_45850_cast_fp16, var_45852_cast_fp16, var_45854_cast_fp16, var_45856_cast_fp16))[name = tensor("op_45926_cast_fp16")]; + tensor var_45928_interleave_0 = const()[name = tensor("op_45928_interleave_0"), val = tensor(false)]; + tensor var_45928_cast_fp16 = concat(axis = var_44457, interleave = var_45928_interleave_0, values = (var_45858_cast_fp16, var_45860_cast_fp16, var_45862_cast_fp16, var_45864_cast_fp16))[name = tensor("op_45928_cast_fp16")]; + tensor var_45930_interleave_0 = const()[name = tensor("op_45930_interleave_0"), val = tensor(false)]; + tensor var_45930_cast_fp16 = concat(axis = var_44457, interleave = var_45930_interleave_0, values = (var_45866_cast_fp16, var_45868_cast_fp16, var_45870_cast_fp16, var_45872_cast_fp16))[name = tensor("op_45930_cast_fp16")]; + tensor var_45932_interleave_0 = const()[name = tensor("op_45932_interleave_0"), val = tensor(false)]; + tensor var_45932_cast_fp16 = concat(axis = var_44457, interleave = var_45932_interleave_0, values = (var_45874_cast_fp16, var_45876_cast_fp16, var_45878_cast_fp16, var_45880_cast_fp16))[name = tensor("op_45932_cast_fp16")]; + tensor var_45934_interleave_0 = const()[name = tensor("op_45934_interleave_0"), val = tensor(false)]; + tensor var_45934_cast_fp16 = concat(axis = var_44457, interleave = var_45934_interleave_0, values = (var_45882_cast_fp16, var_45884_cast_fp16, var_45886_cast_fp16, var_45888_cast_fp16))[name = tensor("op_45934_cast_fp16")]; + tensor var_45936_interleave_0 = const()[name = tensor("op_45936_interleave_0"), val = tensor(false)]; + tensor var_45936_cast_fp16 = concat(axis = var_44457, interleave = var_45936_interleave_0, values = (var_45890_cast_fp16, var_45892_cast_fp16, var_45894_cast_fp16, var_45896_cast_fp16))[name = tensor("op_45936_cast_fp16")]; + tensor input_233_interleave_0 = const()[name = tensor("input_233_interleave_0"), val = tensor(false)]; + tensor input_233_cast_fp16 = concat(axis = var_44482, interleave = input_233_interleave_0, values = (var_45898_cast_fp16, var_45900_cast_fp16, var_45902_cast_fp16, var_45904_cast_fp16, var_45906_cast_fp16, var_45908_cast_fp16, var_45910_cast_fp16, var_45912_cast_fp16, var_45914_cast_fp16, var_45916_cast_fp16, var_45918_cast_fp16, var_45920_cast_fp16, var_45922_cast_fp16, var_45924_cast_fp16, var_45926_cast_fp16, var_45928_cast_fp16, var_45930_cast_fp16, var_45932_cast_fp16, var_45934_cast_fp16, var_45936_cast_fp16))[name = tensor("input_233_cast_fp16")]; + tensor var_45941 = const()[name = tensor("op_45941"), val = tensor([1, 1])]; + tensor var_45943 = const()[name = tensor("op_45943"), val = tensor([1, 1])]; + tensor obj_119_pad_type_0 = const()[name = tensor("obj_119_pad_type_0"), val = tensor("custom")]; + tensor obj_119_pad_0 = const()[name = tensor("obj_119_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_29_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_29_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1165381760)))]; + tensor layers_29_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_29_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1168658624)))]; + tensor obj_119_cast_fp16 = conv(bias = layers_29_self_attn_o_proj_bias_to_fp16, dilations = var_45943, groups = var_44482, pad = obj_119_pad_0, pad_type = obj_119_pad_type_0, strides = var_45941, weight = layers_29_self_attn_o_proj_weight_to_fp16, x = input_233_cast_fp16)[name = tensor("obj_119_cast_fp16")]; + tensor inputs_119_cast_fp16 = add(x = inputs_117_cast_fp16, y = obj_119_cast_fp16)[name = tensor("inputs_119_cast_fp16")]; + tensor var_45949 = const()[name = tensor("op_45949"), val = tensor([1])]; + tensor channels_mean_119_cast_fp16 = reduce_mean(axes = var_45949, keep_dims = var_44483, x = inputs_119_cast_fp16)[name = tensor("channels_mean_119_cast_fp16")]; + tensor zero_mean_119_cast_fp16 = sub(x = inputs_119_cast_fp16, y = channels_mean_119_cast_fp16)[name = tensor("zero_mean_119_cast_fp16")]; + tensor zero_mean_sq_119_cast_fp16 = mul(x = zero_mean_119_cast_fp16, y = zero_mean_119_cast_fp16)[name = tensor("zero_mean_sq_119_cast_fp16")]; + tensor var_45953 = const()[name = tensor("op_45953"), val = tensor([1])]; + tensor var_45954_cast_fp16 = reduce_mean(axes = var_45953, keep_dims = var_44483, x = zero_mean_sq_119_cast_fp16)[name = tensor("op_45954_cast_fp16")]; + tensor var_45955_to_fp16 = const()[name = tensor("op_45955_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_45956_cast_fp16 = add(x = var_45954_cast_fp16, y = var_45955_to_fp16)[name = tensor("op_45956_cast_fp16")]; + tensor denom_119_epsilon_0_to_fp16 = const()[name = tensor("denom_119_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_119_cast_fp16 = rsqrt(epsilon = denom_119_epsilon_0_to_fp16, x = var_45956_cast_fp16)[name = tensor("denom_119_cast_fp16")]; + tensor out_119_cast_fp16 = mul(x = zero_mean_119_cast_fp16, y = denom_119_cast_fp16)[name = tensor("out_119_cast_fp16")]; + tensor input_235_gamma_0_to_fp16 = const()[name = tensor("input_235_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1168661248)))]; + tensor input_235_beta_0_to_fp16 = const()[name = tensor("input_235_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1168663872)))]; + tensor input_235_epsilon_0_to_fp16 = const()[name = tensor("input_235_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_235_cast_fp16 = batch_norm(beta = input_235_beta_0_to_fp16, epsilon = input_235_epsilon_0_to_fp16, gamma = input_235_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_119_cast_fp16)[name = tensor("input_235_cast_fp16")]; + tensor var_45967 = const()[name = tensor("op_45967"), val = tensor([1, 1])]; + tensor var_45969 = const()[name = tensor("op_45969"), val = tensor([1, 1])]; + tensor input_237_pad_type_0 = const()[name = tensor("input_237_pad_type_0"), val = tensor("custom")]; + tensor input_237_pad_0 = const()[name = tensor("input_237_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_29_fc1_weight_to_fp16 = const()[name = tensor("layers_29_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1168666496)))]; + tensor layers_29_fc1_bias_to_fp16 = const()[name = tensor("layers_29_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1181773760)))]; + tensor input_237_cast_fp16 = conv(bias = layers_29_fc1_bias_to_fp16, dilations = var_45969, groups = var_44482, pad = input_237_pad_0, pad_type = input_237_pad_type_0, strides = var_45967, weight = layers_29_fc1_weight_to_fp16, x = input_235_cast_fp16)[name = tensor("input_237_cast_fp16")]; + tensor input_239_mode_0 = const()[name = tensor("input_239_mode_0"), val = tensor("EXACT")]; + tensor input_239_cast_fp16 = gelu(mode = input_239_mode_0, x = input_237_cast_fp16)[name = tensor("input_239_cast_fp16")]; + tensor var_45975 = const()[name = tensor("op_45975"), val = tensor([1, 1])]; + tensor var_45977 = const()[name = tensor("op_45977"), val = tensor([1, 1])]; + tensor hidden_states_63_pad_type_0 = const()[name = tensor("hidden_states_63_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_63_pad_0 = const()[name = tensor("hidden_states_63_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_29_fc2_weight_to_fp16 = const()[name = tensor("layers_29_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1181784064)))]; + tensor layers_29_fc2_bias_to_fp16 = const()[name = tensor("layers_29_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1194891328)))]; + tensor hidden_states_63_cast_fp16 = conv(bias = layers_29_fc2_bias_to_fp16, dilations = var_45977, groups = var_44482, pad = hidden_states_63_pad_0, pad_type = hidden_states_63_pad_type_0, strides = var_45975, weight = layers_29_fc2_weight_to_fp16, x = input_239_cast_fp16)[name = tensor("hidden_states_63_cast_fp16")]; + tensor inputs_121_cast_fp16 = add(x = inputs_119_cast_fp16, y = hidden_states_63_cast_fp16)[name = tensor("inputs_121_cast_fp16")]; + tensor var_45984 = const()[name = tensor("op_45984"), val = tensor(3)]; + tensor var_46009 = const()[name = tensor("op_46009"), val = tensor(1)]; + tensor var_46010 = const()[name = tensor("op_46010"), val = tensor(true)]; + tensor var_46020 = const()[name = tensor("op_46020"), val = tensor([1])]; + tensor channels_mean_121_cast_fp16 = reduce_mean(axes = var_46020, keep_dims = var_46010, x = inputs_121_cast_fp16)[name = tensor("channels_mean_121_cast_fp16")]; + tensor zero_mean_121_cast_fp16 = sub(x = inputs_121_cast_fp16, y = channels_mean_121_cast_fp16)[name = tensor("zero_mean_121_cast_fp16")]; + tensor zero_mean_sq_121_cast_fp16 = mul(x = zero_mean_121_cast_fp16, y = zero_mean_121_cast_fp16)[name = tensor("zero_mean_sq_121_cast_fp16")]; + tensor var_46024 = const()[name = tensor("op_46024"), val = tensor([1])]; + tensor var_46025_cast_fp16 = reduce_mean(axes = var_46024, keep_dims = var_46010, x = zero_mean_sq_121_cast_fp16)[name = tensor("op_46025_cast_fp16")]; + tensor var_46026_to_fp16 = const()[name = tensor("op_46026_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_46027_cast_fp16 = add(x = var_46025_cast_fp16, y = var_46026_to_fp16)[name = tensor("op_46027_cast_fp16")]; + tensor denom_121_epsilon_0_to_fp16 = const()[name = tensor("denom_121_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_121_cast_fp16 = rsqrt(epsilon = denom_121_epsilon_0_to_fp16, x = var_46027_cast_fp16)[name = tensor("denom_121_cast_fp16")]; + tensor out_121_cast_fp16 = mul(x = zero_mean_121_cast_fp16, y = denom_121_cast_fp16)[name = tensor("out_121_cast_fp16")]; + tensor obj_121_gamma_0_to_fp16 = const()[name = tensor("obj_121_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1194893952)))]; + tensor obj_121_beta_0_to_fp16 = const()[name = tensor("obj_121_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1194896576)))]; + tensor obj_121_epsilon_0_to_fp16 = const()[name = tensor("obj_121_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_121_cast_fp16 = batch_norm(beta = obj_121_beta_0_to_fp16, epsilon = obj_121_epsilon_0_to_fp16, gamma = obj_121_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_121_cast_fp16)[name = tensor("obj_121_cast_fp16")]; + tensor var_46042 = const()[name = tensor("op_46042"), val = tensor([1, 1])]; + tensor var_46044 = const()[name = tensor("op_46044"), val = tensor([1, 1])]; + tensor query_61_pad_type_0 = const()[name = tensor("query_61_pad_type_0"), val = tensor("custom")]; + tensor query_61_pad_0 = const()[name = tensor("query_61_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_30_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_30_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1194899200)))]; + tensor layers_30_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_30_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1198176064)))]; + tensor query_61_cast_fp16 = conv(bias = layers_30_self_attn_q_proj_bias_to_fp16, dilations = var_46044, groups = var_46009, pad = query_61_pad_0, pad_type = query_61_pad_type_0, strides = var_46042, weight = layers_30_self_attn_q_proj_weight_to_fp16, x = obj_121_cast_fp16)[name = tensor("query_61_cast_fp16")]; + tensor var_46048 = const()[name = tensor("op_46048"), val = tensor([1, 1])]; + tensor var_46050 = const()[name = tensor("op_46050"), val = tensor([1, 1])]; + tensor key_61_pad_type_0 = const()[name = tensor("key_61_pad_type_0"), val = tensor("custom")]; + tensor key_61_pad_0 = const()[name = tensor("key_61_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_30_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_30_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1198178688)))]; + tensor key_61_cast_fp16 = conv(dilations = var_46050, groups = var_46009, pad = key_61_pad_0, pad_type = key_61_pad_type_0, strides = var_46048, weight = layers_30_self_attn_k_proj_weight_to_fp16, x = obj_121_cast_fp16)[name = tensor("key_61_cast_fp16")]; + tensor var_46055 = const()[name = tensor("op_46055"), val = tensor([1, 1])]; + tensor var_46057 = const()[name = tensor("op_46057"), val = tensor([1, 1])]; + tensor value_61_pad_type_0 = const()[name = tensor("value_61_pad_type_0"), val = tensor("custom")]; + tensor value_61_pad_0 = const()[name = tensor("value_61_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_30_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_30_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1201455552)))]; + tensor layers_30_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_30_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1204732416)))]; + tensor value_61_cast_fp16 = conv(bias = layers_30_self_attn_v_proj_bias_to_fp16, dilations = var_46057, groups = var_46009, pad = value_61_pad_0, pad_type = value_61_pad_type_0, strides = var_46055, weight = layers_30_self_attn_v_proj_weight_to_fp16, x = obj_121_cast_fp16)[name = tensor("value_61_cast_fp16")]; + tensor var_46064_begin_0 = const()[name = tensor("op_46064_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_46064_end_0 = const()[name = tensor("op_46064_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_46064_end_mask_0 = const()[name = tensor("op_46064_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46064_cast_fp16 = slice_by_index(begin = var_46064_begin_0, end = var_46064_end_0, end_mask = var_46064_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_46064_cast_fp16")]; + tensor var_46068_begin_0 = const()[name = tensor("op_46068_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_46068_end_0 = const()[name = tensor("op_46068_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_46068_end_mask_0 = const()[name = tensor("op_46068_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46068_cast_fp16 = slice_by_index(begin = var_46068_begin_0, end = var_46068_end_0, end_mask = var_46068_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_46068_cast_fp16")]; + tensor var_46072_begin_0 = const()[name = tensor("op_46072_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_46072_end_0 = const()[name = tensor("op_46072_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_46072_end_mask_0 = const()[name = tensor("op_46072_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46072_cast_fp16 = slice_by_index(begin = var_46072_begin_0, end = var_46072_end_0, end_mask = var_46072_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_46072_cast_fp16")]; + tensor var_46076_begin_0 = const()[name = tensor("op_46076_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_46076_end_0 = const()[name = tensor("op_46076_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_46076_end_mask_0 = const()[name = tensor("op_46076_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46076_cast_fp16 = slice_by_index(begin = var_46076_begin_0, end = var_46076_end_0, end_mask = var_46076_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_46076_cast_fp16")]; + tensor var_46080_begin_0 = const()[name = tensor("op_46080_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_46080_end_0 = const()[name = tensor("op_46080_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_46080_end_mask_0 = const()[name = tensor("op_46080_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46080_cast_fp16 = slice_by_index(begin = var_46080_begin_0, end = var_46080_end_0, end_mask = var_46080_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_46080_cast_fp16")]; + tensor var_46084_begin_0 = const()[name = tensor("op_46084_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_46084_end_0 = const()[name = tensor("op_46084_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_46084_end_mask_0 = const()[name = tensor("op_46084_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46084_cast_fp16 = slice_by_index(begin = var_46084_begin_0, end = var_46084_end_0, end_mask = var_46084_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_46084_cast_fp16")]; + tensor var_46088_begin_0 = const()[name = tensor("op_46088_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_46088_end_0 = const()[name = tensor("op_46088_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_46088_end_mask_0 = const()[name = tensor("op_46088_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46088_cast_fp16 = slice_by_index(begin = var_46088_begin_0, end = var_46088_end_0, end_mask = var_46088_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_46088_cast_fp16")]; + tensor var_46092_begin_0 = const()[name = tensor("op_46092_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_46092_end_0 = const()[name = tensor("op_46092_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_46092_end_mask_0 = const()[name = tensor("op_46092_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46092_cast_fp16 = slice_by_index(begin = var_46092_begin_0, end = var_46092_end_0, end_mask = var_46092_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_46092_cast_fp16")]; + tensor var_46096_begin_0 = const()[name = tensor("op_46096_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_46096_end_0 = const()[name = tensor("op_46096_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_46096_end_mask_0 = const()[name = tensor("op_46096_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46096_cast_fp16 = slice_by_index(begin = var_46096_begin_0, end = var_46096_end_0, end_mask = var_46096_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_46096_cast_fp16")]; + tensor var_46100_begin_0 = const()[name = tensor("op_46100_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_46100_end_0 = const()[name = tensor("op_46100_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_46100_end_mask_0 = const()[name = tensor("op_46100_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46100_cast_fp16 = slice_by_index(begin = var_46100_begin_0, end = var_46100_end_0, end_mask = var_46100_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_46100_cast_fp16")]; + tensor var_46104_begin_0 = const()[name = tensor("op_46104_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_46104_end_0 = const()[name = tensor("op_46104_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_46104_end_mask_0 = const()[name = tensor("op_46104_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46104_cast_fp16 = slice_by_index(begin = var_46104_begin_0, end = var_46104_end_0, end_mask = var_46104_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_46104_cast_fp16")]; + tensor var_46108_begin_0 = const()[name = tensor("op_46108_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_46108_end_0 = const()[name = tensor("op_46108_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_46108_end_mask_0 = const()[name = tensor("op_46108_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46108_cast_fp16 = slice_by_index(begin = var_46108_begin_0, end = var_46108_end_0, end_mask = var_46108_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_46108_cast_fp16")]; + tensor var_46112_begin_0 = const()[name = tensor("op_46112_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_46112_end_0 = const()[name = tensor("op_46112_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_46112_end_mask_0 = const()[name = tensor("op_46112_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46112_cast_fp16 = slice_by_index(begin = var_46112_begin_0, end = var_46112_end_0, end_mask = var_46112_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_46112_cast_fp16")]; + tensor var_46116_begin_0 = const()[name = tensor("op_46116_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_46116_end_0 = const()[name = tensor("op_46116_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_46116_end_mask_0 = const()[name = tensor("op_46116_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46116_cast_fp16 = slice_by_index(begin = var_46116_begin_0, end = var_46116_end_0, end_mask = var_46116_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_46116_cast_fp16")]; + tensor var_46120_begin_0 = const()[name = tensor("op_46120_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_46120_end_0 = const()[name = tensor("op_46120_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_46120_end_mask_0 = const()[name = tensor("op_46120_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46120_cast_fp16 = slice_by_index(begin = var_46120_begin_0, end = var_46120_end_0, end_mask = var_46120_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_46120_cast_fp16")]; + tensor var_46124_begin_0 = const()[name = tensor("op_46124_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_46124_end_0 = const()[name = tensor("op_46124_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_46124_end_mask_0 = const()[name = tensor("op_46124_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46124_cast_fp16 = slice_by_index(begin = var_46124_begin_0, end = var_46124_end_0, end_mask = var_46124_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_46124_cast_fp16")]; + tensor var_46128_begin_0 = const()[name = tensor("op_46128_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_46128_end_0 = const()[name = tensor("op_46128_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_46128_end_mask_0 = const()[name = tensor("op_46128_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46128_cast_fp16 = slice_by_index(begin = var_46128_begin_0, end = var_46128_end_0, end_mask = var_46128_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_46128_cast_fp16")]; + tensor var_46132_begin_0 = const()[name = tensor("op_46132_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_46132_end_0 = const()[name = tensor("op_46132_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_46132_end_mask_0 = const()[name = tensor("op_46132_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46132_cast_fp16 = slice_by_index(begin = var_46132_begin_0, end = var_46132_end_0, end_mask = var_46132_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_46132_cast_fp16")]; + tensor var_46136_begin_0 = const()[name = tensor("op_46136_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_46136_end_0 = const()[name = tensor("op_46136_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_46136_end_mask_0 = const()[name = tensor("op_46136_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46136_cast_fp16 = slice_by_index(begin = var_46136_begin_0, end = var_46136_end_0, end_mask = var_46136_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_46136_cast_fp16")]; + tensor var_46140_begin_0 = const()[name = tensor("op_46140_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_46140_end_0 = const()[name = tensor("op_46140_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_46140_end_mask_0 = const()[name = tensor("op_46140_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46140_cast_fp16 = slice_by_index(begin = var_46140_begin_0, end = var_46140_end_0, end_mask = var_46140_end_mask_0, x = query_61_cast_fp16)[name = tensor("op_46140_cast_fp16")]; + tensor var_46149_begin_0 = const()[name = tensor("op_46149_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_46149_end_0 = const()[name = tensor("op_46149_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_46149_end_mask_0 = const()[name = tensor("op_46149_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46149_cast_fp16 = slice_by_index(begin = var_46149_begin_0, end = var_46149_end_0, end_mask = var_46149_end_mask_0, x = var_46064_cast_fp16)[name = tensor("op_46149_cast_fp16")]; + tensor var_46156_begin_0 = const()[name = tensor("op_46156_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_46156_end_0 = const()[name = tensor("op_46156_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_46156_end_mask_0 = const()[name = tensor("op_46156_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46156_cast_fp16 = slice_by_index(begin = var_46156_begin_0, end = var_46156_end_0, end_mask = var_46156_end_mask_0, x = var_46064_cast_fp16)[name = tensor("op_46156_cast_fp16")]; + tensor var_46163_begin_0 = const()[name = tensor("op_46163_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_46163_end_0 = const()[name = tensor("op_46163_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_46163_end_mask_0 = const()[name = tensor("op_46163_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46163_cast_fp16 = slice_by_index(begin = var_46163_begin_0, end = var_46163_end_0, end_mask = var_46163_end_mask_0, x = var_46064_cast_fp16)[name = tensor("op_46163_cast_fp16")]; + tensor var_46170_begin_0 = const()[name = tensor("op_46170_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_46170_end_0 = const()[name = tensor("op_46170_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_46170_end_mask_0 = const()[name = tensor("op_46170_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46170_cast_fp16 = slice_by_index(begin = var_46170_begin_0, end = var_46170_end_0, end_mask = var_46170_end_mask_0, x = var_46064_cast_fp16)[name = tensor("op_46170_cast_fp16")]; + tensor var_46177_begin_0 = const()[name = tensor("op_46177_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_46177_end_0 = const()[name = tensor("op_46177_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_46177_end_mask_0 = const()[name = tensor("op_46177_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46177_cast_fp16 = slice_by_index(begin = var_46177_begin_0, end = var_46177_end_0, end_mask = var_46177_end_mask_0, x = var_46068_cast_fp16)[name = tensor("op_46177_cast_fp16")]; + tensor var_46184_begin_0 = const()[name = tensor("op_46184_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_46184_end_0 = const()[name = tensor("op_46184_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_46184_end_mask_0 = const()[name = tensor("op_46184_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46184_cast_fp16 = slice_by_index(begin = var_46184_begin_0, end = var_46184_end_0, end_mask = var_46184_end_mask_0, x = var_46068_cast_fp16)[name = tensor("op_46184_cast_fp16")]; + tensor var_46191_begin_0 = const()[name = tensor("op_46191_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_46191_end_0 = const()[name = tensor("op_46191_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_46191_end_mask_0 = const()[name = tensor("op_46191_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46191_cast_fp16 = slice_by_index(begin = var_46191_begin_0, end = var_46191_end_0, end_mask = var_46191_end_mask_0, x = var_46068_cast_fp16)[name = tensor("op_46191_cast_fp16")]; + tensor var_46198_begin_0 = const()[name = tensor("op_46198_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_46198_end_0 = const()[name = tensor("op_46198_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_46198_end_mask_0 = const()[name = tensor("op_46198_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46198_cast_fp16 = slice_by_index(begin = var_46198_begin_0, end = var_46198_end_0, end_mask = var_46198_end_mask_0, x = var_46068_cast_fp16)[name = tensor("op_46198_cast_fp16")]; + tensor var_46205_begin_0 = const()[name = tensor("op_46205_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_46205_end_0 = const()[name = tensor("op_46205_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_46205_end_mask_0 = const()[name = tensor("op_46205_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46205_cast_fp16 = slice_by_index(begin = var_46205_begin_0, end = var_46205_end_0, end_mask = var_46205_end_mask_0, x = var_46072_cast_fp16)[name = tensor("op_46205_cast_fp16")]; + tensor var_46212_begin_0 = const()[name = tensor("op_46212_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_46212_end_0 = const()[name = tensor("op_46212_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_46212_end_mask_0 = const()[name = tensor("op_46212_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46212_cast_fp16 = slice_by_index(begin = var_46212_begin_0, end = var_46212_end_0, end_mask = var_46212_end_mask_0, x = var_46072_cast_fp16)[name = tensor("op_46212_cast_fp16")]; + tensor var_46219_begin_0 = const()[name = tensor("op_46219_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_46219_end_0 = const()[name = tensor("op_46219_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_46219_end_mask_0 = const()[name = tensor("op_46219_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46219_cast_fp16 = slice_by_index(begin = var_46219_begin_0, end = var_46219_end_0, end_mask = var_46219_end_mask_0, x = var_46072_cast_fp16)[name = tensor("op_46219_cast_fp16")]; + tensor var_46226_begin_0 = const()[name = tensor("op_46226_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_46226_end_0 = const()[name = tensor("op_46226_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_46226_end_mask_0 = const()[name = tensor("op_46226_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46226_cast_fp16 = slice_by_index(begin = var_46226_begin_0, end = var_46226_end_0, end_mask = var_46226_end_mask_0, x = var_46072_cast_fp16)[name = tensor("op_46226_cast_fp16")]; + tensor var_46233_begin_0 = const()[name = tensor("op_46233_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_46233_end_0 = const()[name = tensor("op_46233_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_46233_end_mask_0 = const()[name = tensor("op_46233_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46233_cast_fp16 = slice_by_index(begin = var_46233_begin_0, end = var_46233_end_0, end_mask = var_46233_end_mask_0, x = var_46076_cast_fp16)[name = tensor("op_46233_cast_fp16")]; + tensor var_46240_begin_0 = const()[name = tensor("op_46240_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_46240_end_0 = const()[name = tensor("op_46240_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_46240_end_mask_0 = const()[name = tensor("op_46240_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46240_cast_fp16 = slice_by_index(begin = var_46240_begin_0, end = var_46240_end_0, end_mask = var_46240_end_mask_0, x = var_46076_cast_fp16)[name = tensor("op_46240_cast_fp16")]; + tensor var_46247_begin_0 = const()[name = tensor("op_46247_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_46247_end_0 = const()[name = tensor("op_46247_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_46247_end_mask_0 = const()[name = tensor("op_46247_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46247_cast_fp16 = slice_by_index(begin = var_46247_begin_0, end = var_46247_end_0, end_mask = var_46247_end_mask_0, x = var_46076_cast_fp16)[name = tensor("op_46247_cast_fp16")]; + tensor var_46254_begin_0 = const()[name = tensor("op_46254_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_46254_end_0 = const()[name = tensor("op_46254_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_46254_end_mask_0 = const()[name = tensor("op_46254_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46254_cast_fp16 = slice_by_index(begin = var_46254_begin_0, end = var_46254_end_0, end_mask = var_46254_end_mask_0, x = var_46076_cast_fp16)[name = tensor("op_46254_cast_fp16")]; + tensor var_46261_begin_0 = const()[name = tensor("op_46261_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_46261_end_0 = const()[name = tensor("op_46261_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_46261_end_mask_0 = const()[name = tensor("op_46261_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46261_cast_fp16 = slice_by_index(begin = var_46261_begin_0, end = var_46261_end_0, end_mask = var_46261_end_mask_0, x = var_46080_cast_fp16)[name = tensor("op_46261_cast_fp16")]; + tensor var_46268_begin_0 = const()[name = tensor("op_46268_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_46268_end_0 = const()[name = tensor("op_46268_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_46268_end_mask_0 = const()[name = tensor("op_46268_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46268_cast_fp16 = slice_by_index(begin = var_46268_begin_0, end = var_46268_end_0, end_mask = var_46268_end_mask_0, x = var_46080_cast_fp16)[name = tensor("op_46268_cast_fp16")]; + tensor var_46275_begin_0 = const()[name = tensor("op_46275_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_46275_end_0 = const()[name = tensor("op_46275_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_46275_end_mask_0 = const()[name = tensor("op_46275_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46275_cast_fp16 = slice_by_index(begin = var_46275_begin_0, end = var_46275_end_0, end_mask = var_46275_end_mask_0, x = var_46080_cast_fp16)[name = tensor("op_46275_cast_fp16")]; + tensor var_46282_begin_0 = const()[name = tensor("op_46282_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_46282_end_0 = const()[name = tensor("op_46282_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_46282_end_mask_0 = const()[name = tensor("op_46282_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46282_cast_fp16 = slice_by_index(begin = var_46282_begin_0, end = var_46282_end_0, end_mask = var_46282_end_mask_0, x = var_46080_cast_fp16)[name = tensor("op_46282_cast_fp16")]; + tensor var_46289_begin_0 = const()[name = tensor("op_46289_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_46289_end_0 = const()[name = tensor("op_46289_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_46289_end_mask_0 = const()[name = tensor("op_46289_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46289_cast_fp16 = slice_by_index(begin = var_46289_begin_0, end = var_46289_end_0, end_mask = var_46289_end_mask_0, x = var_46084_cast_fp16)[name = tensor("op_46289_cast_fp16")]; + tensor var_46296_begin_0 = const()[name = tensor("op_46296_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_46296_end_0 = const()[name = tensor("op_46296_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_46296_end_mask_0 = const()[name = tensor("op_46296_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46296_cast_fp16 = slice_by_index(begin = var_46296_begin_0, end = var_46296_end_0, end_mask = var_46296_end_mask_0, x = var_46084_cast_fp16)[name = tensor("op_46296_cast_fp16")]; + tensor var_46303_begin_0 = const()[name = tensor("op_46303_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_46303_end_0 = const()[name = tensor("op_46303_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_46303_end_mask_0 = const()[name = tensor("op_46303_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46303_cast_fp16 = slice_by_index(begin = var_46303_begin_0, end = var_46303_end_0, end_mask = var_46303_end_mask_0, x = var_46084_cast_fp16)[name = tensor("op_46303_cast_fp16")]; + tensor var_46310_begin_0 = const()[name = tensor("op_46310_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_46310_end_0 = const()[name = tensor("op_46310_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_46310_end_mask_0 = const()[name = tensor("op_46310_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46310_cast_fp16 = slice_by_index(begin = var_46310_begin_0, end = var_46310_end_0, end_mask = var_46310_end_mask_0, x = var_46084_cast_fp16)[name = tensor("op_46310_cast_fp16")]; + tensor var_46317_begin_0 = const()[name = tensor("op_46317_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_46317_end_0 = const()[name = tensor("op_46317_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_46317_end_mask_0 = const()[name = tensor("op_46317_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46317_cast_fp16 = slice_by_index(begin = var_46317_begin_0, end = var_46317_end_0, end_mask = var_46317_end_mask_0, x = var_46088_cast_fp16)[name = tensor("op_46317_cast_fp16")]; + tensor var_46324_begin_0 = const()[name = tensor("op_46324_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_46324_end_0 = const()[name = tensor("op_46324_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_46324_end_mask_0 = const()[name = tensor("op_46324_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46324_cast_fp16 = slice_by_index(begin = var_46324_begin_0, end = var_46324_end_0, end_mask = var_46324_end_mask_0, x = var_46088_cast_fp16)[name = tensor("op_46324_cast_fp16")]; + tensor var_46331_begin_0 = const()[name = tensor("op_46331_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_46331_end_0 = const()[name = tensor("op_46331_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_46331_end_mask_0 = const()[name = tensor("op_46331_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46331_cast_fp16 = slice_by_index(begin = var_46331_begin_0, end = var_46331_end_0, end_mask = var_46331_end_mask_0, x = var_46088_cast_fp16)[name = tensor("op_46331_cast_fp16")]; + tensor var_46338_begin_0 = const()[name = tensor("op_46338_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_46338_end_0 = const()[name = tensor("op_46338_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_46338_end_mask_0 = const()[name = tensor("op_46338_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46338_cast_fp16 = slice_by_index(begin = var_46338_begin_0, end = var_46338_end_0, end_mask = var_46338_end_mask_0, x = var_46088_cast_fp16)[name = tensor("op_46338_cast_fp16")]; + tensor var_46345_begin_0 = const()[name = tensor("op_46345_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_46345_end_0 = const()[name = tensor("op_46345_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_46345_end_mask_0 = const()[name = tensor("op_46345_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46345_cast_fp16 = slice_by_index(begin = var_46345_begin_0, end = var_46345_end_0, end_mask = var_46345_end_mask_0, x = var_46092_cast_fp16)[name = tensor("op_46345_cast_fp16")]; + tensor var_46352_begin_0 = const()[name = tensor("op_46352_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_46352_end_0 = const()[name = tensor("op_46352_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_46352_end_mask_0 = const()[name = tensor("op_46352_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46352_cast_fp16 = slice_by_index(begin = var_46352_begin_0, end = var_46352_end_0, end_mask = var_46352_end_mask_0, x = var_46092_cast_fp16)[name = tensor("op_46352_cast_fp16")]; + tensor var_46359_begin_0 = const()[name = tensor("op_46359_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_46359_end_0 = const()[name = tensor("op_46359_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_46359_end_mask_0 = const()[name = tensor("op_46359_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46359_cast_fp16 = slice_by_index(begin = var_46359_begin_0, end = var_46359_end_0, end_mask = var_46359_end_mask_0, x = var_46092_cast_fp16)[name = tensor("op_46359_cast_fp16")]; + tensor var_46366_begin_0 = const()[name = tensor("op_46366_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_46366_end_0 = const()[name = tensor("op_46366_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_46366_end_mask_0 = const()[name = tensor("op_46366_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46366_cast_fp16 = slice_by_index(begin = var_46366_begin_0, end = var_46366_end_0, end_mask = var_46366_end_mask_0, x = var_46092_cast_fp16)[name = tensor("op_46366_cast_fp16")]; + tensor var_46373_begin_0 = const()[name = tensor("op_46373_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_46373_end_0 = const()[name = tensor("op_46373_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_46373_end_mask_0 = const()[name = tensor("op_46373_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46373_cast_fp16 = slice_by_index(begin = var_46373_begin_0, end = var_46373_end_0, end_mask = var_46373_end_mask_0, x = var_46096_cast_fp16)[name = tensor("op_46373_cast_fp16")]; + tensor var_46380_begin_0 = const()[name = tensor("op_46380_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_46380_end_0 = const()[name = tensor("op_46380_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_46380_end_mask_0 = const()[name = tensor("op_46380_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46380_cast_fp16 = slice_by_index(begin = var_46380_begin_0, end = var_46380_end_0, end_mask = var_46380_end_mask_0, x = var_46096_cast_fp16)[name = tensor("op_46380_cast_fp16")]; + tensor var_46387_begin_0 = const()[name = tensor("op_46387_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_46387_end_0 = const()[name = tensor("op_46387_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_46387_end_mask_0 = const()[name = tensor("op_46387_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46387_cast_fp16 = slice_by_index(begin = var_46387_begin_0, end = var_46387_end_0, end_mask = var_46387_end_mask_0, x = var_46096_cast_fp16)[name = tensor("op_46387_cast_fp16")]; + tensor var_46394_begin_0 = const()[name = tensor("op_46394_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_46394_end_0 = const()[name = tensor("op_46394_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_46394_end_mask_0 = const()[name = tensor("op_46394_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46394_cast_fp16 = slice_by_index(begin = var_46394_begin_0, end = var_46394_end_0, end_mask = var_46394_end_mask_0, x = var_46096_cast_fp16)[name = tensor("op_46394_cast_fp16")]; + tensor var_46401_begin_0 = const()[name = tensor("op_46401_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_46401_end_0 = const()[name = tensor("op_46401_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_46401_end_mask_0 = const()[name = tensor("op_46401_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46401_cast_fp16 = slice_by_index(begin = var_46401_begin_0, end = var_46401_end_0, end_mask = var_46401_end_mask_0, x = var_46100_cast_fp16)[name = tensor("op_46401_cast_fp16")]; + tensor var_46408_begin_0 = const()[name = tensor("op_46408_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_46408_end_0 = const()[name = tensor("op_46408_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_46408_end_mask_0 = const()[name = tensor("op_46408_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46408_cast_fp16 = slice_by_index(begin = var_46408_begin_0, end = var_46408_end_0, end_mask = var_46408_end_mask_0, x = var_46100_cast_fp16)[name = tensor("op_46408_cast_fp16")]; + tensor var_46415_begin_0 = const()[name = tensor("op_46415_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_46415_end_0 = const()[name = tensor("op_46415_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_46415_end_mask_0 = const()[name = tensor("op_46415_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46415_cast_fp16 = slice_by_index(begin = var_46415_begin_0, end = var_46415_end_0, end_mask = var_46415_end_mask_0, x = var_46100_cast_fp16)[name = tensor("op_46415_cast_fp16")]; + tensor var_46422_begin_0 = const()[name = tensor("op_46422_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_46422_end_0 = const()[name = tensor("op_46422_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_46422_end_mask_0 = const()[name = tensor("op_46422_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46422_cast_fp16 = slice_by_index(begin = var_46422_begin_0, end = var_46422_end_0, end_mask = var_46422_end_mask_0, x = var_46100_cast_fp16)[name = tensor("op_46422_cast_fp16")]; + tensor var_46429_begin_0 = const()[name = tensor("op_46429_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_46429_end_0 = const()[name = tensor("op_46429_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_46429_end_mask_0 = const()[name = tensor("op_46429_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46429_cast_fp16 = slice_by_index(begin = var_46429_begin_0, end = var_46429_end_0, end_mask = var_46429_end_mask_0, x = var_46104_cast_fp16)[name = tensor("op_46429_cast_fp16")]; + tensor var_46436_begin_0 = const()[name = tensor("op_46436_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_46436_end_0 = const()[name = tensor("op_46436_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_46436_end_mask_0 = const()[name = tensor("op_46436_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46436_cast_fp16 = slice_by_index(begin = var_46436_begin_0, end = var_46436_end_0, end_mask = var_46436_end_mask_0, x = var_46104_cast_fp16)[name = tensor("op_46436_cast_fp16")]; + tensor var_46443_begin_0 = const()[name = tensor("op_46443_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_46443_end_0 = const()[name = tensor("op_46443_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_46443_end_mask_0 = const()[name = tensor("op_46443_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46443_cast_fp16 = slice_by_index(begin = var_46443_begin_0, end = var_46443_end_0, end_mask = var_46443_end_mask_0, x = var_46104_cast_fp16)[name = tensor("op_46443_cast_fp16")]; + tensor var_46450_begin_0 = const()[name = tensor("op_46450_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_46450_end_0 = const()[name = tensor("op_46450_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_46450_end_mask_0 = const()[name = tensor("op_46450_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46450_cast_fp16 = slice_by_index(begin = var_46450_begin_0, end = var_46450_end_0, end_mask = var_46450_end_mask_0, x = var_46104_cast_fp16)[name = tensor("op_46450_cast_fp16")]; + tensor var_46457_begin_0 = const()[name = tensor("op_46457_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_46457_end_0 = const()[name = tensor("op_46457_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_46457_end_mask_0 = const()[name = tensor("op_46457_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46457_cast_fp16 = slice_by_index(begin = var_46457_begin_0, end = var_46457_end_0, end_mask = var_46457_end_mask_0, x = var_46108_cast_fp16)[name = tensor("op_46457_cast_fp16")]; + tensor var_46464_begin_0 = const()[name = tensor("op_46464_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_46464_end_0 = const()[name = tensor("op_46464_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_46464_end_mask_0 = const()[name = tensor("op_46464_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46464_cast_fp16 = slice_by_index(begin = var_46464_begin_0, end = var_46464_end_0, end_mask = var_46464_end_mask_0, x = var_46108_cast_fp16)[name = tensor("op_46464_cast_fp16")]; + tensor var_46471_begin_0 = const()[name = tensor("op_46471_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_46471_end_0 = const()[name = tensor("op_46471_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_46471_end_mask_0 = const()[name = tensor("op_46471_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46471_cast_fp16 = slice_by_index(begin = var_46471_begin_0, end = var_46471_end_0, end_mask = var_46471_end_mask_0, x = var_46108_cast_fp16)[name = tensor("op_46471_cast_fp16")]; + tensor var_46478_begin_0 = const()[name = tensor("op_46478_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_46478_end_0 = const()[name = tensor("op_46478_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_46478_end_mask_0 = const()[name = tensor("op_46478_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46478_cast_fp16 = slice_by_index(begin = var_46478_begin_0, end = var_46478_end_0, end_mask = var_46478_end_mask_0, x = var_46108_cast_fp16)[name = tensor("op_46478_cast_fp16")]; + tensor var_46485_begin_0 = const()[name = tensor("op_46485_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_46485_end_0 = const()[name = tensor("op_46485_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_46485_end_mask_0 = const()[name = tensor("op_46485_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46485_cast_fp16 = slice_by_index(begin = var_46485_begin_0, end = var_46485_end_0, end_mask = var_46485_end_mask_0, x = var_46112_cast_fp16)[name = tensor("op_46485_cast_fp16")]; + tensor var_46492_begin_0 = const()[name = tensor("op_46492_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_46492_end_0 = const()[name = tensor("op_46492_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_46492_end_mask_0 = const()[name = tensor("op_46492_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46492_cast_fp16 = slice_by_index(begin = var_46492_begin_0, end = var_46492_end_0, end_mask = var_46492_end_mask_0, x = var_46112_cast_fp16)[name = tensor("op_46492_cast_fp16")]; + tensor var_46499_begin_0 = const()[name = tensor("op_46499_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_46499_end_0 = const()[name = tensor("op_46499_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_46499_end_mask_0 = const()[name = tensor("op_46499_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46499_cast_fp16 = slice_by_index(begin = var_46499_begin_0, end = var_46499_end_0, end_mask = var_46499_end_mask_0, x = var_46112_cast_fp16)[name = tensor("op_46499_cast_fp16")]; + tensor var_46506_begin_0 = const()[name = tensor("op_46506_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_46506_end_0 = const()[name = tensor("op_46506_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_46506_end_mask_0 = const()[name = tensor("op_46506_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46506_cast_fp16 = slice_by_index(begin = var_46506_begin_0, end = var_46506_end_0, end_mask = var_46506_end_mask_0, x = var_46112_cast_fp16)[name = tensor("op_46506_cast_fp16")]; + tensor var_46513_begin_0 = const()[name = tensor("op_46513_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_46513_end_0 = const()[name = tensor("op_46513_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_46513_end_mask_0 = const()[name = tensor("op_46513_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46513_cast_fp16 = slice_by_index(begin = var_46513_begin_0, end = var_46513_end_0, end_mask = var_46513_end_mask_0, x = var_46116_cast_fp16)[name = tensor("op_46513_cast_fp16")]; + tensor var_46520_begin_0 = const()[name = tensor("op_46520_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_46520_end_0 = const()[name = tensor("op_46520_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_46520_end_mask_0 = const()[name = tensor("op_46520_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46520_cast_fp16 = slice_by_index(begin = var_46520_begin_0, end = var_46520_end_0, end_mask = var_46520_end_mask_0, x = var_46116_cast_fp16)[name = tensor("op_46520_cast_fp16")]; + tensor var_46527_begin_0 = const()[name = tensor("op_46527_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_46527_end_0 = const()[name = tensor("op_46527_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_46527_end_mask_0 = const()[name = tensor("op_46527_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46527_cast_fp16 = slice_by_index(begin = var_46527_begin_0, end = var_46527_end_0, end_mask = var_46527_end_mask_0, x = var_46116_cast_fp16)[name = tensor("op_46527_cast_fp16")]; + tensor var_46534_begin_0 = const()[name = tensor("op_46534_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_46534_end_0 = const()[name = tensor("op_46534_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_46534_end_mask_0 = const()[name = tensor("op_46534_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46534_cast_fp16 = slice_by_index(begin = var_46534_begin_0, end = var_46534_end_0, end_mask = var_46534_end_mask_0, x = var_46116_cast_fp16)[name = tensor("op_46534_cast_fp16")]; + tensor var_46541_begin_0 = const()[name = tensor("op_46541_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_46541_end_0 = const()[name = tensor("op_46541_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_46541_end_mask_0 = const()[name = tensor("op_46541_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46541_cast_fp16 = slice_by_index(begin = var_46541_begin_0, end = var_46541_end_0, end_mask = var_46541_end_mask_0, x = var_46120_cast_fp16)[name = tensor("op_46541_cast_fp16")]; + tensor var_46548_begin_0 = const()[name = tensor("op_46548_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_46548_end_0 = const()[name = tensor("op_46548_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_46548_end_mask_0 = const()[name = tensor("op_46548_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46548_cast_fp16 = slice_by_index(begin = var_46548_begin_0, end = var_46548_end_0, end_mask = var_46548_end_mask_0, x = var_46120_cast_fp16)[name = tensor("op_46548_cast_fp16")]; + tensor var_46555_begin_0 = const()[name = tensor("op_46555_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_46555_end_0 = const()[name = tensor("op_46555_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_46555_end_mask_0 = const()[name = tensor("op_46555_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46555_cast_fp16 = slice_by_index(begin = var_46555_begin_0, end = var_46555_end_0, end_mask = var_46555_end_mask_0, x = var_46120_cast_fp16)[name = tensor("op_46555_cast_fp16")]; + tensor var_46562_begin_0 = const()[name = tensor("op_46562_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_46562_end_0 = const()[name = tensor("op_46562_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_46562_end_mask_0 = const()[name = tensor("op_46562_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46562_cast_fp16 = slice_by_index(begin = var_46562_begin_0, end = var_46562_end_0, end_mask = var_46562_end_mask_0, x = var_46120_cast_fp16)[name = tensor("op_46562_cast_fp16")]; + tensor var_46569_begin_0 = const()[name = tensor("op_46569_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_46569_end_0 = const()[name = tensor("op_46569_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_46569_end_mask_0 = const()[name = tensor("op_46569_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46569_cast_fp16 = slice_by_index(begin = var_46569_begin_0, end = var_46569_end_0, end_mask = var_46569_end_mask_0, x = var_46124_cast_fp16)[name = tensor("op_46569_cast_fp16")]; + tensor var_46576_begin_0 = const()[name = tensor("op_46576_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_46576_end_0 = const()[name = tensor("op_46576_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_46576_end_mask_0 = const()[name = tensor("op_46576_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46576_cast_fp16 = slice_by_index(begin = var_46576_begin_0, end = var_46576_end_0, end_mask = var_46576_end_mask_0, x = var_46124_cast_fp16)[name = tensor("op_46576_cast_fp16")]; + tensor var_46583_begin_0 = const()[name = tensor("op_46583_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_46583_end_0 = const()[name = tensor("op_46583_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_46583_end_mask_0 = const()[name = tensor("op_46583_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46583_cast_fp16 = slice_by_index(begin = var_46583_begin_0, end = var_46583_end_0, end_mask = var_46583_end_mask_0, x = var_46124_cast_fp16)[name = tensor("op_46583_cast_fp16")]; + tensor var_46590_begin_0 = const()[name = tensor("op_46590_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_46590_end_0 = const()[name = tensor("op_46590_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_46590_end_mask_0 = const()[name = tensor("op_46590_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46590_cast_fp16 = slice_by_index(begin = var_46590_begin_0, end = var_46590_end_0, end_mask = var_46590_end_mask_0, x = var_46124_cast_fp16)[name = tensor("op_46590_cast_fp16")]; + tensor var_46597_begin_0 = const()[name = tensor("op_46597_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_46597_end_0 = const()[name = tensor("op_46597_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_46597_end_mask_0 = const()[name = tensor("op_46597_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46597_cast_fp16 = slice_by_index(begin = var_46597_begin_0, end = var_46597_end_0, end_mask = var_46597_end_mask_0, x = var_46128_cast_fp16)[name = tensor("op_46597_cast_fp16")]; + tensor var_46604_begin_0 = const()[name = tensor("op_46604_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_46604_end_0 = const()[name = tensor("op_46604_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_46604_end_mask_0 = const()[name = tensor("op_46604_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46604_cast_fp16 = slice_by_index(begin = var_46604_begin_0, end = var_46604_end_0, end_mask = var_46604_end_mask_0, x = var_46128_cast_fp16)[name = tensor("op_46604_cast_fp16")]; + tensor var_46611_begin_0 = const()[name = tensor("op_46611_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_46611_end_0 = const()[name = tensor("op_46611_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_46611_end_mask_0 = const()[name = tensor("op_46611_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46611_cast_fp16 = slice_by_index(begin = var_46611_begin_0, end = var_46611_end_0, end_mask = var_46611_end_mask_0, x = var_46128_cast_fp16)[name = tensor("op_46611_cast_fp16")]; + tensor var_46618_begin_0 = const()[name = tensor("op_46618_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_46618_end_0 = const()[name = tensor("op_46618_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_46618_end_mask_0 = const()[name = tensor("op_46618_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46618_cast_fp16 = slice_by_index(begin = var_46618_begin_0, end = var_46618_end_0, end_mask = var_46618_end_mask_0, x = var_46128_cast_fp16)[name = tensor("op_46618_cast_fp16")]; + tensor var_46625_begin_0 = const()[name = tensor("op_46625_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_46625_end_0 = const()[name = tensor("op_46625_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_46625_end_mask_0 = const()[name = tensor("op_46625_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46625_cast_fp16 = slice_by_index(begin = var_46625_begin_0, end = var_46625_end_0, end_mask = var_46625_end_mask_0, x = var_46132_cast_fp16)[name = tensor("op_46625_cast_fp16")]; + tensor var_46632_begin_0 = const()[name = tensor("op_46632_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_46632_end_0 = const()[name = tensor("op_46632_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_46632_end_mask_0 = const()[name = tensor("op_46632_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46632_cast_fp16 = slice_by_index(begin = var_46632_begin_0, end = var_46632_end_0, end_mask = var_46632_end_mask_0, x = var_46132_cast_fp16)[name = tensor("op_46632_cast_fp16")]; + tensor var_46639_begin_0 = const()[name = tensor("op_46639_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_46639_end_0 = const()[name = tensor("op_46639_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_46639_end_mask_0 = const()[name = tensor("op_46639_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46639_cast_fp16 = slice_by_index(begin = var_46639_begin_0, end = var_46639_end_0, end_mask = var_46639_end_mask_0, x = var_46132_cast_fp16)[name = tensor("op_46639_cast_fp16")]; + tensor var_46646_begin_0 = const()[name = tensor("op_46646_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_46646_end_0 = const()[name = tensor("op_46646_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_46646_end_mask_0 = const()[name = tensor("op_46646_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46646_cast_fp16 = slice_by_index(begin = var_46646_begin_0, end = var_46646_end_0, end_mask = var_46646_end_mask_0, x = var_46132_cast_fp16)[name = tensor("op_46646_cast_fp16")]; + tensor var_46653_begin_0 = const()[name = tensor("op_46653_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_46653_end_0 = const()[name = tensor("op_46653_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_46653_end_mask_0 = const()[name = tensor("op_46653_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46653_cast_fp16 = slice_by_index(begin = var_46653_begin_0, end = var_46653_end_0, end_mask = var_46653_end_mask_0, x = var_46136_cast_fp16)[name = tensor("op_46653_cast_fp16")]; + tensor var_46660_begin_0 = const()[name = tensor("op_46660_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_46660_end_0 = const()[name = tensor("op_46660_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_46660_end_mask_0 = const()[name = tensor("op_46660_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46660_cast_fp16 = slice_by_index(begin = var_46660_begin_0, end = var_46660_end_0, end_mask = var_46660_end_mask_0, x = var_46136_cast_fp16)[name = tensor("op_46660_cast_fp16")]; + tensor var_46667_begin_0 = const()[name = tensor("op_46667_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_46667_end_0 = const()[name = tensor("op_46667_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_46667_end_mask_0 = const()[name = tensor("op_46667_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46667_cast_fp16 = slice_by_index(begin = var_46667_begin_0, end = var_46667_end_0, end_mask = var_46667_end_mask_0, x = var_46136_cast_fp16)[name = tensor("op_46667_cast_fp16")]; + tensor var_46674_begin_0 = const()[name = tensor("op_46674_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_46674_end_0 = const()[name = tensor("op_46674_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_46674_end_mask_0 = const()[name = tensor("op_46674_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46674_cast_fp16 = slice_by_index(begin = var_46674_begin_0, end = var_46674_end_0, end_mask = var_46674_end_mask_0, x = var_46136_cast_fp16)[name = tensor("op_46674_cast_fp16")]; + tensor var_46681_begin_0 = const()[name = tensor("op_46681_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_46681_end_0 = const()[name = tensor("op_46681_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_46681_end_mask_0 = const()[name = tensor("op_46681_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46681_cast_fp16 = slice_by_index(begin = var_46681_begin_0, end = var_46681_end_0, end_mask = var_46681_end_mask_0, x = var_46140_cast_fp16)[name = tensor("op_46681_cast_fp16")]; + tensor var_46688_begin_0 = const()[name = tensor("op_46688_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_46688_end_0 = const()[name = tensor("op_46688_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_46688_end_mask_0 = const()[name = tensor("op_46688_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46688_cast_fp16 = slice_by_index(begin = var_46688_begin_0, end = var_46688_end_0, end_mask = var_46688_end_mask_0, x = var_46140_cast_fp16)[name = tensor("op_46688_cast_fp16")]; + tensor var_46695_begin_0 = const()[name = tensor("op_46695_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_46695_end_0 = const()[name = tensor("op_46695_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_46695_end_mask_0 = const()[name = tensor("op_46695_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46695_cast_fp16 = slice_by_index(begin = var_46695_begin_0, end = var_46695_end_0, end_mask = var_46695_end_mask_0, x = var_46140_cast_fp16)[name = tensor("op_46695_cast_fp16")]; + tensor var_46702_begin_0 = const()[name = tensor("op_46702_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_46702_end_0 = const()[name = tensor("op_46702_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_46702_end_mask_0 = const()[name = tensor("op_46702_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46702_cast_fp16 = slice_by_index(begin = var_46702_begin_0, end = var_46702_end_0, end_mask = var_46702_end_mask_0, x = var_46140_cast_fp16)[name = tensor("op_46702_cast_fp16")]; + tensor k_61_perm_0 = const()[name = tensor("k_61_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_46707_begin_0 = const()[name = tensor("op_46707_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_46707_end_0 = const()[name = tensor("op_46707_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_46707_end_mask_0 = const()[name = tensor("op_46707_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_1 = transpose(perm = k_61_perm_0, x = key_61_cast_fp16)[name = tensor("transpose_1")]; + tensor var_46707_cast_fp16 = slice_by_index(begin = var_46707_begin_0, end = var_46707_end_0, end_mask = var_46707_end_mask_0, x = transpose_1)[name = tensor("op_46707_cast_fp16")]; + tensor var_46711_begin_0 = const()[name = tensor("op_46711_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_46711_end_0 = const()[name = tensor("op_46711_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_46711_end_mask_0 = const()[name = tensor("op_46711_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46711_cast_fp16 = slice_by_index(begin = var_46711_begin_0, end = var_46711_end_0, end_mask = var_46711_end_mask_0, x = transpose_1)[name = tensor("op_46711_cast_fp16")]; + tensor var_46715_begin_0 = const()[name = tensor("op_46715_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_46715_end_0 = const()[name = tensor("op_46715_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_46715_end_mask_0 = const()[name = tensor("op_46715_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46715_cast_fp16 = slice_by_index(begin = var_46715_begin_0, end = var_46715_end_0, end_mask = var_46715_end_mask_0, x = transpose_1)[name = tensor("op_46715_cast_fp16")]; + tensor var_46719_begin_0 = const()[name = tensor("op_46719_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_46719_end_0 = const()[name = tensor("op_46719_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_46719_end_mask_0 = const()[name = tensor("op_46719_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46719_cast_fp16 = slice_by_index(begin = var_46719_begin_0, end = var_46719_end_0, end_mask = var_46719_end_mask_0, x = transpose_1)[name = tensor("op_46719_cast_fp16")]; + tensor var_46723_begin_0 = const()[name = tensor("op_46723_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_46723_end_0 = const()[name = tensor("op_46723_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_46723_end_mask_0 = const()[name = tensor("op_46723_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46723_cast_fp16 = slice_by_index(begin = var_46723_begin_0, end = var_46723_end_0, end_mask = var_46723_end_mask_0, x = transpose_1)[name = tensor("op_46723_cast_fp16")]; + tensor var_46727_begin_0 = const()[name = tensor("op_46727_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_46727_end_0 = const()[name = tensor("op_46727_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_46727_end_mask_0 = const()[name = tensor("op_46727_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46727_cast_fp16 = slice_by_index(begin = var_46727_begin_0, end = var_46727_end_0, end_mask = var_46727_end_mask_0, x = transpose_1)[name = tensor("op_46727_cast_fp16")]; + tensor var_46731_begin_0 = const()[name = tensor("op_46731_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_46731_end_0 = const()[name = tensor("op_46731_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_46731_end_mask_0 = const()[name = tensor("op_46731_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46731_cast_fp16 = slice_by_index(begin = var_46731_begin_0, end = var_46731_end_0, end_mask = var_46731_end_mask_0, x = transpose_1)[name = tensor("op_46731_cast_fp16")]; + tensor var_46735_begin_0 = const()[name = tensor("op_46735_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_46735_end_0 = const()[name = tensor("op_46735_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_46735_end_mask_0 = const()[name = tensor("op_46735_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46735_cast_fp16 = slice_by_index(begin = var_46735_begin_0, end = var_46735_end_0, end_mask = var_46735_end_mask_0, x = transpose_1)[name = tensor("op_46735_cast_fp16")]; + tensor var_46739_begin_0 = const()[name = tensor("op_46739_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_46739_end_0 = const()[name = tensor("op_46739_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_46739_end_mask_0 = const()[name = tensor("op_46739_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46739_cast_fp16 = slice_by_index(begin = var_46739_begin_0, end = var_46739_end_0, end_mask = var_46739_end_mask_0, x = transpose_1)[name = tensor("op_46739_cast_fp16")]; + tensor var_46743_begin_0 = const()[name = tensor("op_46743_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_46743_end_0 = const()[name = tensor("op_46743_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_46743_end_mask_0 = const()[name = tensor("op_46743_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46743_cast_fp16 = slice_by_index(begin = var_46743_begin_0, end = var_46743_end_0, end_mask = var_46743_end_mask_0, x = transpose_1)[name = tensor("op_46743_cast_fp16")]; + tensor var_46747_begin_0 = const()[name = tensor("op_46747_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_46747_end_0 = const()[name = tensor("op_46747_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_46747_end_mask_0 = const()[name = tensor("op_46747_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46747_cast_fp16 = slice_by_index(begin = var_46747_begin_0, end = var_46747_end_0, end_mask = var_46747_end_mask_0, x = transpose_1)[name = tensor("op_46747_cast_fp16")]; + tensor var_46751_begin_0 = const()[name = tensor("op_46751_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_46751_end_0 = const()[name = tensor("op_46751_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_46751_end_mask_0 = const()[name = tensor("op_46751_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46751_cast_fp16 = slice_by_index(begin = var_46751_begin_0, end = var_46751_end_0, end_mask = var_46751_end_mask_0, x = transpose_1)[name = tensor("op_46751_cast_fp16")]; + tensor var_46755_begin_0 = const()[name = tensor("op_46755_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_46755_end_0 = const()[name = tensor("op_46755_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_46755_end_mask_0 = const()[name = tensor("op_46755_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46755_cast_fp16 = slice_by_index(begin = var_46755_begin_0, end = var_46755_end_0, end_mask = var_46755_end_mask_0, x = transpose_1)[name = tensor("op_46755_cast_fp16")]; + tensor var_46759_begin_0 = const()[name = tensor("op_46759_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_46759_end_0 = const()[name = tensor("op_46759_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_46759_end_mask_0 = const()[name = tensor("op_46759_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46759_cast_fp16 = slice_by_index(begin = var_46759_begin_0, end = var_46759_end_0, end_mask = var_46759_end_mask_0, x = transpose_1)[name = tensor("op_46759_cast_fp16")]; + tensor var_46763_begin_0 = const()[name = tensor("op_46763_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_46763_end_0 = const()[name = tensor("op_46763_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_46763_end_mask_0 = const()[name = tensor("op_46763_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46763_cast_fp16 = slice_by_index(begin = var_46763_begin_0, end = var_46763_end_0, end_mask = var_46763_end_mask_0, x = transpose_1)[name = tensor("op_46763_cast_fp16")]; + tensor var_46767_begin_0 = const()[name = tensor("op_46767_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_46767_end_0 = const()[name = tensor("op_46767_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_46767_end_mask_0 = const()[name = tensor("op_46767_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46767_cast_fp16 = slice_by_index(begin = var_46767_begin_0, end = var_46767_end_0, end_mask = var_46767_end_mask_0, x = transpose_1)[name = tensor("op_46767_cast_fp16")]; + tensor var_46771_begin_0 = const()[name = tensor("op_46771_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_46771_end_0 = const()[name = tensor("op_46771_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_46771_end_mask_0 = const()[name = tensor("op_46771_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46771_cast_fp16 = slice_by_index(begin = var_46771_begin_0, end = var_46771_end_0, end_mask = var_46771_end_mask_0, x = transpose_1)[name = tensor("op_46771_cast_fp16")]; + tensor var_46775_begin_0 = const()[name = tensor("op_46775_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_46775_end_0 = const()[name = tensor("op_46775_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_46775_end_mask_0 = const()[name = tensor("op_46775_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46775_cast_fp16 = slice_by_index(begin = var_46775_begin_0, end = var_46775_end_0, end_mask = var_46775_end_mask_0, x = transpose_1)[name = tensor("op_46775_cast_fp16")]; + tensor var_46779_begin_0 = const()[name = tensor("op_46779_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_46779_end_0 = const()[name = tensor("op_46779_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_46779_end_mask_0 = const()[name = tensor("op_46779_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46779_cast_fp16 = slice_by_index(begin = var_46779_begin_0, end = var_46779_end_0, end_mask = var_46779_end_mask_0, x = transpose_1)[name = tensor("op_46779_cast_fp16")]; + tensor var_46783_begin_0 = const()[name = tensor("op_46783_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_46783_end_0 = const()[name = tensor("op_46783_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_46783_end_mask_0 = const()[name = tensor("op_46783_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_46783_cast_fp16 = slice_by_index(begin = var_46783_begin_0, end = var_46783_end_0, end_mask = var_46783_end_mask_0, x = transpose_1)[name = tensor("op_46783_cast_fp16")]; + tensor var_46785_begin_0 = const()[name = tensor("op_46785_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_46785_end_0 = const()[name = tensor("op_46785_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_46785_end_mask_0 = const()[name = tensor("op_46785_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46785_cast_fp16 = slice_by_index(begin = var_46785_begin_0, end = var_46785_end_0, end_mask = var_46785_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_46785_cast_fp16")]; + tensor var_46789_begin_0 = const()[name = tensor("op_46789_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_46789_end_0 = const()[name = tensor("op_46789_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_46789_end_mask_0 = const()[name = tensor("op_46789_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46789_cast_fp16 = slice_by_index(begin = var_46789_begin_0, end = var_46789_end_0, end_mask = var_46789_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_46789_cast_fp16")]; + tensor var_46793_begin_0 = const()[name = tensor("op_46793_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_46793_end_0 = const()[name = tensor("op_46793_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_46793_end_mask_0 = const()[name = tensor("op_46793_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46793_cast_fp16 = slice_by_index(begin = var_46793_begin_0, end = var_46793_end_0, end_mask = var_46793_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_46793_cast_fp16")]; + tensor var_46797_begin_0 = const()[name = tensor("op_46797_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_46797_end_0 = const()[name = tensor("op_46797_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_46797_end_mask_0 = const()[name = tensor("op_46797_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46797_cast_fp16 = slice_by_index(begin = var_46797_begin_0, end = var_46797_end_0, end_mask = var_46797_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_46797_cast_fp16")]; + tensor var_46801_begin_0 = const()[name = tensor("op_46801_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_46801_end_0 = const()[name = tensor("op_46801_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_46801_end_mask_0 = const()[name = tensor("op_46801_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46801_cast_fp16 = slice_by_index(begin = var_46801_begin_0, end = var_46801_end_0, end_mask = var_46801_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_46801_cast_fp16")]; + tensor var_46805_begin_0 = const()[name = tensor("op_46805_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_46805_end_0 = const()[name = tensor("op_46805_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_46805_end_mask_0 = const()[name = tensor("op_46805_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46805_cast_fp16 = slice_by_index(begin = var_46805_begin_0, end = var_46805_end_0, end_mask = var_46805_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_46805_cast_fp16")]; + tensor var_46809_begin_0 = const()[name = tensor("op_46809_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_46809_end_0 = const()[name = tensor("op_46809_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_46809_end_mask_0 = const()[name = tensor("op_46809_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46809_cast_fp16 = slice_by_index(begin = var_46809_begin_0, end = var_46809_end_0, end_mask = var_46809_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_46809_cast_fp16")]; + tensor var_46813_begin_0 = const()[name = tensor("op_46813_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_46813_end_0 = const()[name = tensor("op_46813_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_46813_end_mask_0 = const()[name = tensor("op_46813_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46813_cast_fp16 = slice_by_index(begin = var_46813_begin_0, end = var_46813_end_0, end_mask = var_46813_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_46813_cast_fp16")]; + tensor var_46817_begin_0 = const()[name = tensor("op_46817_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_46817_end_0 = const()[name = tensor("op_46817_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_46817_end_mask_0 = const()[name = tensor("op_46817_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46817_cast_fp16 = slice_by_index(begin = var_46817_begin_0, end = var_46817_end_0, end_mask = var_46817_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_46817_cast_fp16")]; + tensor var_46821_begin_0 = const()[name = tensor("op_46821_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_46821_end_0 = const()[name = tensor("op_46821_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_46821_end_mask_0 = const()[name = tensor("op_46821_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46821_cast_fp16 = slice_by_index(begin = var_46821_begin_0, end = var_46821_end_0, end_mask = var_46821_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_46821_cast_fp16")]; + tensor var_46825_begin_0 = const()[name = tensor("op_46825_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_46825_end_0 = const()[name = tensor("op_46825_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_46825_end_mask_0 = const()[name = tensor("op_46825_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46825_cast_fp16 = slice_by_index(begin = var_46825_begin_0, end = var_46825_end_0, end_mask = var_46825_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_46825_cast_fp16")]; + tensor var_46829_begin_0 = const()[name = tensor("op_46829_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_46829_end_0 = const()[name = tensor("op_46829_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_46829_end_mask_0 = const()[name = tensor("op_46829_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46829_cast_fp16 = slice_by_index(begin = var_46829_begin_0, end = var_46829_end_0, end_mask = var_46829_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_46829_cast_fp16")]; + tensor var_46833_begin_0 = const()[name = tensor("op_46833_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_46833_end_0 = const()[name = tensor("op_46833_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_46833_end_mask_0 = const()[name = tensor("op_46833_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46833_cast_fp16 = slice_by_index(begin = var_46833_begin_0, end = var_46833_end_0, end_mask = var_46833_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_46833_cast_fp16")]; + tensor var_46837_begin_0 = const()[name = tensor("op_46837_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_46837_end_0 = const()[name = tensor("op_46837_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_46837_end_mask_0 = const()[name = tensor("op_46837_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46837_cast_fp16 = slice_by_index(begin = var_46837_begin_0, end = var_46837_end_0, end_mask = var_46837_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_46837_cast_fp16")]; + tensor var_46841_begin_0 = const()[name = tensor("op_46841_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_46841_end_0 = const()[name = tensor("op_46841_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_46841_end_mask_0 = const()[name = tensor("op_46841_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46841_cast_fp16 = slice_by_index(begin = var_46841_begin_0, end = var_46841_end_0, end_mask = var_46841_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_46841_cast_fp16")]; + tensor var_46845_begin_0 = const()[name = tensor("op_46845_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_46845_end_0 = const()[name = tensor("op_46845_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_46845_end_mask_0 = const()[name = tensor("op_46845_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46845_cast_fp16 = slice_by_index(begin = var_46845_begin_0, end = var_46845_end_0, end_mask = var_46845_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_46845_cast_fp16")]; + tensor var_46849_begin_0 = const()[name = tensor("op_46849_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_46849_end_0 = const()[name = tensor("op_46849_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_46849_end_mask_0 = const()[name = tensor("op_46849_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46849_cast_fp16 = slice_by_index(begin = var_46849_begin_0, end = var_46849_end_0, end_mask = var_46849_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_46849_cast_fp16")]; + tensor var_46853_begin_0 = const()[name = tensor("op_46853_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_46853_end_0 = const()[name = tensor("op_46853_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_46853_end_mask_0 = const()[name = tensor("op_46853_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46853_cast_fp16 = slice_by_index(begin = var_46853_begin_0, end = var_46853_end_0, end_mask = var_46853_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_46853_cast_fp16")]; + tensor var_46857_begin_0 = const()[name = tensor("op_46857_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_46857_end_0 = const()[name = tensor("op_46857_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_46857_end_mask_0 = const()[name = tensor("op_46857_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46857_cast_fp16 = slice_by_index(begin = var_46857_begin_0, end = var_46857_end_0, end_mask = var_46857_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_46857_cast_fp16")]; + tensor var_46861_begin_0 = const()[name = tensor("op_46861_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_46861_end_0 = const()[name = tensor("op_46861_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_46861_end_mask_0 = const()[name = tensor("op_46861_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_46861_cast_fp16 = slice_by_index(begin = var_46861_begin_0, end = var_46861_end_0, end_mask = var_46861_end_mask_0, x = value_61_cast_fp16)[name = tensor("op_46861_cast_fp16")]; + tensor var_46865_equation_0 = const()[name = tensor("op_46865_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46865_cast_fp16 = einsum(equation = var_46865_equation_0, values = (var_46707_cast_fp16, var_46149_cast_fp16))[name = tensor("op_46865_cast_fp16")]; + tensor var_46866_to_fp16 = const()[name = tensor("op_46866_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4801_cast_fp16 = mul(x = var_46865_cast_fp16, y = var_46866_to_fp16)[name = tensor("aw_chunk_4801_cast_fp16")]; + tensor var_46869_equation_0 = const()[name = tensor("op_46869_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46869_cast_fp16 = einsum(equation = var_46869_equation_0, values = (var_46707_cast_fp16, var_46156_cast_fp16))[name = tensor("op_46869_cast_fp16")]; + tensor var_46870_to_fp16 = const()[name = tensor("op_46870_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4803_cast_fp16 = mul(x = var_46869_cast_fp16, y = var_46870_to_fp16)[name = tensor("aw_chunk_4803_cast_fp16")]; + tensor var_46873_equation_0 = const()[name = tensor("op_46873_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46873_cast_fp16 = einsum(equation = var_46873_equation_0, values = (var_46707_cast_fp16, var_46163_cast_fp16))[name = tensor("op_46873_cast_fp16")]; + tensor var_46874_to_fp16 = const()[name = tensor("op_46874_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4805_cast_fp16 = mul(x = var_46873_cast_fp16, y = var_46874_to_fp16)[name = tensor("aw_chunk_4805_cast_fp16")]; + tensor var_46877_equation_0 = const()[name = tensor("op_46877_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46877_cast_fp16 = einsum(equation = var_46877_equation_0, values = (var_46707_cast_fp16, var_46170_cast_fp16))[name = tensor("op_46877_cast_fp16")]; + tensor var_46878_to_fp16 = const()[name = tensor("op_46878_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4807_cast_fp16 = mul(x = var_46877_cast_fp16, y = var_46878_to_fp16)[name = tensor("aw_chunk_4807_cast_fp16")]; + tensor var_46881_equation_0 = const()[name = tensor("op_46881_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46881_cast_fp16 = einsum(equation = var_46881_equation_0, values = (var_46711_cast_fp16, var_46177_cast_fp16))[name = tensor("op_46881_cast_fp16")]; + tensor var_46882_to_fp16 = const()[name = tensor("op_46882_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4809_cast_fp16 = mul(x = var_46881_cast_fp16, y = var_46882_to_fp16)[name = tensor("aw_chunk_4809_cast_fp16")]; + tensor var_46885_equation_0 = const()[name = tensor("op_46885_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46885_cast_fp16 = einsum(equation = var_46885_equation_0, values = (var_46711_cast_fp16, var_46184_cast_fp16))[name = tensor("op_46885_cast_fp16")]; + tensor var_46886_to_fp16 = const()[name = tensor("op_46886_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4811_cast_fp16 = mul(x = var_46885_cast_fp16, y = var_46886_to_fp16)[name = tensor("aw_chunk_4811_cast_fp16")]; + tensor var_46889_equation_0 = const()[name = tensor("op_46889_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46889_cast_fp16 = einsum(equation = var_46889_equation_0, values = (var_46711_cast_fp16, var_46191_cast_fp16))[name = tensor("op_46889_cast_fp16")]; + tensor var_46890_to_fp16 = const()[name = tensor("op_46890_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4813_cast_fp16 = mul(x = var_46889_cast_fp16, y = var_46890_to_fp16)[name = tensor("aw_chunk_4813_cast_fp16")]; + tensor var_46893_equation_0 = const()[name = tensor("op_46893_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46893_cast_fp16 = einsum(equation = var_46893_equation_0, values = (var_46711_cast_fp16, var_46198_cast_fp16))[name = tensor("op_46893_cast_fp16")]; + tensor var_46894_to_fp16 = const()[name = tensor("op_46894_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4815_cast_fp16 = mul(x = var_46893_cast_fp16, y = var_46894_to_fp16)[name = tensor("aw_chunk_4815_cast_fp16")]; + tensor var_46897_equation_0 = const()[name = tensor("op_46897_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46897_cast_fp16 = einsum(equation = var_46897_equation_0, values = (var_46715_cast_fp16, var_46205_cast_fp16))[name = tensor("op_46897_cast_fp16")]; + tensor var_46898_to_fp16 = const()[name = tensor("op_46898_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4817_cast_fp16 = mul(x = var_46897_cast_fp16, y = var_46898_to_fp16)[name = tensor("aw_chunk_4817_cast_fp16")]; + tensor var_46901_equation_0 = const()[name = tensor("op_46901_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46901_cast_fp16 = einsum(equation = var_46901_equation_0, values = (var_46715_cast_fp16, var_46212_cast_fp16))[name = tensor("op_46901_cast_fp16")]; + tensor var_46902_to_fp16 = const()[name = tensor("op_46902_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4819_cast_fp16 = mul(x = var_46901_cast_fp16, y = var_46902_to_fp16)[name = tensor("aw_chunk_4819_cast_fp16")]; + tensor var_46905_equation_0 = const()[name = tensor("op_46905_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46905_cast_fp16 = einsum(equation = var_46905_equation_0, values = (var_46715_cast_fp16, var_46219_cast_fp16))[name = tensor("op_46905_cast_fp16")]; + tensor var_46906_to_fp16 = const()[name = tensor("op_46906_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4821_cast_fp16 = mul(x = var_46905_cast_fp16, y = var_46906_to_fp16)[name = tensor("aw_chunk_4821_cast_fp16")]; + tensor var_46909_equation_0 = const()[name = tensor("op_46909_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46909_cast_fp16 = einsum(equation = var_46909_equation_0, values = (var_46715_cast_fp16, var_46226_cast_fp16))[name = tensor("op_46909_cast_fp16")]; + tensor var_46910_to_fp16 = const()[name = tensor("op_46910_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4823_cast_fp16 = mul(x = var_46909_cast_fp16, y = var_46910_to_fp16)[name = tensor("aw_chunk_4823_cast_fp16")]; + tensor var_46913_equation_0 = const()[name = tensor("op_46913_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46913_cast_fp16 = einsum(equation = var_46913_equation_0, values = (var_46719_cast_fp16, var_46233_cast_fp16))[name = tensor("op_46913_cast_fp16")]; + tensor var_46914_to_fp16 = const()[name = tensor("op_46914_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4825_cast_fp16 = mul(x = var_46913_cast_fp16, y = var_46914_to_fp16)[name = tensor("aw_chunk_4825_cast_fp16")]; + tensor var_46917_equation_0 = const()[name = tensor("op_46917_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46917_cast_fp16 = einsum(equation = var_46917_equation_0, values = (var_46719_cast_fp16, var_46240_cast_fp16))[name = tensor("op_46917_cast_fp16")]; + tensor var_46918_to_fp16 = const()[name = tensor("op_46918_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4827_cast_fp16 = mul(x = var_46917_cast_fp16, y = var_46918_to_fp16)[name = tensor("aw_chunk_4827_cast_fp16")]; + tensor var_46921_equation_0 = const()[name = tensor("op_46921_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46921_cast_fp16 = einsum(equation = var_46921_equation_0, values = (var_46719_cast_fp16, var_46247_cast_fp16))[name = tensor("op_46921_cast_fp16")]; + tensor var_46922_to_fp16 = const()[name = tensor("op_46922_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4829_cast_fp16 = mul(x = var_46921_cast_fp16, y = var_46922_to_fp16)[name = tensor("aw_chunk_4829_cast_fp16")]; + tensor var_46925_equation_0 = const()[name = tensor("op_46925_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46925_cast_fp16 = einsum(equation = var_46925_equation_0, values = (var_46719_cast_fp16, var_46254_cast_fp16))[name = tensor("op_46925_cast_fp16")]; + tensor var_46926_to_fp16 = const()[name = tensor("op_46926_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4831_cast_fp16 = mul(x = var_46925_cast_fp16, y = var_46926_to_fp16)[name = tensor("aw_chunk_4831_cast_fp16")]; + tensor var_46929_equation_0 = const()[name = tensor("op_46929_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46929_cast_fp16 = einsum(equation = var_46929_equation_0, values = (var_46723_cast_fp16, var_46261_cast_fp16))[name = tensor("op_46929_cast_fp16")]; + tensor var_46930_to_fp16 = const()[name = tensor("op_46930_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4833_cast_fp16 = mul(x = var_46929_cast_fp16, y = var_46930_to_fp16)[name = tensor("aw_chunk_4833_cast_fp16")]; + tensor var_46933_equation_0 = const()[name = tensor("op_46933_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46933_cast_fp16 = einsum(equation = var_46933_equation_0, values = (var_46723_cast_fp16, var_46268_cast_fp16))[name = tensor("op_46933_cast_fp16")]; + tensor var_46934_to_fp16 = const()[name = tensor("op_46934_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4835_cast_fp16 = mul(x = var_46933_cast_fp16, y = var_46934_to_fp16)[name = tensor("aw_chunk_4835_cast_fp16")]; + tensor var_46937_equation_0 = const()[name = tensor("op_46937_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46937_cast_fp16 = einsum(equation = var_46937_equation_0, values = (var_46723_cast_fp16, var_46275_cast_fp16))[name = tensor("op_46937_cast_fp16")]; + tensor var_46938_to_fp16 = const()[name = tensor("op_46938_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4837_cast_fp16 = mul(x = var_46937_cast_fp16, y = var_46938_to_fp16)[name = tensor("aw_chunk_4837_cast_fp16")]; + tensor var_46941_equation_0 = const()[name = tensor("op_46941_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46941_cast_fp16 = einsum(equation = var_46941_equation_0, values = (var_46723_cast_fp16, var_46282_cast_fp16))[name = tensor("op_46941_cast_fp16")]; + tensor var_46942_to_fp16 = const()[name = tensor("op_46942_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4839_cast_fp16 = mul(x = var_46941_cast_fp16, y = var_46942_to_fp16)[name = tensor("aw_chunk_4839_cast_fp16")]; + tensor var_46945_equation_0 = const()[name = tensor("op_46945_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46945_cast_fp16 = einsum(equation = var_46945_equation_0, values = (var_46727_cast_fp16, var_46289_cast_fp16))[name = tensor("op_46945_cast_fp16")]; + tensor var_46946_to_fp16 = const()[name = tensor("op_46946_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4841_cast_fp16 = mul(x = var_46945_cast_fp16, y = var_46946_to_fp16)[name = tensor("aw_chunk_4841_cast_fp16")]; + tensor var_46949_equation_0 = const()[name = tensor("op_46949_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46949_cast_fp16 = einsum(equation = var_46949_equation_0, values = (var_46727_cast_fp16, var_46296_cast_fp16))[name = tensor("op_46949_cast_fp16")]; + tensor var_46950_to_fp16 = const()[name = tensor("op_46950_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4843_cast_fp16 = mul(x = var_46949_cast_fp16, y = var_46950_to_fp16)[name = tensor("aw_chunk_4843_cast_fp16")]; + tensor var_46953_equation_0 = const()[name = tensor("op_46953_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46953_cast_fp16 = einsum(equation = var_46953_equation_0, values = (var_46727_cast_fp16, var_46303_cast_fp16))[name = tensor("op_46953_cast_fp16")]; + tensor var_46954_to_fp16 = const()[name = tensor("op_46954_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4845_cast_fp16 = mul(x = var_46953_cast_fp16, y = var_46954_to_fp16)[name = tensor("aw_chunk_4845_cast_fp16")]; + tensor var_46957_equation_0 = const()[name = tensor("op_46957_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46957_cast_fp16 = einsum(equation = var_46957_equation_0, values = (var_46727_cast_fp16, var_46310_cast_fp16))[name = tensor("op_46957_cast_fp16")]; + tensor var_46958_to_fp16 = const()[name = tensor("op_46958_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4847_cast_fp16 = mul(x = var_46957_cast_fp16, y = var_46958_to_fp16)[name = tensor("aw_chunk_4847_cast_fp16")]; + tensor var_46961_equation_0 = const()[name = tensor("op_46961_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46961_cast_fp16 = einsum(equation = var_46961_equation_0, values = (var_46731_cast_fp16, var_46317_cast_fp16))[name = tensor("op_46961_cast_fp16")]; + tensor var_46962_to_fp16 = const()[name = tensor("op_46962_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4849_cast_fp16 = mul(x = var_46961_cast_fp16, y = var_46962_to_fp16)[name = tensor("aw_chunk_4849_cast_fp16")]; + tensor var_46965_equation_0 = const()[name = tensor("op_46965_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46965_cast_fp16 = einsum(equation = var_46965_equation_0, values = (var_46731_cast_fp16, var_46324_cast_fp16))[name = tensor("op_46965_cast_fp16")]; + tensor var_46966_to_fp16 = const()[name = tensor("op_46966_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4851_cast_fp16 = mul(x = var_46965_cast_fp16, y = var_46966_to_fp16)[name = tensor("aw_chunk_4851_cast_fp16")]; + tensor var_46969_equation_0 = const()[name = tensor("op_46969_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46969_cast_fp16 = einsum(equation = var_46969_equation_0, values = (var_46731_cast_fp16, var_46331_cast_fp16))[name = tensor("op_46969_cast_fp16")]; + tensor var_46970_to_fp16 = const()[name = tensor("op_46970_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4853_cast_fp16 = mul(x = var_46969_cast_fp16, y = var_46970_to_fp16)[name = tensor("aw_chunk_4853_cast_fp16")]; + tensor var_46973_equation_0 = const()[name = tensor("op_46973_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46973_cast_fp16 = einsum(equation = var_46973_equation_0, values = (var_46731_cast_fp16, var_46338_cast_fp16))[name = tensor("op_46973_cast_fp16")]; + tensor var_46974_to_fp16 = const()[name = tensor("op_46974_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4855_cast_fp16 = mul(x = var_46973_cast_fp16, y = var_46974_to_fp16)[name = tensor("aw_chunk_4855_cast_fp16")]; + tensor var_46977_equation_0 = const()[name = tensor("op_46977_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46977_cast_fp16 = einsum(equation = var_46977_equation_0, values = (var_46735_cast_fp16, var_46345_cast_fp16))[name = tensor("op_46977_cast_fp16")]; + tensor var_46978_to_fp16 = const()[name = tensor("op_46978_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4857_cast_fp16 = mul(x = var_46977_cast_fp16, y = var_46978_to_fp16)[name = tensor("aw_chunk_4857_cast_fp16")]; + tensor var_46981_equation_0 = const()[name = tensor("op_46981_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46981_cast_fp16 = einsum(equation = var_46981_equation_0, values = (var_46735_cast_fp16, var_46352_cast_fp16))[name = tensor("op_46981_cast_fp16")]; + tensor var_46982_to_fp16 = const()[name = tensor("op_46982_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4859_cast_fp16 = mul(x = var_46981_cast_fp16, y = var_46982_to_fp16)[name = tensor("aw_chunk_4859_cast_fp16")]; + tensor var_46985_equation_0 = const()[name = tensor("op_46985_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46985_cast_fp16 = einsum(equation = var_46985_equation_0, values = (var_46735_cast_fp16, var_46359_cast_fp16))[name = tensor("op_46985_cast_fp16")]; + tensor var_46986_to_fp16 = const()[name = tensor("op_46986_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4861_cast_fp16 = mul(x = var_46985_cast_fp16, y = var_46986_to_fp16)[name = tensor("aw_chunk_4861_cast_fp16")]; + tensor var_46989_equation_0 = const()[name = tensor("op_46989_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46989_cast_fp16 = einsum(equation = var_46989_equation_0, values = (var_46735_cast_fp16, var_46366_cast_fp16))[name = tensor("op_46989_cast_fp16")]; + tensor var_46990_to_fp16 = const()[name = tensor("op_46990_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4863_cast_fp16 = mul(x = var_46989_cast_fp16, y = var_46990_to_fp16)[name = tensor("aw_chunk_4863_cast_fp16")]; + tensor var_46993_equation_0 = const()[name = tensor("op_46993_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46993_cast_fp16 = einsum(equation = var_46993_equation_0, values = (var_46739_cast_fp16, var_46373_cast_fp16))[name = tensor("op_46993_cast_fp16")]; + tensor var_46994_to_fp16 = const()[name = tensor("op_46994_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4865_cast_fp16 = mul(x = var_46993_cast_fp16, y = var_46994_to_fp16)[name = tensor("aw_chunk_4865_cast_fp16")]; + tensor var_46997_equation_0 = const()[name = tensor("op_46997_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_46997_cast_fp16 = einsum(equation = var_46997_equation_0, values = (var_46739_cast_fp16, var_46380_cast_fp16))[name = tensor("op_46997_cast_fp16")]; + tensor var_46998_to_fp16 = const()[name = tensor("op_46998_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4867_cast_fp16 = mul(x = var_46997_cast_fp16, y = var_46998_to_fp16)[name = tensor("aw_chunk_4867_cast_fp16")]; + tensor var_47001_equation_0 = const()[name = tensor("op_47001_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47001_cast_fp16 = einsum(equation = var_47001_equation_0, values = (var_46739_cast_fp16, var_46387_cast_fp16))[name = tensor("op_47001_cast_fp16")]; + tensor var_47002_to_fp16 = const()[name = tensor("op_47002_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4869_cast_fp16 = mul(x = var_47001_cast_fp16, y = var_47002_to_fp16)[name = tensor("aw_chunk_4869_cast_fp16")]; + tensor var_47005_equation_0 = const()[name = tensor("op_47005_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47005_cast_fp16 = einsum(equation = var_47005_equation_0, values = (var_46739_cast_fp16, var_46394_cast_fp16))[name = tensor("op_47005_cast_fp16")]; + tensor var_47006_to_fp16 = const()[name = tensor("op_47006_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4871_cast_fp16 = mul(x = var_47005_cast_fp16, y = var_47006_to_fp16)[name = tensor("aw_chunk_4871_cast_fp16")]; + tensor var_47009_equation_0 = const()[name = tensor("op_47009_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47009_cast_fp16 = einsum(equation = var_47009_equation_0, values = (var_46743_cast_fp16, var_46401_cast_fp16))[name = tensor("op_47009_cast_fp16")]; + tensor var_47010_to_fp16 = const()[name = tensor("op_47010_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4873_cast_fp16 = mul(x = var_47009_cast_fp16, y = var_47010_to_fp16)[name = tensor("aw_chunk_4873_cast_fp16")]; + tensor var_47013_equation_0 = const()[name = tensor("op_47013_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47013_cast_fp16 = einsum(equation = var_47013_equation_0, values = (var_46743_cast_fp16, var_46408_cast_fp16))[name = tensor("op_47013_cast_fp16")]; + tensor var_47014_to_fp16 = const()[name = tensor("op_47014_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4875_cast_fp16 = mul(x = var_47013_cast_fp16, y = var_47014_to_fp16)[name = tensor("aw_chunk_4875_cast_fp16")]; + tensor var_47017_equation_0 = const()[name = tensor("op_47017_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47017_cast_fp16 = einsum(equation = var_47017_equation_0, values = (var_46743_cast_fp16, var_46415_cast_fp16))[name = tensor("op_47017_cast_fp16")]; + tensor var_47018_to_fp16 = const()[name = tensor("op_47018_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4877_cast_fp16 = mul(x = var_47017_cast_fp16, y = var_47018_to_fp16)[name = tensor("aw_chunk_4877_cast_fp16")]; + tensor var_47021_equation_0 = const()[name = tensor("op_47021_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47021_cast_fp16 = einsum(equation = var_47021_equation_0, values = (var_46743_cast_fp16, var_46422_cast_fp16))[name = tensor("op_47021_cast_fp16")]; + tensor var_47022_to_fp16 = const()[name = tensor("op_47022_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4879_cast_fp16 = mul(x = var_47021_cast_fp16, y = var_47022_to_fp16)[name = tensor("aw_chunk_4879_cast_fp16")]; + tensor var_47025_equation_0 = const()[name = tensor("op_47025_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47025_cast_fp16 = einsum(equation = var_47025_equation_0, values = (var_46747_cast_fp16, var_46429_cast_fp16))[name = tensor("op_47025_cast_fp16")]; + tensor var_47026_to_fp16 = const()[name = tensor("op_47026_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4881_cast_fp16 = mul(x = var_47025_cast_fp16, y = var_47026_to_fp16)[name = tensor("aw_chunk_4881_cast_fp16")]; + tensor var_47029_equation_0 = const()[name = tensor("op_47029_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47029_cast_fp16 = einsum(equation = var_47029_equation_0, values = (var_46747_cast_fp16, var_46436_cast_fp16))[name = tensor("op_47029_cast_fp16")]; + tensor var_47030_to_fp16 = const()[name = tensor("op_47030_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4883_cast_fp16 = mul(x = var_47029_cast_fp16, y = var_47030_to_fp16)[name = tensor("aw_chunk_4883_cast_fp16")]; + tensor var_47033_equation_0 = const()[name = tensor("op_47033_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47033_cast_fp16 = einsum(equation = var_47033_equation_0, values = (var_46747_cast_fp16, var_46443_cast_fp16))[name = tensor("op_47033_cast_fp16")]; + tensor var_47034_to_fp16 = const()[name = tensor("op_47034_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4885_cast_fp16 = mul(x = var_47033_cast_fp16, y = var_47034_to_fp16)[name = tensor("aw_chunk_4885_cast_fp16")]; + tensor var_47037_equation_0 = const()[name = tensor("op_47037_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47037_cast_fp16 = einsum(equation = var_47037_equation_0, values = (var_46747_cast_fp16, var_46450_cast_fp16))[name = tensor("op_47037_cast_fp16")]; + tensor var_47038_to_fp16 = const()[name = tensor("op_47038_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4887_cast_fp16 = mul(x = var_47037_cast_fp16, y = var_47038_to_fp16)[name = tensor("aw_chunk_4887_cast_fp16")]; + tensor var_47041_equation_0 = const()[name = tensor("op_47041_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47041_cast_fp16 = einsum(equation = var_47041_equation_0, values = (var_46751_cast_fp16, var_46457_cast_fp16))[name = tensor("op_47041_cast_fp16")]; + tensor var_47042_to_fp16 = const()[name = tensor("op_47042_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4889_cast_fp16 = mul(x = var_47041_cast_fp16, y = var_47042_to_fp16)[name = tensor("aw_chunk_4889_cast_fp16")]; + tensor var_47045_equation_0 = const()[name = tensor("op_47045_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47045_cast_fp16 = einsum(equation = var_47045_equation_0, values = (var_46751_cast_fp16, var_46464_cast_fp16))[name = tensor("op_47045_cast_fp16")]; + tensor var_47046_to_fp16 = const()[name = tensor("op_47046_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4891_cast_fp16 = mul(x = var_47045_cast_fp16, y = var_47046_to_fp16)[name = tensor("aw_chunk_4891_cast_fp16")]; + tensor var_47049_equation_0 = const()[name = tensor("op_47049_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47049_cast_fp16 = einsum(equation = var_47049_equation_0, values = (var_46751_cast_fp16, var_46471_cast_fp16))[name = tensor("op_47049_cast_fp16")]; + tensor var_47050_to_fp16 = const()[name = tensor("op_47050_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4893_cast_fp16 = mul(x = var_47049_cast_fp16, y = var_47050_to_fp16)[name = tensor("aw_chunk_4893_cast_fp16")]; + tensor var_47053_equation_0 = const()[name = tensor("op_47053_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47053_cast_fp16 = einsum(equation = var_47053_equation_0, values = (var_46751_cast_fp16, var_46478_cast_fp16))[name = tensor("op_47053_cast_fp16")]; + tensor var_47054_to_fp16 = const()[name = tensor("op_47054_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4895_cast_fp16 = mul(x = var_47053_cast_fp16, y = var_47054_to_fp16)[name = tensor("aw_chunk_4895_cast_fp16")]; + tensor var_47057_equation_0 = const()[name = tensor("op_47057_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47057_cast_fp16 = einsum(equation = var_47057_equation_0, values = (var_46755_cast_fp16, var_46485_cast_fp16))[name = tensor("op_47057_cast_fp16")]; + tensor var_47058_to_fp16 = const()[name = tensor("op_47058_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4897_cast_fp16 = mul(x = var_47057_cast_fp16, y = var_47058_to_fp16)[name = tensor("aw_chunk_4897_cast_fp16")]; + tensor var_47061_equation_0 = const()[name = tensor("op_47061_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47061_cast_fp16 = einsum(equation = var_47061_equation_0, values = (var_46755_cast_fp16, var_46492_cast_fp16))[name = tensor("op_47061_cast_fp16")]; + tensor var_47062_to_fp16 = const()[name = tensor("op_47062_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4899_cast_fp16 = mul(x = var_47061_cast_fp16, y = var_47062_to_fp16)[name = tensor("aw_chunk_4899_cast_fp16")]; + tensor var_47065_equation_0 = const()[name = tensor("op_47065_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47065_cast_fp16 = einsum(equation = var_47065_equation_0, values = (var_46755_cast_fp16, var_46499_cast_fp16))[name = tensor("op_47065_cast_fp16")]; + tensor var_47066_to_fp16 = const()[name = tensor("op_47066_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4901_cast_fp16 = mul(x = var_47065_cast_fp16, y = var_47066_to_fp16)[name = tensor("aw_chunk_4901_cast_fp16")]; + tensor var_47069_equation_0 = const()[name = tensor("op_47069_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47069_cast_fp16 = einsum(equation = var_47069_equation_0, values = (var_46755_cast_fp16, var_46506_cast_fp16))[name = tensor("op_47069_cast_fp16")]; + tensor var_47070_to_fp16 = const()[name = tensor("op_47070_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4903_cast_fp16 = mul(x = var_47069_cast_fp16, y = var_47070_to_fp16)[name = tensor("aw_chunk_4903_cast_fp16")]; + tensor var_47073_equation_0 = const()[name = tensor("op_47073_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47073_cast_fp16 = einsum(equation = var_47073_equation_0, values = (var_46759_cast_fp16, var_46513_cast_fp16))[name = tensor("op_47073_cast_fp16")]; + tensor var_47074_to_fp16 = const()[name = tensor("op_47074_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4905_cast_fp16 = mul(x = var_47073_cast_fp16, y = var_47074_to_fp16)[name = tensor("aw_chunk_4905_cast_fp16")]; + tensor var_47077_equation_0 = const()[name = tensor("op_47077_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47077_cast_fp16 = einsum(equation = var_47077_equation_0, values = (var_46759_cast_fp16, var_46520_cast_fp16))[name = tensor("op_47077_cast_fp16")]; + tensor var_47078_to_fp16 = const()[name = tensor("op_47078_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4907_cast_fp16 = mul(x = var_47077_cast_fp16, y = var_47078_to_fp16)[name = tensor("aw_chunk_4907_cast_fp16")]; + tensor var_47081_equation_0 = const()[name = tensor("op_47081_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47081_cast_fp16 = einsum(equation = var_47081_equation_0, values = (var_46759_cast_fp16, var_46527_cast_fp16))[name = tensor("op_47081_cast_fp16")]; + tensor var_47082_to_fp16 = const()[name = tensor("op_47082_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4909_cast_fp16 = mul(x = var_47081_cast_fp16, y = var_47082_to_fp16)[name = tensor("aw_chunk_4909_cast_fp16")]; + tensor var_47085_equation_0 = const()[name = tensor("op_47085_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47085_cast_fp16 = einsum(equation = var_47085_equation_0, values = (var_46759_cast_fp16, var_46534_cast_fp16))[name = tensor("op_47085_cast_fp16")]; + tensor var_47086_to_fp16 = const()[name = tensor("op_47086_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4911_cast_fp16 = mul(x = var_47085_cast_fp16, y = var_47086_to_fp16)[name = tensor("aw_chunk_4911_cast_fp16")]; + tensor var_47089_equation_0 = const()[name = tensor("op_47089_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47089_cast_fp16 = einsum(equation = var_47089_equation_0, values = (var_46763_cast_fp16, var_46541_cast_fp16))[name = tensor("op_47089_cast_fp16")]; + tensor var_47090_to_fp16 = const()[name = tensor("op_47090_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4913_cast_fp16 = mul(x = var_47089_cast_fp16, y = var_47090_to_fp16)[name = tensor("aw_chunk_4913_cast_fp16")]; + tensor var_47093_equation_0 = const()[name = tensor("op_47093_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47093_cast_fp16 = einsum(equation = var_47093_equation_0, values = (var_46763_cast_fp16, var_46548_cast_fp16))[name = tensor("op_47093_cast_fp16")]; + tensor var_47094_to_fp16 = const()[name = tensor("op_47094_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4915_cast_fp16 = mul(x = var_47093_cast_fp16, y = var_47094_to_fp16)[name = tensor("aw_chunk_4915_cast_fp16")]; + tensor var_47097_equation_0 = const()[name = tensor("op_47097_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47097_cast_fp16 = einsum(equation = var_47097_equation_0, values = (var_46763_cast_fp16, var_46555_cast_fp16))[name = tensor("op_47097_cast_fp16")]; + tensor var_47098_to_fp16 = const()[name = tensor("op_47098_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4917_cast_fp16 = mul(x = var_47097_cast_fp16, y = var_47098_to_fp16)[name = tensor("aw_chunk_4917_cast_fp16")]; + tensor var_47101_equation_0 = const()[name = tensor("op_47101_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47101_cast_fp16 = einsum(equation = var_47101_equation_0, values = (var_46763_cast_fp16, var_46562_cast_fp16))[name = tensor("op_47101_cast_fp16")]; + tensor var_47102_to_fp16 = const()[name = tensor("op_47102_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4919_cast_fp16 = mul(x = var_47101_cast_fp16, y = var_47102_to_fp16)[name = tensor("aw_chunk_4919_cast_fp16")]; + tensor var_47105_equation_0 = const()[name = tensor("op_47105_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47105_cast_fp16 = einsum(equation = var_47105_equation_0, values = (var_46767_cast_fp16, var_46569_cast_fp16))[name = tensor("op_47105_cast_fp16")]; + tensor var_47106_to_fp16 = const()[name = tensor("op_47106_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4921_cast_fp16 = mul(x = var_47105_cast_fp16, y = var_47106_to_fp16)[name = tensor("aw_chunk_4921_cast_fp16")]; + tensor var_47109_equation_0 = const()[name = tensor("op_47109_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47109_cast_fp16 = einsum(equation = var_47109_equation_0, values = (var_46767_cast_fp16, var_46576_cast_fp16))[name = tensor("op_47109_cast_fp16")]; + tensor var_47110_to_fp16 = const()[name = tensor("op_47110_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4923_cast_fp16 = mul(x = var_47109_cast_fp16, y = var_47110_to_fp16)[name = tensor("aw_chunk_4923_cast_fp16")]; + tensor var_47113_equation_0 = const()[name = tensor("op_47113_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47113_cast_fp16 = einsum(equation = var_47113_equation_0, values = (var_46767_cast_fp16, var_46583_cast_fp16))[name = tensor("op_47113_cast_fp16")]; + tensor var_47114_to_fp16 = const()[name = tensor("op_47114_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4925_cast_fp16 = mul(x = var_47113_cast_fp16, y = var_47114_to_fp16)[name = tensor("aw_chunk_4925_cast_fp16")]; + tensor var_47117_equation_0 = const()[name = tensor("op_47117_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47117_cast_fp16 = einsum(equation = var_47117_equation_0, values = (var_46767_cast_fp16, var_46590_cast_fp16))[name = tensor("op_47117_cast_fp16")]; + tensor var_47118_to_fp16 = const()[name = tensor("op_47118_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4927_cast_fp16 = mul(x = var_47117_cast_fp16, y = var_47118_to_fp16)[name = tensor("aw_chunk_4927_cast_fp16")]; + tensor var_47121_equation_0 = const()[name = tensor("op_47121_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47121_cast_fp16 = einsum(equation = var_47121_equation_0, values = (var_46771_cast_fp16, var_46597_cast_fp16))[name = tensor("op_47121_cast_fp16")]; + tensor var_47122_to_fp16 = const()[name = tensor("op_47122_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4929_cast_fp16 = mul(x = var_47121_cast_fp16, y = var_47122_to_fp16)[name = tensor("aw_chunk_4929_cast_fp16")]; + tensor var_47125_equation_0 = const()[name = tensor("op_47125_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47125_cast_fp16 = einsum(equation = var_47125_equation_0, values = (var_46771_cast_fp16, var_46604_cast_fp16))[name = tensor("op_47125_cast_fp16")]; + tensor var_47126_to_fp16 = const()[name = tensor("op_47126_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4931_cast_fp16 = mul(x = var_47125_cast_fp16, y = var_47126_to_fp16)[name = tensor("aw_chunk_4931_cast_fp16")]; + tensor var_47129_equation_0 = const()[name = tensor("op_47129_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47129_cast_fp16 = einsum(equation = var_47129_equation_0, values = (var_46771_cast_fp16, var_46611_cast_fp16))[name = tensor("op_47129_cast_fp16")]; + tensor var_47130_to_fp16 = const()[name = tensor("op_47130_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4933_cast_fp16 = mul(x = var_47129_cast_fp16, y = var_47130_to_fp16)[name = tensor("aw_chunk_4933_cast_fp16")]; + tensor var_47133_equation_0 = const()[name = tensor("op_47133_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47133_cast_fp16 = einsum(equation = var_47133_equation_0, values = (var_46771_cast_fp16, var_46618_cast_fp16))[name = tensor("op_47133_cast_fp16")]; + tensor var_47134_to_fp16 = const()[name = tensor("op_47134_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4935_cast_fp16 = mul(x = var_47133_cast_fp16, y = var_47134_to_fp16)[name = tensor("aw_chunk_4935_cast_fp16")]; + tensor var_47137_equation_0 = const()[name = tensor("op_47137_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47137_cast_fp16 = einsum(equation = var_47137_equation_0, values = (var_46775_cast_fp16, var_46625_cast_fp16))[name = tensor("op_47137_cast_fp16")]; + tensor var_47138_to_fp16 = const()[name = tensor("op_47138_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4937_cast_fp16 = mul(x = var_47137_cast_fp16, y = var_47138_to_fp16)[name = tensor("aw_chunk_4937_cast_fp16")]; + tensor var_47141_equation_0 = const()[name = tensor("op_47141_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47141_cast_fp16 = einsum(equation = var_47141_equation_0, values = (var_46775_cast_fp16, var_46632_cast_fp16))[name = tensor("op_47141_cast_fp16")]; + tensor var_47142_to_fp16 = const()[name = tensor("op_47142_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4939_cast_fp16 = mul(x = var_47141_cast_fp16, y = var_47142_to_fp16)[name = tensor("aw_chunk_4939_cast_fp16")]; + tensor var_47145_equation_0 = const()[name = tensor("op_47145_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47145_cast_fp16 = einsum(equation = var_47145_equation_0, values = (var_46775_cast_fp16, var_46639_cast_fp16))[name = tensor("op_47145_cast_fp16")]; + tensor var_47146_to_fp16 = const()[name = tensor("op_47146_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4941_cast_fp16 = mul(x = var_47145_cast_fp16, y = var_47146_to_fp16)[name = tensor("aw_chunk_4941_cast_fp16")]; + tensor var_47149_equation_0 = const()[name = tensor("op_47149_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47149_cast_fp16 = einsum(equation = var_47149_equation_0, values = (var_46775_cast_fp16, var_46646_cast_fp16))[name = tensor("op_47149_cast_fp16")]; + tensor var_47150_to_fp16 = const()[name = tensor("op_47150_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4943_cast_fp16 = mul(x = var_47149_cast_fp16, y = var_47150_to_fp16)[name = tensor("aw_chunk_4943_cast_fp16")]; + tensor var_47153_equation_0 = const()[name = tensor("op_47153_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47153_cast_fp16 = einsum(equation = var_47153_equation_0, values = (var_46779_cast_fp16, var_46653_cast_fp16))[name = tensor("op_47153_cast_fp16")]; + tensor var_47154_to_fp16 = const()[name = tensor("op_47154_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4945_cast_fp16 = mul(x = var_47153_cast_fp16, y = var_47154_to_fp16)[name = tensor("aw_chunk_4945_cast_fp16")]; + tensor var_47157_equation_0 = const()[name = tensor("op_47157_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47157_cast_fp16 = einsum(equation = var_47157_equation_0, values = (var_46779_cast_fp16, var_46660_cast_fp16))[name = tensor("op_47157_cast_fp16")]; + tensor var_47158_to_fp16 = const()[name = tensor("op_47158_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4947_cast_fp16 = mul(x = var_47157_cast_fp16, y = var_47158_to_fp16)[name = tensor("aw_chunk_4947_cast_fp16")]; + tensor var_47161_equation_0 = const()[name = tensor("op_47161_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47161_cast_fp16 = einsum(equation = var_47161_equation_0, values = (var_46779_cast_fp16, var_46667_cast_fp16))[name = tensor("op_47161_cast_fp16")]; + tensor var_47162_to_fp16 = const()[name = tensor("op_47162_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4949_cast_fp16 = mul(x = var_47161_cast_fp16, y = var_47162_to_fp16)[name = tensor("aw_chunk_4949_cast_fp16")]; + tensor var_47165_equation_0 = const()[name = tensor("op_47165_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47165_cast_fp16 = einsum(equation = var_47165_equation_0, values = (var_46779_cast_fp16, var_46674_cast_fp16))[name = tensor("op_47165_cast_fp16")]; + tensor var_47166_to_fp16 = const()[name = tensor("op_47166_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4951_cast_fp16 = mul(x = var_47165_cast_fp16, y = var_47166_to_fp16)[name = tensor("aw_chunk_4951_cast_fp16")]; + tensor var_47169_equation_0 = const()[name = tensor("op_47169_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47169_cast_fp16 = einsum(equation = var_47169_equation_0, values = (var_46783_cast_fp16, var_46681_cast_fp16))[name = tensor("op_47169_cast_fp16")]; + tensor var_47170_to_fp16 = const()[name = tensor("op_47170_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4953_cast_fp16 = mul(x = var_47169_cast_fp16, y = var_47170_to_fp16)[name = tensor("aw_chunk_4953_cast_fp16")]; + tensor var_47173_equation_0 = const()[name = tensor("op_47173_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47173_cast_fp16 = einsum(equation = var_47173_equation_0, values = (var_46783_cast_fp16, var_46688_cast_fp16))[name = tensor("op_47173_cast_fp16")]; + tensor var_47174_to_fp16 = const()[name = tensor("op_47174_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4955_cast_fp16 = mul(x = var_47173_cast_fp16, y = var_47174_to_fp16)[name = tensor("aw_chunk_4955_cast_fp16")]; + tensor var_47177_equation_0 = const()[name = tensor("op_47177_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47177_cast_fp16 = einsum(equation = var_47177_equation_0, values = (var_46783_cast_fp16, var_46695_cast_fp16))[name = tensor("op_47177_cast_fp16")]; + tensor var_47178_to_fp16 = const()[name = tensor("op_47178_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4957_cast_fp16 = mul(x = var_47177_cast_fp16, y = var_47178_to_fp16)[name = tensor("aw_chunk_4957_cast_fp16")]; + tensor var_47181_equation_0 = const()[name = tensor("op_47181_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_47181_cast_fp16 = einsum(equation = var_47181_equation_0, values = (var_46783_cast_fp16, var_46702_cast_fp16))[name = tensor("op_47181_cast_fp16")]; + tensor var_47182_to_fp16 = const()[name = tensor("op_47182_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4959_cast_fp16 = mul(x = var_47181_cast_fp16, y = var_47182_to_fp16)[name = tensor("aw_chunk_4959_cast_fp16")]; + tensor var_47184_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4801_cast_fp16)[name = tensor("op_47184_cast_fp16")]; + tensor var_47185_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4803_cast_fp16)[name = tensor("op_47185_cast_fp16")]; + tensor var_47186_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4805_cast_fp16)[name = tensor("op_47186_cast_fp16")]; + tensor var_47187_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4807_cast_fp16)[name = tensor("op_47187_cast_fp16")]; + tensor var_47188_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4809_cast_fp16)[name = tensor("op_47188_cast_fp16")]; + tensor var_47189_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4811_cast_fp16)[name = tensor("op_47189_cast_fp16")]; + tensor var_47190_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4813_cast_fp16)[name = tensor("op_47190_cast_fp16")]; + tensor var_47191_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4815_cast_fp16)[name = tensor("op_47191_cast_fp16")]; + tensor var_47192_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4817_cast_fp16)[name = tensor("op_47192_cast_fp16")]; + tensor var_47193_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4819_cast_fp16)[name = tensor("op_47193_cast_fp16")]; + tensor var_47194_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4821_cast_fp16)[name = tensor("op_47194_cast_fp16")]; + tensor var_47195_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4823_cast_fp16)[name = tensor("op_47195_cast_fp16")]; + tensor var_47196_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4825_cast_fp16)[name = tensor("op_47196_cast_fp16")]; + tensor var_47197_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4827_cast_fp16)[name = tensor("op_47197_cast_fp16")]; + tensor var_47198_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4829_cast_fp16)[name = tensor("op_47198_cast_fp16")]; + tensor var_47199_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4831_cast_fp16)[name = tensor("op_47199_cast_fp16")]; + tensor var_47200_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4833_cast_fp16)[name = tensor("op_47200_cast_fp16")]; + tensor var_47201_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4835_cast_fp16)[name = tensor("op_47201_cast_fp16")]; + tensor var_47202_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4837_cast_fp16)[name = tensor("op_47202_cast_fp16")]; + tensor var_47203_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4839_cast_fp16)[name = tensor("op_47203_cast_fp16")]; + tensor var_47204_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4841_cast_fp16)[name = tensor("op_47204_cast_fp16")]; + tensor var_47205_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4843_cast_fp16)[name = tensor("op_47205_cast_fp16")]; + tensor var_47206_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4845_cast_fp16)[name = tensor("op_47206_cast_fp16")]; + tensor var_47207_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4847_cast_fp16)[name = tensor("op_47207_cast_fp16")]; + tensor var_47208_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4849_cast_fp16)[name = tensor("op_47208_cast_fp16")]; + tensor var_47209_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4851_cast_fp16)[name = tensor("op_47209_cast_fp16")]; + tensor var_47210_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4853_cast_fp16)[name = tensor("op_47210_cast_fp16")]; + tensor var_47211_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4855_cast_fp16)[name = tensor("op_47211_cast_fp16")]; + tensor var_47212_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4857_cast_fp16)[name = tensor("op_47212_cast_fp16")]; + tensor var_47213_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4859_cast_fp16)[name = tensor("op_47213_cast_fp16")]; + tensor var_47214_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4861_cast_fp16)[name = tensor("op_47214_cast_fp16")]; + tensor var_47215_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4863_cast_fp16)[name = tensor("op_47215_cast_fp16")]; + tensor var_47216_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4865_cast_fp16)[name = tensor("op_47216_cast_fp16")]; + tensor var_47217_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4867_cast_fp16)[name = tensor("op_47217_cast_fp16")]; + tensor var_47218_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4869_cast_fp16)[name = tensor("op_47218_cast_fp16")]; + tensor var_47219_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4871_cast_fp16)[name = tensor("op_47219_cast_fp16")]; + tensor var_47220_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4873_cast_fp16)[name = tensor("op_47220_cast_fp16")]; + tensor var_47221_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4875_cast_fp16)[name = tensor("op_47221_cast_fp16")]; + tensor var_47222_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4877_cast_fp16)[name = tensor("op_47222_cast_fp16")]; + tensor var_47223_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4879_cast_fp16)[name = tensor("op_47223_cast_fp16")]; + tensor var_47224_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4881_cast_fp16)[name = tensor("op_47224_cast_fp16")]; + tensor var_47225_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4883_cast_fp16)[name = tensor("op_47225_cast_fp16")]; + tensor var_47226_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4885_cast_fp16)[name = tensor("op_47226_cast_fp16")]; + tensor var_47227_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4887_cast_fp16)[name = tensor("op_47227_cast_fp16")]; + tensor var_47228_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4889_cast_fp16)[name = tensor("op_47228_cast_fp16")]; + tensor var_47229_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4891_cast_fp16)[name = tensor("op_47229_cast_fp16")]; + tensor var_47230_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4893_cast_fp16)[name = tensor("op_47230_cast_fp16")]; + tensor var_47231_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4895_cast_fp16)[name = tensor("op_47231_cast_fp16")]; + tensor var_47232_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4897_cast_fp16)[name = tensor("op_47232_cast_fp16")]; + tensor var_47233_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4899_cast_fp16)[name = tensor("op_47233_cast_fp16")]; + tensor var_47234_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4901_cast_fp16)[name = tensor("op_47234_cast_fp16")]; + tensor var_47235_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4903_cast_fp16)[name = tensor("op_47235_cast_fp16")]; + tensor var_47236_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4905_cast_fp16)[name = tensor("op_47236_cast_fp16")]; + tensor var_47237_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4907_cast_fp16)[name = tensor("op_47237_cast_fp16")]; + tensor var_47238_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4909_cast_fp16)[name = tensor("op_47238_cast_fp16")]; + tensor var_47239_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4911_cast_fp16)[name = tensor("op_47239_cast_fp16")]; + tensor var_47240_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4913_cast_fp16)[name = tensor("op_47240_cast_fp16")]; + tensor var_47241_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4915_cast_fp16)[name = tensor("op_47241_cast_fp16")]; + tensor var_47242_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4917_cast_fp16)[name = tensor("op_47242_cast_fp16")]; + tensor var_47243_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4919_cast_fp16)[name = tensor("op_47243_cast_fp16")]; + tensor var_47244_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4921_cast_fp16)[name = tensor("op_47244_cast_fp16")]; + tensor var_47245_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4923_cast_fp16)[name = tensor("op_47245_cast_fp16")]; + tensor var_47246_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4925_cast_fp16)[name = tensor("op_47246_cast_fp16")]; + tensor var_47247_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4927_cast_fp16)[name = tensor("op_47247_cast_fp16")]; + tensor var_47248_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4929_cast_fp16)[name = tensor("op_47248_cast_fp16")]; + tensor var_47249_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4931_cast_fp16)[name = tensor("op_47249_cast_fp16")]; + tensor var_47250_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4933_cast_fp16)[name = tensor("op_47250_cast_fp16")]; + tensor var_47251_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4935_cast_fp16)[name = tensor("op_47251_cast_fp16")]; + tensor var_47252_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4937_cast_fp16)[name = tensor("op_47252_cast_fp16")]; + tensor var_47253_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4939_cast_fp16)[name = tensor("op_47253_cast_fp16")]; + tensor var_47254_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4941_cast_fp16)[name = tensor("op_47254_cast_fp16")]; + tensor var_47255_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4943_cast_fp16)[name = tensor("op_47255_cast_fp16")]; + tensor var_47256_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4945_cast_fp16)[name = tensor("op_47256_cast_fp16")]; + tensor var_47257_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4947_cast_fp16)[name = tensor("op_47257_cast_fp16")]; + tensor var_47258_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4949_cast_fp16)[name = tensor("op_47258_cast_fp16")]; + tensor var_47259_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4951_cast_fp16)[name = tensor("op_47259_cast_fp16")]; + tensor var_47260_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4953_cast_fp16)[name = tensor("op_47260_cast_fp16")]; + tensor var_47261_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4955_cast_fp16)[name = tensor("op_47261_cast_fp16")]; + tensor var_47262_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4957_cast_fp16)[name = tensor("op_47262_cast_fp16")]; + tensor var_47263_cast_fp16 = softmax(axis = var_46009, x = aw_chunk_4959_cast_fp16)[name = tensor("op_47263_cast_fp16")]; + tensor var_47265_equation_0 = const()[name = tensor("op_47265_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47265_cast_fp16 = einsum(equation = var_47265_equation_0, values = (var_46785_cast_fp16, var_47184_cast_fp16))[name = tensor("op_47265_cast_fp16")]; + tensor var_47267_equation_0 = const()[name = tensor("op_47267_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47267_cast_fp16 = einsum(equation = var_47267_equation_0, values = (var_46785_cast_fp16, var_47185_cast_fp16))[name = tensor("op_47267_cast_fp16")]; + tensor var_47269_equation_0 = const()[name = tensor("op_47269_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47269_cast_fp16 = einsum(equation = var_47269_equation_0, values = (var_46785_cast_fp16, var_47186_cast_fp16))[name = tensor("op_47269_cast_fp16")]; + tensor var_47271_equation_0 = const()[name = tensor("op_47271_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47271_cast_fp16 = einsum(equation = var_47271_equation_0, values = (var_46785_cast_fp16, var_47187_cast_fp16))[name = tensor("op_47271_cast_fp16")]; + tensor var_47273_equation_0 = const()[name = tensor("op_47273_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47273_cast_fp16 = einsum(equation = var_47273_equation_0, values = (var_46789_cast_fp16, var_47188_cast_fp16))[name = tensor("op_47273_cast_fp16")]; + tensor var_47275_equation_0 = const()[name = tensor("op_47275_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47275_cast_fp16 = einsum(equation = var_47275_equation_0, values = (var_46789_cast_fp16, var_47189_cast_fp16))[name = tensor("op_47275_cast_fp16")]; + tensor var_47277_equation_0 = const()[name = tensor("op_47277_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47277_cast_fp16 = einsum(equation = var_47277_equation_0, values = (var_46789_cast_fp16, var_47190_cast_fp16))[name = tensor("op_47277_cast_fp16")]; + tensor var_47279_equation_0 = const()[name = tensor("op_47279_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47279_cast_fp16 = einsum(equation = var_47279_equation_0, values = (var_46789_cast_fp16, var_47191_cast_fp16))[name = tensor("op_47279_cast_fp16")]; + tensor var_47281_equation_0 = const()[name = tensor("op_47281_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47281_cast_fp16 = einsum(equation = var_47281_equation_0, values = (var_46793_cast_fp16, var_47192_cast_fp16))[name = tensor("op_47281_cast_fp16")]; + tensor var_47283_equation_0 = const()[name = tensor("op_47283_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47283_cast_fp16 = einsum(equation = var_47283_equation_0, values = (var_46793_cast_fp16, var_47193_cast_fp16))[name = tensor("op_47283_cast_fp16")]; + tensor var_47285_equation_0 = const()[name = tensor("op_47285_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47285_cast_fp16 = einsum(equation = var_47285_equation_0, values = (var_46793_cast_fp16, var_47194_cast_fp16))[name = tensor("op_47285_cast_fp16")]; + tensor var_47287_equation_0 = const()[name = tensor("op_47287_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47287_cast_fp16 = einsum(equation = var_47287_equation_0, values = (var_46793_cast_fp16, var_47195_cast_fp16))[name = tensor("op_47287_cast_fp16")]; + tensor var_47289_equation_0 = const()[name = tensor("op_47289_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47289_cast_fp16 = einsum(equation = var_47289_equation_0, values = (var_46797_cast_fp16, var_47196_cast_fp16))[name = tensor("op_47289_cast_fp16")]; + tensor var_47291_equation_0 = const()[name = tensor("op_47291_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47291_cast_fp16 = einsum(equation = var_47291_equation_0, values = (var_46797_cast_fp16, var_47197_cast_fp16))[name = tensor("op_47291_cast_fp16")]; + tensor var_47293_equation_0 = const()[name = tensor("op_47293_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47293_cast_fp16 = einsum(equation = var_47293_equation_0, values = (var_46797_cast_fp16, var_47198_cast_fp16))[name = tensor("op_47293_cast_fp16")]; + tensor var_47295_equation_0 = const()[name = tensor("op_47295_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47295_cast_fp16 = einsum(equation = var_47295_equation_0, values = (var_46797_cast_fp16, var_47199_cast_fp16))[name = tensor("op_47295_cast_fp16")]; + tensor var_47297_equation_0 = const()[name = tensor("op_47297_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47297_cast_fp16 = einsum(equation = var_47297_equation_0, values = (var_46801_cast_fp16, var_47200_cast_fp16))[name = tensor("op_47297_cast_fp16")]; + tensor var_47299_equation_0 = const()[name = tensor("op_47299_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47299_cast_fp16 = einsum(equation = var_47299_equation_0, values = (var_46801_cast_fp16, var_47201_cast_fp16))[name = tensor("op_47299_cast_fp16")]; + tensor var_47301_equation_0 = const()[name = tensor("op_47301_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47301_cast_fp16 = einsum(equation = var_47301_equation_0, values = (var_46801_cast_fp16, var_47202_cast_fp16))[name = tensor("op_47301_cast_fp16")]; + tensor var_47303_equation_0 = const()[name = tensor("op_47303_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47303_cast_fp16 = einsum(equation = var_47303_equation_0, values = (var_46801_cast_fp16, var_47203_cast_fp16))[name = tensor("op_47303_cast_fp16")]; + tensor var_47305_equation_0 = const()[name = tensor("op_47305_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47305_cast_fp16 = einsum(equation = var_47305_equation_0, values = (var_46805_cast_fp16, var_47204_cast_fp16))[name = tensor("op_47305_cast_fp16")]; + tensor var_47307_equation_0 = const()[name = tensor("op_47307_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47307_cast_fp16 = einsum(equation = var_47307_equation_0, values = (var_46805_cast_fp16, var_47205_cast_fp16))[name = tensor("op_47307_cast_fp16")]; + tensor var_47309_equation_0 = const()[name = tensor("op_47309_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47309_cast_fp16 = einsum(equation = var_47309_equation_0, values = (var_46805_cast_fp16, var_47206_cast_fp16))[name = tensor("op_47309_cast_fp16")]; + tensor var_47311_equation_0 = const()[name = tensor("op_47311_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47311_cast_fp16 = einsum(equation = var_47311_equation_0, values = (var_46805_cast_fp16, var_47207_cast_fp16))[name = tensor("op_47311_cast_fp16")]; + tensor var_47313_equation_0 = const()[name = tensor("op_47313_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47313_cast_fp16 = einsum(equation = var_47313_equation_0, values = (var_46809_cast_fp16, var_47208_cast_fp16))[name = tensor("op_47313_cast_fp16")]; + tensor var_47315_equation_0 = const()[name = tensor("op_47315_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47315_cast_fp16 = einsum(equation = var_47315_equation_0, values = (var_46809_cast_fp16, var_47209_cast_fp16))[name = tensor("op_47315_cast_fp16")]; + tensor var_47317_equation_0 = const()[name = tensor("op_47317_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47317_cast_fp16 = einsum(equation = var_47317_equation_0, values = (var_46809_cast_fp16, var_47210_cast_fp16))[name = tensor("op_47317_cast_fp16")]; + tensor var_47319_equation_0 = const()[name = tensor("op_47319_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47319_cast_fp16 = einsum(equation = var_47319_equation_0, values = (var_46809_cast_fp16, var_47211_cast_fp16))[name = tensor("op_47319_cast_fp16")]; + tensor var_47321_equation_0 = const()[name = tensor("op_47321_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47321_cast_fp16 = einsum(equation = var_47321_equation_0, values = (var_46813_cast_fp16, var_47212_cast_fp16))[name = tensor("op_47321_cast_fp16")]; + tensor var_47323_equation_0 = const()[name = tensor("op_47323_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47323_cast_fp16 = einsum(equation = var_47323_equation_0, values = (var_46813_cast_fp16, var_47213_cast_fp16))[name = tensor("op_47323_cast_fp16")]; + tensor var_47325_equation_0 = const()[name = tensor("op_47325_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47325_cast_fp16 = einsum(equation = var_47325_equation_0, values = (var_46813_cast_fp16, var_47214_cast_fp16))[name = tensor("op_47325_cast_fp16")]; + tensor var_47327_equation_0 = const()[name = tensor("op_47327_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47327_cast_fp16 = einsum(equation = var_47327_equation_0, values = (var_46813_cast_fp16, var_47215_cast_fp16))[name = tensor("op_47327_cast_fp16")]; + tensor var_47329_equation_0 = const()[name = tensor("op_47329_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47329_cast_fp16 = einsum(equation = var_47329_equation_0, values = (var_46817_cast_fp16, var_47216_cast_fp16))[name = tensor("op_47329_cast_fp16")]; + tensor var_47331_equation_0 = const()[name = tensor("op_47331_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47331_cast_fp16 = einsum(equation = var_47331_equation_0, values = (var_46817_cast_fp16, var_47217_cast_fp16))[name = tensor("op_47331_cast_fp16")]; + tensor var_47333_equation_0 = const()[name = tensor("op_47333_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47333_cast_fp16 = einsum(equation = var_47333_equation_0, values = (var_46817_cast_fp16, var_47218_cast_fp16))[name = tensor("op_47333_cast_fp16")]; + tensor var_47335_equation_0 = const()[name = tensor("op_47335_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47335_cast_fp16 = einsum(equation = var_47335_equation_0, values = (var_46817_cast_fp16, var_47219_cast_fp16))[name = tensor("op_47335_cast_fp16")]; + tensor var_47337_equation_0 = const()[name = tensor("op_47337_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47337_cast_fp16 = einsum(equation = var_47337_equation_0, values = (var_46821_cast_fp16, var_47220_cast_fp16))[name = tensor("op_47337_cast_fp16")]; + tensor var_47339_equation_0 = const()[name = tensor("op_47339_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47339_cast_fp16 = einsum(equation = var_47339_equation_0, values = (var_46821_cast_fp16, var_47221_cast_fp16))[name = tensor("op_47339_cast_fp16")]; + tensor var_47341_equation_0 = const()[name = tensor("op_47341_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47341_cast_fp16 = einsum(equation = var_47341_equation_0, values = (var_46821_cast_fp16, var_47222_cast_fp16))[name = tensor("op_47341_cast_fp16")]; + tensor var_47343_equation_0 = const()[name = tensor("op_47343_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47343_cast_fp16 = einsum(equation = var_47343_equation_0, values = (var_46821_cast_fp16, var_47223_cast_fp16))[name = tensor("op_47343_cast_fp16")]; + tensor var_47345_equation_0 = const()[name = tensor("op_47345_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47345_cast_fp16 = einsum(equation = var_47345_equation_0, values = (var_46825_cast_fp16, var_47224_cast_fp16))[name = tensor("op_47345_cast_fp16")]; + tensor var_47347_equation_0 = const()[name = tensor("op_47347_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47347_cast_fp16 = einsum(equation = var_47347_equation_0, values = (var_46825_cast_fp16, var_47225_cast_fp16))[name = tensor("op_47347_cast_fp16")]; + tensor var_47349_equation_0 = const()[name = tensor("op_47349_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47349_cast_fp16 = einsum(equation = var_47349_equation_0, values = (var_46825_cast_fp16, var_47226_cast_fp16))[name = tensor("op_47349_cast_fp16")]; + tensor var_47351_equation_0 = const()[name = tensor("op_47351_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47351_cast_fp16 = einsum(equation = var_47351_equation_0, values = (var_46825_cast_fp16, var_47227_cast_fp16))[name = tensor("op_47351_cast_fp16")]; + tensor var_47353_equation_0 = const()[name = tensor("op_47353_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47353_cast_fp16 = einsum(equation = var_47353_equation_0, values = (var_46829_cast_fp16, var_47228_cast_fp16))[name = tensor("op_47353_cast_fp16")]; + tensor var_47355_equation_0 = const()[name = tensor("op_47355_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47355_cast_fp16 = einsum(equation = var_47355_equation_0, values = (var_46829_cast_fp16, var_47229_cast_fp16))[name = tensor("op_47355_cast_fp16")]; + tensor var_47357_equation_0 = const()[name = tensor("op_47357_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47357_cast_fp16 = einsum(equation = var_47357_equation_0, values = (var_46829_cast_fp16, var_47230_cast_fp16))[name = tensor("op_47357_cast_fp16")]; + tensor var_47359_equation_0 = const()[name = tensor("op_47359_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47359_cast_fp16 = einsum(equation = var_47359_equation_0, values = (var_46829_cast_fp16, var_47231_cast_fp16))[name = tensor("op_47359_cast_fp16")]; + tensor var_47361_equation_0 = const()[name = tensor("op_47361_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47361_cast_fp16 = einsum(equation = var_47361_equation_0, values = (var_46833_cast_fp16, var_47232_cast_fp16))[name = tensor("op_47361_cast_fp16")]; + tensor var_47363_equation_0 = const()[name = tensor("op_47363_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47363_cast_fp16 = einsum(equation = var_47363_equation_0, values = (var_46833_cast_fp16, var_47233_cast_fp16))[name = tensor("op_47363_cast_fp16")]; + tensor var_47365_equation_0 = const()[name = tensor("op_47365_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47365_cast_fp16 = einsum(equation = var_47365_equation_0, values = (var_46833_cast_fp16, var_47234_cast_fp16))[name = tensor("op_47365_cast_fp16")]; + tensor var_47367_equation_0 = const()[name = tensor("op_47367_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47367_cast_fp16 = einsum(equation = var_47367_equation_0, values = (var_46833_cast_fp16, var_47235_cast_fp16))[name = tensor("op_47367_cast_fp16")]; + tensor var_47369_equation_0 = const()[name = tensor("op_47369_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47369_cast_fp16 = einsum(equation = var_47369_equation_0, values = (var_46837_cast_fp16, var_47236_cast_fp16))[name = tensor("op_47369_cast_fp16")]; + tensor var_47371_equation_0 = const()[name = tensor("op_47371_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47371_cast_fp16 = einsum(equation = var_47371_equation_0, values = (var_46837_cast_fp16, var_47237_cast_fp16))[name = tensor("op_47371_cast_fp16")]; + tensor var_47373_equation_0 = const()[name = tensor("op_47373_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47373_cast_fp16 = einsum(equation = var_47373_equation_0, values = (var_46837_cast_fp16, var_47238_cast_fp16))[name = tensor("op_47373_cast_fp16")]; + tensor var_47375_equation_0 = const()[name = tensor("op_47375_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47375_cast_fp16 = einsum(equation = var_47375_equation_0, values = (var_46837_cast_fp16, var_47239_cast_fp16))[name = tensor("op_47375_cast_fp16")]; + tensor var_47377_equation_0 = const()[name = tensor("op_47377_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47377_cast_fp16 = einsum(equation = var_47377_equation_0, values = (var_46841_cast_fp16, var_47240_cast_fp16))[name = tensor("op_47377_cast_fp16")]; + tensor var_47379_equation_0 = const()[name = tensor("op_47379_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47379_cast_fp16 = einsum(equation = var_47379_equation_0, values = (var_46841_cast_fp16, var_47241_cast_fp16))[name = tensor("op_47379_cast_fp16")]; + tensor var_47381_equation_0 = const()[name = tensor("op_47381_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47381_cast_fp16 = einsum(equation = var_47381_equation_0, values = (var_46841_cast_fp16, var_47242_cast_fp16))[name = tensor("op_47381_cast_fp16")]; + tensor var_47383_equation_0 = const()[name = tensor("op_47383_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47383_cast_fp16 = einsum(equation = var_47383_equation_0, values = (var_46841_cast_fp16, var_47243_cast_fp16))[name = tensor("op_47383_cast_fp16")]; + tensor var_47385_equation_0 = const()[name = tensor("op_47385_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47385_cast_fp16 = einsum(equation = var_47385_equation_0, values = (var_46845_cast_fp16, var_47244_cast_fp16))[name = tensor("op_47385_cast_fp16")]; + tensor var_47387_equation_0 = const()[name = tensor("op_47387_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47387_cast_fp16 = einsum(equation = var_47387_equation_0, values = (var_46845_cast_fp16, var_47245_cast_fp16))[name = tensor("op_47387_cast_fp16")]; + tensor var_47389_equation_0 = const()[name = tensor("op_47389_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47389_cast_fp16 = einsum(equation = var_47389_equation_0, values = (var_46845_cast_fp16, var_47246_cast_fp16))[name = tensor("op_47389_cast_fp16")]; + tensor var_47391_equation_0 = const()[name = tensor("op_47391_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47391_cast_fp16 = einsum(equation = var_47391_equation_0, values = (var_46845_cast_fp16, var_47247_cast_fp16))[name = tensor("op_47391_cast_fp16")]; + tensor var_47393_equation_0 = const()[name = tensor("op_47393_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47393_cast_fp16 = einsum(equation = var_47393_equation_0, values = (var_46849_cast_fp16, var_47248_cast_fp16))[name = tensor("op_47393_cast_fp16")]; + tensor var_47395_equation_0 = const()[name = tensor("op_47395_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47395_cast_fp16 = einsum(equation = var_47395_equation_0, values = (var_46849_cast_fp16, var_47249_cast_fp16))[name = tensor("op_47395_cast_fp16")]; + tensor var_47397_equation_0 = const()[name = tensor("op_47397_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47397_cast_fp16 = einsum(equation = var_47397_equation_0, values = (var_46849_cast_fp16, var_47250_cast_fp16))[name = tensor("op_47397_cast_fp16")]; + tensor var_47399_equation_0 = const()[name = tensor("op_47399_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47399_cast_fp16 = einsum(equation = var_47399_equation_0, values = (var_46849_cast_fp16, var_47251_cast_fp16))[name = tensor("op_47399_cast_fp16")]; + tensor var_47401_equation_0 = const()[name = tensor("op_47401_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47401_cast_fp16 = einsum(equation = var_47401_equation_0, values = (var_46853_cast_fp16, var_47252_cast_fp16))[name = tensor("op_47401_cast_fp16")]; + tensor var_47403_equation_0 = const()[name = tensor("op_47403_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47403_cast_fp16 = einsum(equation = var_47403_equation_0, values = (var_46853_cast_fp16, var_47253_cast_fp16))[name = tensor("op_47403_cast_fp16")]; + tensor var_47405_equation_0 = const()[name = tensor("op_47405_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47405_cast_fp16 = einsum(equation = var_47405_equation_0, values = (var_46853_cast_fp16, var_47254_cast_fp16))[name = tensor("op_47405_cast_fp16")]; + tensor var_47407_equation_0 = const()[name = tensor("op_47407_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47407_cast_fp16 = einsum(equation = var_47407_equation_0, values = (var_46853_cast_fp16, var_47255_cast_fp16))[name = tensor("op_47407_cast_fp16")]; + tensor var_47409_equation_0 = const()[name = tensor("op_47409_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47409_cast_fp16 = einsum(equation = var_47409_equation_0, values = (var_46857_cast_fp16, var_47256_cast_fp16))[name = tensor("op_47409_cast_fp16")]; + tensor var_47411_equation_0 = const()[name = tensor("op_47411_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47411_cast_fp16 = einsum(equation = var_47411_equation_0, values = (var_46857_cast_fp16, var_47257_cast_fp16))[name = tensor("op_47411_cast_fp16")]; + tensor var_47413_equation_0 = const()[name = tensor("op_47413_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47413_cast_fp16 = einsum(equation = var_47413_equation_0, values = (var_46857_cast_fp16, var_47258_cast_fp16))[name = tensor("op_47413_cast_fp16")]; + tensor var_47415_equation_0 = const()[name = tensor("op_47415_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47415_cast_fp16 = einsum(equation = var_47415_equation_0, values = (var_46857_cast_fp16, var_47259_cast_fp16))[name = tensor("op_47415_cast_fp16")]; + tensor var_47417_equation_0 = const()[name = tensor("op_47417_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47417_cast_fp16 = einsum(equation = var_47417_equation_0, values = (var_46861_cast_fp16, var_47260_cast_fp16))[name = tensor("op_47417_cast_fp16")]; + tensor var_47419_equation_0 = const()[name = tensor("op_47419_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47419_cast_fp16 = einsum(equation = var_47419_equation_0, values = (var_46861_cast_fp16, var_47261_cast_fp16))[name = tensor("op_47419_cast_fp16")]; + tensor var_47421_equation_0 = const()[name = tensor("op_47421_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47421_cast_fp16 = einsum(equation = var_47421_equation_0, values = (var_46861_cast_fp16, var_47262_cast_fp16))[name = tensor("op_47421_cast_fp16")]; + tensor var_47423_equation_0 = const()[name = tensor("op_47423_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_47423_cast_fp16 = einsum(equation = var_47423_equation_0, values = (var_46861_cast_fp16, var_47263_cast_fp16))[name = tensor("op_47423_cast_fp16")]; + tensor var_47425_interleave_0 = const()[name = tensor("op_47425_interleave_0"), val = tensor(false)]; + tensor var_47425_cast_fp16 = concat(axis = var_45984, interleave = var_47425_interleave_0, values = (var_47265_cast_fp16, var_47267_cast_fp16, var_47269_cast_fp16, var_47271_cast_fp16))[name = tensor("op_47425_cast_fp16")]; + tensor var_47427_interleave_0 = const()[name = tensor("op_47427_interleave_0"), val = tensor(false)]; + tensor var_47427_cast_fp16 = concat(axis = var_45984, interleave = var_47427_interleave_0, values = (var_47273_cast_fp16, var_47275_cast_fp16, var_47277_cast_fp16, var_47279_cast_fp16))[name = tensor("op_47427_cast_fp16")]; + tensor var_47429_interleave_0 = const()[name = tensor("op_47429_interleave_0"), val = tensor(false)]; + tensor var_47429_cast_fp16 = concat(axis = var_45984, interleave = var_47429_interleave_0, values = (var_47281_cast_fp16, var_47283_cast_fp16, var_47285_cast_fp16, var_47287_cast_fp16))[name = tensor("op_47429_cast_fp16")]; + tensor var_47431_interleave_0 = const()[name = tensor("op_47431_interleave_0"), val = tensor(false)]; + tensor var_47431_cast_fp16 = concat(axis = var_45984, interleave = var_47431_interleave_0, values = (var_47289_cast_fp16, var_47291_cast_fp16, var_47293_cast_fp16, var_47295_cast_fp16))[name = tensor("op_47431_cast_fp16")]; + tensor var_47433_interleave_0 = const()[name = tensor("op_47433_interleave_0"), val = tensor(false)]; + tensor var_47433_cast_fp16 = concat(axis = var_45984, interleave = var_47433_interleave_0, values = (var_47297_cast_fp16, var_47299_cast_fp16, var_47301_cast_fp16, var_47303_cast_fp16))[name = tensor("op_47433_cast_fp16")]; + tensor var_47435_interleave_0 = const()[name = tensor("op_47435_interleave_0"), val = tensor(false)]; + tensor var_47435_cast_fp16 = concat(axis = var_45984, interleave = var_47435_interleave_0, values = (var_47305_cast_fp16, var_47307_cast_fp16, var_47309_cast_fp16, var_47311_cast_fp16))[name = tensor("op_47435_cast_fp16")]; + tensor var_47437_interleave_0 = const()[name = tensor("op_47437_interleave_0"), val = tensor(false)]; + tensor var_47437_cast_fp16 = concat(axis = var_45984, interleave = var_47437_interleave_0, values = (var_47313_cast_fp16, var_47315_cast_fp16, var_47317_cast_fp16, var_47319_cast_fp16))[name = tensor("op_47437_cast_fp16")]; + tensor var_47439_interleave_0 = const()[name = tensor("op_47439_interleave_0"), val = tensor(false)]; + tensor var_47439_cast_fp16 = concat(axis = var_45984, interleave = var_47439_interleave_0, values = (var_47321_cast_fp16, var_47323_cast_fp16, var_47325_cast_fp16, var_47327_cast_fp16))[name = tensor("op_47439_cast_fp16")]; + tensor var_47441_interleave_0 = const()[name = tensor("op_47441_interleave_0"), val = tensor(false)]; + tensor var_47441_cast_fp16 = concat(axis = var_45984, interleave = var_47441_interleave_0, values = (var_47329_cast_fp16, var_47331_cast_fp16, var_47333_cast_fp16, var_47335_cast_fp16))[name = tensor("op_47441_cast_fp16")]; + tensor var_47443_interleave_0 = const()[name = tensor("op_47443_interleave_0"), val = tensor(false)]; + tensor var_47443_cast_fp16 = concat(axis = var_45984, interleave = var_47443_interleave_0, values = (var_47337_cast_fp16, var_47339_cast_fp16, var_47341_cast_fp16, var_47343_cast_fp16))[name = tensor("op_47443_cast_fp16")]; + tensor var_47445_interleave_0 = const()[name = tensor("op_47445_interleave_0"), val = tensor(false)]; + tensor var_47445_cast_fp16 = concat(axis = var_45984, interleave = var_47445_interleave_0, values = (var_47345_cast_fp16, var_47347_cast_fp16, var_47349_cast_fp16, var_47351_cast_fp16))[name = tensor("op_47445_cast_fp16")]; + tensor var_47447_interleave_0 = const()[name = tensor("op_47447_interleave_0"), val = tensor(false)]; + tensor var_47447_cast_fp16 = concat(axis = var_45984, interleave = var_47447_interleave_0, values = (var_47353_cast_fp16, var_47355_cast_fp16, var_47357_cast_fp16, var_47359_cast_fp16))[name = tensor("op_47447_cast_fp16")]; + tensor var_47449_interleave_0 = const()[name = tensor("op_47449_interleave_0"), val = tensor(false)]; + tensor var_47449_cast_fp16 = concat(axis = var_45984, interleave = var_47449_interleave_0, values = (var_47361_cast_fp16, var_47363_cast_fp16, var_47365_cast_fp16, var_47367_cast_fp16))[name = tensor("op_47449_cast_fp16")]; + tensor var_47451_interleave_0 = const()[name = tensor("op_47451_interleave_0"), val = tensor(false)]; + tensor var_47451_cast_fp16 = concat(axis = var_45984, interleave = var_47451_interleave_0, values = (var_47369_cast_fp16, var_47371_cast_fp16, var_47373_cast_fp16, var_47375_cast_fp16))[name = tensor("op_47451_cast_fp16")]; + tensor var_47453_interleave_0 = const()[name = tensor("op_47453_interleave_0"), val = tensor(false)]; + tensor var_47453_cast_fp16 = concat(axis = var_45984, interleave = var_47453_interleave_0, values = (var_47377_cast_fp16, var_47379_cast_fp16, var_47381_cast_fp16, var_47383_cast_fp16))[name = tensor("op_47453_cast_fp16")]; + tensor var_47455_interleave_0 = const()[name = tensor("op_47455_interleave_0"), val = tensor(false)]; + tensor var_47455_cast_fp16 = concat(axis = var_45984, interleave = var_47455_interleave_0, values = (var_47385_cast_fp16, var_47387_cast_fp16, var_47389_cast_fp16, var_47391_cast_fp16))[name = tensor("op_47455_cast_fp16")]; + tensor var_47457_interleave_0 = const()[name = tensor("op_47457_interleave_0"), val = tensor(false)]; + tensor var_47457_cast_fp16 = concat(axis = var_45984, interleave = var_47457_interleave_0, values = (var_47393_cast_fp16, var_47395_cast_fp16, var_47397_cast_fp16, var_47399_cast_fp16))[name = tensor("op_47457_cast_fp16")]; + tensor var_47459_interleave_0 = const()[name = tensor("op_47459_interleave_0"), val = tensor(false)]; + tensor var_47459_cast_fp16 = concat(axis = var_45984, interleave = var_47459_interleave_0, values = (var_47401_cast_fp16, var_47403_cast_fp16, var_47405_cast_fp16, var_47407_cast_fp16))[name = tensor("op_47459_cast_fp16")]; + tensor var_47461_interleave_0 = const()[name = tensor("op_47461_interleave_0"), val = tensor(false)]; + tensor var_47461_cast_fp16 = concat(axis = var_45984, interleave = var_47461_interleave_0, values = (var_47409_cast_fp16, var_47411_cast_fp16, var_47413_cast_fp16, var_47415_cast_fp16))[name = tensor("op_47461_cast_fp16")]; + tensor var_47463_interleave_0 = const()[name = tensor("op_47463_interleave_0"), val = tensor(false)]; + tensor var_47463_cast_fp16 = concat(axis = var_45984, interleave = var_47463_interleave_0, values = (var_47417_cast_fp16, var_47419_cast_fp16, var_47421_cast_fp16, var_47423_cast_fp16))[name = tensor("op_47463_cast_fp16")]; + tensor input_241_interleave_0 = const()[name = tensor("input_241_interleave_0"), val = tensor(false)]; + tensor input_241_cast_fp16 = concat(axis = var_46009, interleave = input_241_interleave_0, values = (var_47425_cast_fp16, var_47427_cast_fp16, var_47429_cast_fp16, var_47431_cast_fp16, var_47433_cast_fp16, var_47435_cast_fp16, var_47437_cast_fp16, var_47439_cast_fp16, var_47441_cast_fp16, var_47443_cast_fp16, var_47445_cast_fp16, var_47447_cast_fp16, var_47449_cast_fp16, var_47451_cast_fp16, var_47453_cast_fp16, var_47455_cast_fp16, var_47457_cast_fp16, var_47459_cast_fp16, var_47461_cast_fp16, var_47463_cast_fp16))[name = tensor("input_241_cast_fp16")]; + tensor var_47468 = const()[name = tensor("op_47468"), val = tensor([1, 1])]; + tensor var_47470 = const()[name = tensor("op_47470"), val = tensor([1, 1])]; + tensor obj_123_pad_type_0 = const()[name = tensor("obj_123_pad_type_0"), val = tensor("custom")]; + tensor obj_123_pad_0 = const()[name = tensor("obj_123_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_30_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_30_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1204735040)))]; + tensor layers_30_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_30_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1208011904)))]; + tensor obj_123_cast_fp16 = conv(bias = layers_30_self_attn_o_proj_bias_to_fp16, dilations = var_47470, groups = var_46009, pad = obj_123_pad_0, pad_type = obj_123_pad_type_0, strides = var_47468, weight = layers_30_self_attn_o_proj_weight_to_fp16, x = input_241_cast_fp16)[name = tensor("obj_123_cast_fp16")]; + tensor inputs_123_cast_fp16 = add(x = inputs_121_cast_fp16, y = obj_123_cast_fp16)[name = tensor("inputs_123_cast_fp16")]; + tensor var_47476 = const()[name = tensor("op_47476"), val = tensor([1])]; + tensor channels_mean_123_cast_fp16 = reduce_mean(axes = var_47476, keep_dims = var_46010, x = inputs_123_cast_fp16)[name = tensor("channels_mean_123_cast_fp16")]; + tensor zero_mean_123_cast_fp16 = sub(x = inputs_123_cast_fp16, y = channels_mean_123_cast_fp16)[name = tensor("zero_mean_123_cast_fp16")]; + tensor zero_mean_sq_123_cast_fp16 = mul(x = zero_mean_123_cast_fp16, y = zero_mean_123_cast_fp16)[name = tensor("zero_mean_sq_123_cast_fp16")]; + tensor var_47480 = const()[name = tensor("op_47480"), val = tensor([1])]; + tensor var_47481_cast_fp16 = reduce_mean(axes = var_47480, keep_dims = var_46010, x = zero_mean_sq_123_cast_fp16)[name = tensor("op_47481_cast_fp16")]; + tensor var_47482_to_fp16 = const()[name = tensor("op_47482_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_47483_cast_fp16 = add(x = var_47481_cast_fp16, y = var_47482_to_fp16)[name = tensor("op_47483_cast_fp16")]; + tensor denom_123_epsilon_0_to_fp16 = const()[name = tensor("denom_123_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_123_cast_fp16 = rsqrt(epsilon = denom_123_epsilon_0_to_fp16, x = var_47483_cast_fp16)[name = tensor("denom_123_cast_fp16")]; + tensor out_123_cast_fp16 = mul(x = zero_mean_123_cast_fp16, y = denom_123_cast_fp16)[name = tensor("out_123_cast_fp16")]; + tensor input_243_gamma_0_to_fp16 = const()[name = tensor("input_243_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1208014528)))]; + tensor input_243_beta_0_to_fp16 = const()[name = tensor("input_243_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1208017152)))]; + tensor input_243_epsilon_0_to_fp16 = const()[name = tensor("input_243_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_243_cast_fp16 = batch_norm(beta = input_243_beta_0_to_fp16, epsilon = input_243_epsilon_0_to_fp16, gamma = input_243_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_123_cast_fp16)[name = tensor("input_243_cast_fp16")]; + tensor var_47494 = const()[name = tensor("op_47494"), val = tensor([1, 1])]; + tensor var_47496 = const()[name = tensor("op_47496"), val = tensor([1, 1])]; + tensor input_245_pad_type_0 = const()[name = tensor("input_245_pad_type_0"), val = tensor("custom")]; + tensor input_245_pad_0 = const()[name = tensor("input_245_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_30_fc1_weight_to_fp16 = const()[name = tensor("layers_30_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1208019776)))]; + tensor layers_30_fc1_bias_to_fp16 = const()[name = tensor("layers_30_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1221127040)))]; + tensor input_245_cast_fp16 = conv(bias = layers_30_fc1_bias_to_fp16, dilations = var_47496, groups = var_46009, pad = input_245_pad_0, pad_type = input_245_pad_type_0, strides = var_47494, weight = layers_30_fc1_weight_to_fp16, x = input_243_cast_fp16)[name = tensor("input_245_cast_fp16")]; + tensor input_247_mode_0 = const()[name = tensor("input_247_mode_0"), val = tensor("EXACT")]; + tensor input_247_cast_fp16 = gelu(mode = input_247_mode_0, x = input_245_cast_fp16)[name = tensor("input_247_cast_fp16")]; + tensor var_47502 = const()[name = tensor("op_47502"), val = tensor([1, 1])]; + tensor var_47504 = const()[name = tensor("op_47504"), val = tensor([1, 1])]; + tensor hidden_states_65_pad_type_0 = const()[name = tensor("hidden_states_65_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_65_pad_0 = const()[name = tensor("hidden_states_65_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_30_fc2_weight_to_fp16 = const()[name = tensor("layers_30_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1221137344)))]; + tensor layers_30_fc2_bias_to_fp16 = const()[name = tensor("layers_30_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1234244608)))]; + tensor hidden_states_65_cast_fp16 = conv(bias = layers_30_fc2_bias_to_fp16, dilations = var_47504, groups = var_46009, pad = hidden_states_65_pad_0, pad_type = hidden_states_65_pad_type_0, strides = var_47502, weight = layers_30_fc2_weight_to_fp16, x = input_247_cast_fp16)[name = tensor("hidden_states_65_cast_fp16")]; + tensor inputs_125_cast_fp16 = add(x = inputs_123_cast_fp16, y = hidden_states_65_cast_fp16)[name = tensor("inputs_125_cast_fp16")]; + tensor var_47511 = const()[name = tensor("op_47511"), val = tensor(3)]; + tensor var_47536 = const()[name = tensor("op_47536"), val = tensor(1)]; + tensor var_47537 = const()[name = tensor("op_47537"), val = tensor(true)]; + tensor var_47547 = const()[name = tensor("op_47547"), val = tensor([1])]; + tensor channels_mean_125_cast_fp16 = reduce_mean(axes = var_47547, keep_dims = var_47537, x = inputs_125_cast_fp16)[name = tensor("channels_mean_125_cast_fp16")]; + tensor zero_mean_125_cast_fp16 = sub(x = inputs_125_cast_fp16, y = channels_mean_125_cast_fp16)[name = tensor("zero_mean_125_cast_fp16")]; + tensor zero_mean_sq_125_cast_fp16 = mul(x = zero_mean_125_cast_fp16, y = zero_mean_125_cast_fp16)[name = tensor("zero_mean_sq_125_cast_fp16")]; + tensor var_47551 = const()[name = tensor("op_47551"), val = tensor([1])]; + tensor var_47552_cast_fp16 = reduce_mean(axes = var_47551, keep_dims = var_47537, x = zero_mean_sq_125_cast_fp16)[name = tensor("op_47552_cast_fp16")]; + tensor var_47553_to_fp16 = const()[name = tensor("op_47553_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_47554_cast_fp16 = add(x = var_47552_cast_fp16, y = var_47553_to_fp16)[name = tensor("op_47554_cast_fp16")]; + tensor denom_125_epsilon_0_to_fp16 = const()[name = tensor("denom_125_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_125_cast_fp16 = rsqrt(epsilon = denom_125_epsilon_0_to_fp16, x = var_47554_cast_fp16)[name = tensor("denom_125_cast_fp16")]; + tensor out_125_cast_fp16 = mul(x = zero_mean_125_cast_fp16, y = denom_125_cast_fp16)[name = tensor("out_125_cast_fp16")]; + tensor obj_125_gamma_0_to_fp16 = const()[name = tensor("obj_125_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1234247232)))]; + tensor obj_125_beta_0_to_fp16 = const()[name = tensor("obj_125_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1234249856)))]; + tensor obj_125_epsilon_0_to_fp16 = const()[name = tensor("obj_125_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor obj_125_cast_fp16 = batch_norm(beta = obj_125_beta_0_to_fp16, epsilon = obj_125_epsilon_0_to_fp16, gamma = obj_125_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_125_cast_fp16)[name = tensor("obj_125_cast_fp16")]; + tensor var_47569 = const()[name = tensor("op_47569"), val = tensor([1, 1])]; + tensor var_47571 = const()[name = tensor("op_47571"), val = tensor([1, 1])]; + tensor query_pad_type_0 = const()[name = tensor("query_pad_type_0"), val = tensor("custom")]; + tensor query_pad_0 = const()[name = tensor("query_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_31_self_attn_q_proj_weight_to_fp16 = const()[name = tensor("layers_31_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1234252480)))]; + tensor layers_31_self_attn_q_proj_bias_to_fp16 = const()[name = tensor("layers_31_self_attn_q_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1237529344)))]; + tensor query_cast_fp16 = conv(bias = layers_31_self_attn_q_proj_bias_to_fp16, dilations = var_47571, groups = var_47536, pad = query_pad_0, pad_type = query_pad_type_0, strides = var_47569, weight = layers_31_self_attn_q_proj_weight_to_fp16, x = obj_125_cast_fp16)[name = tensor("query_cast_fp16")]; + tensor var_47575 = const()[name = tensor("op_47575"), val = tensor([1, 1])]; + tensor var_47577 = const()[name = tensor("op_47577"), val = tensor([1, 1])]; + tensor key_pad_type_0 = const()[name = tensor("key_pad_type_0"), val = tensor("custom")]; + tensor key_pad_0 = const()[name = tensor("key_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_31_self_attn_k_proj_weight_to_fp16 = const()[name = tensor("layers_31_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1237531968)))]; + tensor key_cast_fp16 = conv(dilations = var_47577, groups = var_47536, pad = key_pad_0, pad_type = key_pad_type_0, strides = var_47575, weight = layers_31_self_attn_k_proj_weight_to_fp16, x = obj_125_cast_fp16)[name = tensor("key_cast_fp16")]; + tensor var_47582 = const()[name = tensor("op_47582"), val = tensor([1, 1])]; + tensor var_47584 = const()[name = tensor("op_47584"), val = tensor([1, 1])]; + tensor value_pad_type_0 = const()[name = tensor("value_pad_type_0"), val = tensor("custom")]; + tensor value_pad_0 = const()[name = tensor("value_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_31_self_attn_v_proj_weight_to_fp16 = const()[name = tensor("layers_31_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1240808832)))]; + tensor layers_31_self_attn_v_proj_bias_to_fp16 = const()[name = tensor("layers_31_self_attn_v_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1244085696)))]; + tensor value_cast_fp16 = conv(bias = layers_31_self_attn_v_proj_bias_to_fp16, dilations = var_47584, groups = var_47536, pad = value_pad_0, pad_type = value_pad_type_0, strides = var_47582, weight = layers_31_self_attn_v_proj_weight_to_fp16, x = obj_125_cast_fp16)[name = tensor("value_cast_fp16")]; + tensor var_47591_begin_0 = const()[name = tensor("op_47591_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_47591_end_0 = const()[name = tensor("op_47591_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_47591_end_mask_0 = const()[name = tensor("op_47591_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_47591_cast_fp16 = slice_by_index(begin = var_47591_begin_0, end = var_47591_end_0, end_mask = var_47591_end_mask_0, x = query_cast_fp16)[name = tensor("op_47591_cast_fp16")]; + tensor var_47595_begin_0 = const()[name = tensor("op_47595_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_47595_end_0 = const()[name = tensor("op_47595_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_47595_end_mask_0 = const()[name = tensor("op_47595_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_47595_cast_fp16 = slice_by_index(begin = var_47595_begin_0, end = var_47595_end_0, end_mask = var_47595_end_mask_0, x = query_cast_fp16)[name = tensor("op_47595_cast_fp16")]; + tensor var_47599_begin_0 = const()[name = tensor("op_47599_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_47599_end_0 = const()[name = tensor("op_47599_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_47599_end_mask_0 = const()[name = tensor("op_47599_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_47599_cast_fp16 = slice_by_index(begin = var_47599_begin_0, end = var_47599_end_0, end_mask = var_47599_end_mask_0, x = query_cast_fp16)[name = tensor("op_47599_cast_fp16")]; + tensor var_47603_begin_0 = const()[name = tensor("op_47603_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_47603_end_0 = const()[name = tensor("op_47603_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_47603_end_mask_0 = const()[name = tensor("op_47603_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_47603_cast_fp16 = slice_by_index(begin = var_47603_begin_0, end = var_47603_end_0, end_mask = var_47603_end_mask_0, x = query_cast_fp16)[name = tensor("op_47603_cast_fp16")]; + tensor var_47607_begin_0 = const()[name = tensor("op_47607_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_47607_end_0 = const()[name = tensor("op_47607_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_47607_end_mask_0 = const()[name = tensor("op_47607_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_47607_cast_fp16 = slice_by_index(begin = var_47607_begin_0, end = var_47607_end_0, end_mask = var_47607_end_mask_0, x = query_cast_fp16)[name = tensor("op_47607_cast_fp16")]; + tensor var_47611_begin_0 = const()[name = tensor("op_47611_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_47611_end_0 = const()[name = tensor("op_47611_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_47611_end_mask_0 = const()[name = tensor("op_47611_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_47611_cast_fp16 = slice_by_index(begin = var_47611_begin_0, end = var_47611_end_0, end_mask = var_47611_end_mask_0, x = query_cast_fp16)[name = tensor("op_47611_cast_fp16")]; + tensor var_47615_begin_0 = const()[name = tensor("op_47615_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_47615_end_0 = const()[name = tensor("op_47615_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_47615_end_mask_0 = const()[name = tensor("op_47615_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_47615_cast_fp16 = slice_by_index(begin = var_47615_begin_0, end = var_47615_end_0, end_mask = var_47615_end_mask_0, x = query_cast_fp16)[name = tensor("op_47615_cast_fp16")]; + tensor var_47619_begin_0 = const()[name = tensor("op_47619_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_47619_end_0 = const()[name = tensor("op_47619_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_47619_end_mask_0 = const()[name = tensor("op_47619_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_47619_cast_fp16 = slice_by_index(begin = var_47619_begin_0, end = var_47619_end_0, end_mask = var_47619_end_mask_0, x = query_cast_fp16)[name = tensor("op_47619_cast_fp16")]; + tensor var_47623_begin_0 = const()[name = tensor("op_47623_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_47623_end_0 = const()[name = tensor("op_47623_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_47623_end_mask_0 = const()[name = tensor("op_47623_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_47623_cast_fp16 = slice_by_index(begin = var_47623_begin_0, end = var_47623_end_0, end_mask = var_47623_end_mask_0, x = query_cast_fp16)[name = tensor("op_47623_cast_fp16")]; + tensor var_47627_begin_0 = const()[name = tensor("op_47627_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_47627_end_0 = const()[name = tensor("op_47627_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_47627_end_mask_0 = const()[name = tensor("op_47627_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_47627_cast_fp16 = slice_by_index(begin = var_47627_begin_0, end = var_47627_end_0, end_mask = var_47627_end_mask_0, x = query_cast_fp16)[name = tensor("op_47627_cast_fp16")]; + tensor var_47631_begin_0 = const()[name = tensor("op_47631_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_47631_end_0 = const()[name = tensor("op_47631_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_47631_end_mask_0 = const()[name = tensor("op_47631_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_47631_cast_fp16 = slice_by_index(begin = var_47631_begin_0, end = var_47631_end_0, end_mask = var_47631_end_mask_0, x = query_cast_fp16)[name = tensor("op_47631_cast_fp16")]; + tensor var_47635_begin_0 = const()[name = tensor("op_47635_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_47635_end_0 = const()[name = tensor("op_47635_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_47635_end_mask_0 = const()[name = tensor("op_47635_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_47635_cast_fp16 = slice_by_index(begin = var_47635_begin_0, end = var_47635_end_0, end_mask = var_47635_end_mask_0, x = query_cast_fp16)[name = tensor("op_47635_cast_fp16")]; + tensor var_47639_begin_0 = const()[name = tensor("op_47639_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_47639_end_0 = const()[name = tensor("op_47639_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_47639_end_mask_0 = const()[name = tensor("op_47639_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_47639_cast_fp16 = slice_by_index(begin = var_47639_begin_0, end = var_47639_end_0, end_mask = var_47639_end_mask_0, x = query_cast_fp16)[name = tensor("op_47639_cast_fp16")]; + tensor var_47643_begin_0 = const()[name = tensor("op_47643_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_47643_end_0 = const()[name = tensor("op_47643_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_47643_end_mask_0 = const()[name = tensor("op_47643_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_47643_cast_fp16 = slice_by_index(begin = var_47643_begin_0, end = var_47643_end_0, end_mask = var_47643_end_mask_0, x = query_cast_fp16)[name = tensor("op_47643_cast_fp16")]; + tensor var_47647_begin_0 = const()[name = tensor("op_47647_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_47647_end_0 = const()[name = tensor("op_47647_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_47647_end_mask_0 = const()[name = tensor("op_47647_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_47647_cast_fp16 = slice_by_index(begin = var_47647_begin_0, end = var_47647_end_0, end_mask = var_47647_end_mask_0, x = query_cast_fp16)[name = tensor("op_47647_cast_fp16")]; + tensor var_47651_begin_0 = const()[name = tensor("op_47651_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_47651_end_0 = const()[name = tensor("op_47651_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_47651_end_mask_0 = const()[name = tensor("op_47651_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_47651_cast_fp16 = slice_by_index(begin = var_47651_begin_0, end = var_47651_end_0, end_mask = var_47651_end_mask_0, x = query_cast_fp16)[name = tensor("op_47651_cast_fp16")]; + tensor var_47655_begin_0 = const()[name = tensor("op_47655_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_47655_end_0 = const()[name = tensor("op_47655_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_47655_end_mask_0 = const()[name = tensor("op_47655_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_47655_cast_fp16 = slice_by_index(begin = var_47655_begin_0, end = var_47655_end_0, end_mask = var_47655_end_mask_0, x = query_cast_fp16)[name = tensor("op_47655_cast_fp16")]; + tensor var_47659_begin_0 = const()[name = tensor("op_47659_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_47659_end_0 = const()[name = tensor("op_47659_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_47659_end_mask_0 = const()[name = tensor("op_47659_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_47659_cast_fp16 = slice_by_index(begin = var_47659_begin_0, end = var_47659_end_0, end_mask = var_47659_end_mask_0, x = query_cast_fp16)[name = tensor("op_47659_cast_fp16")]; + tensor var_47663_begin_0 = const()[name = tensor("op_47663_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_47663_end_0 = const()[name = tensor("op_47663_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_47663_end_mask_0 = const()[name = tensor("op_47663_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_47663_cast_fp16 = slice_by_index(begin = var_47663_begin_0, end = var_47663_end_0, end_mask = var_47663_end_mask_0, x = query_cast_fp16)[name = tensor("op_47663_cast_fp16")]; + tensor var_47667_begin_0 = const()[name = tensor("op_47667_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_47667_end_0 = const()[name = tensor("op_47667_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_47667_end_mask_0 = const()[name = tensor("op_47667_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_47667_cast_fp16 = slice_by_index(begin = var_47667_begin_0, end = var_47667_end_0, end_mask = var_47667_end_mask_0, x = query_cast_fp16)[name = tensor("op_47667_cast_fp16")]; + tensor var_47676_begin_0 = const()[name = tensor("op_47676_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_47676_end_0 = const()[name = tensor("op_47676_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_47676_end_mask_0 = const()[name = tensor("op_47676_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47676_cast_fp16 = slice_by_index(begin = var_47676_begin_0, end = var_47676_end_0, end_mask = var_47676_end_mask_0, x = var_47591_cast_fp16)[name = tensor("op_47676_cast_fp16")]; + tensor var_47683_begin_0 = const()[name = tensor("op_47683_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_47683_end_0 = const()[name = tensor("op_47683_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_47683_end_mask_0 = const()[name = tensor("op_47683_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47683_cast_fp16 = slice_by_index(begin = var_47683_begin_0, end = var_47683_end_0, end_mask = var_47683_end_mask_0, x = var_47591_cast_fp16)[name = tensor("op_47683_cast_fp16")]; + tensor var_47690_begin_0 = const()[name = tensor("op_47690_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_47690_end_0 = const()[name = tensor("op_47690_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_47690_end_mask_0 = const()[name = tensor("op_47690_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47690_cast_fp16 = slice_by_index(begin = var_47690_begin_0, end = var_47690_end_0, end_mask = var_47690_end_mask_0, x = var_47591_cast_fp16)[name = tensor("op_47690_cast_fp16")]; + tensor var_47697_begin_0 = const()[name = tensor("op_47697_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_47697_end_0 = const()[name = tensor("op_47697_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_47697_end_mask_0 = const()[name = tensor("op_47697_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47697_cast_fp16 = slice_by_index(begin = var_47697_begin_0, end = var_47697_end_0, end_mask = var_47697_end_mask_0, x = var_47591_cast_fp16)[name = tensor("op_47697_cast_fp16")]; + tensor var_47704_begin_0 = const()[name = tensor("op_47704_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_47704_end_0 = const()[name = tensor("op_47704_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_47704_end_mask_0 = const()[name = tensor("op_47704_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47704_cast_fp16 = slice_by_index(begin = var_47704_begin_0, end = var_47704_end_0, end_mask = var_47704_end_mask_0, x = var_47595_cast_fp16)[name = tensor("op_47704_cast_fp16")]; + tensor var_47711_begin_0 = const()[name = tensor("op_47711_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_47711_end_0 = const()[name = tensor("op_47711_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_47711_end_mask_0 = const()[name = tensor("op_47711_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47711_cast_fp16 = slice_by_index(begin = var_47711_begin_0, end = var_47711_end_0, end_mask = var_47711_end_mask_0, x = var_47595_cast_fp16)[name = tensor("op_47711_cast_fp16")]; + tensor var_47718_begin_0 = const()[name = tensor("op_47718_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_47718_end_0 = const()[name = tensor("op_47718_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_47718_end_mask_0 = const()[name = tensor("op_47718_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47718_cast_fp16 = slice_by_index(begin = var_47718_begin_0, end = var_47718_end_0, end_mask = var_47718_end_mask_0, x = var_47595_cast_fp16)[name = tensor("op_47718_cast_fp16")]; + tensor var_47725_begin_0 = const()[name = tensor("op_47725_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_47725_end_0 = const()[name = tensor("op_47725_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_47725_end_mask_0 = const()[name = tensor("op_47725_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47725_cast_fp16 = slice_by_index(begin = var_47725_begin_0, end = var_47725_end_0, end_mask = var_47725_end_mask_0, x = var_47595_cast_fp16)[name = tensor("op_47725_cast_fp16")]; + tensor var_47732_begin_0 = const()[name = tensor("op_47732_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_47732_end_0 = const()[name = tensor("op_47732_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_47732_end_mask_0 = const()[name = tensor("op_47732_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47732_cast_fp16 = slice_by_index(begin = var_47732_begin_0, end = var_47732_end_0, end_mask = var_47732_end_mask_0, x = var_47599_cast_fp16)[name = tensor("op_47732_cast_fp16")]; + tensor var_47739_begin_0 = const()[name = tensor("op_47739_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_47739_end_0 = const()[name = tensor("op_47739_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_47739_end_mask_0 = const()[name = tensor("op_47739_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47739_cast_fp16 = slice_by_index(begin = var_47739_begin_0, end = var_47739_end_0, end_mask = var_47739_end_mask_0, x = var_47599_cast_fp16)[name = tensor("op_47739_cast_fp16")]; + tensor var_47746_begin_0 = const()[name = tensor("op_47746_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_47746_end_0 = const()[name = tensor("op_47746_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_47746_end_mask_0 = const()[name = tensor("op_47746_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47746_cast_fp16 = slice_by_index(begin = var_47746_begin_0, end = var_47746_end_0, end_mask = var_47746_end_mask_0, x = var_47599_cast_fp16)[name = tensor("op_47746_cast_fp16")]; + tensor var_47753_begin_0 = const()[name = tensor("op_47753_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_47753_end_0 = const()[name = tensor("op_47753_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_47753_end_mask_0 = const()[name = tensor("op_47753_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47753_cast_fp16 = slice_by_index(begin = var_47753_begin_0, end = var_47753_end_0, end_mask = var_47753_end_mask_0, x = var_47599_cast_fp16)[name = tensor("op_47753_cast_fp16")]; + tensor var_47760_begin_0 = const()[name = tensor("op_47760_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_47760_end_0 = const()[name = tensor("op_47760_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_47760_end_mask_0 = const()[name = tensor("op_47760_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47760_cast_fp16 = slice_by_index(begin = var_47760_begin_0, end = var_47760_end_0, end_mask = var_47760_end_mask_0, x = var_47603_cast_fp16)[name = tensor("op_47760_cast_fp16")]; + tensor var_47767_begin_0 = const()[name = tensor("op_47767_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_47767_end_0 = const()[name = tensor("op_47767_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_47767_end_mask_0 = const()[name = tensor("op_47767_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47767_cast_fp16 = slice_by_index(begin = var_47767_begin_0, end = var_47767_end_0, end_mask = var_47767_end_mask_0, x = var_47603_cast_fp16)[name = tensor("op_47767_cast_fp16")]; + tensor var_47774_begin_0 = const()[name = tensor("op_47774_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_47774_end_0 = const()[name = tensor("op_47774_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_47774_end_mask_0 = const()[name = tensor("op_47774_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47774_cast_fp16 = slice_by_index(begin = var_47774_begin_0, end = var_47774_end_0, end_mask = var_47774_end_mask_0, x = var_47603_cast_fp16)[name = tensor("op_47774_cast_fp16")]; + tensor var_47781_begin_0 = const()[name = tensor("op_47781_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_47781_end_0 = const()[name = tensor("op_47781_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_47781_end_mask_0 = const()[name = tensor("op_47781_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47781_cast_fp16 = slice_by_index(begin = var_47781_begin_0, end = var_47781_end_0, end_mask = var_47781_end_mask_0, x = var_47603_cast_fp16)[name = tensor("op_47781_cast_fp16")]; + tensor var_47788_begin_0 = const()[name = tensor("op_47788_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_47788_end_0 = const()[name = tensor("op_47788_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_47788_end_mask_0 = const()[name = tensor("op_47788_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47788_cast_fp16 = slice_by_index(begin = var_47788_begin_0, end = var_47788_end_0, end_mask = var_47788_end_mask_0, x = var_47607_cast_fp16)[name = tensor("op_47788_cast_fp16")]; + tensor var_47795_begin_0 = const()[name = tensor("op_47795_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_47795_end_0 = const()[name = tensor("op_47795_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_47795_end_mask_0 = const()[name = tensor("op_47795_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47795_cast_fp16 = slice_by_index(begin = var_47795_begin_0, end = var_47795_end_0, end_mask = var_47795_end_mask_0, x = var_47607_cast_fp16)[name = tensor("op_47795_cast_fp16")]; + tensor var_47802_begin_0 = const()[name = tensor("op_47802_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_47802_end_0 = const()[name = tensor("op_47802_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_47802_end_mask_0 = const()[name = tensor("op_47802_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47802_cast_fp16 = slice_by_index(begin = var_47802_begin_0, end = var_47802_end_0, end_mask = var_47802_end_mask_0, x = var_47607_cast_fp16)[name = tensor("op_47802_cast_fp16")]; + tensor var_47809_begin_0 = const()[name = tensor("op_47809_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_47809_end_0 = const()[name = tensor("op_47809_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_47809_end_mask_0 = const()[name = tensor("op_47809_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47809_cast_fp16 = slice_by_index(begin = var_47809_begin_0, end = var_47809_end_0, end_mask = var_47809_end_mask_0, x = var_47607_cast_fp16)[name = tensor("op_47809_cast_fp16")]; + tensor var_47816_begin_0 = const()[name = tensor("op_47816_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_47816_end_0 = const()[name = tensor("op_47816_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_47816_end_mask_0 = const()[name = tensor("op_47816_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47816_cast_fp16 = slice_by_index(begin = var_47816_begin_0, end = var_47816_end_0, end_mask = var_47816_end_mask_0, x = var_47611_cast_fp16)[name = tensor("op_47816_cast_fp16")]; + tensor var_47823_begin_0 = const()[name = tensor("op_47823_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_47823_end_0 = const()[name = tensor("op_47823_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_47823_end_mask_0 = const()[name = tensor("op_47823_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47823_cast_fp16 = slice_by_index(begin = var_47823_begin_0, end = var_47823_end_0, end_mask = var_47823_end_mask_0, x = var_47611_cast_fp16)[name = tensor("op_47823_cast_fp16")]; + tensor var_47830_begin_0 = const()[name = tensor("op_47830_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_47830_end_0 = const()[name = tensor("op_47830_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_47830_end_mask_0 = const()[name = tensor("op_47830_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47830_cast_fp16 = slice_by_index(begin = var_47830_begin_0, end = var_47830_end_0, end_mask = var_47830_end_mask_0, x = var_47611_cast_fp16)[name = tensor("op_47830_cast_fp16")]; + tensor var_47837_begin_0 = const()[name = tensor("op_47837_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_47837_end_0 = const()[name = tensor("op_47837_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_47837_end_mask_0 = const()[name = tensor("op_47837_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47837_cast_fp16 = slice_by_index(begin = var_47837_begin_0, end = var_47837_end_0, end_mask = var_47837_end_mask_0, x = var_47611_cast_fp16)[name = tensor("op_47837_cast_fp16")]; + tensor var_47844_begin_0 = const()[name = tensor("op_47844_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_47844_end_0 = const()[name = tensor("op_47844_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_47844_end_mask_0 = const()[name = tensor("op_47844_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47844_cast_fp16 = slice_by_index(begin = var_47844_begin_0, end = var_47844_end_0, end_mask = var_47844_end_mask_0, x = var_47615_cast_fp16)[name = tensor("op_47844_cast_fp16")]; + tensor var_47851_begin_0 = const()[name = tensor("op_47851_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_47851_end_0 = const()[name = tensor("op_47851_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_47851_end_mask_0 = const()[name = tensor("op_47851_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47851_cast_fp16 = slice_by_index(begin = var_47851_begin_0, end = var_47851_end_0, end_mask = var_47851_end_mask_0, x = var_47615_cast_fp16)[name = tensor("op_47851_cast_fp16")]; + tensor var_47858_begin_0 = const()[name = tensor("op_47858_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_47858_end_0 = const()[name = tensor("op_47858_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_47858_end_mask_0 = const()[name = tensor("op_47858_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47858_cast_fp16 = slice_by_index(begin = var_47858_begin_0, end = var_47858_end_0, end_mask = var_47858_end_mask_0, x = var_47615_cast_fp16)[name = tensor("op_47858_cast_fp16")]; + tensor var_47865_begin_0 = const()[name = tensor("op_47865_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_47865_end_0 = const()[name = tensor("op_47865_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_47865_end_mask_0 = const()[name = tensor("op_47865_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47865_cast_fp16 = slice_by_index(begin = var_47865_begin_0, end = var_47865_end_0, end_mask = var_47865_end_mask_0, x = var_47615_cast_fp16)[name = tensor("op_47865_cast_fp16")]; + tensor var_47872_begin_0 = const()[name = tensor("op_47872_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_47872_end_0 = const()[name = tensor("op_47872_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_47872_end_mask_0 = const()[name = tensor("op_47872_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47872_cast_fp16 = slice_by_index(begin = var_47872_begin_0, end = var_47872_end_0, end_mask = var_47872_end_mask_0, x = var_47619_cast_fp16)[name = tensor("op_47872_cast_fp16")]; + tensor var_47879_begin_0 = const()[name = tensor("op_47879_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_47879_end_0 = const()[name = tensor("op_47879_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_47879_end_mask_0 = const()[name = tensor("op_47879_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47879_cast_fp16 = slice_by_index(begin = var_47879_begin_0, end = var_47879_end_0, end_mask = var_47879_end_mask_0, x = var_47619_cast_fp16)[name = tensor("op_47879_cast_fp16")]; + tensor var_47886_begin_0 = const()[name = tensor("op_47886_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_47886_end_0 = const()[name = tensor("op_47886_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_47886_end_mask_0 = const()[name = tensor("op_47886_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47886_cast_fp16 = slice_by_index(begin = var_47886_begin_0, end = var_47886_end_0, end_mask = var_47886_end_mask_0, x = var_47619_cast_fp16)[name = tensor("op_47886_cast_fp16")]; + tensor var_47893_begin_0 = const()[name = tensor("op_47893_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_47893_end_0 = const()[name = tensor("op_47893_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_47893_end_mask_0 = const()[name = tensor("op_47893_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47893_cast_fp16 = slice_by_index(begin = var_47893_begin_0, end = var_47893_end_0, end_mask = var_47893_end_mask_0, x = var_47619_cast_fp16)[name = tensor("op_47893_cast_fp16")]; + tensor var_47900_begin_0 = const()[name = tensor("op_47900_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_47900_end_0 = const()[name = tensor("op_47900_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_47900_end_mask_0 = const()[name = tensor("op_47900_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47900_cast_fp16 = slice_by_index(begin = var_47900_begin_0, end = var_47900_end_0, end_mask = var_47900_end_mask_0, x = var_47623_cast_fp16)[name = tensor("op_47900_cast_fp16")]; + tensor var_47907_begin_0 = const()[name = tensor("op_47907_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_47907_end_0 = const()[name = tensor("op_47907_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_47907_end_mask_0 = const()[name = tensor("op_47907_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47907_cast_fp16 = slice_by_index(begin = var_47907_begin_0, end = var_47907_end_0, end_mask = var_47907_end_mask_0, x = var_47623_cast_fp16)[name = tensor("op_47907_cast_fp16")]; + tensor var_47914_begin_0 = const()[name = tensor("op_47914_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_47914_end_0 = const()[name = tensor("op_47914_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_47914_end_mask_0 = const()[name = tensor("op_47914_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47914_cast_fp16 = slice_by_index(begin = var_47914_begin_0, end = var_47914_end_0, end_mask = var_47914_end_mask_0, x = var_47623_cast_fp16)[name = tensor("op_47914_cast_fp16")]; + tensor var_47921_begin_0 = const()[name = tensor("op_47921_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_47921_end_0 = const()[name = tensor("op_47921_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_47921_end_mask_0 = const()[name = tensor("op_47921_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47921_cast_fp16 = slice_by_index(begin = var_47921_begin_0, end = var_47921_end_0, end_mask = var_47921_end_mask_0, x = var_47623_cast_fp16)[name = tensor("op_47921_cast_fp16")]; + tensor var_47928_begin_0 = const()[name = tensor("op_47928_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_47928_end_0 = const()[name = tensor("op_47928_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_47928_end_mask_0 = const()[name = tensor("op_47928_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47928_cast_fp16 = slice_by_index(begin = var_47928_begin_0, end = var_47928_end_0, end_mask = var_47928_end_mask_0, x = var_47627_cast_fp16)[name = tensor("op_47928_cast_fp16")]; + tensor var_47935_begin_0 = const()[name = tensor("op_47935_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_47935_end_0 = const()[name = tensor("op_47935_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_47935_end_mask_0 = const()[name = tensor("op_47935_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47935_cast_fp16 = slice_by_index(begin = var_47935_begin_0, end = var_47935_end_0, end_mask = var_47935_end_mask_0, x = var_47627_cast_fp16)[name = tensor("op_47935_cast_fp16")]; + tensor var_47942_begin_0 = const()[name = tensor("op_47942_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_47942_end_0 = const()[name = tensor("op_47942_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_47942_end_mask_0 = const()[name = tensor("op_47942_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47942_cast_fp16 = slice_by_index(begin = var_47942_begin_0, end = var_47942_end_0, end_mask = var_47942_end_mask_0, x = var_47627_cast_fp16)[name = tensor("op_47942_cast_fp16")]; + tensor var_47949_begin_0 = const()[name = tensor("op_47949_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_47949_end_0 = const()[name = tensor("op_47949_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_47949_end_mask_0 = const()[name = tensor("op_47949_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47949_cast_fp16 = slice_by_index(begin = var_47949_begin_0, end = var_47949_end_0, end_mask = var_47949_end_mask_0, x = var_47627_cast_fp16)[name = tensor("op_47949_cast_fp16")]; + tensor var_47956_begin_0 = const()[name = tensor("op_47956_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_47956_end_0 = const()[name = tensor("op_47956_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_47956_end_mask_0 = const()[name = tensor("op_47956_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47956_cast_fp16 = slice_by_index(begin = var_47956_begin_0, end = var_47956_end_0, end_mask = var_47956_end_mask_0, x = var_47631_cast_fp16)[name = tensor("op_47956_cast_fp16")]; + tensor var_47963_begin_0 = const()[name = tensor("op_47963_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_47963_end_0 = const()[name = tensor("op_47963_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_47963_end_mask_0 = const()[name = tensor("op_47963_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47963_cast_fp16 = slice_by_index(begin = var_47963_begin_0, end = var_47963_end_0, end_mask = var_47963_end_mask_0, x = var_47631_cast_fp16)[name = tensor("op_47963_cast_fp16")]; + tensor var_47970_begin_0 = const()[name = tensor("op_47970_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_47970_end_0 = const()[name = tensor("op_47970_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_47970_end_mask_0 = const()[name = tensor("op_47970_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47970_cast_fp16 = slice_by_index(begin = var_47970_begin_0, end = var_47970_end_0, end_mask = var_47970_end_mask_0, x = var_47631_cast_fp16)[name = tensor("op_47970_cast_fp16")]; + tensor var_47977_begin_0 = const()[name = tensor("op_47977_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_47977_end_0 = const()[name = tensor("op_47977_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_47977_end_mask_0 = const()[name = tensor("op_47977_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47977_cast_fp16 = slice_by_index(begin = var_47977_begin_0, end = var_47977_end_0, end_mask = var_47977_end_mask_0, x = var_47631_cast_fp16)[name = tensor("op_47977_cast_fp16")]; + tensor var_47984_begin_0 = const()[name = tensor("op_47984_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_47984_end_0 = const()[name = tensor("op_47984_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_47984_end_mask_0 = const()[name = tensor("op_47984_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47984_cast_fp16 = slice_by_index(begin = var_47984_begin_0, end = var_47984_end_0, end_mask = var_47984_end_mask_0, x = var_47635_cast_fp16)[name = tensor("op_47984_cast_fp16")]; + tensor var_47991_begin_0 = const()[name = tensor("op_47991_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_47991_end_0 = const()[name = tensor("op_47991_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_47991_end_mask_0 = const()[name = tensor("op_47991_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47991_cast_fp16 = slice_by_index(begin = var_47991_begin_0, end = var_47991_end_0, end_mask = var_47991_end_mask_0, x = var_47635_cast_fp16)[name = tensor("op_47991_cast_fp16")]; + tensor var_47998_begin_0 = const()[name = tensor("op_47998_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_47998_end_0 = const()[name = tensor("op_47998_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_47998_end_mask_0 = const()[name = tensor("op_47998_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_47998_cast_fp16 = slice_by_index(begin = var_47998_begin_0, end = var_47998_end_0, end_mask = var_47998_end_mask_0, x = var_47635_cast_fp16)[name = tensor("op_47998_cast_fp16")]; + tensor var_48005_begin_0 = const()[name = tensor("op_48005_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_48005_end_0 = const()[name = tensor("op_48005_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_48005_end_mask_0 = const()[name = tensor("op_48005_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48005_cast_fp16 = slice_by_index(begin = var_48005_begin_0, end = var_48005_end_0, end_mask = var_48005_end_mask_0, x = var_47635_cast_fp16)[name = tensor("op_48005_cast_fp16")]; + tensor var_48012_begin_0 = const()[name = tensor("op_48012_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_48012_end_0 = const()[name = tensor("op_48012_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_48012_end_mask_0 = const()[name = tensor("op_48012_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48012_cast_fp16 = slice_by_index(begin = var_48012_begin_0, end = var_48012_end_0, end_mask = var_48012_end_mask_0, x = var_47639_cast_fp16)[name = tensor("op_48012_cast_fp16")]; + tensor var_48019_begin_0 = const()[name = tensor("op_48019_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_48019_end_0 = const()[name = tensor("op_48019_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_48019_end_mask_0 = const()[name = tensor("op_48019_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48019_cast_fp16 = slice_by_index(begin = var_48019_begin_0, end = var_48019_end_0, end_mask = var_48019_end_mask_0, x = var_47639_cast_fp16)[name = tensor("op_48019_cast_fp16")]; + tensor var_48026_begin_0 = const()[name = tensor("op_48026_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_48026_end_0 = const()[name = tensor("op_48026_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_48026_end_mask_0 = const()[name = tensor("op_48026_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48026_cast_fp16 = slice_by_index(begin = var_48026_begin_0, end = var_48026_end_0, end_mask = var_48026_end_mask_0, x = var_47639_cast_fp16)[name = tensor("op_48026_cast_fp16")]; + tensor var_48033_begin_0 = const()[name = tensor("op_48033_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_48033_end_0 = const()[name = tensor("op_48033_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_48033_end_mask_0 = const()[name = tensor("op_48033_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48033_cast_fp16 = slice_by_index(begin = var_48033_begin_0, end = var_48033_end_0, end_mask = var_48033_end_mask_0, x = var_47639_cast_fp16)[name = tensor("op_48033_cast_fp16")]; + tensor var_48040_begin_0 = const()[name = tensor("op_48040_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_48040_end_0 = const()[name = tensor("op_48040_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_48040_end_mask_0 = const()[name = tensor("op_48040_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48040_cast_fp16 = slice_by_index(begin = var_48040_begin_0, end = var_48040_end_0, end_mask = var_48040_end_mask_0, x = var_47643_cast_fp16)[name = tensor("op_48040_cast_fp16")]; + tensor var_48047_begin_0 = const()[name = tensor("op_48047_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_48047_end_0 = const()[name = tensor("op_48047_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_48047_end_mask_0 = const()[name = tensor("op_48047_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48047_cast_fp16 = slice_by_index(begin = var_48047_begin_0, end = var_48047_end_0, end_mask = var_48047_end_mask_0, x = var_47643_cast_fp16)[name = tensor("op_48047_cast_fp16")]; + tensor var_48054_begin_0 = const()[name = tensor("op_48054_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_48054_end_0 = const()[name = tensor("op_48054_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_48054_end_mask_0 = const()[name = tensor("op_48054_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48054_cast_fp16 = slice_by_index(begin = var_48054_begin_0, end = var_48054_end_0, end_mask = var_48054_end_mask_0, x = var_47643_cast_fp16)[name = tensor("op_48054_cast_fp16")]; + tensor var_48061_begin_0 = const()[name = tensor("op_48061_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_48061_end_0 = const()[name = tensor("op_48061_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_48061_end_mask_0 = const()[name = tensor("op_48061_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48061_cast_fp16 = slice_by_index(begin = var_48061_begin_0, end = var_48061_end_0, end_mask = var_48061_end_mask_0, x = var_47643_cast_fp16)[name = tensor("op_48061_cast_fp16")]; + tensor var_48068_begin_0 = const()[name = tensor("op_48068_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_48068_end_0 = const()[name = tensor("op_48068_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_48068_end_mask_0 = const()[name = tensor("op_48068_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48068_cast_fp16 = slice_by_index(begin = var_48068_begin_0, end = var_48068_end_0, end_mask = var_48068_end_mask_0, x = var_47647_cast_fp16)[name = tensor("op_48068_cast_fp16")]; + tensor var_48075_begin_0 = const()[name = tensor("op_48075_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_48075_end_0 = const()[name = tensor("op_48075_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_48075_end_mask_0 = const()[name = tensor("op_48075_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48075_cast_fp16 = slice_by_index(begin = var_48075_begin_0, end = var_48075_end_0, end_mask = var_48075_end_mask_0, x = var_47647_cast_fp16)[name = tensor("op_48075_cast_fp16")]; + tensor var_48082_begin_0 = const()[name = tensor("op_48082_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_48082_end_0 = const()[name = tensor("op_48082_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_48082_end_mask_0 = const()[name = tensor("op_48082_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48082_cast_fp16 = slice_by_index(begin = var_48082_begin_0, end = var_48082_end_0, end_mask = var_48082_end_mask_0, x = var_47647_cast_fp16)[name = tensor("op_48082_cast_fp16")]; + tensor var_48089_begin_0 = const()[name = tensor("op_48089_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_48089_end_0 = const()[name = tensor("op_48089_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_48089_end_mask_0 = const()[name = tensor("op_48089_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48089_cast_fp16 = slice_by_index(begin = var_48089_begin_0, end = var_48089_end_0, end_mask = var_48089_end_mask_0, x = var_47647_cast_fp16)[name = tensor("op_48089_cast_fp16")]; + tensor var_48096_begin_0 = const()[name = tensor("op_48096_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_48096_end_0 = const()[name = tensor("op_48096_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_48096_end_mask_0 = const()[name = tensor("op_48096_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48096_cast_fp16 = slice_by_index(begin = var_48096_begin_0, end = var_48096_end_0, end_mask = var_48096_end_mask_0, x = var_47651_cast_fp16)[name = tensor("op_48096_cast_fp16")]; + tensor var_48103_begin_0 = const()[name = tensor("op_48103_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_48103_end_0 = const()[name = tensor("op_48103_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_48103_end_mask_0 = const()[name = tensor("op_48103_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48103_cast_fp16 = slice_by_index(begin = var_48103_begin_0, end = var_48103_end_0, end_mask = var_48103_end_mask_0, x = var_47651_cast_fp16)[name = tensor("op_48103_cast_fp16")]; + tensor var_48110_begin_0 = const()[name = tensor("op_48110_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_48110_end_0 = const()[name = tensor("op_48110_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_48110_end_mask_0 = const()[name = tensor("op_48110_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48110_cast_fp16 = slice_by_index(begin = var_48110_begin_0, end = var_48110_end_0, end_mask = var_48110_end_mask_0, x = var_47651_cast_fp16)[name = tensor("op_48110_cast_fp16")]; + tensor var_48117_begin_0 = const()[name = tensor("op_48117_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_48117_end_0 = const()[name = tensor("op_48117_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_48117_end_mask_0 = const()[name = tensor("op_48117_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48117_cast_fp16 = slice_by_index(begin = var_48117_begin_0, end = var_48117_end_0, end_mask = var_48117_end_mask_0, x = var_47651_cast_fp16)[name = tensor("op_48117_cast_fp16")]; + tensor var_48124_begin_0 = const()[name = tensor("op_48124_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_48124_end_0 = const()[name = tensor("op_48124_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_48124_end_mask_0 = const()[name = tensor("op_48124_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48124_cast_fp16 = slice_by_index(begin = var_48124_begin_0, end = var_48124_end_0, end_mask = var_48124_end_mask_0, x = var_47655_cast_fp16)[name = tensor("op_48124_cast_fp16")]; + tensor var_48131_begin_0 = const()[name = tensor("op_48131_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_48131_end_0 = const()[name = tensor("op_48131_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_48131_end_mask_0 = const()[name = tensor("op_48131_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48131_cast_fp16 = slice_by_index(begin = var_48131_begin_0, end = var_48131_end_0, end_mask = var_48131_end_mask_0, x = var_47655_cast_fp16)[name = tensor("op_48131_cast_fp16")]; + tensor var_48138_begin_0 = const()[name = tensor("op_48138_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_48138_end_0 = const()[name = tensor("op_48138_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_48138_end_mask_0 = const()[name = tensor("op_48138_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48138_cast_fp16 = slice_by_index(begin = var_48138_begin_0, end = var_48138_end_0, end_mask = var_48138_end_mask_0, x = var_47655_cast_fp16)[name = tensor("op_48138_cast_fp16")]; + tensor var_48145_begin_0 = const()[name = tensor("op_48145_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_48145_end_0 = const()[name = tensor("op_48145_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_48145_end_mask_0 = const()[name = tensor("op_48145_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48145_cast_fp16 = slice_by_index(begin = var_48145_begin_0, end = var_48145_end_0, end_mask = var_48145_end_mask_0, x = var_47655_cast_fp16)[name = tensor("op_48145_cast_fp16")]; + tensor var_48152_begin_0 = const()[name = tensor("op_48152_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_48152_end_0 = const()[name = tensor("op_48152_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_48152_end_mask_0 = const()[name = tensor("op_48152_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48152_cast_fp16 = slice_by_index(begin = var_48152_begin_0, end = var_48152_end_0, end_mask = var_48152_end_mask_0, x = var_47659_cast_fp16)[name = tensor("op_48152_cast_fp16")]; + tensor var_48159_begin_0 = const()[name = tensor("op_48159_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_48159_end_0 = const()[name = tensor("op_48159_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_48159_end_mask_0 = const()[name = tensor("op_48159_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48159_cast_fp16 = slice_by_index(begin = var_48159_begin_0, end = var_48159_end_0, end_mask = var_48159_end_mask_0, x = var_47659_cast_fp16)[name = tensor("op_48159_cast_fp16")]; + tensor var_48166_begin_0 = const()[name = tensor("op_48166_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_48166_end_0 = const()[name = tensor("op_48166_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_48166_end_mask_0 = const()[name = tensor("op_48166_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48166_cast_fp16 = slice_by_index(begin = var_48166_begin_0, end = var_48166_end_0, end_mask = var_48166_end_mask_0, x = var_47659_cast_fp16)[name = tensor("op_48166_cast_fp16")]; + tensor var_48173_begin_0 = const()[name = tensor("op_48173_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_48173_end_0 = const()[name = tensor("op_48173_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_48173_end_mask_0 = const()[name = tensor("op_48173_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48173_cast_fp16 = slice_by_index(begin = var_48173_begin_0, end = var_48173_end_0, end_mask = var_48173_end_mask_0, x = var_47659_cast_fp16)[name = tensor("op_48173_cast_fp16")]; + tensor var_48180_begin_0 = const()[name = tensor("op_48180_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_48180_end_0 = const()[name = tensor("op_48180_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_48180_end_mask_0 = const()[name = tensor("op_48180_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48180_cast_fp16 = slice_by_index(begin = var_48180_begin_0, end = var_48180_end_0, end_mask = var_48180_end_mask_0, x = var_47663_cast_fp16)[name = tensor("op_48180_cast_fp16")]; + tensor var_48187_begin_0 = const()[name = tensor("op_48187_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_48187_end_0 = const()[name = tensor("op_48187_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_48187_end_mask_0 = const()[name = tensor("op_48187_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48187_cast_fp16 = slice_by_index(begin = var_48187_begin_0, end = var_48187_end_0, end_mask = var_48187_end_mask_0, x = var_47663_cast_fp16)[name = tensor("op_48187_cast_fp16")]; + tensor var_48194_begin_0 = const()[name = tensor("op_48194_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_48194_end_0 = const()[name = tensor("op_48194_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_48194_end_mask_0 = const()[name = tensor("op_48194_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48194_cast_fp16 = slice_by_index(begin = var_48194_begin_0, end = var_48194_end_0, end_mask = var_48194_end_mask_0, x = var_47663_cast_fp16)[name = tensor("op_48194_cast_fp16")]; + tensor var_48201_begin_0 = const()[name = tensor("op_48201_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_48201_end_0 = const()[name = tensor("op_48201_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_48201_end_mask_0 = const()[name = tensor("op_48201_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48201_cast_fp16 = slice_by_index(begin = var_48201_begin_0, end = var_48201_end_0, end_mask = var_48201_end_mask_0, x = var_47663_cast_fp16)[name = tensor("op_48201_cast_fp16")]; + tensor var_48208_begin_0 = const()[name = tensor("op_48208_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_48208_end_0 = const()[name = tensor("op_48208_end_0"), val = tensor([1, 64, 1, 375])]; + tensor var_48208_end_mask_0 = const()[name = tensor("op_48208_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48208_cast_fp16 = slice_by_index(begin = var_48208_begin_0, end = var_48208_end_0, end_mask = var_48208_end_mask_0, x = var_47667_cast_fp16)[name = tensor("op_48208_cast_fp16")]; + tensor var_48215_begin_0 = const()[name = tensor("op_48215_begin_0"), val = tensor([0, 0, 0, 375])]; + tensor var_48215_end_0 = const()[name = tensor("op_48215_end_0"), val = tensor([1, 64, 1, 750])]; + tensor var_48215_end_mask_0 = const()[name = tensor("op_48215_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48215_cast_fp16 = slice_by_index(begin = var_48215_begin_0, end = var_48215_end_0, end_mask = var_48215_end_mask_0, x = var_47667_cast_fp16)[name = tensor("op_48215_cast_fp16")]; + tensor var_48222_begin_0 = const()[name = tensor("op_48222_begin_0"), val = tensor([0, 0, 0, 750])]; + tensor var_48222_end_0 = const()[name = tensor("op_48222_end_0"), val = tensor([1, 64, 1, 1125])]; + tensor var_48222_end_mask_0 = const()[name = tensor("op_48222_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48222_cast_fp16 = slice_by_index(begin = var_48222_begin_0, end = var_48222_end_0, end_mask = var_48222_end_mask_0, x = var_47667_cast_fp16)[name = tensor("op_48222_cast_fp16")]; + tensor var_48229_begin_0 = const()[name = tensor("op_48229_begin_0"), val = tensor([0, 0, 0, 1125])]; + tensor var_48229_end_0 = const()[name = tensor("op_48229_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_48229_end_mask_0 = const()[name = tensor("op_48229_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48229_cast_fp16 = slice_by_index(begin = var_48229_begin_0, end = var_48229_end_0, end_mask = var_48229_end_mask_0, x = var_47667_cast_fp16)[name = tensor("op_48229_cast_fp16")]; + tensor k_perm_0 = const()[name = tensor("k_perm_0"), val = tensor([0, 3, 2, 1])]; + tensor var_48234_begin_0 = const()[name = tensor("op_48234_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_48234_end_0 = const()[name = tensor("op_48234_end_0"), val = tensor([1, 1500, 1, 64])]; + tensor var_48234_end_mask_0 = const()[name = tensor("op_48234_end_mask_0"), val = tensor([true, true, true, false])]; + tensor transpose_0 = transpose(perm = k_perm_0, x = key_cast_fp16)[name = tensor("transpose_0")]; + tensor var_48234_cast_fp16 = slice_by_index(begin = var_48234_begin_0, end = var_48234_end_0, end_mask = var_48234_end_mask_0, x = transpose_0)[name = tensor("op_48234_cast_fp16")]; + tensor var_48238_begin_0 = const()[name = tensor("op_48238_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_48238_end_0 = const()[name = tensor("op_48238_end_0"), val = tensor([1, 1500, 1, 128])]; + tensor var_48238_end_mask_0 = const()[name = tensor("op_48238_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48238_cast_fp16 = slice_by_index(begin = var_48238_begin_0, end = var_48238_end_0, end_mask = var_48238_end_mask_0, x = transpose_0)[name = tensor("op_48238_cast_fp16")]; + tensor var_48242_begin_0 = const()[name = tensor("op_48242_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_48242_end_0 = const()[name = tensor("op_48242_end_0"), val = tensor([1, 1500, 1, 192])]; + tensor var_48242_end_mask_0 = const()[name = tensor("op_48242_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48242_cast_fp16 = slice_by_index(begin = var_48242_begin_0, end = var_48242_end_0, end_mask = var_48242_end_mask_0, x = transpose_0)[name = tensor("op_48242_cast_fp16")]; + tensor var_48246_begin_0 = const()[name = tensor("op_48246_begin_0"), val = tensor([0, 0, 0, 192])]; + tensor var_48246_end_0 = const()[name = tensor("op_48246_end_0"), val = tensor([1, 1500, 1, 256])]; + tensor var_48246_end_mask_0 = const()[name = tensor("op_48246_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48246_cast_fp16 = slice_by_index(begin = var_48246_begin_0, end = var_48246_end_0, end_mask = var_48246_end_mask_0, x = transpose_0)[name = tensor("op_48246_cast_fp16")]; + tensor var_48250_begin_0 = const()[name = tensor("op_48250_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_48250_end_0 = const()[name = tensor("op_48250_end_0"), val = tensor([1, 1500, 1, 320])]; + tensor var_48250_end_mask_0 = const()[name = tensor("op_48250_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48250_cast_fp16 = slice_by_index(begin = var_48250_begin_0, end = var_48250_end_0, end_mask = var_48250_end_mask_0, x = transpose_0)[name = tensor("op_48250_cast_fp16")]; + tensor var_48254_begin_0 = const()[name = tensor("op_48254_begin_0"), val = tensor([0, 0, 0, 320])]; + tensor var_48254_end_0 = const()[name = tensor("op_48254_end_0"), val = tensor([1, 1500, 1, 384])]; + tensor var_48254_end_mask_0 = const()[name = tensor("op_48254_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48254_cast_fp16 = slice_by_index(begin = var_48254_begin_0, end = var_48254_end_0, end_mask = var_48254_end_mask_0, x = transpose_0)[name = tensor("op_48254_cast_fp16")]; + tensor var_48258_begin_0 = const()[name = tensor("op_48258_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_48258_end_0 = const()[name = tensor("op_48258_end_0"), val = tensor([1, 1500, 1, 448])]; + tensor var_48258_end_mask_0 = const()[name = tensor("op_48258_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48258_cast_fp16 = slice_by_index(begin = var_48258_begin_0, end = var_48258_end_0, end_mask = var_48258_end_mask_0, x = transpose_0)[name = tensor("op_48258_cast_fp16")]; + tensor var_48262_begin_0 = const()[name = tensor("op_48262_begin_0"), val = tensor([0, 0, 0, 448])]; + tensor var_48262_end_0 = const()[name = tensor("op_48262_end_0"), val = tensor([1, 1500, 1, 512])]; + tensor var_48262_end_mask_0 = const()[name = tensor("op_48262_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48262_cast_fp16 = slice_by_index(begin = var_48262_begin_0, end = var_48262_end_0, end_mask = var_48262_end_mask_0, x = transpose_0)[name = tensor("op_48262_cast_fp16")]; + tensor var_48266_begin_0 = const()[name = tensor("op_48266_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_48266_end_0 = const()[name = tensor("op_48266_end_0"), val = tensor([1, 1500, 1, 576])]; + tensor var_48266_end_mask_0 = const()[name = tensor("op_48266_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48266_cast_fp16 = slice_by_index(begin = var_48266_begin_0, end = var_48266_end_0, end_mask = var_48266_end_mask_0, x = transpose_0)[name = tensor("op_48266_cast_fp16")]; + tensor var_48270_begin_0 = const()[name = tensor("op_48270_begin_0"), val = tensor([0, 0, 0, 576])]; + tensor var_48270_end_0 = const()[name = tensor("op_48270_end_0"), val = tensor([1, 1500, 1, 640])]; + tensor var_48270_end_mask_0 = const()[name = tensor("op_48270_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48270_cast_fp16 = slice_by_index(begin = var_48270_begin_0, end = var_48270_end_0, end_mask = var_48270_end_mask_0, x = transpose_0)[name = tensor("op_48270_cast_fp16")]; + tensor var_48274_begin_0 = const()[name = tensor("op_48274_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_48274_end_0 = const()[name = tensor("op_48274_end_0"), val = tensor([1, 1500, 1, 704])]; + tensor var_48274_end_mask_0 = const()[name = tensor("op_48274_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48274_cast_fp16 = slice_by_index(begin = var_48274_begin_0, end = var_48274_end_0, end_mask = var_48274_end_mask_0, x = transpose_0)[name = tensor("op_48274_cast_fp16")]; + tensor var_48278_begin_0 = const()[name = tensor("op_48278_begin_0"), val = tensor([0, 0, 0, 704])]; + tensor var_48278_end_0 = const()[name = tensor("op_48278_end_0"), val = tensor([1, 1500, 1, 768])]; + tensor var_48278_end_mask_0 = const()[name = tensor("op_48278_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48278_cast_fp16 = slice_by_index(begin = var_48278_begin_0, end = var_48278_end_0, end_mask = var_48278_end_mask_0, x = transpose_0)[name = tensor("op_48278_cast_fp16")]; + tensor var_48282_begin_0 = const()[name = tensor("op_48282_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_48282_end_0 = const()[name = tensor("op_48282_end_0"), val = tensor([1, 1500, 1, 832])]; + tensor var_48282_end_mask_0 = const()[name = tensor("op_48282_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48282_cast_fp16 = slice_by_index(begin = var_48282_begin_0, end = var_48282_end_0, end_mask = var_48282_end_mask_0, x = transpose_0)[name = tensor("op_48282_cast_fp16")]; + tensor var_48286_begin_0 = const()[name = tensor("op_48286_begin_0"), val = tensor([0, 0, 0, 832])]; + tensor var_48286_end_0 = const()[name = tensor("op_48286_end_0"), val = tensor([1, 1500, 1, 896])]; + tensor var_48286_end_mask_0 = const()[name = tensor("op_48286_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48286_cast_fp16 = slice_by_index(begin = var_48286_begin_0, end = var_48286_end_0, end_mask = var_48286_end_mask_0, x = transpose_0)[name = tensor("op_48286_cast_fp16")]; + tensor var_48290_begin_0 = const()[name = tensor("op_48290_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_48290_end_0 = const()[name = tensor("op_48290_end_0"), val = tensor([1, 1500, 1, 960])]; + tensor var_48290_end_mask_0 = const()[name = tensor("op_48290_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48290_cast_fp16 = slice_by_index(begin = var_48290_begin_0, end = var_48290_end_0, end_mask = var_48290_end_mask_0, x = transpose_0)[name = tensor("op_48290_cast_fp16")]; + tensor var_48294_begin_0 = const()[name = tensor("op_48294_begin_0"), val = tensor([0, 0, 0, 960])]; + tensor var_48294_end_0 = const()[name = tensor("op_48294_end_0"), val = tensor([1, 1500, 1, 1024])]; + tensor var_48294_end_mask_0 = const()[name = tensor("op_48294_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48294_cast_fp16 = slice_by_index(begin = var_48294_begin_0, end = var_48294_end_0, end_mask = var_48294_end_mask_0, x = transpose_0)[name = tensor("op_48294_cast_fp16")]; + tensor var_48298_begin_0 = const()[name = tensor("op_48298_begin_0"), val = tensor([0, 0, 0, 1024])]; + tensor var_48298_end_0 = const()[name = tensor("op_48298_end_0"), val = tensor([1, 1500, 1, 1088])]; + tensor var_48298_end_mask_0 = const()[name = tensor("op_48298_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48298_cast_fp16 = slice_by_index(begin = var_48298_begin_0, end = var_48298_end_0, end_mask = var_48298_end_mask_0, x = transpose_0)[name = tensor("op_48298_cast_fp16")]; + tensor var_48302_begin_0 = const()[name = tensor("op_48302_begin_0"), val = tensor([0, 0, 0, 1088])]; + tensor var_48302_end_0 = const()[name = tensor("op_48302_end_0"), val = tensor([1, 1500, 1, 1152])]; + tensor var_48302_end_mask_0 = const()[name = tensor("op_48302_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48302_cast_fp16 = slice_by_index(begin = var_48302_begin_0, end = var_48302_end_0, end_mask = var_48302_end_mask_0, x = transpose_0)[name = tensor("op_48302_cast_fp16")]; + tensor var_48306_begin_0 = const()[name = tensor("op_48306_begin_0"), val = tensor([0, 0, 0, 1152])]; + tensor var_48306_end_0 = const()[name = tensor("op_48306_end_0"), val = tensor([1, 1500, 1, 1216])]; + tensor var_48306_end_mask_0 = const()[name = tensor("op_48306_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48306_cast_fp16 = slice_by_index(begin = var_48306_begin_0, end = var_48306_end_0, end_mask = var_48306_end_mask_0, x = transpose_0)[name = tensor("op_48306_cast_fp16")]; + tensor var_48310_begin_0 = const()[name = tensor("op_48310_begin_0"), val = tensor([0, 0, 0, 1216])]; + tensor var_48310_end_0 = const()[name = tensor("op_48310_end_0"), val = tensor([1, 1500, 1, 1280])]; + tensor var_48310_end_mask_0 = const()[name = tensor("op_48310_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_48310_cast_fp16 = slice_by_index(begin = var_48310_begin_0, end = var_48310_end_0, end_mask = var_48310_end_mask_0, x = transpose_0)[name = tensor("op_48310_cast_fp16")]; + tensor var_48312_begin_0 = const()[name = tensor("op_48312_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_48312_end_0 = const()[name = tensor("op_48312_end_0"), val = tensor([1, 64, 1, 1500])]; + tensor var_48312_end_mask_0 = const()[name = tensor("op_48312_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_48312_cast_fp16 = slice_by_index(begin = var_48312_begin_0, end = var_48312_end_0, end_mask = var_48312_end_mask_0, x = value_cast_fp16)[name = tensor("op_48312_cast_fp16")]; + tensor var_48316_begin_0 = const()[name = tensor("op_48316_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_48316_end_0 = const()[name = tensor("op_48316_end_0"), val = tensor([1, 128, 1, 1500])]; + tensor var_48316_end_mask_0 = const()[name = tensor("op_48316_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_48316_cast_fp16 = slice_by_index(begin = var_48316_begin_0, end = var_48316_end_0, end_mask = var_48316_end_mask_0, x = value_cast_fp16)[name = tensor("op_48316_cast_fp16")]; + tensor var_48320_begin_0 = const()[name = tensor("op_48320_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_48320_end_0 = const()[name = tensor("op_48320_end_0"), val = tensor([1, 192, 1, 1500])]; + tensor var_48320_end_mask_0 = const()[name = tensor("op_48320_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_48320_cast_fp16 = slice_by_index(begin = var_48320_begin_0, end = var_48320_end_0, end_mask = var_48320_end_mask_0, x = value_cast_fp16)[name = tensor("op_48320_cast_fp16")]; + tensor var_48324_begin_0 = const()[name = tensor("op_48324_begin_0"), val = tensor([0, 192, 0, 0])]; + tensor var_48324_end_0 = const()[name = tensor("op_48324_end_0"), val = tensor([1, 256, 1, 1500])]; + tensor var_48324_end_mask_0 = const()[name = tensor("op_48324_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_48324_cast_fp16 = slice_by_index(begin = var_48324_begin_0, end = var_48324_end_0, end_mask = var_48324_end_mask_0, x = value_cast_fp16)[name = tensor("op_48324_cast_fp16")]; + tensor var_48328_begin_0 = const()[name = tensor("op_48328_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_48328_end_0 = const()[name = tensor("op_48328_end_0"), val = tensor([1, 320, 1, 1500])]; + tensor var_48328_end_mask_0 = const()[name = tensor("op_48328_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_48328_cast_fp16 = slice_by_index(begin = var_48328_begin_0, end = var_48328_end_0, end_mask = var_48328_end_mask_0, x = value_cast_fp16)[name = tensor("op_48328_cast_fp16")]; + tensor var_48332_begin_0 = const()[name = tensor("op_48332_begin_0"), val = tensor([0, 320, 0, 0])]; + tensor var_48332_end_0 = const()[name = tensor("op_48332_end_0"), val = tensor([1, 384, 1, 1500])]; + tensor var_48332_end_mask_0 = const()[name = tensor("op_48332_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_48332_cast_fp16 = slice_by_index(begin = var_48332_begin_0, end = var_48332_end_0, end_mask = var_48332_end_mask_0, x = value_cast_fp16)[name = tensor("op_48332_cast_fp16")]; + tensor var_48336_begin_0 = const()[name = tensor("op_48336_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_48336_end_0 = const()[name = tensor("op_48336_end_0"), val = tensor([1, 448, 1, 1500])]; + tensor var_48336_end_mask_0 = const()[name = tensor("op_48336_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_48336_cast_fp16 = slice_by_index(begin = var_48336_begin_0, end = var_48336_end_0, end_mask = var_48336_end_mask_0, x = value_cast_fp16)[name = tensor("op_48336_cast_fp16")]; + tensor var_48340_begin_0 = const()[name = tensor("op_48340_begin_0"), val = tensor([0, 448, 0, 0])]; + tensor var_48340_end_0 = const()[name = tensor("op_48340_end_0"), val = tensor([1, 512, 1, 1500])]; + tensor var_48340_end_mask_0 = const()[name = tensor("op_48340_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_48340_cast_fp16 = slice_by_index(begin = var_48340_begin_0, end = var_48340_end_0, end_mask = var_48340_end_mask_0, x = value_cast_fp16)[name = tensor("op_48340_cast_fp16")]; + tensor var_48344_begin_0 = const()[name = tensor("op_48344_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_48344_end_0 = const()[name = tensor("op_48344_end_0"), val = tensor([1, 576, 1, 1500])]; + tensor var_48344_end_mask_0 = const()[name = tensor("op_48344_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_48344_cast_fp16 = slice_by_index(begin = var_48344_begin_0, end = var_48344_end_0, end_mask = var_48344_end_mask_0, x = value_cast_fp16)[name = tensor("op_48344_cast_fp16")]; + tensor var_48348_begin_0 = const()[name = tensor("op_48348_begin_0"), val = tensor([0, 576, 0, 0])]; + tensor var_48348_end_0 = const()[name = tensor("op_48348_end_0"), val = tensor([1, 640, 1, 1500])]; + tensor var_48348_end_mask_0 = const()[name = tensor("op_48348_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_48348_cast_fp16 = slice_by_index(begin = var_48348_begin_0, end = var_48348_end_0, end_mask = var_48348_end_mask_0, x = value_cast_fp16)[name = tensor("op_48348_cast_fp16")]; + tensor var_48352_begin_0 = const()[name = tensor("op_48352_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_48352_end_0 = const()[name = tensor("op_48352_end_0"), val = tensor([1, 704, 1, 1500])]; + tensor var_48352_end_mask_0 = const()[name = tensor("op_48352_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_48352_cast_fp16 = slice_by_index(begin = var_48352_begin_0, end = var_48352_end_0, end_mask = var_48352_end_mask_0, x = value_cast_fp16)[name = tensor("op_48352_cast_fp16")]; + tensor var_48356_begin_0 = const()[name = tensor("op_48356_begin_0"), val = tensor([0, 704, 0, 0])]; + tensor var_48356_end_0 = const()[name = tensor("op_48356_end_0"), val = tensor([1, 768, 1, 1500])]; + tensor var_48356_end_mask_0 = const()[name = tensor("op_48356_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_48356_cast_fp16 = slice_by_index(begin = var_48356_begin_0, end = var_48356_end_0, end_mask = var_48356_end_mask_0, x = value_cast_fp16)[name = tensor("op_48356_cast_fp16")]; + tensor var_48360_begin_0 = const()[name = tensor("op_48360_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_48360_end_0 = const()[name = tensor("op_48360_end_0"), val = tensor([1, 832, 1, 1500])]; + tensor var_48360_end_mask_0 = const()[name = tensor("op_48360_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_48360_cast_fp16 = slice_by_index(begin = var_48360_begin_0, end = var_48360_end_0, end_mask = var_48360_end_mask_0, x = value_cast_fp16)[name = tensor("op_48360_cast_fp16")]; + tensor var_48364_begin_0 = const()[name = tensor("op_48364_begin_0"), val = tensor([0, 832, 0, 0])]; + tensor var_48364_end_0 = const()[name = tensor("op_48364_end_0"), val = tensor([1, 896, 1, 1500])]; + tensor var_48364_end_mask_0 = const()[name = tensor("op_48364_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_48364_cast_fp16 = slice_by_index(begin = var_48364_begin_0, end = var_48364_end_0, end_mask = var_48364_end_mask_0, x = value_cast_fp16)[name = tensor("op_48364_cast_fp16")]; + tensor var_48368_begin_0 = const()[name = tensor("op_48368_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_48368_end_0 = const()[name = tensor("op_48368_end_0"), val = tensor([1, 960, 1, 1500])]; + tensor var_48368_end_mask_0 = const()[name = tensor("op_48368_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_48368_cast_fp16 = slice_by_index(begin = var_48368_begin_0, end = var_48368_end_0, end_mask = var_48368_end_mask_0, x = value_cast_fp16)[name = tensor("op_48368_cast_fp16")]; + tensor var_48372_begin_0 = const()[name = tensor("op_48372_begin_0"), val = tensor([0, 960, 0, 0])]; + tensor var_48372_end_0 = const()[name = tensor("op_48372_end_0"), val = tensor([1, 1024, 1, 1500])]; + tensor var_48372_end_mask_0 = const()[name = tensor("op_48372_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_48372_cast_fp16 = slice_by_index(begin = var_48372_begin_0, end = var_48372_end_0, end_mask = var_48372_end_mask_0, x = value_cast_fp16)[name = tensor("op_48372_cast_fp16")]; + tensor var_48376_begin_0 = const()[name = tensor("op_48376_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_48376_end_0 = const()[name = tensor("op_48376_end_0"), val = tensor([1, 1088, 1, 1500])]; + tensor var_48376_end_mask_0 = const()[name = tensor("op_48376_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_48376_cast_fp16 = slice_by_index(begin = var_48376_begin_0, end = var_48376_end_0, end_mask = var_48376_end_mask_0, x = value_cast_fp16)[name = tensor("op_48376_cast_fp16")]; + tensor var_48380_begin_0 = const()[name = tensor("op_48380_begin_0"), val = tensor([0, 1088, 0, 0])]; + tensor var_48380_end_0 = const()[name = tensor("op_48380_end_0"), val = tensor([1, 1152, 1, 1500])]; + tensor var_48380_end_mask_0 = const()[name = tensor("op_48380_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_48380_cast_fp16 = slice_by_index(begin = var_48380_begin_0, end = var_48380_end_0, end_mask = var_48380_end_mask_0, x = value_cast_fp16)[name = tensor("op_48380_cast_fp16")]; + tensor var_48384_begin_0 = const()[name = tensor("op_48384_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_48384_end_0 = const()[name = tensor("op_48384_end_0"), val = tensor([1, 1216, 1, 1500])]; + tensor var_48384_end_mask_0 = const()[name = tensor("op_48384_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_48384_cast_fp16 = slice_by_index(begin = var_48384_begin_0, end = var_48384_end_0, end_mask = var_48384_end_mask_0, x = value_cast_fp16)[name = tensor("op_48384_cast_fp16")]; + tensor var_48388_begin_0 = const()[name = tensor("op_48388_begin_0"), val = tensor([0, 1216, 0, 0])]; + tensor var_48388_end_0 = const()[name = tensor("op_48388_end_0"), val = tensor([1, 1280, 1, 1500])]; + tensor var_48388_end_mask_0 = const()[name = tensor("op_48388_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_48388_cast_fp16 = slice_by_index(begin = var_48388_begin_0, end = var_48388_end_0, end_mask = var_48388_end_mask_0, x = value_cast_fp16)[name = tensor("op_48388_cast_fp16")]; + tensor var_48392_equation_0 = const()[name = tensor("op_48392_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48392_cast_fp16 = einsum(equation = var_48392_equation_0, values = (var_48234_cast_fp16, var_47676_cast_fp16))[name = tensor("op_48392_cast_fp16")]; + tensor var_48393_to_fp16 = const()[name = tensor("op_48393_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4961_cast_fp16 = mul(x = var_48392_cast_fp16, y = var_48393_to_fp16)[name = tensor("aw_chunk_4961_cast_fp16")]; + tensor var_48396_equation_0 = const()[name = tensor("op_48396_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48396_cast_fp16 = einsum(equation = var_48396_equation_0, values = (var_48234_cast_fp16, var_47683_cast_fp16))[name = tensor("op_48396_cast_fp16")]; + tensor var_48397_to_fp16 = const()[name = tensor("op_48397_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4963_cast_fp16 = mul(x = var_48396_cast_fp16, y = var_48397_to_fp16)[name = tensor("aw_chunk_4963_cast_fp16")]; + tensor var_48400_equation_0 = const()[name = tensor("op_48400_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48400_cast_fp16 = einsum(equation = var_48400_equation_0, values = (var_48234_cast_fp16, var_47690_cast_fp16))[name = tensor("op_48400_cast_fp16")]; + tensor var_48401_to_fp16 = const()[name = tensor("op_48401_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4965_cast_fp16 = mul(x = var_48400_cast_fp16, y = var_48401_to_fp16)[name = tensor("aw_chunk_4965_cast_fp16")]; + tensor var_48404_equation_0 = const()[name = tensor("op_48404_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48404_cast_fp16 = einsum(equation = var_48404_equation_0, values = (var_48234_cast_fp16, var_47697_cast_fp16))[name = tensor("op_48404_cast_fp16")]; + tensor var_48405_to_fp16 = const()[name = tensor("op_48405_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4967_cast_fp16 = mul(x = var_48404_cast_fp16, y = var_48405_to_fp16)[name = tensor("aw_chunk_4967_cast_fp16")]; + tensor var_48408_equation_0 = const()[name = tensor("op_48408_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48408_cast_fp16 = einsum(equation = var_48408_equation_0, values = (var_48238_cast_fp16, var_47704_cast_fp16))[name = tensor("op_48408_cast_fp16")]; + tensor var_48409_to_fp16 = const()[name = tensor("op_48409_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4969_cast_fp16 = mul(x = var_48408_cast_fp16, y = var_48409_to_fp16)[name = tensor("aw_chunk_4969_cast_fp16")]; + tensor var_48412_equation_0 = const()[name = tensor("op_48412_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48412_cast_fp16 = einsum(equation = var_48412_equation_0, values = (var_48238_cast_fp16, var_47711_cast_fp16))[name = tensor("op_48412_cast_fp16")]; + tensor var_48413_to_fp16 = const()[name = tensor("op_48413_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4971_cast_fp16 = mul(x = var_48412_cast_fp16, y = var_48413_to_fp16)[name = tensor("aw_chunk_4971_cast_fp16")]; + tensor var_48416_equation_0 = const()[name = tensor("op_48416_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48416_cast_fp16 = einsum(equation = var_48416_equation_0, values = (var_48238_cast_fp16, var_47718_cast_fp16))[name = tensor("op_48416_cast_fp16")]; + tensor var_48417_to_fp16 = const()[name = tensor("op_48417_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4973_cast_fp16 = mul(x = var_48416_cast_fp16, y = var_48417_to_fp16)[name = tensor("aw_chunk_4973_cast_fp16")]; + tensor var_48420_equation_0 = const()[name = tensor("op_48420_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48420_cast_fp16 = einsum(equation = var_48420_equation_0, values = (var_48238_cast_fp16, var_47725_cast_fp16))[name = tensor("op_48420_cast_fp16")]; + tensor var_48421_to_fp16 = const()[name = tensor("op_48421_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4975_cast_fp16 = mul(x = var_48420_cast_fp16, y = var_48421_to_fp16)[name = tensor("aw_chunk_4975_cast_fp16")]; + tensor var_48424_equation_0 = const()[name = tensor("op_48424_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48424_cast_fp16 = einsum(equation = var_48424_equation_0, values = (var_48242_cast_fp16, var_47732_cast_fp16))[name = tensor("op_48424_cast_fp16")]; + tensor var_48425_to_fp16 = const()[name = tensor("op_48425_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4977_cast_fp16 = mul(x = var_48424_cast_fp16, y = var_48425_to_fp16)[name = tensor("aw_chunk_4977_cast_fp16")]; + tensor var_48428_equation_0 = const()[name = tensor("op_48428_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48428_cast_fp16 = einsum(equation = var_48428_equation_0, values = (var_48242_cast_fp16, var_47739_cast_fp16))[name = tensor("op_48428_cast_fp16")]; + tensor var_48429_to_fp16 = const()[name = tensor("op_48429_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4979_cast_fp16 = mul(x = var_48428_cast_fp16, y = var_48429_to_fp16)[name = tensor("aw_chunk_4979_cast_fp16")]; + tensor var_48432_equation_0 = const()[name = tensor("op_48432_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48432_cast_fp16 = einsum(equation = var_48432_equation_0, values = (var_48242_cast_fp16, var_47746_cast_fp16))[name = tensor("op_48432_cast_fp16")]; + tensor var_48433_to_fp16 = const()[name = tensor("op_48433_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4981_cast_fp16 = mul(x = var_48432_cast_fp16, y = var_48433_to_fp16)[name = tensor("aw_chunk_4981_cast_fp16")]; + tensor var_48436_equation_0 = const()[name = tensor("op_48436_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48436_cast_fp16 = einsum(equation = var_48436_equation_0, values = (var_48242_cast_fp16, var_47753_cast_fp16))[name = tensor("op_48436_cast_fp16")]; + tensor var_48437_to_fp16 = const()[name = tensor("op_48437_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4983_cast_fp16 = mul(x = var_48436_cast_fp16, y = var_48437_to_fp16)[name = tensor("aw_chunk_4983_cast_fp16")]; + tensor var_48440_equation_0 = const()[name = tensor("op_48440_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48440_cast_fp16 = einsum(equation = var_48440_equation_0, values = (var_48246_cast_fp16, var_47760_cast_fp16))[name = tensor("op_48440_cast_fp16")]; + tensor var_48441_to_fp16 = const()[name = tensor("op_48441_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4985_cast_fp16 = mul(x = var_48440_cast_fp16, y = var_48441_to_fp16)[name = tensor("aw_chunk_4985_cast_fp16")]; + tensor var_48444_equation_0 = const()[name = tensor("op_48444_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48444_cast_fp16 = einsum(equation = var_48444_equation_0, values = (var_48246_cast_fp16, var_47767_cast_fp16))[name = tensor("op_48444_cast_fp16")]; + tensor var_48445_to_fp16 = const()[name = tensor("op_48445_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4987_cast_fp16 = mul(x = var_48444_cast_fp16, y = var_48445_to_fp16)[name = tensor("aw_chunk_4987_cast_fp16")]; + tensor var_48448_equation_0 = const()[name = tensor("op_48448_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48448_cast_fp16 = einsum(equation = var_48448_equation_0, values = (var_48246_cast_fp16, var_47774_cast_fp16))[name = tensor("op_48448_cast_fp16")]; + tensor var_48449_to_fp16 = const()[name = tensor("op_48449_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4989_cast_fp16 = mul(x = var_48448_cast_fp16, y = var_48449_to_fp16)[name = tensor("aw_chunk_4989_cast_fp16")]; + tensor var_48452_equation_0 = const()[name = tensor("op_48452_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48452_cast_fp16 = einsum(equation = var_48452_equation_0, values = (var_48246_cast_fp16, var_47781_cast_fp16))[name = tensor("op_48452_cast_fp16")]; + tensor var_48453_to_fp16 = const()[name = tensor("op_48453_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4991_cast_fp16 = mul(x = var_48452_cast_fp16, y = var_48453_to_fp16)[name = tensor("aw_chunk_4991_cast_fp16")]; + tensor var_48456_equation_0 = const()[name = tensor("op_48456_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48456_cast_fp16 = einsum(equation = var_48456_equation_0, values = (var_48250_cast_fp16, var_47788_cast_fp16))[name = tensor("op_48456_cast_fp16")]; + tensor var_48457_to_fp16 = const()[name = tensor("op_48457_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4993_cast_fp16 = mul(x = var_48456_cast_fp16, y = var_48457_to_fp16)[name = tensor("aw_chunk_4993_cast_fp16")]; + tensor var_48460_equation_0 = const()[name = tensor("op_48460_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48460_cast_fp16 = einsum(equation = var_48460_equation_0, values = (var_48250_cast_fp16, var_47795_cast_fp16))[name = tensor("op_48460_cast_fp16")]; + tensor var_48461_to_fp16 = const()[name = tensor("op_48461_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4995_cast_fp16 = mul(x = var_48460_cast_fp16, y = var_48461_to_fp16)[name = tensor("aw_chunk_4995_cast_fp16")]; + tensor var_48464_equation_0 = const()[name = tensor("op_48464_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48464_cast_fp16 = einsum(equation = var_48464_equation_0, values = (var_48250_cast_fp16, var_47802_cast_fp16))[name = tensor("op_48464_cast_fp16")]; + tensor var_48465_to_fp16 = const()[name = tensor("op_48465_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4997_cast_fp16 = mul(x = var_48464_cast_fp16, y = var_48465_to_fp16)[name = tensor("aw_chunk_4997_cast_fp16")]; + tensor var_48468_equation_0 = const()[name = tensor("op_48468_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48468_cast_fp16 = einsum(equation = var_48468_equation_0, values = (var_48250_cast_fp16, var_47809_cast_fp16))[name = tensor("op_48468_cast_fp16")]; + tensor var_48469_to_fp16 = const()[name = tensor("op_48469_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_4999_cast_fp16 = mul(x = var_48468_cast_fp16, y = var_48469_to_fp16)[name = tensor("aw_chunk_4999_cast_fp16")]; + tensor var_48472_equation_0 = const()[name = tensor("op_48472_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48472_cast_fp16 = einsum(equation = var_48472_equation_0, values = (var_48254_cast_fp16, var_47816_cast_fp16))[name = tensor("op_48472_cast_fp16")]; + tensor var_48473_to_fp16 = const()[name = tensor("op_48473_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5001_cast_fp16 = mul(x = var_48472_cast_fp16, y = var_48473_to_fp16)[name = tensor("aw_chunk_5001_cast_fp16")]; + tensor var_48476_equation_0 = const()[name = tensor("op_48476_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48476_cast_fp16 = einsum(equation = var_48476_equation_0, values = (var_48254_cast_fp16, var_47823_cast_fp16))[name = tensor("op_48476_cast_fp16")]; + tensor var_48477_to_fp16 = const()[name = tensor("op_48477_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5003_cast_fp16 = mul(x = var_48476_cast_fp16, y = var_48477_to_fp16)[name = tensor("aw_chunk_5003_cast_fp16")]; + tensor var_48480_equation_0 = const()[name = tensor("op_48480_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48480_cast_fp16 = einsum(equation = var_48480_equation_0, values = (var_48254_cast_fp16, var_47830_cast_fp16))[name = tensor("op_48480_cast_fp16")]; + tensor var_48481_to_fp16 = const()[name = tensor("op_48481_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5005_cast_fp16 = mul(x = var_48480_cast_fp16, y = var_48481_to_fp16)[name = tensor("aw_chunk_5005_cast_fp16")]; + tensor var_48484_equation_0 = const()[name = tensor("op_48484_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48484_cast_fp16 = einsum(equation = var_48484_equation_0, values = (var_48254_cast_fp16, var_47837_cast_fp16))[name = tensor("op_48484_cast_fp16")]; + tensor var_48485_to_fp16 = const()[name = tensor("op_48485_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5007_cast_fp16 = mul(x = var_48484_cast_fp16, y = var_48485_to_fp16)[name = tensor("aw_chunk_5007_cast_fp16")]; + tensor var_48488_equation_0 = const()[name = tensor("op_48488_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48488_cast_fp16 = einsum(equation = var_48488_equation_0, values = (var_48258_cast_fp16, var_47844_cast_fp16))[name = tensor("op_48488_cast_fp16")]; + tensor var_48489_to_fp16 = const()[name = tensor("op_48489_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5009_cast_fp16 = mul(x = var_48488_cast_fp16, y = var_48489_to_fp16)[name = tensor("aw_chunk_5009_cast_fp16")]; + tensor var_48492_equation_0 = const()[name = tensor("op_48492_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48492_cast_fp16 = einsum(equation = var_48492_equation_0, values = (var_48258_cast_fp16, var_47851_cast_fp16))[name = tensor("op_48492_cast_fp16")]; + tensor var_48493_to_fp16 = const()[name = tensor("op_48493_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5011_cast_fp16 = mul(x = var_48492_cast_fp16, y = var_48493_to_fp16)[name = tensor("aw_chunk_5011_cast_fp16")]; + tensor var_48496_equation_0 = const()[name = tensor("op_48496_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48496_cast_fp16 = einsum(equation = var_48496_equation_0, values = (var_48258_cast_fp16, var_47858_cast_fp16))[name = tensor("op_48496_cast_fp16")]; + tensor var_48497_to_fp16 = const()[name = tensor("op_48497_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5013_cast_fp16 = mul(x = var_48496_cast_fp16, y = var_48497_to_fp16)[name = tensor("aw_chunk_5013_cast_fp16")]; + tensor var_48500_equation_0 = const()[name = tensor("op_48500_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48500_cast_fp16 = einsum(equation = var_48500_equation_0, values = (var_48258_cast_fp16, var_47865_cast_fp16))[name = tensor("op_48500_cast_fp16")]; + tensor var_48501_to_fp16 = const()[name = tensor("op_48501_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5015_cast_fp16 = mul(x = var_48500_cast_fp16, y = var_48501_to_fp16)[name = tensor("aw_chunk_5015_cast_fp16")]; + tensor var_48504_equation_0 = const()[name = tensor("op_48504_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48504_cast_fp16 = einsum(equation = var_48504_equation_0, values = (var_48262_cast_fp16, var_47872_cast_fp16))[name = tensor("op_48504_cast_fp16")]; + tensor var_48505_to_fp16 = const()[name = tensor("op_48505_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5017_cast_fp16 = mul(x = var_48504_cast_fp16, y = var_48505_to_fp16)[name = tensor("aw_chunk_5017_cast_fp16")]; + tensor var_48508_equation_0 = const()[name = tensor("op_48508_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48508_cast_fp16 = einsum(equation = var_48508_equation_0, values = (var_48262_cast_fp16, var_47879_cast_fp16))[name = tensor("op_48508_cast_fp16")]; + tensor var_48509_to_fp16 = const()[name = tensor("op_48509_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5019_cast_fp16 = mul(x = var_48508_cast_fp16, y = var_48509_to_fp16)[name = tensor("aw_chunk_5019_cast_fp16")]; + tensor var_48512_equation_0 = const()[name = tensor("op_48512_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48512_cast_fp16 = einsum(equation = var_48512_equation_0, values = (var_48262_cast_fp16, var_47886_cast_fp16))[name = tensor("op_48512_cast_fp16")]; + tensor var_48513_to_fp16 = const()[name = tensor("op_48513_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5021_cast_fp16 = mul(x = var_48512_cast_fp16, y = var_48513_to_fp16)[name = tensor("aw_chunk_5021_cast_fp16")]; + tensor var_48516_equation_0 = const()[name = tensor("op_48516_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48516_cast_fp16 = einsum(equation = var_48516_equation_0, values = (var_48262_cast_fp16, var_47893_cast_fp16))[name = tensor("op_48516_cast_fp16")]; + tensor var_48517_to_fp16 = const()[name = tensor("op_48517_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5023_cast_fp16 = mul(x = var_48516_cast_fp16, y = var_48517_to_fp16)[name = tensor("aw_chunk_5023_cast_fp16")]; + tensor var_48520_equation_0 = const()[name = tensor("op_48520_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48520_cast_fp16 = einsum(equation = var_48520_equation_0, values = (var_48266_cast_fp16, var_47900_cast_fp16))[name = tensor("op_48520_cast_fp16")]; + tensor var_48521_to_fp16 = const()[name = tensor("op_48521_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5025_cast_fp16 = mul(x = var_48520_cast_fp16, y = var_48521_to_fp16)[name = tensor("aw_chunk_5025_cast_fp16")]; + tensor var_48524_equation_0 = const()[name = tensor("op_48524_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48524_cast_fp16 = einsum(equation = var_48524_equation_0, values = (var_48266_cast_fp16, var_47907_cast_fp16))[name = tensor("op_48524_cast_fp16")]; + tensor var_48525_to_fp16 = const()[name = tensor("op_48525_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5027_cast_fp16 = mul(x = var_48524_cast_fp16, y = var_48525_to_fp16)[name = tensor("aw_chunk_5027_cast_fp16")]; + tensor var_48528_equation_0 = const()[name = tensor("op_48528_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48528_cast_fp16 = einsum(equation = var_48528_equation_0, values = (var_48266_cast_fp16, var_47914_cast_fp16))[name = tensor("op_48528_cast_fp16")]; + tensor var_48529_to_fp16 = const()[name = tensor("op_48529_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5029_cast_fp16 = mul(x = var_48528_cast_fp16, y = var_48529_to_fp16)[name = tensor("aw_chunk_5029_cast_fp16")]; + tensor var_48532_equation_0 = const()[name = tensor("op_48532_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48532_cast_fp16 = einsum(equation = var_48532_equation_0, values = (var_48266_cast_fp16, var_47921_cast_fp16))[name = tensor("op_48532_cast_fp16")]; + tensor var_48533_to_fp16 = const()[name = tensor("op_48533_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5031_cast_fp16 = mul(x = var_48532_cast_fp16, y = var_48533_to_fp16)[name = tensor("aw_chunk_5031_cast_fp16")]; + tensor var_48536_equation_0 = const()[name = tensor("op_48536_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48536_cast_fp16 = einsum(equation = var_48536_equation_0, values = (var_48270_cast_fp16, var_47928_cast_fp16))[name = tensor("op_48536_cast_fp16")]; + tensor var_48537_to_fp16 = const()[name = tensor("op_48537_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5033_cast_fp16 = mul(x = var_48536_cast_fp16, y = var_48537_to_fp16)[name = tensor("aw_chunk_5033_cast_fp16")]; + tensor var_48540_equation_0 = const()[name = tensor("op_48540_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48540_cast_fp16 = einsum(equation = var_48540_equation_0, values = (var_48270_cast_fp16, var_47935_cast_fp16))[name = tensor("op_48540_cast_fp16")]; + tensor var_48541_to_fp16 = const()[name = tensor("op_48541_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5035_cast_fp16 = mul(x = var_48540_cast_fp16, y = var_48541_to_fp16)[name = tensor("aw_chunk_5035_cast_fp16")]; + tensor var_48544_equation_0 = const()[name = tensor("op_48544_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48544_cast_fp16 = einsum(equation = var_48544_equation_0, values = (var_48270_cast_fp16, var_47942_cast_fp16))[name = tensor("op_48544_cast_fp16")]; + tensor var_48545_to_fp16 = const()[name = tensor("op_48545_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5037_cast_fp16 = mul(x = var_48544_cast_fp16, y = var_48545_to_fp16)[name = tensor("aw_chunk_5037_cast_fp16")]; + tensor var_48548_equation_0 = const()[name = tensor("op_48548_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48548_cast_fp16 = einsum(equation = var_48548_equation_0, values = (var_48270_cast_fp16, var_47949_cast_fp16))[name = tensor("op_48548_cast_fp16")]; + tensor var_48549_to_fp16 = const()[name = tensor("op_48549_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5039_cast_fp16 = mul(x = var_48548_cast_fp16, y = var_48549_to_fp16)[name = tensor("aw_chunk_5039_cast_fp16")]; + tensor var_48552_equation_0 = const()[name = tensor("op_48552_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48552_cast_fp16 = einsum(equation = var_48552_equation_0, values = (var_48274_cast_fp16, var_47956_cast_fp16))[name = tensor("op_48552_cast_fp16")]; + tensor var_48553_to_fp16 = const()[name = tensor("op_48553_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5041_cast_fp16 = mul(x = var_48552_cast_fp16, y = var_48553_to_fp16)[name = tensor("aw_chunk_5041_cast_fp16")]; + tensor var_48556_equation_0 = const()[name = tensor("op_48556_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48556_cast_fp16 = einsum(equation = var_48556_equation_0, values = (var_48274_cast_fp16, var_47963_cast_fp16))[name = tensor("op_48556_cast_fp16")]; + tensor var_48557_to_fp16 = const()[name = tensor("op_48557_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5043_cast_fp16 = mul(x = var_48556_cast_fp16, y = var_48557_to_fp16)[name = tensor("aw_chunk_5043_cast_fp16")]; + tensor var_48560_equation_0 = const()[name = tensor("op_48560_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48560_cast_fp16 = einsum(equation = var_48560_equation_0, values = (var_48274_cast_fp16, var_47970_cast_fp16))[name = tensor("op_48560_cast_fp16")]; + tensor var_48561_to_fp16 = const()[name = tensor("op_48561_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5045_cast_fp16 = mul(x = var_48560_cast_fp16, y = var_48561_to_fp16)[name = tensor("aw_chunk_5045_cast_fp16")]; + tensor var_48564_equation_0 = const()[name = tensor("op_48564_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48564_cast_fp16 = einsum(equation = var_48564_equation_0, values = (var_48274_cast_fp16, var_47977_cast_fp16))[name = tensor("op_48564_cast_fp16")]; + tensor var_48565_to_fp16 = const()[name = tensor("op_48565_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5047_cast_fp16 = mul(x = var_48564_cast_fp16, y = var_48565_to_fp16)[name = tensor("aw_chunk_5047_cast_fp16")]; + tensor var_48568_equation_0 = const()[name = tensor("op_48568_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48568_cast_fp16 = einsum(equation = var_48568_equation_0, values = (var_48278_cast_fp16, var_47984_cast_fp16))[name = tensor("op_48568_cast_fp16")]; + tensor var_48569_to_fp16 = const()[name = tensor("op_48569_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5049_cast_fp16 = mul(x = var_48568_cast_fp16, y = var_48569_to_fp16)[name = tensor("aw_chunk_5049_cast_fp16")]; + tensor var_48572_equation_0 = const()[name = tensor("op_48572_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48572_cast_fp16 = einsum(equation = var_48572_equation_0, values = (var_48278_cast_fp16, var_47991_cast_fp16))[name = tensor("op_48572_cast_fp16")]; + tensor var_48573_to_fp16 = const()[name = tensor("op_48573_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5051_cast_fp16 = mul(x = var_48572_cast_fp16, y = var_48573_to_fp16)[name = tensor("aw_chunk_5051_cast_fp16")]; + tensor var_48576_equation_0 = const()[name = tensor("op_48576_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48576_cast_fp16 = einsum(equation = var_48576_equation_0, values = (var_48278_cast_fp16, var_47998_cast_fp16))[name = tensor("op_48576_cast_fp16")]; + tensor var_48577_to_fp16 = const()[name = tensor("op_48577_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5053_cast_fp16 = mul(x = var_48576_cast_fp16, y = var_48577_to_fp16)[name = tensor("aw_chunk_5053_cast_fp16")]; + tensor var_48580_equation_0 = const()[name = tensor("op_48580_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48580_cast_fp16 = einsum(equation = var_48580_equation_0, values = (var_48278_cast_fp16, var_48005_cast_fp16))[name = tensor("op_48580_cast_fp16")]; + tensor var_48581_to_fp16 = const()[name = tensor("op_48581_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5055_cast_fp16 = mul(x = var_48580_cast_fp16, y = var_48581_to_fp16)[name = tensor("aw_chunk_5055_cast_fp16")]; + tensor var_48584_equation_0 = const()[name = tensor("op_48584_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48584_cast_fp16 = einsum(equation = var_48584_equation_0, values = (var_48282_cast_fp16, var_48012_cast_fp16))[name = tensor("op_48584_cast_fp16")]; + tensor var_48585_to_fp16 = const()[name = tensor("op_48585_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5057_cast_fp16 = mul(x = var_48584_cast_fp16, y = var_48585_to_fp16)[name = tensor("aw_chunk_5057_cast_fp16")]; + tensor var_48588_equation_0 = const()[name = tensor("op_48588_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48588_cast_fp16 = einsum(equation = var_48588_equation_0, values = (var_48282_cast_fp16, var_48019_cast_fp16))[name = tensor("op_48588_cast_fp16")]; + tensor var_48589_to_fp16 = const()[name = tensor("op_48589_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5059_cast_fp16 = mul(x = var_48588_cast_fp16, y = var_48589_to_fp16)[name = tensor("aw_chunk_5059_cast_fp16")]; + tensor var_48592_equation_0 = const()[name = tensor("op_48592_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48592_cast_fp16 = einsum(equation = var_48592_equation_0, values = (var_48282_cast_fp16, var_48026_cast_fp16))[name = tensor("op_48592_cast_fp16")]; + tensor var_48593_to_fp16 = const()[name = tensor("op_48593_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5061_cast_fp16 = mul(x = var_48592_cast_fp16, y = var_48593_to_fp16)[name = tensor("aw_chunk_5061_cast_fp16")]; + tensor var_48596_equation_0 = const()[name = tensor("op_48596_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48596_cast_fp16 = einsum(equation = var_48596_equation_0, values = (var_48282_cast_fp16, var_48033_cast_fp16))[name = tensor("op_48596_cast_fp16")]; + tensor var_48597_to_fp16 = const()[name = tensor("op_48597_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5063_cast_fp16 = mul(x = var_48596_cast_fp16, y = var_48597_to_fp16)[name = tensor("aw_chunk_5063_cast_fp16")]; + tensor var_48600_equation_0 = const()[name = tensor("op_48600_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48600_cast_fp16 = einsum(equation = var_48600_equation_0, values = (var_48286_cast_fp16, var_48040_cast_fp16))[name = tensor("op_48600_cast_fp16")]; + tensor var_48601_to_fp16 = const()[name = tensor("op_48601_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5065_cast_fp16 = mul(x = var_48600_cast_fp16, y = var_48601_to_fp16)[name = tensor("aw_chunk_5065_cast_fp16")]; + tensor var_48604_equation_0 = const()[name = tensor("op_48604_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48604_cast_fp16 = einsum(equation = var_48604_equation_0, values = (var_48286_cast_fp16, var_48047_cast_fp16))[name = tensor("op_48604_cast_fp16")]; + tensor var_48605_to_fp16 = const()[name = tensor("op_48605_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5067_cast_fp16 = mul(x = var_48604_cast_fp16, y = var_48605_to_fp16)[name = tensor("aw_chunk_5067_cast_fp16")]; + tensor var_48608_equation_0 = const()[name = tensor("op_48608_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48608_cast_fp16 = einsum(equation = var_48608_equation_0, values = (var_48286_cast_fp16, var_48054_cast_fp16))[name = tensor("op_48608_cast_fp16")]; + tensor var_48609_to_fp16 = const()[name = tensor("op_48609_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5069_cast_fp16 = mul(x = var_48608_cast_fp16, y = var_48609_to_fp16)[name = tensor("aw_chunk_5069_cast_fp16")]; + tensor var_48612_equation_0 = const()[name = tensor("op_48612_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48612_cast_fp16 = einsum(equation = var_48612_equation_0, values = (var_48286_cast_fp16, var_48061_cast_fp16))[name = tensor("op_48612_cast_fp16")]; + tensor var_48613_to_fp16 = const()[name = tensor("op_48613_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5071_cast_fp16 = mul(x = var_48612_cast_fp16, y = var_48613_to_fp16)[name = tensor("aw_chunk_5071_cast_fp16")]; + tensor var_48616_equation_0 = const()[name = tensor("op_48616_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48616_cast_fp16 = einsum(equation = var_48616_equation_0, values = (var_48290_cast_fp16, var_48068_cast_fp16))[name = tensor("op_48616_cast_fp16")]; + tensor var_48617_to_fp16 = const()[name = tensor("op_48617_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5073_cast_fp16 = mul(x = var_48616_cast_fp16, y = var_48617_to_fp16)[name = tensor("aw_chunk_5073_cast_fp16")]; + tensor var_48620_equation_0 = const()[name = tensor("op_48620_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48620_cast_fp16 = einsum(equation = var_48620_equation_0, values = (var_48290_cast_fp16, var_48075_cast_fp16))[name = tensor("op_48620_cast_fp16")]; + tensor var_48621_to_fp16 = const()[name = tensor("op_48621_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5075_cast_fp16 = mul(x = var_48620_cast_fp16, y = var_48621_to_fp16)[name = tensor("aw_chunk_5075_cast_fp16")]; + tensor var_48624_equation_0 = const()[name = tensor("op_48624_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48624_cast_fp16 = einsum(equation = var_48624_equation_0, values = (var_48290_cast_fp16, var_48082_cast_fp16))[name = tensor("op_48624_cast_fp16")]; + tensor var_48625_to_fp16 = const()[name = tensor("op_48625_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5077_cast_fp16 = mul(x = var_48624_cast_fp16, y = var_48625_to_fp16)[name = tensor("aw_chunk_5077_cast_fp16")]; + tensor var_48628_equation_0 = const()[name = tensor("op_48628_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48628_cast_fp16 = einsum(equation = var_48628_equation_0, values = (var_48290_cast_fp16, var_48089_cast_fp16))[name = tensor("op_48628_cast_fp16")]; + tensor var_48629_to_fp16 = const()[name = tensor("op_48629_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5079_cast_fp16 = mul(x = var_48628_cast_fp16, y = var_48629_to_fp16)[name = tensor("aw_chunk_5079_cast_fp16")]; + tensor var_48632_equation_0 = const()[name = tensor("op_48632_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48632_cast_fp16 = einsum(equation = var_48632_equation_0, values = (var_48294_cast_fp16, var_48096_cast_fp16))[name = tensor("op_48632_cast_fp16")]; + tensor var_48633_to_fp16 = const()[name = tensor("op_48633_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5081_cast_fp16 = mul(x = var_48632_cast_fp16, y = var_48633_to_fp16)[name = tensor("aw_chunk_5081_cast_fp16")]; + tensor var_48636_equation_0 = const()[name = tensor("op_48636_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48636_cast_fp16 = einsum(equation = var_48636_equation_0, values = (var_48294_cast_fp16, var_48103_cast_fp16))[name = tensor("op_48636_cast_fp16")]; + tensor var_48637_to_fp16 = const()[name = tensor("op_48637_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5083_cast_fp16 = mul(x = var_48636_cast_fp16, y = var_48637_to_fp16)[name = tensor("aw_chunk_5083_cast_fp16")]; + tensor var_48640_equation_0 = const()[name = tensor("op_48640_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48640_cast_fp16 = einsum(equation = var_48640_equation_0, values = (var_48294_cast_fp16, var_48110_cast_fp16))[name = tensor("op_48640_cast_fp16")]; + tensor var_48641_to_fp16 = const()[name = tensor("op_48641_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5085_cast_fp16 = mul(x = var_48640_cast_fp16, y = var_48641_to_fp16)[name = tensor("aw_chunk_5085_cast_fp16")]; + tensor var_48644_equation_0 = const()[name = tensor("op_48644_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48644_cast_fp16 = einsum(equation = var_48644_equation_0, values = (var_48294_cast_fp16, var_48117_cast_fp16))[name = tensor("op_48644_cast_fp16")]; + tensor var_48645_to_fp16 = const()[name = tensor("op_48645_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5087_cast_fp16 = mul(x = var_48644_cast_fp16, y = var_48645_to_fp16)[name = tensor("aw_chunk_5087_cast_fp16")]; + tensor var_48648_equation_0 = const()[name = tensor("op_48648_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48648_cast_fp16 = einsum(equation = var_48648_equation_0, values = (var_48298_cast_fp16, var_48124_cast_fp16))[name = tensor("op_48648_cast_fp16")]; + tensor var_48649_to_fp16 = const()[name = tensor("op_48649_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5089_cast_fp16 = mul(x = var_48648_cast_fp16, y = var_48649_to_fp16)[name = tensor("aw_chunk_5089_cast_fp16")]; + tensor var_48652_equation_0 = const()[name = tensor("op_48652_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48652_cast_fp16 = einsum(equation = var_48652_equation_0, values = (var_48298_cast_fp16, var_48131_cast_fp16))[name = tensor("op_48652_cast_fp16")]; + tensor var_48653_to_fp16 = const()[name = tensor("op_48653_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5091_cast_fp16 = mul(x = var_48652_cast_fp16, y = var_48653_to_fp16)[name = tensor("aw_chunk_5091_cast_fp16")]; + tensor var_48656_equation_0 = const()[name = tensor("op_48656_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48656_cast_fp16 = einsum(equation = var_48656_equation_0, values = (var_48298_cast_fp16, var_48138_cast_fp16))[name = tensor("op_48656_cast_fp16")]; + tensor var_48657_to_fp16 = const()[name = tensor("op_48657_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5093_cast_fp16 = mul(x = var_48656_cast_fp16, y = var_48657_to_fp16)[name = tensor("aw_chunk_5093_cast_fp16")]; + tensor var_48660_equation_0 = const()[name = tensor("op_48660_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48660_cast_fp16 = einsum(equation = var_48660_equation_0, values = (var_48298_cast_fp16, var_48145_cast_fp16))[name = tensor("op_48660_cast_fp16")]; + tensor var_48661_to_fp16 = const()[name = tensor("op_48661_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5095_cast_fp16 = mul(x = var_48660_cast_fp16, y = var_48661_to_fp16)[name = tensor("aw_chunk_5095_cast_fp16")]; + tensor var_48664_equation_0 = const()[name = tensor("op_48664_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48664_cast_fp16 = einsum(equation = var_48664_equation_0, values = (var_48302_cast_fp16, var_48152_cast_fp16))[name = tensor("op_48664_cast_fp16")]; + tensor var_48665_to_fp16 = const()[name = tensor("op_48665_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5097_cast_fp16 = mul(x = var_48664_cast_fp16, y = var_48665_to_fp16)[name = tensor("aw_chunk_5097_cast_fp16")]; + tensor var_48668_equation_0 = const()[name = tensor("op_48668_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48668_cast_fp16 = einsum(equation = var_48668_equation_0, values = (var_48302_cast_fp16, var_48159_cast_fp16))[name = tensor("op_48668_cast_fp16")]; + tensor var_48669_to_fp16 = const()[name = tensor("op_48669_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5099_cast_fp16 = mul(x = var_48668_cast_fp16, y = var_48669_to_fp16)[name = tensor("aw_chunk_5099_cast_fp16")]; + tensor var_48672_equation_0 = const()[name = tensor("op_48672_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48672_cast_fp16 = einsum(equation = var_48672_equation_0, values = (var_48302_cast_fp16, var_48166_cast_fp16))[name = tensor("op_48672_cast_fp16")]; + tensor var_48673_to_fp16 = const()[name = tensor("op_48673_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5101_cast_fp16 = mul(x = var_48672_cast_fp16, y = var_48673_to_fp16)[name = tensor("aw_chunk_5101_cast_fp16")]; + tensor var_48676_equation_0 = const()[name = tensor("op_48676_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48676_cast_fp16 = einsum(equation = var_48676_equation_0, values = (var_48302_cast_fp16, var_48173_cast_fp16))[name = tensor("op_48676_cast_fp16")]; + tensor var_48677_to_fp16 = const()[name = tensor("op_48677_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5103_cast_fp16 = mul(x = var_48676_cast_fp16, y = var_48677_to_fp16)[name = tensor("aw_chunk_5103_cast_fp16")]; + tensor var_48680_equation_0 = const()[name = tensor("op_48680_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48680_cast_fp16 = einsum(equation = var_48680_equation_0, values = (var_48306_cast_fp16, var_48180_cast_fp16))[name = tensor("op_48680_cast_fp16")]; + tensor var_48681_to_fp16 = const()[name = tensor("op_48681_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5105_cast_fp16 = mul(x = var_48680_cast_fp16, y = var_48681_to_fp16)[name = tensor("aw_chunk_5105_cast_fp16")]; + tensor var_48684_equation_0 = const()[name = tensor("op_48684_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48684_cast_fp16 = einsum(equation = var_48684_equation_0, values = (var_48306_cast_fp16, var_48187_cast_fp16))[name = tensor("op_48684_cast_fp16")]; + tensor var_48685_to_fp16 = const()[name = tensor("op_48685_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5107_cast_fp16 = mul(x = var_48684_cast_fp16, y = var_48685_to_fp16)[name = tensor("aw_chunk_5107_cast_fp16")]; + tensor var_48688_equation_0 = const()[name = tensor("op_48688_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48688_cast_fp16 = einsum(equation = var_48688_equation_0, values = (var_48306_cast_fp16, var_48194_cast_fp16))[name = tensor("op_48688_cast_fp16")]; + tensor var_48689_to_fp16 = const()[name = tensor("op_48689_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5109_cast_fp16 = mul(x = var_48688_cast_fp16, y = var_48689_to_fp16)[name = tensor("aw_chunk_5109_cast_fp16")]; + tensor var_48692_equation_0 = const()[name = tensor("op_48692_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48692_cast_fp16 = einsum(equation = var_48692_equation_0, values = (var_48306_cast_fp16, var_48201_cast_fp16))[name = tensor("op_48692_cast_fp16")]; + tensor var_48693_to_fp16 = const()[name = tensor("op_48693_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5111_cast_fp16 = mul(x = var_48692_cast_fp16, y = var_48693_to_fp16)[name = tensor("aw_chunk_5111_cast_fp16")]; + tensor var_48696_equation_0 = const()[name = tensor("op_48696_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48696_cast_fp16 = einsum(equation = var_48696_equation_0, values = (var_48310_cast_fp16, var_48208_cast_fp16))[name = tensor("op_48696_cast_fp16")]; + tensor var_48697_to_fp16 = const()[name = tensor("op_48697_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5113_cast_fp16 = mul(x = var_48696_cast_fp16, y = var_48697_to_fp16)[name = tensor("aw_chunk_5113_cast_fp16")]; + tensor var_48700_equation_0 = const()[name = tensor("op_48700_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48700_cast_fp16 = einsum(equation = var_48700_equation_0, values = (var_48310_cast_fp16, var_48215_cast_fp16))[name = tensor("op_48700_cast_fp16")]; + tensor var_48701_to_fp16 = const()[name = tensor("op_48701_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5115_cast_fp16 = mul(x = var_48700_cast_fp16, y = var_48701_to_fp16)[name = tensor("aw_chunk_5115_cast_fp16")]; + tensor var_48704_equation_0 = const()[name = tensor("op_48704_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48704_cast_fp16 = einsum(equation = var_48704_equation_0, values = (var_48310_cast_fp16, var_48222_cast_fp16))[name = tensor("op_48704_cast_fp16")]; + tensor var_48705_to_fp16 = const()[name = tensor("op_48705_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_5117_cast_fp16 = mul(x = var_48704_cast_fp16, y = var_48705_to_fp16)[name = tensor("aw_chunk_5117_cast_fp16")]; + tensor var_48708_equation_0 = const()[name = tensor("op_48708_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_48708_cast_fp16 = einsum(equation = var_48708_equation_0, values = (var_48310_cast_fp16, var_48229_cast_fp16))[name = tensor("op_48708_cast_fp16")]; + tensor var_48709_to_fp16 = const()[name = tensor("op_48709_to_fp16"), val = tensor(0x1p-3)]; + tensor aw_chunk_cast_fp16 = mul(x = var_48708_cast_fp16, y = var_48709_to_fp16)[name = tensor("aw_chunk_cast_fp16")]; + tensor var_48711_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_4961_cast_fp16)[name = tensor("op_48711_cast_fp16")]; + tensor var_48712_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_4963_cast_fp16)[name = tensor("op_48712_cast_fp16")]; + tensor var_48713_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_4965_cast_fp16)[name = tensor("op_48713_cast_fp16")]; + tensor var_48714_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_4967_cast_fp16)[name = tensor("op_48714_cast_fp16")]; + tensor var_48715_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_4969_cast_fp16)[name = tensor("op_48715_cast_fp16")]; + tensor var_48716_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_4971_cast_fp16)[name = tensor("op_48716_cast_fp16")]; + tensor var_48717_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_4973_cast_fp16)[name = tensor("op_48717_cast_fp16")]; + tensor var_48718_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_4975_cast_fp16)[name = tensor("op_48718_cast_fp16")]; + tensor var_48719_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_4977_cast_fp16)[name = tensor("op_48719_cast_fp16")]; + tensor var_48720_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_4979_cast_fp16)[name = tensor("op_48720_cast_fp16")]; + tensor var_48721_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_4981_cast_fp16)[name = tensor("op_48721_cast_fp16")]; + tensor var_48722_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_4983_cast_fp16)[name = tensor("op_48722_cast_fp16")]; + tensor var_48723_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_4985_cast_fp16)[name = tensor("op_48723_cast_fp16")]; + tensor var_48724_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_4987_cast_fp16)[name = tensor("op_48724_cast_fp16")]; + tensor var_48725_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_4989_cast_fp16)[name = tensor("op_48725_cast_fp16")]; + tensor var_48726_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_4991_cast_fp16)[name = tensor("op_48726_cast_fp16")]; + tensor var_48727_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_4993_cast_fp16)[name = tensor("op_48727_cast_fp16")]; + tensor var_48728_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_4995_cast_fp16)[name = tensor("op_48728_cast_fp16")]; + tensor var_48729_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_4997_cast_fp16)[name = tensor("op_48729_cast_fp16")]; + tensor var_48730_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_4999_cast_fp16)[name = tensor("op_48730_cast_fp16")]; + tensor var_48731_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5001_cast_fp16)[name = tensor("op_48731_cast_fp16")]; + tensor var_48732_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5003_cast_fp16)[name = tensor("op_48732_cast_fp16")]; + tensor var_48733_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5005_cast_fp16)[name = tensor("op_48733_cast_fp16")]; + tensor var_48734_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5007_cast_fp16)[name = tensor("op_48734_cast_fp16")]; + tensor var_48735_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5009_cast_fp16)[name = tensor("op_48735_cast_fp16")]; + tensor var_48736_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5011_cast_fp16)[name = tensor("op_48736_cast_fp16")]; + tensor var_48737_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5013_cast_fp16)[name = tensor("op_48737_cast_fp16")]; + tensor var_48738_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5015_cast_fp16)[name = tensor("op_48738_cast_fp16")]; + tensor var_48739_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5017_cast_fp16)[name = tensor("op_48739_cast_fp16")]; + tensor var_48740_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5019_cast_fp16)[name = tensor("op_48740_cast_fp16")]; + tensor var_48741_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5021_cast_fp16)[name = tensor("op_48741_cast_fp16")]; + tensor var_48742_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5023_cast_fp16)[name = tensor("op_48742_cast_fp16")]; + tensor var_48743_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5025_cast_fp16)[name = tensor("op_48743_cast_fp16")]; + tensor var_48744_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5027_cast_fp16)[name = tensor("op_48744_cast_fp16")]; + tensor var_48745_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5029_cast_fp16)[name = tensor("op_48745_cast_fp16")]; + tensor var_48746_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5031_cast_fp16)[name = tensor("op_48746_cast_fp16")]; + tensor var_48747_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5033_cast_fp16)[name = tensor("op_48747_cast_fp16")]; + tensor var_48748_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5035_cast_fp16)[name = tensor("op_48748_cast_fp16")]; + tensor var_48749_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5037_cast_fp16)[name = tensor("op_48749_cast_fp16")]; + tensor var_48750_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5039_cast_fp16)[name = tensor("op_48750_cast_fp16")]; + tensor var_48751_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5041_cast_fp16)[name = tensor("op_48751_cast_fp16")]; + tensor var_48752_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5043_cast_fp16)[name = tensor("op_48752_cast_fp16")]; + tensor var_48753_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5045_cast_fp16)[name = tensor("op_48753_cast_fp16")]; + tensor var_48754_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5047_cast_fp16)[name = tensor("op_48754_cast_fp16")]; + tensor var_48755_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5049_cast_fp16)[name = tensor("op_48755_cast_fp16")]; + tensor var_48756_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5051_cast_fp16)[name = tensor("op_48756_cast_fp16")]; + tensor var_48757_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5053_cast_fp16)[name = tensor("op_48757_cast_fp16")]; + tensor var_48758_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5055_cast_fp16)[name = tensor("op_48758_cast_fp16")]; + tensor var_48759_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5057_cast_fp16)[name = tensor("op_48759_cast_fp16")]; + tensor var_48760_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5059_cast_fp16)[name = tensor("op_48760_cast_fp16")]; + tensor var_48761_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5061_cast_fp16)[name = tensor("op_48761_cast_fp16")]; + tensor var_48762_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5063_cast_fp16)[name = tensor("op_48762_cast_fp16")]; + tensor var_48763_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5065_cast_fp16)[name = tensor("op_48763_cast_fp16")]; + tensor var_48764_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5067_cast_fp16)[name = tensor("op_48764_cast_fp16")]; + tensor var_48765_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5069_cast_fp16)[name = tensor("op_48765_cast_fp16")]; + tensor var_48766_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5071_cast_fp16)[name = tensor("op_48766_cast_fp16")]; + tensor var_48767_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5073_cast_fp16)[name = tensor("op_48767_cast_fp16")]; + tensor var_48768_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5075_cast_fp16)[name = tensor("op_48768_cast_fp16")]; + tensor var_48769_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5077_cast_fp16)[name = tensor("op_48769_cast_fp16")]; + tensor var_48770_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5079_cast_fp16)[name = tensor("op_48770_cast_fp16")]; + tensor var_48771_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5081_cast_fp16)[name = tensor("op_48771_cast_fp16")]; + tensor var_48772_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5083_cast_fp16)[name = tensor("op_48772_cast_fp16")]; + tensor var_48773_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5085_cast_fp16)[name = tensor("op_48773_cast_fp16")]; + tensor var_48774_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5087_cast_fp16)[name = tensor("op_48774_cast_fp16")]; + tensor var_48775_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5089_cast_fp16)[name = tensor("op_48775_cast_fp16")]; + tensor var_48776_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5091_cast_fp16)[name = tensor("op_48776_cast_fp16")]; + tensor var_48777_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5093_cast_fp16)[name = tensor("op_48777_cast_fp16")]; + tensor var_48778_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5095_cast_fp16)[name = tensor("op_48778_cast_fp16")]; + tensor var_48779_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5097_cast_fp16)[name = tensor("op_48779_cast_fp16")]; + tensor var_48780_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5099_cast_fp16)[name = tensor("op_48780_cast_fp16")]; + tensor var_48781_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5101_cast_fp16)[name = tensor("op_48781_cast_fp16")]; + tensor var_48782_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5103_cast_fp16)[name = tensor("op_48782_cast_fp16")]; + tensor var_48783_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5105_cast_fp16)[name = tensor("op_48783_cast_fp16")]; + tensor var_48784_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5107_cast_fp16)[name = tensor("op_48784_cast_fp16")]; + tensor var_48785_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5109_cast_fp16)[name = tensor("op_48785_cast_fp16")]; + tensor var_48786_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5111_cast_fp16)[name = tensor("op_48786_cast_fp16")]; + tensor var_48787_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5113_cast_fp16)[name = tensor("op_48787_cast_fp16")]; + tensor var_48788_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5115_cast_fp16)[name = tensor("op_48788_cast_fp16")]; + tensor var_48789_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_5117_cast_fp16)[name = tensor("op_48789_cast_fp16")]; + tensor var_48790_cast_fp16 = softmax(axis = var_47536, x = aw_chunk_cast_fp16)[name = tensor("op_48790_cast_fp16")]; + tensor var_48792_equation_0 = const()[name = tensor("op_48792_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48792_cast_fp16 = einsum(equation = var_48792_equation_0, values = (var_48312_cast_fp16, var_48711_cast_fp16))[name = tensor("op_48792_cast_fp16")]; + tensor var_48794_equation_0 = const()[name = tensor("op_48794_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48794_cast_fp16 = einsum(equation = var_48794_equation_0, values = (var_48312_cast_fp16, var_48712_cast_fp16))[name = tensor("op_48794_cast_fp16")]; + tensor var_48796_equation_0 = const()[name = tensor("op_48796_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48796_cast_fp16 = einsum(equation = var_48796_equation_0, values = (var_48312_cast_fp16, var_48713_cast_fp16))[name = tensor("op_48796_cast_fp16")]; + tensor var_48798_equation_0 = const()[name = tensor("op_48798_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48798_cast_fp16 = einsum(equation = var_48798_equation_0, values = (var_48312_cast_fp16, var_48714_cast_fp16))[name = tensor("op_48798_cast_fp16")]; + tensor var_48800_equation_0 = const()[name = tensor("op_48800_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48800_cast_fp16 = einsum(equation = var_48800_equation_0, values = (var_48316_cast_fp16, var_48715_cast_fp16))[name = tensor("op_48800_cast_fp16")]; + tensor var_48802_equation_0 = const()[name = tensor("op_48802_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48802_cast_fp16 = einsum(equation = var_48802_equation_0, values = (var_48316_cast_fp16, var_48716_cast_fp16))[name = tensor("op_48802_cast_fp16")]; + tensor var_48804_equation_0 = const()[name = tensor("op_48804_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48804_cast_fp16 = einsum(equation = var_48804_equation_0, values = (var_48316_cast_fp16, var_48717_cast_fp16))[name = tensor("op_48804_cast_fp16")]; + tensor var_48806_equation_0 = const()[name = tensor("op_48806_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48806_cast_fp16 = einsum(equation = var_48806_equation_0, values = (var_48316_cast_fp16, var_48718_cast_fp16))[name = tensor("op_48806_cast_fp16")]; + tensor var_48808_equation_0 = const()[name = tensor("op_48808_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48808_cast_fp16 = einsum(equation = var_48808_equation_0, values = (var_48320_cast_fp16, var_48719_cast_fp16))[name = tensor("op_48808_cast_fp16")]; + tensor var_48810_equation_0 = const()[name = tensor("op_48810_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48810_cast_fp16 = einsum(equation = var_48810_equation_0, values = (var_48320_cast_fp16, var_48720_cast_fp16))[name = tensor("op_48810_cast_fp16")]; + tensor var_48812_equation_0 = const()[name = tensor("op_48812_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48812_cast_fp16 = einsum(equation = var_48812_equation_0, values = (var_48320_cast_fp16, var_48721_cast_fp16))[name = tensor("op_48812_cast_fp16")]; + tensor var_48814_equation_0 = const()[name = tensor("op_48814_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48814_cast_fp16 = einsum(equation = var_48814_equation_0, values = (var_48320_cast_fp16, var_48722_cast_fp16))[name = tensor("op_48814_cast_fp16")]; + tensor var_48816_equation_0 = const()[name = tensor("op_48816_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48816_cast_fp16 = einsum(equation = var_48816_equation_0, values = (var_48324_cast_fp16, var_48723_cast_fp16))[name = tensor("op_48816_cast_fp16")]; + tensor var_48818_equation_0 = const()[name = tensor("op_48818_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48818_cast_fp16 = einsum(equation = var_48818_equation_0, values = (var_48324_cast_fp16, var_48724_cast_fp16))[name = tensor("op_48818_cast_fp16")]; + tensor var_48820_equation_0 = const()[name = tensor("op_48820_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48820_cast_fp16 = einsum(equation = var_48820_equation_0, values = (var_48324_cast_fp16, var_48725_cast_fp16))[name = tensor("op_48820_cast_fp16")]; + tensor var_48822_equation_0 = const()[name = tensor("op_48822_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48822_cast_fp16 = einsum(equation = var_48822_equation_0, values = (var_48324_cast_fp16, var_48726_cast_fp16))[name = tensor("op_48822_cast_fp16")]; + tensor var_48824_equation_0 = const()[name = tensor("op_48824_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48824_cast_fp16 = einsum(equation = var_48824_equation_0, values = (var_48328_cast_fp16, var_48727_cast_fp16))[name = tensor("op_48824_cast_fp16")]; + tensor var_48826_equation_0 = const()[name = tensor("op_48826_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48826_cast_fp16 = einsum(equation = var_48826_equation_0, values = (var_48328_cast_fp16, var_48728_cast_fp16))[name = tensor("op_48826_cast_fp16")]; + tensor var_48828_equation_0 = const()[name = tensor("op_48828_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48828_cast_fp16 = einsum(equation = var_48828_equation_0, values = (var_48328_cast_fp16, var_48729_cast_fp16))[name = tensor("op_48828_cast_fp16")]; + tensor var_48830_equation_0 = const()[name = tensor("op_48830_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48830_cast_fp16 = einsum(equation = var_48830_equation_0, values = (var_48328_cast_fp16, var_48730_cast_fp16))[name = tensor("op_48830_cast_fp16")]; + tensor var_48832_equation_0 = const()[name = tensor("op_48832_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48832_cast_fp16 = einsum(equation = var_48832_equation_0, values = (var_48332_cast_fp16, var_48731_cast_fp16))[name = tensor("op_48832_cast_fp16")]; + tensor var_48834_equation_0 = const()[name = tensor("op_48834_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48834_cast_fp16 = einsum(equation = var_48834_equation_0, values = (var_48332_cast_fp16, var_48732_cast_fp16))[name = tensor("op_48834_cast_fp16")]; + tensor var_48836_equation_0 = const()[name = tensor("op_48836_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48836_cast_fp16 = einsum(equation = var_48836_equation_0, values = (var_48332_cast_fp16, var_48733_cast_fp16))[name = tensor("op_48836_cast_fp16")]; + tensor var_48838_equation_0 = const()[name = tensor("op_48838_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48838_cast_fp16 = einsum(equation = var_48838_equation_0, values = (var_48332_cast_fp16, var_48734_cast_fp16))[name = tensor("op_48838_cast_fp16")]; + tensor var_48840_equation_0 = const()[name = tensor("op_48840_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48840_cast_fp16 = einsum(equation = var_48840_equation_0, values = (var_48336_cast_fp16, var_48735_cast_fp16))[name = tensor("op_48840_cast_fp16")]; + tensor var_48842_equation_0 = const()[name = tensor("op_48842_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48842_cast_fp16 = einsum(equation = var_48842_equation_0, values = (var_48336_cast_fp16, var_48736_cast_fp16))[name = tensor("op_48842_cast_fp16")]; + tensor var_48844_equation_0 = const()[name = tensor("op_48844_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48844_cast_fp16 = einsum(equation = var_48844_equation_0, values = (var_48336_cast_fp16, var_48737_cast_fp16))[name = tensor("op_48844_cast_fp16")]; + tensor var_48846_equation_0 = const()[name = tensor("op_48846_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48846_cast_fp16 = einsum(equation = var_48846_equation_0, values = (var_48336_cast_fp16, var_48738_cast_fp16))[name = tensor("op_48846_cast_fp16")]; + tensor var_48848_equation_0 = const()[name = tensor("op_48848_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48848_cast_fp16 = einsum(equation = var_48848_equation_0, values = (var_48340_cast_fp16, var_48739_cast_fp16))[name = tensor("op_48848_cast_fp16")]; + tensor var_48850_equation_0 = const()[name = tensor("op_48850_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48850_cast_fp16 = einsum(equation = var_48850_equation_0, values = (var_48340_cast_fp16, var_48740_cast_fp16))[name = tensor("op_48850_cast_fp16")]; + tensor var_48852_equation_0 = const()[name = tensor("op_48852_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48852_cast_fp16 = einsum(equation = var_48852_equation_0, values = (var_48340_cast_fp16, var_48741_cast_fp16))[name = tensor("op_48852_cast_fp16")]; + tensor var_48854_equation_0 = const()[name = tensor("op_48854_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48854_cast_fp16 = einsum(equation = var_48854_equation_0, values = (var_48340_cast_fp16, var_48742_cast_fp16))[name = tensor("op_48854_cast_fp16")]; + tensor var_48856_equation_0 = const()[name = tensor("op_48856_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48856_cast_fp16 = einsum(equation = var_48856_equation_0, values = (var_48344_cast_fp16, var_48743_cast_fp16))[name = tensor("op_48856_cast_fp16")]; + tensor var_48858_equation_0 = const()[name = tensor("op_48858_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48858_cast_fp16 = einsum(equation = var_48858_equation_0, values = (var_48344_cast_fp16, var_48744_cast_fp16))[name = tensor("op_48858_cast_fp16")]; + tensor var_48860_equation_0 = const()[name = tensor("op_48860_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48860_cast_fp16 = einsum(equation = var_48860_equation_0, values = (var_48344_cast_fp16, var_48745_cast_fp16))[name = tensor("op_48860_cast_fp16")]; + tensor var_48862_equation_0 = const()[name = tensor("op_48862_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48862_cast_fp16 = einsum(equation = var_48862_equation_0, values = (var_48344_cast_fp16, var_48746_cast_fp16))[name = tensor("op_48862_cast_fp16")]; + tensor var_48864_equation_0 = const()[name = tensor("op_48864_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48864_cast_fp16 = einsum(equation = var_48864_equation_0, values = (var_48348_cast_fp16, var_48747_cast_fp16))[name = tensor("op_48864_cast_fp16")]; + tensor var_48866_equation_0 = const()[name = tensor("op_48866_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48866_cast_fp16 = einsum(equation = var_48866_equation_0, values = (var_48348_cast_fp16, var_48748_cast_fp16))[name = tensor("op_48866_cast_fp16")]; + tensor var_48868_equation_0 = const()[name = tensor("op_48868_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48868_cast_fp16 = einsum(equation = var_48868_equation_0, values = (var_48348_cast_fp16, var_48749_cast_fp16))[name = tensor("op_48868_cast_fp16")]; + tensor var_48870_equation_0 = const()[name = tensor("op_48870_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48870_cast_fp16 = einsum(equation = var_48870_equation_0, values = (var_48348_cast_fp16, var_48750_cast_fp16))[name = tensor("op_48870_cast_fp16")]; + tensor var_48872_equation_0 = const()[name = tensor("op_48872_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48872_cast_fp16 = einsum(equation = var_48872_equation_0, values = (var_48352_cast_fp16, var_48751_cast_fp16))[name = tensor("op_48872_cast_fp16")]; + tensor var_48874_equation_0 = const()[name = tensor("op_48874_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48874_cast_fp16 = einsum(equation = var_48874_equation_0, values = (var_48352_cast_fp16, var_48752_cast_fp16))[name = tensor("op_48874_cast_fp16")]; + tensor var_48876_equation_0 = const()[name = tensor("op_48876_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48876_cast_fp16 = einsum(equation = var_48876_equation_0, values = (var_48352_cast_fp16, var_48753_cast_fp16))[name = tensor("op_48876_cast_fp16")]; + tensor var_48878_equation_0 = const()[name = tensor("op_48878_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48878_cast_fp16 = einsum(equation = var_48878_equation_0, values = (var_48352_cast_fp16, var_48754_cast_fp16))[name = tensor("op_48878_cast_fp16")]; + tensor var_48880_equation_0 = const()[name = tensor("op_48880_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48880_cast_fp16 = einsum(equation = var_48880_equation_0, values = (var_48356_cast_fp16, var_48755_cast_fp16))[name = tensor("op_48880_cast_fp16")]; + tensor var_48882_equation_0 = const()[name = tensor("op_48882_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48882_cast_fp16 = einsum(equation = var_48882_equation_0, values = (var_48356_cast_fp16, var_48756_cast_fp16))[name = tensor("op_48882_cast_fp16")]; + tensor var_48884_equation_0 = const()[name = tensor("op_48884_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48884_cast_fp16 = einsum(equation = var_48884_equation_0, values = (var_48356_cast_fp16, var_48757_cast_fp16))[name = tensor("op_48884_cast_fp16")]; + tensor var_48886_equation_0 = const()[name = tensor("op_48886_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48886_cast_fp16 = einsum(equation = var_48886_equation_0, values = (var_48356_cast_fp16, var_48758_cast_fp16))[name = tensor("op_48886_cast_fp16")]; + tensor var_48888_equation_0 = const()[name = tensor("op_48888_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48888_cast_fp16 = einsum(equation = var_48888_equation_0, values = (var_48360_cast_fp16, var_48759_cast_fp16))[name = tensor("op_48888_cast_fp16")]; + tensor var_48890_equation_0 = const()[name = tensor("op_48890_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48890_cast_fp16 = einsum(equation = var_48890_equation_0, values = (var_48360_cast_fp16, var_48760_cast_fp16))[name = tensor("op_48890_cast_fp16")]; + tensor var_48892_equation_0 = const()[name = tensor("op_48892_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48892_cast_fp16 = einsum(equation = var_48892_equation_0, values = (var_48360_cast_fp16, var_48761_cast_fp16))[name = tensor("op_48892_cast_fp16")]; + tensor var_48894_equation_0 = const()[name = tensor("op_48894_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48894_cast_fp16 = einsum(equation = var_48894_equation_0, values = (var_48360_cast_fp16, var_48762_cast_fp16))[name = tensor("op_48894_cast_fp16")]; + tensor var_48896_equation_0 = const()[name = tensor("op_48896_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48896_cast_fp16 = einsum(equation = var_48896_equation_0, values = (var_48364_cast_fp16, var_48763_cast_fp16))[name = tensor("op_48896_cast_fp16")]; + tensor var_48898_equation_0 = const()[name = tensor("op_48898_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48898_cast_fp16 = einsum(equation = var_48898_equation_0, values = (var_48364_cast_fp16, var_48764_cast_fp16))[name = tensor("op_48898_cast_fp16")]; + tensor var_48900_equation_0 = const()[name = tensor("op_48900_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48900_cast_fp16 = einsum(equation = var_48900_equation_0, values = (var_48364_cast_fp16, var_48765_cast_fp16))[name = tensor("op_48900_cast_fp16")]; + tensor var_48902_equation_0 = const()[name = tensor("op_48902_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48902_cast_fp16 = einsum(equation = var_48902_equation_0, values = (var_48364_cast_fp16, var_48766_cast_fp16))[name = tensor("op_48902_cast_fp16")]; + tensor var_48904_equation_0 = const()[name = tensor("op_48904_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48904_cast_fp16 = einsum(equation = var_48904_equation_0, values = (var_48368_cast_fp16, var_48767_cast_fp16))[name = tensor("op_48904_cast_fp16")]; + tensor var_48906_equation_0 = const()[name = tensor("op_48906_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48906_cast_fp16 = einsum(equation = var_48906_equation_0, values = (var_48368_cast_fp16, var_48768_cast_fp16))[name = tensor("op_48906_cast_fp16")]; + tensor var_48908_equation_0 = const()[name = tensor("op_48908_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48908_cast_fp16 = einsum(equation = var_48908_equation_0, values = (var_48368_cast_fp16, var_48769_cast_fp16))[name = tensor("op_48908_cast_fp16")]; + tensor var_48910_equation_0 = const()[name = tensor("op_48910_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48910_cast_fp16 = einsum(equation = var_48910_equation_0, values = (var_48368_cast_fp16, var_48770_cast_fp16))[name = tensor("op_48910_cast_fp16")]; + tensor var_48912_equation_0 = const()[name = tensor("op_48912_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48912_cast_fp16 = einsum(equation = var_48912_equation_0, values = (var_48372_cast_fp16, var_48771_cast_fp16))[name = tensor("op_48912_cast_fp16")]; + tensor var_48914_equation_0 = const()[name = tensor("op_48914_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48914_cast_fp16 = einsum(equation = var_48914_equation_0, values = (var_48372_cast_fp16, var_48772_cast_fp16))[name = tensor("op_48914_cast_fp16")]; + tensor var_48916_equation_0 = const()[name = tensor("op_48916_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48916_cast_fp16 = einsum(equation = var_48916_equation_0, values = (var_48372_cast_fp16, var_48773_cast_fp16))[name = tensor("op_48916_cast_fp16")]; + tensor var_48918_equation_0 = const()[name = tensor("op_48918_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48918_cast_fp16 = einsum(equation = var_48918_equation_0, values = (var_48372_cast_fp16, var_48774_cast_fp16))[name = tensor("op_48918_cast_fp16")]; + tensor var_48920_equation_0 = const()[name = tensor("op_48920_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48920_cast_fp16 = einsum(equation = var_48920_equation_0, values = (var_48376_cast_fp16, var_48775_cast_fp16))[name = tensor("op_48920_cast_fp16")]; + tensor var_48922_equation_0 = const()[name = tensor("op_48922_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48922_cast_fp16 = einsum(equation = var_48922_equation_0, values = (var_48376_cast_fp16, var_48776_cast_fp16))[name = tensor("op_48922_cast_fp16")]; + tensor var_48924_equation_0 = const()[name = tensor("op_48924_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48924_cast_fp16 = einsum(equation = var_48924_equation_0, values = (var_48376_cast_fp16, var_48777_cast_fp16))[name = tensor("op_48924_cast_fp16")]; + tensor var_48926_equation_0 = const()[name = tensor("op_48926_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48926_cast_fp16 = einsum(equation = var_48926_equation_0, values = (var_48376_cast_fp16, var_48778_cast_fp16))[name = tensor("op_48926_cast_fp16")]; + tensor var_48928_equation_0 = const()[name = tensor("op_48928_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48928_cast_fp16 = einsum(equation = var_48928_equation_0, values = (var_48380_cast_fp16, var_48779_cast_fp16))[name = tensor("op_48928_cast_fp16")]; + tensor var_48930_equation_0 = const()[name = tensor("op_48930_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48930_cast_fp16 = einsum(equation = var_48930_equation_0, values = (var_48380_cast_fp16, var_48780_cast_fp16))[name = tensor("op_48930_cast_fp16")]; + tensor var_48932_equation_0 = const()[name = tensor("op_48932_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48932_cast_fp16 = einsum(equation = var_48932_equation_0, values = (var_48380_cast_fp16, var_48781_cast_fp16))[name = tensor("op_48932_cast_fp16")]; + tensor var_48934_equation_0 = const()[name = tensor("op_48934_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48934_cast_fp16 = einsum(equation = var_48934_equation_0, values = (var_48380_cast_fp16, var_48782_cast_fp16))[name = tensor("op_48934_cast_fp16")]; + tensor var_48936_equation_0 = const()[name = tensor("op_48936_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48936_cast_fp16 = einsum(equation = var_48936_equation_0, values = (var_48384_cast_fp16, var_48783_cast_fp16))[name = tensor("op_48936_cast_fp16")]; + tensor var_48938_equation_0 = const()[name = tensor("op_48938_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48938_cast_fp16 = einsum(equation = var_48938_equation_0, values = (var_48384_cast_fp16, var_48784_cast_fp16))[name = tensor("op_48938_cast_fp16")]; + tensor var_48940_equation_0 = const()[name = tensor("op_48940_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48940_cast_fp16 = einsum(equation = var_48940_equation_0, values = (var_48384_cast_fp16, var_48785_cast_fp16))[name = tensor("op_48940_cast_fp16")]; + tensor var_48942_equation_0 = const()[name = tensor("op_48942_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48942_cast_fp16 = einsum(equation = var_48942_equation_0, values = (var_48384_cast_fp16, var_48786_cast_fp16))[name = tensor("op_48942_cast_fp16")]; + tensor var_48944_equation_0 = const()[name = tensor("op_48944_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48944_cast_fp16 = einsum(equation = var_48944_equation_0, values = (var_48388_cast_fp16, var_48787_cast_fp16))[name = tensor("op_48944_cast_fp16")]; + tensor var_48946_equation_0 = const()[name = tensor("op_48946_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48946_cast_fp16 = einsum(equation = var_48946_equation_0, values = (var_48388_cast_fp16, var_48788_cast_fp16))[name = tensor("op_48946_cast_fp16")]; + tensor var_48948_equation_0 = const()[name = tensor("op_48948_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48948_cast_fp16 = einsum(equation = var_48948_equation_0, values = (var_48388_cast_fp16, var_48789_cast_fp16))[name = tensor("op_48948_cast_fp16")]; + tensor var_48950_equation_0 = const()[name = tensor("op_48950_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_48950_cast_fp16 = einsum(equation = var_48950_equation_0, values = (var_48388_cast_fp16, var_48790_cast_fp16))[name = tensor("op_48950_cast_fp16")]; + tensor var_48952_interleave_0 = const()[name = tensor("op_48952_interleave_0"), val = tensor(false)]; + tensor var_48952_cast_fp16 = concat(axis = var_47511, interleave = var_48952_interleave_0, values = (var_48792_cast_fp16, var_48794_cast_fp16, var_48796_cast_fp16, var_48798_cast_fp16))[name = tensor("op_48952_cast_fp16")]; + tensor var_48954_interleave_0 = const()[name = tensor("op_48954_interleave_0"), val = tensor(false)]; + tensor var_48954_cast_fp16 = concat(axis = var_47511, interleave = var_48954_interleave_0, values = (var_48800_cast_fp16, var_48802_cast_fp16, var_48804_cast_fp16, var_48806_cast_fp16))[name = tensor("op_48954_cast_fp16")]; + tensor var_48956_interleave_0 = const()[name = tensor("op_48956_interleave_0"), val = tensor(false)]; + tensor var_48956_cast_fp16 = concat(axis = var_47511, interleave = var_48956_interleave_0, values = (var_48808_cast_fp16, var_48810_cast_fp16, var_48812_cast_fp16, var_48814_cast_fp16))[name = tensor("op_48956_cast_fp16")]; + tensor var_48958_interleave_0 = const()[name = tensor("op_48958_interleave_0"), val = tensor(false)]; + tensor var_48958_cast_fp16 = concat(axis = var_47511, interleave = var_48958_interleave_0, values = (var_48816_cast_fp16, var_48818_cast_fp16, var_48820_cast_fp16, var_48822_cast_fp16))[name = tensor("op_48958_cast_fp16")]; + tensor var_48960_interleave_0 = const()[name = tensor("op_48960_interleave_0"), val = tensor(false)]; + tensor var_48960_cast_fp16 = concat(axis = var_47511, interleave = var_48960_interleave_0, values = (var_48824_cast_fp16, var_48826_cast_fp16, var_48828_cast_fp16, var_48830_cast_fp16))[name = tensor("op_48960_cast_fp16")]; + tensor var_48962_interleave_0 = const()[name = tensor("op_48962_interleave_0"), val = tensor(false)]; + tensor var_48962_cast_fp16 = concat(axis = var_47511, interleave = var_48962_interleave_0, values = (var_48832_cast_fp16, var_48834_cast_fp16, var_48836_cast_fp16, var_48838_cast_fp16))[name = tensor("op_48962_cast_fp16")]; + tensor var_48964_interleave_0 = const()[name = tensor("op_48964_interleave_0"), val = tensor(false)]; + tensor var_48964_cast_fp16 = concat(axis = var_47511, interleave = var_48964_interleave_0, values = (var_48840_cast_fp16, var_48842_cast_fp16, var_48844_cast_fp16, var_48846_cast_fp16))[name = tensor("op_48964_cast_fp16")]; + tensor var_48966_interleave_0 = const()[name = tensor("op_48966_interleave_0"), val = tensor(false)]; + tensor var_48966_cast_fp16 = concat(axis = var_47511, interleave = var_48966_interleave_0, values = (var_48848_cast_fp16, var_48850_cast_fp16, var_48852_cast_fp16, var_48854_cast_fp16))[name = tensor("op_48966_cast_fp16")]; + tensor var_48968_interleave_0 = const()[name = tensor("op_48968_interleave_0"), val = tensor(false)]; + tensor var_48968_cast_fp16 = concat(axis = var_47511, interleave = var_48968_interleave_0, values = (var_48856_cast_fp16, var_48858_cast_fp16, var_48860_cast_fp16, var_48862_cast_fp16))[name = tensor("op_48968_cast_fp16")]; + tensor var_48970_interleave_0 = const()[name = tensor("op_48970_interleave_0"), val = tensor(false)]; + tensor var_48970_cast_fp16 = concat(axis = var_47511, interleave = var_48970_interleave_0, values = (var_48864_cast_fp16, var_48866_cast_fp16, var_48868_cast_fp16, var_48870_cast_fp16))[name = tensor("op_48970_cast_fp16")]; + tensor var_48972_interleave_0 = const()[name = tensor("op_48972_interleave_0"), val = tensor(false)]; + tensor var_48972_cast_fp16 = concat(axis = var_47511, interleave = var_48972_interleave_0, values = (var_48872_cast_fp16, var_48874_cast_fp16, var_48876_cast_fp16, var_48878_cast_fp16))[name = tensor("op_48972_cast_fp16")]; + tensor var_48974_interleave_0 = const()[name = tensor("op_48974_interleave_0"), val = tensor(false)]; + tensor var_48974_cast_fp16 = concat(axis = var_47511, interleave = var_48974_interleave_0, values = (var_48880_cast_fp16, var_48882_cast_fp16, var_48884_cast_fp16, var_48886_cast_fp16))[name = tensor("op_48974_cast_fp16")]; + tensor var_48976_interleave_0 = const()[name = tensor("op_48976_interleave_0"), val = tensor(false)]; + tensor var_48976_cast_fp16 = concat(axis = var_47511, interleave = var_48976_interleave_0, values = (var_48888_cast_fp16, var_48890_cast_fp16, var_48892_cast_fp16, var_48894_cast_fp16))[name = tensor("op_48976_cast_fp16")]; + tensor var_48978_interleave_0 = const()[name = tensor("op_48978_interleave_0"), val = tensor(false)]; + tensor var_48978_cast_fp16 = concat(axis = var_47511, interleave = var_48978_interleave_0, values = (var_48896_cast_fp16, var_48898_cast_fp16, var_48900_cast_fp16, var_48902_cast_fp16))[name = tensor("op_48978_cast_fp16")]; + tensor var_48980_interleave_0 = const()[name = tensor("op_48980_interleave_0"), val = tensor(false)]; + tensor var_48980_cast_fp16 = concat(axis = var_47511, interleave = var_48980_interleave_0, values = (var_48904_cast_fp16, var_48906_cast_fp16, var_48908_cast_fp16, var_48910_cast_fp16))[name = tensor("op_48980_cast_fp16")]; + tensor var_48982_interleave_0 = const()[name = tensor("op_48982_interleave_0"), val = tensor(false)]; + tensor var_48982_cast_fp16 = concat(axis = var_47511, interleave = var_48982_interleave_0, values = (var_48912_cast_fp16, var_48914_cast_fp16, var_48916_cast_fp16, var_48918_cast_fp16))[name = tensor("op_48982_cast_fp16")]; + tensor var_48984_interleave_0 = const()[name = tensor("op_48984_interleave_0"), val = tensor(false)]; + tensor var_48984_cast_fp16 = concat(axis = var_47511, interleave = var_48984_interleave_0, values = (var_48920_cast_fp16, var_48922_cast_fp16, var_48924_cast_fp16, var_48926_cast_fp16))[name = tensor("op_48984_cast_fp16")]; + tensor var_48986_interleave_0 = const()[name = tensor("op_48986_interleave_0"), val = tensor(false)]; + tensor var_48986_cast_fp16 = concat(axis = var_47511, interleave = var_48986_interleave_0, values = (var_48928_cast_fp16, var_48930_cast_fp16, var_48932_cast_fp16, var_48934_cast_fp16))[name = tensor("op_48986_cast_fp16")]; + tensor var_48988_interleave_0 = const()[name = tensor("op_48988_interleave_0"), val = tensor(false)]; + tensor var_48988_cast_fp16 = concat(axis = var_47511, interleave = var_48988_interleave_0, values = (var_48936_cast_fp16, var_48938_cast_fp16, var_48940_cast_fp16, var_48942_cast_fp16))[name = tensor("op_48988_cast_fp16")]; + tensor var_48990_interleave_0 = const()[name = tensor("op_48990_interleave_0"), val = tensor(false)]; + tensor var_48990_cast_fp16 = concat(axis = var_47511, interleave = var_48990_interleave_0, values = (var_48944_cast_fp16, var_48946_cast_fp16, var_48948_cast_fp16, var_48950_cast_fp16))[name = tensor("op_48990_cast_fp16")]; + tensor input_249_interleave_0 = const()[name = tensor("input_249_interleave_0"), val = tensor(false)]; + tensor input_249_cast_fp16 = concat(axis = var_47536, interleave = input_249_interleave_0, values = (var_48952_cast_fp16, var_48954_cast_fp16, var_48956_cast_fp16, var_48958_cast_fp16, var_48960_cast_fp16, var_48962_cast_fp16, var_48964_cast_fp16, var_48966_cast_fp16, var_48968_cast_fp16, var_48970_cast_fp16, var_48972_cast_fp16, var_48974_cast_fp16, var_48976_cast_fp16, var_48978_cast_fp16, var_48980_cast_fp16, var_48982_cast_fp16, var_48984_cast_fp16, var_48986_cast_fp16, var_48988_cast_fp16, var_48990_cast_fp16))[name = tensor("input_249_cast_fp16")]; + tensor var_48995 = const()[name = tensor("op_48995"), val = tensor([1, 1])]; + tensor var_48997 = const()[name = tensor("op_48997"), val = tensor([1, 1])]; + tensor obj_pad_type_0 = const()[name = tensor("obj_pad_type_0"), val = tensor("custom")]; + tensor obj_pad_0 = const()[name = tensor("obj_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_31_self_attn_o_proj_weight_to_fp16 = const()[name = tensor("layers_31_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1244088320)))]; + tensor layers_31_self_attn_o_proj_bias_to_fp16 = const()[name = tensor("layers_31_self_attn_o_proj_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1247365184)))]; + tensor obj_cast_fp16 = conv(bias = layers_31_self_attn_o_proj_bias_to_fp16, dilations = var_48997, groups = var_47536, pad = obj_pad_0, pad_type = obj_pad_type_0, strides = var_48995, weight = layers_31_self_attn_o_proj_weight_to_fp16, x = input_249_cast_fp16)[name = tensor("obj_cast_fp16")]; + tensor inputs_127_cast_fp16 = add(x = inputs_125_cast_fp16, y = obj_cast_fp16)[name = tensor("inputs_127_cast_fp16")]; + tensor var_49003 = const()[name = tensor("op_49003"), val = tensor([1])]; + tensor channels_mean_127_cast_fp16 = reduce_mean(axes = var_49003, keep_dims = var_47537, x = inputs_127_cast_fp16)[name = tensor("channels_mean_127_cast_fp16")]; + tensor zero_mean_127_cast_fp16 = sub(x = inputs_127_cast_fp16, y = channels_mean_127_cast_fp16)[name = tensor("zero_mean_127_cast_fp16")]; + tensor zero_mean_sq_127_cast_fp16 = mul(x = zero_mean_127_cast_fp16, y = zero_mean_127_cast_fp16)[name = tensor("zero_mean_sq_127_cast_fp16")]; + tensor var_49007 = const()[name = tensor("op_49007"), val = tensor([1])]; + tensor var_49008_cast_fp16 = reduce_mean(axes = var_49007, keep_dims = var_47537, x = zero_mean_sq_127_cast_fp16)[name = tensor("op_49008_cast_fp16")]; + tensor var_49009_to_fp16 = const()[name = tensor("op_49009_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_49010_cast_fp16 = add(x = var_49008_cast_fp16, y = var_49009_to_fp16)[name = tensor("op_49010_cast_fp16")]; + tensor denom_127_epsilon_0_to_fp16 = const()[name = tensor("denom_127_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_127_cast_fp16 = rsqrt(epsilon = denom_127_epsilon_0_to_fp16, x = var_49010_cast_fp16)[name = tensor("denom_127_cast_fp16")]; + tensor out_127_cast_fp16 = mul(x = zero_mean_127_cast_fp16, y = denom_127_cast_fp16)[name = tensor("out_127_cast_fp16")]; + tensor input_251_gamma_0_to_fp16 = const()[name = tensor("input_251_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1247367808)))]; + tensor input_251_beta_0_to_fp16 = const()[name = tensor("input_251_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1247370432)))]; + tensor input_251_epsilon_0_to_fp16 = const()[name = tensor("input_251_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor input_251_cast_fp16 = batch_norm(beta = input_251_beta_0_to_fp16, epsilon = input_251_epsilon_0_to_fp16, gamma = input_251_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_127_cast_fp16)[name = tensor("input_251_cast_fp16")]; + tensor var_49021 = const()[name = tensor("op_49021"), val = tensor([1, 1])]; + tensor var_49023 = const()[name = tensor("op_49023"), val = tensor([1, 1])]; + tensor input_253_pad_type_0 = const()[name = tensor("input_253_pad_type_0"), val = tensor("custom")]; + tensor input_253_pad_0 = const()[name = tensor("input_253_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_31_fc1_weight_to_fp16 = const()[name = tensor("layers_31_fc1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1247373056)))]; + tensor layers_31_fc1_bias_to_fp16 = const()[name = tensor("layers_31_fc1_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1260480320)))]; + tensor input_253_cast_fp16 = conv(bias = layers_31_fc1_bias_to_fp16, dilations = var_49023, groups = var_47536, pad = input_253_pad_0, pad_type = input_253_pad_type_0, strides = var_49021, weight = layers_31_fc1_weight_to_fp16, x = input_251_cast_fp16)[name = tensor("input_253_cast_fp16")]; + tensor input_mode_0 = const()[name = tensor("input_mode_0"), val = tensor("EXACT")]; + tensor input_cast_fp16 = gelu(mode = input_mode_0, x = input_253_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_49029 = const()[name = tensor("op_49029"), val = tensor([1, 1])]; + tensor var_49031 = const()[name = tensor("op_49031"), val = tensor([1, 1])]; + tensor hidden_states_pad_type_0 = const()[name = tensor("hidden_states_pad_type_0"), val = tensor("custom")]; + tensor hidden_states_pad_0 = const()[name = tensor("hidden_states_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor layers_31_fc2_weight_to_fp16 = const()[name = tensor("layers_31_fc2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1260490624)))]; + tensor layers_31_fc2_bias_to_fp16 = const()[name = tensor("layers_31_fc2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1273597888)))]; + tensor hidden_states_cast_fp16 = conv(bias = layers_31_fc2_bias_to_fp16, dilations = var_49031, groups = var_47536, pad = hidden_states_pad_0, pad_type = hidden_states_pad_type_0, strides = var_49029, weight = layers_31_fc2_weight_to_fp16, x = input_cast_fp16)[name = tensor("hidden_states_cast_fp16")]; + tensor inputs_cast_fp16 = add(x = inputs_127_cast_fp16, y = hidden_states_cast_fp16)[name = tensor("inputs_cast_fp16")]; + tensor var_49037 = const()[name = tensor("op_49037"), val = tensor(true)]; + tensor var_49041 = const()[name = tensor("op_49041"), val = tensor([1])]; + tensor channels_mean_cast_fp16 = reduce_mean(axes = var_49041, keep_dims = var_49037, x = inputs_cast_fp16)[name = tensor("channels_mean_cast_fp16")]; + tensor zero_mean_cast_fp16 = sub(x = inputs_cast_fp16, y = channels_mean_cast_fp16)[name = tensor("zero_mean_cast_fp16")]; + tensor zero_mean_sq_cast_fp16 = mul(x = zero_mean_cast_fp16, y = zero_mean_cast_fp16)[name = tensor("zero_mean_sq_cast_fp16")]; + tensor var_49045 = const()[name = tensor("op_49045"), val = tensor([1])]; + tensor var_49046_cast_fp16 = reduce_mean(axes = var_49045, keep_dims = var_49037, x = zero_mean_sq_cast_fp16)[name = tensor("op_49046_cast_fp16")]; + tensor var_49047_to_fp16 = const()[name = tensor("op_49047_to_fp16"), val = tensor(0x1.5p-17)]; + tensor var_49048_cast_fp16 = add(x = var_49046_cast_fp16, y = var_49047_to_fp16)[name = tensor("op_49048_cast_fp16")]; + tensor denom_epsilon_0_to_fp16 = const()[name = tensor("denom_epsilon_0_to_fp16"), val = tensor(0x1p-24)]; + tensor denom_cast_fp16 = rsqrt(epsilon = denom_epsilon_0_to_fp16, x = var_49048_cast_fp16)[name = tensor("denom_cast_fp16")]; + tensor out_cast_fp16 = mul(x = zero_mean_cast_fp16, y = denom_cast_fp16)[name = tensor("out_cast_fp16")]; + tensor encoder_output_embeds_type_fp32_gamma_0_to_fp16 = const()[name = tensor("encoder_output_embeds_type_fp32_gamma_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1273600512)))]; + tensor encoder_output_embeds_type_fp32_beta_0_to_fp16 = const()[name = tensor("encoder_output_embeds_type_fp32_beta_0_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(1273603136)))]; + tensor encoder_output_embeds_type_fp32_epsilon_0_to_fp16 = const()[name = tensor("encoder_output_embeds_type_fp32_epsilon_0_to_fp16"), val = tensor(0x1.5p-17)]; + tensor encoder_output_embeds = batch_norm(beta = encoder_output_embeds_type_fp32_beta_0_to_fp16, epsilon = encoder_output_embeds_type_fp32_epsilon_0_to_fp16, gamma = encoder_output_embeds_type_fp32_gamma_0_to_fp16, mean = obj_1_mean_0_to_fp16, variance = obj_1_variance_0_to_fp16, x = out_cast_fp16)[name = tensor("encoder_output_embeds_type_fp32_cast_fp16")]; + } -> (encoder_output_embeds); +} \ No newline at end of file